1use crate::rfc3464::{content_type, RemoteMta};
3use anyhow::anyhow;
4use bstr::{BStr, BString, ByteSlice};
5use chrono::{DateTime, Utc};
6use mailparsing::{BStringUtf8, Header, HeaderParseResult, MimePart};
7use rfc5321::parser::EnvelopeAddress;
8use serde::{Deserialize, Serialize};
9use serde_with::serde_as;
10use std::collections::BTreeMap;
11use std::str::FromStr;
12
13#[serde_as]
14#[derive(Debug, Serialize, Deserialize, Clone, Eq, PartialEq)]
15pub struct ARFReport {
16 pub feedback_type: String,
17 pub user_agent: String,
18 pub version: String,
19
20 #[serde(default)]
21 pub arrival_date: Option<DateTime<Utc>>,
22 #[serde(default)]
23 pub incidents: Option<u32>,
24 #[serde(default)]
25 pub original_envelope_id: Option<String>,
26 #[serde(default)]
27 pub original_mail_from: Option<String>,
28 #[serde(default)]
29 pub reporting_mta: Option<RemoteMta>,
30 #[serde(default)]
31 pub source_ip: Option<String>,
32
33 #[serde(default)]
34 pub authentication_results: Vec<String>,
35 #[serde(default)]
36 pub original_rcpto_to: Vec<String>,
37 #[serde(default)]
38 pub reported_domain: Vec<String>,
39 #[serde(default)]
40 pub reported_uri: Vec<String>,
41
42 #[serde_as(as = "BTreeMap<_, Vec<BStringUtf8>>")]
43 pub extensions: BTreeMap<String, Vec<BString>>,
44
45 #[serde_as(as = "Option<BStringUtf8>")]
46 pub original_message: Option<BString>,
47 pub supplemental_trace: Option<serde_json::Value>,
48}
49
50impl ARFReport {
51 pub fn parse(input: &[u8]) -> anyhow::Result<Option<Self>> {
52 let mail = MimePart::parse(input)?;
53 let ct = mail.headers().content_type()?;
54 let ct = match ct {
55 None => return Ok(None),
56 Some(ct) => ct,
57 };
58
59 if ct.value != "multipart/report" {
60 return Ok(None);
61 }
62
63 if ct.get("report-type").as_ref().map(|b| b.as_bstr()) != Some(BStr::new("feedback-report"))
64 {
65 return Ok(None);
66 }
67
68 let mut original_message = None;
69 let mut supplemental_trace = None;
70
71 for part in mail.child_parts() {
72 let ct = content_type(part);
73 let ct = ct.as_ref().map(|b| b.as_bstr());
74 if ct == Some(BStr::new("message/rfc822"))
75 || ct == Some(BStr::new("text/rfc822-headers"))
76 {
77 if let Ok(HeaderParseResult { headers, .. }) =
78 Header::parse_headers(part.raw_body())
79 {
80 for hdr in headers.iter() {
82 if !(hdr.get_name().starts_with_str("X-")
83 || hdr.get_name().starts_with_str("x-"))
84 {
85 continue;
86 }
87 if let Ok(decoded) =
88 data_encoding::BASE64.decode(hdr.get_raw_value().as_bytes())
89 {
90 #[derive(Deserialize)]
91 struct Wrap {
92 #[serde(rename = "_@_")]
93 marker: String,
94 #[serde(flatten)]
95 payload: serde_json::Value,
96 }
97 if let Ok(obj) = serde_json::from_slice::<Wrap>(&decoded) {
98 if obj.marker == "\\_/" {
102 supplemental_trace.replace(obj.payload);
103 break;
104 }
105 }
106 }
107 }
108 }
109
110 original_message = Some(BString::new(
111 part.raw_body().as_bytes().replace("\r\n", "\n"),
112 ));
113 }
114 }
115
116 for part in mail.child_parts() {
117 let ct = content_type(part);
118 let ct = ct.as_ref().map(|b| b.as_bstr());
119 if ct == Some(BStr::new("message/feedback-report")) {
120 return Ok(Some(Self::parse_inner(
121 part,
122 original_message,
123 supplemental_trace,
124 )?));
125 }
126 }
127
128 anyhow::bail!("feedback-report part missing");
129 }
130
131 fn parse_inner(
132 part: &MimePart,
133 original_message: Option<BString>,
134 supplemental_trace: Option<serde_json::Value>,
135 ) -> anyhow::Result<Self> {
136 let body = part.raw_body();
137 let mut extensions = extract_headers(body.as_bytes())?;
138
139 let feedback_type = extract_single_req("feedback-type", &mut extensions)?;
140 let user_agent = extract_single_req("user-agent", &mut extensions)?;
141 let version = extract_single_req("version", &mut extensions)?;
142 let arrival_date = extract_single_conv_fallback::<DateTimeRfc2822, DateTime<Utc>>(
143 "arrival-date",
144 "received-date",
145 &mut extensions,
146 );
147 let incidents = extract_single("incidents", &mut extensions)?;
148 let original_envelope_id = extract_single("original-envelope-id", &mut extensions)?;
149 let original_mail_from =
150 extract_single::<EnvelopeAddress>("original-mail-from", &mut extensions)?
151 .map(|a| a.to_string());
152 let reporting_mta = extract_single("reporting-mta", &mut extensions)?;
153 let source_ip = extract_single("source-ip", &mut extensions)?;
154 let authentication_results = extract_multiple("authentication-results", &mut extensions)?;
155 let original_rcpto_to =
156 extract_multiple::<EnvelopeAddress>("original-rcpt-to", &mut extensions)?
157 .into_iter()
158 .map(|a| a.to_string())
159 .collect();
160 let reported_domain = extract_multiple("reported-domain", &mut extensions)?;
161 let reported_uri = extract_multiple("reported-uri", &mut extensions)?;
162
163 Ok(Self {
164 feedback_type,
165 user_agent,
166 version,
167 arrival_date,
168 incidents,
169 original_envelope_id,
170 original_mail_from,
171 reporting_mta,
172 source_ip,
173 authentication_results,
174 original_rcpto_to,
175 reported_domain,
176 reported_uri,
177 extensions,
178 original_message,
179 supplemental_trace,
180 })
181 }
182}
183
184pub(crate) fn extract_headers(part: &[u8]) -> anyhow::Result<BTreeMap<String, Vec<BString>>> {
185 let HeaderParseResult { headers, .. } = Header::parse_headers(part)?;
186
187 let mut extensions = BTreeMap::new();
188
189 for hdr in headers.iter() {
190 let name = String::from_utf8_lossy(&hdr.get_name()).to_ascii_lowercase();
191 extensions
192 .entry(name)
193 .or_insert_with(std::vec::Vec::new)
194 .push(hdr.as_unstructured()?);
195 }
196 Ok(extensions)
197}
198
199pub(crate) struct DateTimeRfc2822(pub DateTime<Utc>);
200
201impl FromStr for DateTimeRfc2822 {
202 type Err = anyhow::Error;
203 fn from_str(input: &str) -> anyhow::Result<Self> {
204 let date = DateTime::parse_from_rfc2822(input)?;
205 Ok(Self(date.into()))
206 }
207}
208
209impl From<DateTimeRfc2822> for DateTime<Utc> {
210 fn from(val: DateTimeRfc2822) -> Self {
211 val.0
212 }
213}
214
215pub(crate) fn extract_single_req<R>(
216 name: &str,
217 extensions: &mut BTreeMap<String, Vec<BString>>,
218) -> anyhow::Result<R>
219where
220 R: FromStr,
221 <R as FromStr>::Err: std::fmt::Display,
222{
223 extract_single(name, extensions)?
224 .ok_or_else(|| anyhow!("required header {name} is not present"))
225}
226
227pub(crate) fn extract_single<R>(
228 name: &str,
229 extensions: &mut BTreeMap<String, Vec<BString>>,
230) -> anyhow::Result<Option<R>>
231where
232 R: FromStr,
233 <R as FromStr>::Err: std::fmt::Display,
234{
235 match extensions.remove(name) {
236 Some(mut hdrs) if hdrs.len() == 1 => {
237 let value = hdrs.remove(0);
238 let value = value
239 .to_str()
240 .map_err(|err| anyhow!("{value} could not be converted to UTF-8: {err:#}"))?;
241 let converted = value
242 .parse::<R>()
243 .map_err(|err| anyhow!("failed to convert '{value}': {err:#}"))?;
244 Ok(Some(converted))
245 }
246 Some(_) => anyhow::bail!("header {name} should have only a single value"),
247 None => Ok(None),
248 }
249}
250
251pub(crate) fn extract_single_conv<R, T>(
252 name: &str,
253 extensions: &mut BTreeMap<String, Vec<BString>>,
254) -> anyhow::Result<Option<T>>
255where
256 R: FromStr,
257 <R as FromStr>::Err: std::fmt::Display,
258 R: Into<T>,
259{
260 Ok(extract_single::<R>(name, extensions)?.map(|v| v.into()))
261}
262
263pub(crate) fn extract_single_conv_fallback<R, T>(
264 name: &str,
265 fallback: &str,
266 extensions: &mut BTreeMap<String, Vec<BString>>,
267) -> Option<T>
268where
269 R: FromStr,
270 <R as FromStr>::Err: std::fmt::Display,
271 R: Into<T>,
272{
273 let maybe = extract_single::<R>(name, extensions).ok()?;
274 match maybe {
275 Some(value) => Some(value.into()),
276 None => extract_single::<R>(fallback, extensions)
277 .ok()?
278 .map(Into::into),
279 }
280}
281
282pub(crate) fn extract_multiple<R>(
283 name: &str,
284 extensions: &mut BTreeMap<String, Vec<BString>>,
285) -> anyhow::Result<Vec<R>>
286where
287 R: FromStr,
288 <R as FromStr>::Err: std::fmt::Display,
289{
290 match extensions.remove(name) {
291 Some(hdrs) => {
292 let mut results = vec![];
293 for h in hdrs {
294 let value = h
295 .to_str()
296 .map_err(|err| anyhow!("{h} could not be converted to UTF-8: {err:#}"))?;
297 let converted = value
298 .parse::<R>()
299 .map_err(|err| anyhow!("failed to convert {h}: {err:#}"))?;
300 results.push(converted);
301 }
302 Ok(results)
303 }
304 None => Ok(vec![]),
305 }
306}
307
308#[cfg(test)]
309mod test {
310 use super::*;
311
312 #[test]
313 fn rfc5965_1() {
314 let result = ARFReport::parse(include_bytes!("../data/rfc5965/1.eml")).unwrap();
315 k9::snapshot!(
316 result,
317 r#"
318Some(
319 ARFReport {
320 feedback_type: "abuse",
321 user_agent: "SomeGenerator/1.0",
322 version: "1",
323 arrival_date: None,
324 incidents: None,
325 original_envelope_id: None,
326 original_mail_from: None,
327 reporting_mta: None,
328 source_ip: None,
329 authentication_results: [],
330 original_rcpto_to: [],
331 reported_domain: [],
332 reported_uri: [],
333 extensions: {},
334 original_message: Some(
335 "Received: from mailserver.example.net
336 (mailserver.example.net [192.0.2.1])
337 by example.com with ESMTP id M63d4137594e46;
338 Thu, 08 Mar 2005 14:00:00 -0400
339From: <somespammer@example.net>
340To: <Undisclosed Recipients>
341Subject: Earn money
342MIME-Version: 1.0
343Content-type: text/plain
344Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
345Date: Thu, 02 Sep 2004 12:31:03 -0500
346
347Spam Spam Spam
348Spam Spam Spam
349Spam Spam Spam
350Spam Spam Spam
351",
352 ),
353 supplemental_trace: None,
354 },
355)
356"#
357 );
358 }
359
360 #[test]
361 fn rfc5965_2() {
362 let result = ARFReport::parse(include_bytes!("../data/rfc5965/2.eml")).unwrap();
363 k9::snapshot!(
364 result,
365 r#"
366Some(
367 ARFReport {
368 feedback_type: "abuse",
369 user_agent: "SomeGenerator/1.0",
370 version: "1",
371 arrival_date: None,
372 incidents: None,
373 original_envelope_id: None,
374 original_mail_from: Some(
375 "somespammer@example.net",
376 ),
377 reporting_mta: Some(
378 RemoteMta {
379 mta_type: "dns",
380 name: "mail.example.com",
381 },
382 ),
383 source_ip: Some(
384 "192.0.2.1",
385 ),
386 authentication_results: [
387 "mail.example.com; spf=fail smtp.mail=somespammer@example.com",
388 ],
389 original_rcpto_to: [
390 "user@example.com",
391 ],
392 reported_domain: [
393 "example.net",
394 ],
395 reported_uri: [
396 "http://example.net/earn_money.html",
397 "mailto:user@example.com",
398 ],
399 extensions: {
400 "removal-recipient": [
401 "user@example.com",
402 ],
403 },
404 original_message: Some(
405 "From: <somespammer@example.net>
406Received: from mailserver.example.net (mailserver.example.net
407 [192.0.2.1]) by example.com with ESMTP id M63d4137594e46;
408 Tue, 08 Mar 2005 14:00:00 -0400
409X-KumoRef: eyJfQF8iOiJcXF8vIiwicmVjaXBpZW50IjoidGVzdEBleGFtcGxlLmNvbSJ9
410To: <Undisclosed Recipients>
411Subject: Earn money
412MIME-Version: 1.0
413Content-type: text/plain
414Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
415Date: Thu, 02 Sep 2004 12:31:03 -0500
416
417Spam Spam Spam
418Spam Spam Spam
419Spam Spam Spam
420Spam Spam Spam
421",
422 ),
423 supplemental_trace: Some(
424 Object {
425 "recipient": String("test@example.com"),
426 },
427 ),
428 },
429)
430"#
431 );
432 }
433
434 #[test]
435 fn rfc5965_3() {
436 let result = ARFReport::parse(include_bytes!("../data/rfc5965/3.eml")).unwrap();
437 k9::snapshot!(
438 result,
439 r#"
440Some(
441 ARFReport {
442 feedback_type: "abuse",
443 user_agent: "Yahoo!-Mail-Feedback/2.0",
444 version: "0.1",
445 arrival_date: Some(
446 2023-12-14T16:16:15Z,
447 ),
448 incidents: None,
449 original_envelope_id: None,
450 original_mail_from: Some(
451 "test1@example.com",
452 ),
453 reporting_mta: None,
454 source_ip: None,
455 authentication_results: [
456 "authentication result string is not available",
457 ],
458 original_rcpto_to: [
459 "user@example.com",
460 ],
461 reported_domain: [
462 "bounce.kumo.example.com",
463 ],
464 reported_uri: [],
465 extensions: {},
466 original_message: Some(
467 "Date: Thu, 14 Dec 2023 16:16:14 +0000
468To: user@example.com
469Subject: test Thu, 14 Dec 2023 16:16:14 +0000
470
471This is a test mailing
472
473",
474 ),
475 supplemental_trace: None,
476 },
477)
478"#
479 );
480 }
481
482 #[test]
483 fn rfc5965_4() {
484 let result = ARFReport::parse(include_bytes!("../data/rfc5965/4.eml")).unwrap();
485 k9::snapshot!(
486 result,
487 r#"
488Some(
489 ARFReport {
490 feedback_type: "abuse",
491 user_agent: "ReturnPathFBL/2.0",
492 version: "1",
493 arrival_date: Some(
494 2023-12-13T19:03:30Z,
495 ),
496 incidents: None,
497 original_envelope_id: None,
498 original_mail_from: Some(
499 "foo@bounce.example.com",
500 ),
501 reporting_mta: None,
502 source_ip: Some(
503 "x.x.x.x",
504 ),
505 authentication_results: [],
506 original_rcpto_to: [
507 "cb4a01a48251d4765f489076aa81e2a4@comcast.net",
508 ],
509 reported_domain: [
510 "bounce.example.com",
511 ],
512 reported_uri: [],
513 extensions: {
514 "abuse-type": [
515 "complaint",
516 ],
517 "source": [
518 "Comcast",
519 ],
520 "subscription-link": [
521 "https://fbl.returnpath.net/manage/subscriptions/xxxx",
522 ],
523 },
524 original_message: Some(
525 "Date: Thu, 14 Dec 2023 16:16:14 +0000
526To: user@example.com
527Subject: test Thu, 14 Dec 2023 16:16:14 +0000
528
529This is a test mailing
530
531",
532 ),
533 supplemental_trace: None,
534 },
535)
536"#
537 );
538 }
539}