1use crate::rfc3464::{content_type, RemoteMta};
3use anyhow::anyhow;
4use bstr::{BStr, BString, ByteSlice};
5use chrono::{DateTime, Utc};
6use mailparsing::{Header, HeaderParseResult, MimePart};
7use rfc5321::parser::EnvelopeAddress;
8use serde::{Deserialize, Serialize};
9use std::collections::BTreeMap;
10use std::str::FromStr;
11
12#[derive(Debug, Serialize, Deserialize, Clone, Eq, PartialEq)]
13pub struct ARFReport {
14 pub feedback_type: String,
15 pub user_agent: String,
16 pub version: String,
17
18 #[serde(default)]
19 pub arrival_date: Option<DateTime<Utc>>,
20 #[serde(default)]
21 pub incidents: Option<u32>,
22 #[serde(default)]
23 pub original_envelope_id: Option<String>,
24 #[serde(default)]
25 pub original_mail_from: Option<String>,
26 #[serde(default)]
27 pub reporting_mta: Option<RemoteMta>,
28 #[serde(default)]
29 pub source_ip: Option<String>,
30
31 #[serde(default)]
32 pub authentication_results: Vec<String>,
33 #[serde(default)]
34 pub original_rcpto_to: Vec<String>,
35 #[serde(default)]
36 pub reported_domain: Vec<String>,
37 #[serde(default)]
38 pub reported_uri: Vec<String>,
39
40 pub extensions: BTreeMap<String, Vec<BString>>,
41
42 pub original_message: Option<BString>,
43 pub supplemental_trace: Option<serde_json::Value>,
44}
45
46impl ARFReport {
47 pub fn parse(input: &[u8]) -> anyhow::Result<Option<Self>> {
48 let mail = MimePart::parse(input)?;
49 let ct = mail.headers().content_type()?;
50 let ct = match ct {
51 None => return Ok(None),
52 Some(ct) => ct,
53 };
54
55 if ct.value != "multipart/report" {
56 return Ok(None);
57 }
58
59 if ct.get("report-type").as_ref().map(|b| b.as_bstr()) != Some(BStr::new("feedback-report"))
60 {
61 return Ok(None);
62 }
63
64 let mut original_message = None;
65 let mut supplemental_trace = None;
66
67 for part in mail.child_parts() {
68 let ct = content_type(part);
69 let ct = ct.as_ref().map(|b| b.as_bstr());
70 if ct == Some(BStr::new("message/rfc822"))
71 || ct == Some(BStr::new("text/rfc822-headers"))
72 {
73 if let Ok(HeaderParseResult { headers, .. }) =
74 Header::parse_headers(part.raw_body())
75 {
76 for hdr in headers.iter() {
78 if !(hdr.get_name().starts_with_str("X-")
79 || hdr.get_name().starts_with_str("x-"))
80 {
81 continue;
82 }
83 if let Ok(decoded) =
84 data_encoding::BASE64.decode(hdr.get_raw_value().as_bytes())
85 {
86 #[derive(Deserialize)]
87 struct Wrap {
88 #[serde(rename = "_@_")]
89 marker: String,
90 #[serde(flatten)]
91 payload: serde_json::Value,
92 }
93 if let Ok(obj) = serde_json::from_slice::<Wrap>(&decoded) {
94 if obj.marker == "\\_/" {
98 supplemental_trace.replace(obj.payload);
99 break;
100 }
101 }
102 }
103 }
104 }
105
106 original_message = Some(BString::new(
107 part.raw_body().as_bytes().replace("\r\n", "\n"),
108 ));
109 }
110 }
111
112 for part in mail.child_parts() {
113 let ct = content_type(part);
114 let ct = ct.as_ref().map(|b| b.as_bstr());
115 if ct == Some(BStr::new("message/feedback-report")) {
116 return Ok(Some(Self::parse_inner(
117 part,
118 original_message,
119 supplemental_trace,
120 )?));
121 }
122 }
123
124 anyhow::bail!("feedback-report part missing");
125 }
126
127 fn parse_inner(
128 part: &MimePart,
129 original_message: Option<BString>,
130 supplemental_trace: Option<serde_json::Value>,
131 ) -> anyhow::Result<Self> {
132 let body = part.raw_body();
133 let mut extensions = extract_headers(body.as_bytes())?;
134
135 let feedback_type = extract_single_req("feedback-type", &mut extensions)?;
136 let user_agent = extract_single_req("user-agent", &mut extensions)?;
137 let version = extract_single_req("version", &mut extensions)?;
138 let arrival_date = extract_single_conv_fallback::<DateTimeRfc2822, DateTime<Utc>>(
139 "arrival-date",
140 "received-date",
141 &mut extensions,
142 );
143 let incidents = extract_single("incidents", &mut extensions)?;
144 let original_envelope_id = extract_single("original-envelope-id", &mut extensions)?;
145 let original_mail_from =
146 extract_single::<EnvelopeAddress>("original-mail-from", &mut extensions)?
147 .map(|a| a.to_string());
148 let reporting_mta = extract_single("reporting-mta", &mut extensions)?;
149 let source_ip = extract_single("source-ip", &mut extensions)?;
150 let authentication_results = extract_multiple("authentication-results", &mut extensions)?;
151 let original_rcpto_to =
152 extract_multiple::<EnvelopeAddress>("original-rcpt-to", &mut extensions)?
153 .into_iter()
154 .map(|a| a.to_string())
155 .collect();
156 let reported_domain = extract_multiple("reported-domain", &mut extensions)?;
157 let reported_uri = extract_multiple("reported-uri", &mut extensions)?;
158
159 Ok(Self {
160 feedback_type,
161 user_agent,
162 version,
163 arrival_date,
164 incidents,
165 original_envelope_id,
166 original_mail_from,
167 reporting_mta,
168 source_ip,
169 authentication_results,
170 original_rcpto_to,
171 reported_domain,
172 reported_uri,
173 extensions,
174 original_message,
175 supplemental_trace,
176 })
177 }
178}
179
180pub(crate) fn extract_headers(part: &[u8]) -> anyhow::Result<BTreeMap<String, Vec<BString>>> {
181 let HeaderParseResult { headers, .. } = Header::parse_headers(part)?;
182
183 let mut extensions = BTreeMap::new();
184
185 for hdr in headers.iter() {
186 let name = String::from_utf8_lossy(&hdr.get_name()).to_ascii_lowercase();
187 extensions
188 .entry(name)
189 .or_insert_with(std::vec::Vec::new)
190 .push(hdr.as_unstructured()?);
191 }
192 Ok(extensions)
193}
194
195pub(crate) struct DateTimeRfc2822(pub DateTime<Utc>);
196
197impl FromStr for DateTimeRfc2822 {
198 type Err = anyhow::Error;
199 fn from_str(input: &str) -> anyhow::Result<Self> {
200 let date = DateTime::parse_from_rfc2822(input)?;
201 Ok(Self(date.into()))
202 }
203}
204
205impl From<DateTimeRfc2822> for DateTime<Utc> {
206 fn from(val: DateTimeRfc2822) -> Self {
207 val.0
208 }
209}
210
211pub(crate) fn extract_single_req<R>(
212 name: &str,
213 extensions: &mut BTreeMap<String, Vec<BString>>,
214) -> anyhow::Result<R>
215where
216 R: FromStr,
217 <R as FromStr>::Err: std::fmt::Display,
218{
219 extract_single(name, extensions)?
220 .ok_or_else(|| anyhow!("required header {name} is not present"))
221}
222
223pub(crate) fn extract_single<R>(
224 name: &str,
225 extensions: &mut BTreeMap<String, Vec<BString>>,
226) -> anyhow::Result<Option<R>>
227where
228 R: FromStr,
229 <R as FromStr>::Err: std::fmt::Display,
230{
231 match extensions.remove(name) {
232 Some(mut hdrs) if hdrs.len() == 1 => {
233 let value = hdrs.remove(0);
234 let value = value
235 .to_str()
236 .map_err(|err| anyhow!("{value} could not be converted to UTF-8: {err:#}"))?;
237 let converted = value
238 .parse::<R>()
239 .map_err(|err| anyhow!("failed to convert '{value}': {err:#}"))?;
240 Ok(Some(converted))
241 }
242 Some(_) => anyhow::bail!("header {name} should have only a single value"),
243 None => Ok(None),
244 }
245}
246
247pub(crate) fn extract_single_conv<R, T>(
248 name: &str,
249 extensions: &mut BTreeMap<String, Vec<BString>>,
250) -> anyhow::Result<Option<T>>
251where
252 R: FromStr,
253 <R as FromStr>::Err: std::fmt::Display,
254 R: Into<T>,
255{
256 Ok(extract_single::<R>(name, extensions)?.map(|v| v.into()))
257}
258
259pub(crate) fn extract_single_conv_fallback<R, T>(
260 name: &str,
261 fallback: &str,
262 extensions: &mut BTreeMap<String, Vec<BString>>,
263) -> Option<T>
264where
265 R: FromStr,
266 <R as FromStr>::Err: std::fmt::Display,
267 R: Into<T>,
268{
269 let maybe = extract_single::<R>(name, extensions).ok()?;
270 match maybe {
271 Some(value) => Some(value.into()),
272 None => extract_single::<R>(fallback, extensions)
273 .ok()?
274 .map(Into::into),
275 }
276}
277
278pub(crate) fn extract_multiple<R>(
279 name: &str,
280 extensions: &mut BTreeMap<String, Vec<BString>>,
281) -> anyhow::Result<Vec<R>>
282where
283 R: FromStr,
284 <R as FromStr>::Err: std::fmt::Display,
285{
286 match extensions.remove(name) {
287 Some(hdrs) => {
288 let mut results = vec![];
289 for h in hdrs {
290 let value = h
291 .to_str()
292 .map_err(|err| anyhow!("{h} could not be converted to UTF-8: {err:#}"))?;
293 let converted = value
294 .parse::<R>()
295 .map_err(|err| anyhow!("failed to convert {h}: {err:#}"))?;
296 results.push(converted);
297 }
298 Ok(results)
299 }
300 None => Ok(vec![]),
301 }
302}
303
304#[cfg(test)]
305mod test {
306 use super::*;
307
308 #[test]
309 fn rfc5965_1() {
310 let result = ARFReport::parse(include_bytes!("../data/rfc5965/1.eml")).unwrap();
311 k9::snapshot!(
312 result,
313 r#"
314Some(
315 ARFReport {
316 feedback_type: "abuse",
317 user_agent: "SomeGenerator/1.0",
318 version: "1",
319 arrival_date: None,
320 incidents: None,
321 original_envelope_id: None,
322 original_mail_from: None,
323 reporting_mta: None,
324 source_ip: None,
325 authentication_results: [],
326 original_rcpto_to: [],
327 reported_domain: [],
328 reported_uri: [],
329 extensions: {},
330 original_message: Some(
331 "Received: from mailserver.example.net
332 (mailserver.example.net [192.0.2.1])
333 by example.com with ESMTP id M63d4137594e46;
334 Thu, 08 Mar 2005 14:00:00 -0400
335From: <somespammer@example.net>
336To: <Undisclosed Recipients>
337Subject: Earn money
338MIME-Version: 1.0
339Content-type: text/plain
340Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
341Date: Thu, 02 Sep 2004 12:31:03 -0500
342
343Spam Spam Spam
344Spam Spam Spam
345Spam Spam Spam
346Spam Spam Spam
347",
348 ),
349 supplemental_trace: None,
350 },
351)
352"#
353 );
354 }
355
356 #[test]
357 fn rfc5965_2() {
358 let result = ARFReport::parse(include_bytes!("../data/rfc5965/2.eml")).unwrap();
359 k9::snapshot!(
360 result,
361 r#"
362Some(
363 ARFReport {
364 feedback_type: "abuse",
365 user_agent: "SomeGenerator/1.0",
366 version: "1",
367 arrival_date: None,
368 incidents: None,
369 original_envelope_id: None,
370 original_mail_from: Some(
371 "somespammer@example.net",
372 ),
373 reporting_mta: Some(
374 RemoteMta {
375 mta_type: "dns",
376 name: "mail.example.com",
377 },
378 ),
379 source_ip: Some(
380 "192.0.2.1",
381 ),
382 authentication_results: [
383 "mail.example.com; spf=fail smtp.mail=somespammer@example.com",
384 ],
385 original_rcpto_to: [
386 "user@example.com",
387 ],
388 reported_domain: [
389 "example.net",
390 ],
391 reported_uri: [
392 "http://example.net/earn_money.html",
393 "mailto:user@example.com",
394 ],
395 extensions: {
396 "removal-recipient": [
397 "user@example.com",
398 ],
399 },
400 original_message: Some(
401 "From: <somespammer@example.net>
402Received: from mailserver.example.net (mailserver.example.net
403 [192.0.2.1]) by example.com with ESMTP id M63d4137594e46;
404 Tue, 08 Mar 2005 14:00:00 -0400
405X-KumoRef: eyJfQF8iOiJcXF8vIiwicmVjaXBpZW50IjoidGVzdEBleGFtcGxlLmNvbSJ9
406To: <Undisclosed Recipients>
407Subject: Earn money
408MIME-Version: 1.0
409Content-type: text/plain
410Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
411Date: Thu, 02 Sep 2004 12:31:03 -0500
412
413Spam Spam Spam
414Spam Spam Spam
415Spam Spam Spam
416Spam Spam Spam
417",
418 ),
419 supplemental_trace: Some(
420 Object {
421 "recipient": String("test@example.com"),
422 },
423 ),
424 },
425)
426"#
427 );
428 }
429
430 #[test]
431 fn rfc5965_3() {
432 let result = ARFReport::parse(include_bytes!("../data/rfc5965/3.eml")).unwrap();
433 k9::snapshot!(
434 result,
435 r#"
436Some(
437 ARFReport {
438 feedback_type: "abuse",
439 user_agent: "Yahoo!-Mail-Feedback/2.0",
440 version: "0.1",
441 arrival_date: Some(
442 2023-12-14T16:16:15Z,
443 ),
444 incidents: None,
445 original_envelope_id: None,
446 original_mail_from: Some(
447 "test1@example.com",
448 ),
449 reporting_mta: None,
450 source_ip: None,
451 authentication_results: [
452 "authentication result string is not available",
453 ],
454 original_rcpto_to: [
455 "user@example.com",
456 ],
457 reported_domain: [
458 "bounce.kumo.example.com",
459 ],
460 reported_uri: [],
461 extensions: {},
462 original_message: Some(
463 "Date: Thu, 14 Dec 2023 16:16:14 +0000
464To: user@example.com
465Subject: test Thu, 14 Dec 2023 16:16:14 +0000
466
467This is a test mailing
468
469",
470 ),
471 supplemental_trace: None,
472 },
473)
474"#
475 );
476 }
477
478 #[test]
479 fn rfc5965_4() {
480 let result = ARFReport::parse(include_bytes!("../data/rfc5965/4.eml")).unwrap();
481 k9::snapshot!(
482 result,
483 r#"
484Some(
485 ARFReport {
486 feedback_type: "abuse",
487 user_agent: "ReturnPathFBL/2.0",
488 version: "1",
489 arrival_date: Some(
490 2023-12-13T19:03:30Z,
491 ),
492 incidents: None,
493 original_envelope_id: None,
494 original_mail_from: Some(
495 "foo@bounce.example.com",
496 ),
497 reporting_mta: None,
498 source_ip: Some(
499 "x.x.x.x",
500 ),
501 authentication_results: [],
502 original_rcpto_to: [
503 "cb4a01a48251d4765f489076aa81e2a4@comcast.net",
504 ],
505 reported_domain: [
506 "bounce.example.com",
507 ],
508 reported_uri: [],
509 extensions: {
510 "abuse-type": [
511 "complaint",
512 ],
513 "source": [
514 "Comcast",
515 ],
516 "subscription-link": [
517 "https://fbl.returnpath.net/manage/subscriptions/xxxx",
518 ],
519 },
520 original_message: Some(
521 "Date: Thu, 14 Dec 2023 16:16:14 +0000
522To: user@example.com
523Subject: test Thu, 14 Dec 2023 16:16:14 +0000
524
525This is a test mailing
526
527",
528 ),
529 supplemental_trace: None,
530 },
531)
532"#
533 );
534 }
535}