1use crate::rfc3464::{content_type, RemoteMta};
3use anyhow::anyhow;
4use chrono::{DateTime, Utc};
5use mailparsing::{Header, HeaderParseResult, MimePart};
6use serde::{Deserialize, Serialize};
7use std::collections::BTreeMap;
8use std::str::FromStr;
9
10#[derive(Debug, Serialize, Deserialize, Clone, Eq, PartialEq)]
11pub struct ARFReport {
12 pub feedback_type: String,
13 pub user_agent: String,
14 pub version: String,
15
16 #[serde(default)]
17 pub arrival_date: Option<DateTime<Utc>>,
18 #[serde(default)]
19 pub incidents: Option<u32>,
20 #[serde(default)]
21 pub original_envelope_id: Option<String>,
22 #[serde(default)]
23 pub original_mail_from: Option<String>,
24 #[serde(default)]
25 pub reporting_mta: Option<RemoteMta>,
26 #[serde(default)]
27 pub source_ip: Option<String>,
28
29 #[serde(default)]
30 pub authentication_results: Vec<String>,
31 #[serde(default)]
32 pub original_rcpto_to: Vec<String>,
33 #[serde(default)]
34 pub reported_domain: Vec<String>,
35 #[serde(default)]
36 pub reported_uri: Vec<String>,
37
38 pub extensions: BTreeMap<String, Vec<String>>,
39
40 pub original_message: Option<String>,
41 pub supplemental_trace: Option<serde_json::Value>,
42}
43
44impl ARFReport {
45 pub fn parse(input: &[u8]) -> anyhow::Result<Option<Self>> {
46 let mail = MimePart::parse(input)?;
47 let ct = mail.headers().content_type()?;
48 let ct = match ct {
49 None => return Ok(None),
50 Some(ct) => ct,
51 };
52
53 if ct.value != "multipart/report" {
54 return Ok(None);
55 }
56
57 if ct.get("report-type").as_deref() != Some("feedback-report") {
58 return Ok(None);
59 }
60
61 let mut original_message = None;
62 let mut supplemental_trace = None;
63
64 for part in mail.child_parts() {
65 let ct = content_type(part);
66 let ct = ct.as_deref();
67 if ct == Some("message/rfc822") || ct == Some("text/rfc822-headers") {
68 if let Ok(HeaderParseResult { headers, .. }) =
69 Header::parse_headers(part.raw_body())
70 {
71 for hdr in headers.iter() {
73 if !(hdr.get_name().starts_with("X-") || hdr.get_name().starts_with("x-")) {
74 continue;
75 }
76 if let Ok(decoded) =
77 data_encoding::BASE64.decode(hdr.get_raw_value().as_bytes())
78 {
79 #[derive(Deserialize)]
80 struct Wrap {
81 #[serde(rename = "_@_")]
82 marker: String,
83 #[serde(flatten)]
84 payload: serde_json::Value,
85 }
86 if let Ok(obj) = serde_json::from_slice::<Wrap>(&decoded) {
87 if obj.marker == "\\_/" {
91 supplemental_trace.replace(obj.payload);
92 break;
93 }
94 }
95 }
96 }
97 }
98
99 original_message = Some(part.raw_body().replace("\r\n", "\n"));
100 }
101 }
102
103 for part in mail.child_parts() {
104 let ct = content_type(part);
105 let ct = ct.as_deref();
106 if ct == Some("message/feedback-report") {
107 return Ok(Some(Self::parse_inner(
108 part,
109 original_message,
110 supplemental_trace,
111 )?));
112 }
113 }
114
115 anyhow::bail!("feedback-report part missing");
116 }
117
118 fn parse_inner(
119 part: &MimePart,
120 original_message: Option<String>,
121 supplemental_trace: Option<serde_json::Value>,
122 ) -> anyhow::Result<Self> {
123 let body = part.raw_body();
124 let mut extensions = extract_headers(body.as_bytes())?;
125
126 let feedback_type = extract_single_req("feedback-type", &mut extensions)?;
127 let user_agent = extract_single_req("user-agent", &mut extensions)?;
128 let version = extract_single_req("version", &mut extensions)?;
129 let arrival_date = extract_single_conv_fallback::<DateTimeRfc2822, DateTime<Utc>>(
130 "arrival-date",
131 "received-date",
132 &mut extensions,
133 );
134 let incidents = extract_single("incidents", &mut extensions)?;
135 let original_envelope_id = extract_single("original-envelope-id", &mut extensions)?;
136 let original_mail_from = extract_single("original-mail-from", &mut extensions)?;
137 let reporting_mta = extract_single("reporting-mta", &mut extensions)?;
138 let source_ip = extract_single("source-ip", &mut extensions)?;
139 let authentication_results = extract_multiple("authentication-results", &mut extensions)?;
140 let original_rcpto_to = extract_multiple("original-rcpt-to", &mut extensions)?;
141 let reported_domain = extract_multiple("reported-domain", &mut extensions)?;
142 let reported_uri = extract_multiple("reported-uri", &mut extensions)?;
143
144 Ok(Self {
145 feedback_type,
146 user_agent,
147 version,
148 arrival_date,
149 incidents,
150 original_envelope_id,
151 original_mail_from,
152 reporting_mta,
153 source_ip,
154 authentication_results,
155 original_rcpto_to,
156 reported_domain,
157 reported_uri,
158 extensions,
159 original_message,
160 supplemental_trace,
161 })
162 }
163}
164
165pub(crate) fn extract_headers(part: &[u8]) -> anyhow::Result<BTreeMap<String, Vec<String>>> {
166 let HeaderParseResult { headers, .. } = Header::parse_headers(part)?;
167
168 let mut extensions = BTreeMap::new();
169
170 for hdr in headers.iter() {
171 let name = hdr.get_name().to_ascii_lowercase();
172 extensions
173 .entry(name)
174 .or_insert_with(std::vec::Vec::new)
175 .push(hdr.as_unstructured()?);
176 }
177 Ok(extensions)
178}
179
180pub(crate) struct DateTimeRfc2822(pub DateTime<Utc>);
181
182impl FromStr for DateTimeRfc2822 {
183 type Err = anyhow::Error;
184 fn from_str(input: &str) -> anyhow::Result<Self> {
185 let date = DateTime::parse_from_rfc2822(input)?;
186 Ok(Self(date.into()))
187 }
188}
189
190impl From<DateTimeRfc2822> for DateTime<Utc> {
191 fn from(val: DateTimeRfc2822) -> Self {
192 val.0
193 }
194}
195
196pub(crate) fn extract_single_req<R>(
197 name: &str,
198 extensions: &mut BTreeMap<String, Vec<String>>,
199) -> anyhow::Result<R>
200where
201 R: FromStr,
202 <R as FromStr>::Err: std::fmt::Display,
203{
204 extract_single(name, extensions)?
205 .ok_or_else(|| anyhow!("required header {name} is not present"))
206}
207
208pub(crate) fn extract_single<R>(
209 name: &str,
210 extensions: &mut BTreeMap<String, Vec<String>>,
211) -> anyhow::Result<Option<R>>
212where
213 R: FromStr,
214 <R as FromStr>::Err: std::fmt::Display,
215{
216 match extensions.remove(name) {
217 Some(mut hdrs) if hdrs.len() == 1 => {
218 let value = hdrs.remove(0);
219 let converted = value
220 .parse::<R>()
221 .map_err(|err| anyhow!("failed to convert '{value}': {err:#}"))?;
222 Ok(Some(converted))
223 }
224 Some(_) => anyhow::bail!("header {name} should have only a single value"),
225 None => Ok(None),
226 }
227}
228
229pub(crate) fn extract_single_conv<R, T>(
230 name: &str,
231 extensions: &mut BTreeMap<String, Vec<String>>,
232) -> anyhow::Result<Option<T>>
233where
234 R: FromStr,
235 <R as FromStr>::Err: std::fmt::Display,
236 R: Into<T>,
237{
238 Ok(extract_single::<R>(name, extensions)?.map(|v| v.into()))
239}
240
241pub(crate) fn extract_single_conv_fallback<R, T>(
242 name: &str,
243 fallback: &str,
244 extensions: &mut BTreeMap<String, Vec<String>>,
245) -> Option<T>
246where
247 R: FromStr,
248 <R as FromStr>::Err: std::fmt::Display,
249 R: Into<T>,
250{
251 let maybe = extract_single::<R>(name, extensions).ok()?;
252 match maybe {
253 Some(value) => Some(value.into()),
254 None => extract_single::<R>(fallback, extensions)
255 .ok()?
256 .map(Into::into),
257 }
258}
259
260pub(crate) fn extract_multiple<R>(
261 name: &str,
262 extensions: &mut BTreeMap<String, Vec<String>>,
263) -> anyhow::Result<Vec<R>>
264where
265 R: FromStr,
266 <R as FromStr>::Err: std::fmt::Display,
267{
268 match extensions.remove(name) {
269 Some(hdrs) => {
270 let mut results = vec![];
271 for h in hdrs {
272 let converted = h
273 .parse::<R>()
274 .map_err(|err| anyhow!("failed to convert {h}: {err:#}"))?;
275 results.push(converted);
276 }
277 Ok(results)
278 }
279 None => Ok(vec![]),
280 }
281}
282
283#[cfg(test)]
284mod test {
285 use super::*;
286
287 #[test]
288 fn rfc5965_1() {
289 let result = ARFReport::parse(include_bytes!("../data/rfc5965/1.eml")).unwrap();
290 k9::snapshot!(
291 result,
292 r#"
293Some(
294 ARFReport {
295 feedback_type: "abuse",
296 user_agent: "SomeGenerator/1.0",
297 version: "1",
298 arrival_date: None,
299 incidents: None,
300 original_envelope_id: None,
301 original_mail_from: None,
302 reporting_mta: None,
303 source_ip: None,
304 authentication_results: [],
305 original_rcpto_to: [],
306 reported_domain: [],
307 reported_uri: [],
308 extensions: {},
309 original_message: Some(
310 "Received: from mailserver.example.net
311 (mailserver.example.net [192.0.2.1])
312 by example.com with ESMTP id M63d4137594e46;
313 Thu, 08 Mar 2005 14:00:00 -0400
314From: <somespammer@example.net>
315To: <Undisclosed Recipients>
316Subject: Earn money
317MIME-Version: 1.0
318Content-type: text/plain
319Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
320Date: Thu, 02 Sep 2004 12:31:03 -0500
321
322Spam Spam Spam
323Spam Spam Spam
324Spam Spam Spam
325Spam Spam Spam
326",
327 ),
328 supplemental_trace: None,
329 },
330)
331"#
332 );
333 }
334
335 #[test]
336 fn rfc5965_2() {
337 let result = ARFReport::parse(include_bytes!("../data/rfc5965/2.eml")).unwrap();
338 k9::snapshot!(
339 result,
340 r#"
341Some(
342 ARFReport {
343 feedback_type: "abuse",
344 user_agent: "SomeGenerator/1.0",
345 version: "1",
346 arrival_date: None,
347 incidents: None,
348 original_envelope_id: None,
349 original_mail_from: Some(
350 "<somespammer@example.net>",
351 ),
352 reporting_mta: Some(
353 RemoteMta {
354 mta_type: "dns",
355 name: "mail.example.com",
356 },
357 ),
358 source_ip: Some(
359 "192.0.2.1",
360 ),
361 authentication_results: [
362 "mail.example.com; spf=fail smtp.mail=somespammer@example.com",
363 ],
364 original_rcpto_to: [
365 "<user@example.com>",
366 ],
367 reported_domain: [
368 "example.net",
369 ],
370 reported_uri: [
371 "http://example.net/earn_money.html",
372 "mailto:user@example.com",
373 ],
374 extensions: {
375 "removal-recipient": [
376 "user@example.com",
377 ],
378 },
379 original_message: Some(
380 "From: <somespammer@example.net>
381Received: from mailserver.example.net (mailserver.example.net
382 [192.0.2.1]) by example.com with ESMTP id M63d4137594e46;
383 Tue, 08 Mar 2005 14:00:00 -0400
384X-KumoRef: eyJfQF8iOiJcXF8vIiwicmVjaXBpZW50IjoidGVzdEBleGFtcGxlLmNvbSJ9
385To: <Undisclosed Recipients>
386Subject: Earn money
387MIME-Version: 1.0
388Content-type: text/plain
389Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
390Date: Thu, 02 Sep 2004 12:31:03 -0500
391
392Spam Spam Spam
393Spam Spam Spam
394Spam Spam Spam
395Spam Spam Spam
396",
397 ),
398 supplemental_trace: Some(
399 Object {
400 "recipient": String("test@example.com"),
401 },
402 ),
403 },
404)
405"#
406 );
407 }
408
409 #[test]
410 fn rfc5965_3() {
411 let result = ARFReport::parse(include_bytes!("../data/rfc5965/3.eml")).unwrap();
412 k9::snapshot!(
413 result,
414 r#"
415Some(
416 ARFReport {
417 feedback_type: "abuse",
418 user_agent: "Yahoo!-Mail-Feedback/2.0",
419 version: "0.1",
420 arrival_date: Some(
421 2023-12-14T16:16:15Z,
422 ),
423 incidents: None,
424 original_envelope_id: None,
425 original_mail_from: Some(
426 "<test1@example.com>",
427 ),
428 reporting_mta: None,
429 source_ip: None,
430 authentication_results: [
431 "authentication result string is not available",
432 ],
433 original_rcpto_to: [
434 "user@example.com",
435 ],
436 reported_domain: [
437 "bounce.kumo.example.com",
438 ],
439 reported_uri: [],
440 extensions: {},
441 original_message: Some(
442 "Date: Thu, 14 Dec 2023 16:16:14 +0000
443To: user@example.com
444Subject: test Thu, 14 Dec 2023 16:16:14 +0000
445
446This is a test mailing
447
448",
449 ),
450 supplemental_trace: None,
451 },
452)
453"#
454 );
455 }
456
457 #[test]
458 fn rfc5965_4() {
459 let result = ARFReport::parse(include_bytes!("../data/rfc5965/4.eml")).unwrap();
460 k9::snapshot!(
461 result,
462 r#"
463Some(
464 ARFReport {
465 feedback_type: "abuse",
466 user_agent: "ReturnPathFBL/2.0",
467 version: "1",
468 arrival_date: Some(
469 2023-12-13T19:03:30Z,
470 ),
471 incidents: None,
472 original_envelope_id: None,
473 original_mail_from: Some(
474 "foo@bounce.example.com",
475 ),
476 reporting_mta: None,
477 source_ip: Some(
478 "x.x.x.x",
479 ),
480 authentication_results: [],
481 original_rcpto_to: [
482 "cb4a01a48251d4765f489076aa81e2a4@comcast.net",
483 ],
484 reported_domain: [
485 "bounce.example.com",
486 ],
487 reported_uri: [],
488 extensions: {
489 "abuse-type": [
490 "complaint",
491 ],
492 "source": [
493 "Comcast",
494 ],
495 "subscription-link": [
496 "https://fbl.returnpath.net/manage/subscriptions/xxxx",
497 ],
498 },
499 original_message: Some(
500 "Date: Thu, 14 Dec 2023 16:16:14 +0000
501To: user@example.com
502Subject: test Thu, 14 Dec 2023 16:16:14 +0000
503
504This is a test mailing
505
506",
507 ),
508 supplemental_trace: None,
509 },
510)
511"#
512 );
513 }
514}