1use crate::rfc3464::{content_type, RemoteMta};
3use anyhow::anyhow;
4use chrono::{DateTime, Utc};
5use mailparsing::{Header, HeaderParseResult, MimePart};
6use rfc5321::parse_envelope_address;
7use serde::{Deserialize, Serialize};
8use std::collections::BTreeMap;
9use std::str::FromStr;
10
11#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
12#[serde(transparent)]
13pub(crate) struct EnvelopeAddress(String);
14
15impl EnvelopeAddress {
16 pub fn into_inner(self) -> String {
17 self.0
18 }
19}
20
21impl FromStr for EnvelopeAddress {
22 type Err = anyhow::Error;
23 fn from_str(input: &str) -> anyhow::Result<Self> {
24 Ok(Self(parse_envelope_address(input).map_err(|err| {
25 anyhow!("failed to parse {input} as EnvelopeAddress: {err}")
26 })?))
27 }
28}
29
30#[derive(Debug, Serialize, Deserialize, Clone, Eq, PartialEq)]
31pub struct ARFReport {
32 pub feedback_type: String,
33 pub user_agent: String,
34 pub version: String,
35
36 #[serde(default)]
37 pub arrival_date: Option<DateTime<Utc>>,
38 #[serde(default)]
39 pub incidents: Option<u32>,
40 #[serde(default)]
41 pub original_envelope_id: Option<String>,
42 #[serde(default)]
43 pub original_mail_from: Option<String>,
44 #[serde(default)]
45 pub reporting_mta: Option<RemoteMta>,
46 #[serde(default)]
47 pub source_ip: Option<String>,
48
49 #[serde(default)]
50 pub authentication_results: Vec<String>,
51 #[serde(default)]
52 pub original_rcpto_to: Vec<String>,
53 #[serde(default)]
54 pub reported_domain: Vec<String>,
55 #[serde(default)]
56 pub reported_uri: Vec<String>,
57
58 pub extensions: BTreeMap<String, Vec<String>>,
59
60 pub original_message: Option<String>,
61 pub supplemental_trace: Option<serde_json::Value>,
62}
63
64impl ARFReport {
65 pub fn parse(input: &[u8]) -> anyhow::Result<Option<Self>> {
66 let mail = MimePart::parse(input)?;
67 let ct = mail.headers().content_type()?;
68 let ct = match ct {
69 None => return Ok(None),
70 Some(ct) => ct,
71 };
72
73 if ct.value != "multipart/report" {
74 return Ok(None);
75 }
76
77 if ct.get("report-type").as_deref() != Some("feedback-report") {
78 return Ok(None);
79 }
80
81 let mut original_message = None;
82 let mut supplemental_trace = None;
83
84 for part in mail.child_parts() {
85 let ct = content_type(part);
86 let ct = ct.as_deref();
87 if ct == Some("message/rfc822") || ct == Some("text/rfc822-headers") {
88 if let Ok(HeaderParseResult { headers, .. }) =
89 Header::parse_headers(part.raw_body())
90 {
91 for hdr in headers.iter() {
93 if !(hdr.get_name().starts_with("X-") || hdr.get_name().starts_with("x-")) {
94 continue;
95 }
96 if let Ok(decoded) =
97 data_encoding::BASE64.decode(hdr.get_raw_value().as_bytes())
98 {
99 #[derive(Deserialize)]
100 struct Wrap {
101 #[serde(rename = "_@_")]
102 marker: String,
103 #[serde(flatten)]
104 payload: serde_json::Value,
105 }
106 if let Ok(obj) = serde_json::from_slice::<Wrap>(&decoded) {
107 if obj.marker == "\\_/" {
111 supplemental_trace.replace(obj.payload);
112 break;
113 }
114 }
115 }
116 }
117 }
118
119 original_message = Some(part.raw_body().replace("\r\n", "\n"));
120 }
121 }
122
123 for part in mail.child_parts() {
124 let ct = content_type(part);
125 let ct = ct.as_deref();
126 if ct == Some("message/feedback-report") {
127 return Ok(Some(Self::parse_inner(
128 part,
129 original_message,
130 supplemental_trace,
131 )?));
132 }
133 }
134
135 anyhow::bail!("feedback-report part missing");
136 }
137
138 fn parse_inner(
139 part: &MimePart,
140 original_message: Option<String>,
141 supplemental_trace: Option<serde_json::Value>,
142 ) -> anyhow::Result<Self> {
143 let body = part.raw_body();
144 let mut extensions = extract_headers(body.as_bytes())?;
145
146 let feedback_type = extract_single_req("feedback-type", &mut extensions)?;
147 let user_agent = extract_single_req("user-agent", &mut extensions)?;
148 let version = extract_single_req("version", &mut extensions)?;
149 let arrival_date = extract_single_conv_fallback::<DateTimeRfc2822, DateTime<Utc>>(
150 "arrival-date",
151 "received-date",
152 &mut extensions,
153 );
154 let incidents = extract_single("incidents", &mut extensions)?;
155 let original_envelope_id = extract_single("original-envelope-id", &mut extensions)?;
156 let original_mail_from =
157 extract_single::<EnvelopeAddress>("original-mail-from", &mut extensions)?.map(|a| a.0);
158 let reporting_mta = extract_single("reporting-mta", &mut extensions)?;
159 let source_ip = extract_single("source-ip", &mut extensions)?;
160 let authentication_results = extract_multiple("authentication-results", &mut extensions)?;
161 let original_rcpto_to =
162 extract_multiple::<EnvelopeAddress>("original-rcpt-to", &mut extensions)?
163 .into_iter()
164 .map(|a| a.0)
165 .collect();
166 let reported_domain = extract_multiple("reported-domain", &mut extensions)?;
167 let reported_uri = extract_multiple("reported-uri", &mut extensions)?;
168
169 Ok(Self {
170 feedback_type,
171 user_agent,
172 version,
173 arrival_date,
174 incidents,
175 original_envelope_id,
176 original_mail_from,
177 reporting_mta,
178 source_ip,
179 authentication_results,
180 original_rcpto_to,
181 reported_domain,
182 reported_uri,
183 extensions,
184 original_message,
185 supplemental_trace,
186 })
187 }
188}
189
190pub(crate) fn extract_headers(part: &[u8]) -> anyhow::Result<BTreeMap<String, Vec<String>>> {
191 let HeaderParseResult { headers, .. } = Header::parse_headers(part)?;
192
193 let mut extensions = BTreeMap::new();
194
195 for hdr in headers.iter() {
196 let name = hdr.get_name().to_ascii_lowercase();
197 extensions
198 .entry(name)
199 .or_insert_with(std::vec::Vec::new)
200 .push(hdr.as_unstructured()?);
201 }
202 Ok(extensions)
203}
204
205pub(crate) struct DateTimeRfc2822(pub DateTime<Utc>);
206
207impl FromStr for DateTimeRfc2822 {
208 type Err = anyhow::Error;
209 fn from_str(input: &str) -> anyhow::Result<Self> {
210 let date = DateTime::parse_from_rfc2822(input)?;
211 Ok(Self(date.into()))
212 }
213}
214
215impl From<DateTimeRfc2822> for DateTime<Utc> {
216 fn from(val: DateTimeRfc2822) -> Self {
217 val.0
218 }
219}
220
221pub(crate) fn extract_single_req<R>(
222 name: &str,
223 extensions: &mut BTreeMap<String, Vec<String>>,
224) -> anyhow::Result<R>
225where
226 R: FromStr,
227 <R as FromStr>::Err: std::fmt::Display,
228{
229 extract_single(name, extensions)?
230 .ok_or_else(|| anyhow!("required header {name} is not present"))
231}
232
233pub(crate) fn extract_single<R>(
234 name: &str,
235 extensions: &mut BTreeMap<String, Vec<String>>,
236) -> anyhow::Result<Option<R>>
237where
238 R: FromStr,
239 <R as FromStr>::Err: std::fmt::Display,
240{
241 match extensions.remove(name) {
242 Some(mut hdrs) if hdrs.len() == 1 => {
243 let value = hdrs.remove(0);
244 let converted = value
245 .parse::<R>()
246 .map_err(|err| anyhow!("failed to convert '{value}': {err:#}"))?;
247 Ok(Some(converted))
248 }
249 Some(_) => anyhow::bail!("header {name} should have only a single value"),
250 None => Ok(None),
251 }
252}
253
254pub(crate) fn extract_single_conv<R, T>(
255 name: &str,
256 extensions: &mut BTreeMap<String, Vec<String>>,
257) -> anyhow::Result<Option<T>>
258where
259 R: FromStr,
260 <R as FromStr>::Err: std::fmt::Display,
261 R: Into<T>,
262{
263 Ok(extract_single::<R>(name, extensions)?.map(|v| v.into()))
264}
265
266pub(crate) fn extract_single_conv_fallback<R, T>(
267 name: &str,
268 fallback: &str,
269 extensions: &mut BTreeMap<String, Vec<String>>,
270) -> Option<T>
271where
272 R: FromStr,
273 <R as FromStr>::Err: std::fmt::Display,
274 R: Into<T>,
275{
276 let maybe = extract_single::<R>(name, extensions).ok()?;
277 match maybe {
278 Some(value) => Some(value.into()),
279 None => extract_single::<R>(fallback, extensions)
280 .ok()?
281 .map(Into::into),
282 }
283}
284
285pub(crate) fn extract_multiple<R>(
286 name: &str,
287 extensions: &mut BTreeMap<String, Vec<String>>,
288) -> anyhow::Result<Vec<R>>
289where
290 R: FromStr,
291 <R as FromStr>::Err: std::fmt::Display,
292{
293 match extensions.remove(name) {
294 Some(hdrs) => {
295 let mut results = vec![];
296 for h in hdrs {
297 let converted = h
298 .parse::<R>()
299 .map_err(|err| anyhow!("failed to convert {h}: {err:#}"))?;
300 results.push(converted);
301 }
302 Ok(results)
303 }
304 None => Ok(vec![]),
305 }
306}
307
308#[cfg(test)]
309mod test {
310 use super::*;
311
312 #[test]
313 fn rfc5965_1() {
314 let result = ARFReport::parse(include_bytes!("../data/rfc5965/1.eml")).unwrap();
315 k9::snapshot!(
316 result,
317 r#"
318Some(
319 ARFReport {
320 feedback_type: "abuse",
321 user_agent: "SomeGenerator/1.0",
322 version: "1",
323 arrival_date: None,
324 incidents: None,
325 original_envelope_id: None,
326 original_mail_from: None,
327 reporting_mta: None,
328 source_ip: None,
329 authentication_results: [],
330 original_rcpto_to: [],
331 reported_domain: [],
332 reported_uri: [],
333 extensions: {},
334 original_message: Some(
335 "Received: from mailserver.example.net
336 (mailserver.example.net [192.0.2.1])
337 by example.com with ESMTP id M63d4137594e46;
338 Thu, 08 Mar 2005 14:00:00 -0400
339From: <somespammer@example.net>
340To: <Undisclosed Recipients>
341Subject: Earn money
342MIME-Version: 1.0
343Content-type: text/plain
344Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
345Date: Thu, 02 Sep 2004 12:31:03 -0500
346
347Spam Spam Spam
348Spam Spam Spam
349Spam Spam Spam
350Spam Spam Spam
351",
352 ),
353 supplemental_trace: None,
354 },
355)
356"#
357 );
358 }
359
360 #[test]
361 fn rfc5965_2() {
362 let result = ARFReport::parse(include_bytes!("../data/rfc5965/2.eml")).unwrap();
363 k9::snapshot!(
364 result,
365 r#"
366Some(
367 ARFReport {
368 feedback_type: "abuse",
369 user_agent: "SomeGenerator/1.0",
370 version: "1",
371 arrival_date: None,
372 incidents: None,
373 original_envelope_id: None,
374 original_mail_from: Some(
375 "somespammer@example.net",
376 ),
377 reporting_mta: Some(
378 RemoteMta {
379 mta_type: "dns",
380 name: "mail.example.com",
381 },
382 ),
383 source_ip: Some(
384 "192.0.2.1",
385 ),
386 authentication_results: [
387 "mail.example.com; spf=fail smtp.mail=somespammer@example.com",
388 ],
389 original_rcpto_to: [
390 "user@example.com",
391 ],
392 reported_domain: [
393 "example.net",
394 ],
395 reported_uri: [
396 "http://example.net/earn_money.html",
397 "mailto:user@example.com",
398 ],
399 extensions: {
400 "removal-recipient": [
401 "user@example.com",
402 ],
403 },
404 original_message: Some(
405 "From: <somespammer@example.net>
406Received: from mailserver.example.net (mailserver.example.net
407 [192.0.2.1]) by example.com with ESMTP id M63d4137594e46;
408 Tue, 08 Mar 2005 14:00:00 -0400
409X-KumoRef: eyJfQF8iOiJcXF8vIiwicmVjaXBpZW50IjoidGVzdEBleGFtcGxlLmNvbSJ9
410To: <Undisclosed Recipients>
411Subject: Earn money
412MIME-Version: 1.0
413Content-type: text/plain
414Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
415Date: Thu, 02 Sep 2004 12:31:03 -0500
416
417Spam Spam Spam
418Spam Spam Spam
419Spam Spam Spam
420Spam Spam Spam
421",
422 ),
423 supplemental_trace: Some(
424 Object {
425 "recipient": String("test@example.com"),
426 },
427 ),
428 },
429)
430"#
431 );
432 }
433
434 #[test]
435 fn rfc5965_3() {
436 let result = ARFReport::parse(include_bytes!("../data/rfc5965/3.eml")).unwrap();
437 k9::snapshot!(
438 result,
439 r#"
440Some(
441 ARFReport {
442 feedback_type: "abuse",
443 user_agent: "Yahoo!-Mail-Feedback/2.0",
444 version: "0.1",
445 arrival_date: Some(
446 2023-12-14T16:16:15Z,
447 ),
448 incidents: None,
449 original_envelope_id: None,
450 original_mail_from: Some(
451 "test1@example.com",
452 ),
453 reporting_mta: None,
454 source_ip: None,
455 authentication_results: [
456 "authentication result string is not available",
457 ],
458 original_rcpto_to: [
459 "user@example.com",
460 ],
461 reported_domain: [
462 "bounce.kumo.example.com",
463 ],
464 reported_uri: [],
465 extensions: {},
466 original_message: Some(
467 "Date: Thu, 14 Dec 2023 16:16:14 +0000
468To: user@example.com
469Subject: test Thu, 14 Dec 2023 16:16:14 +0000
470
471This is a test mailing
472
473",
474 ),
475 supplemental_trace: None,
476 },
477)
478"#
479 );
480 }
481
482 #[test]
483 fn rfc5965_4() {
484 let result = ARFReport::parse(include_bytes!("../data/rfc5965/4.eml")).unwrap();
485 k9::snapshot!(
486 result,
487 r#"
488Some(
489 ARFReport {
490 feedback_type: "abuse",
491 user_agent: "ReturnPathFBL/2.0",
492 version: "1",
493 arrival_date: Some(
494 2023-12-13T19:03:30Z,
495 ),
496 incidents: None,
497 original_envelope_id: None,
498 original_mail_from: Some(
499 "foo@bounce.example.com",
500 ),
501 reporting_mta: None,
502 source_ip: Some(
503 "x.x.x.x",
504 ),
505 authentication_results: [],
506 original_rcpto_to: [
507 "cb4a01a48251d4765f489076aa81e2a4@comcast.net",
508 ],
509 reported_domain: [
510 "bounce.example.com",
511 ],
512 reported_uri: [],
513 extensions: {
514 "abuse-type": [
515 "complaint",
516 ],
517 "source": [
518 "Comcast",
519 ],
520 "subscription-link": [
521 "https://fbl.returnpath.net/manage/subscriptions/xxxx",
522 ],
523 },
524 original_message: Some(
525 "Date: Thu, 14 Dec 2023 16:16:14 +0000
526To: user@example.com
527Subject: test Thu, 14 Dec 2023 16:16:14 +0000
528
529This is a test mailing
530
531",
532 ),
533 supplemental_trace: None,
534 },
535)
536"#
537 );
538 }
539}