kumo_log_types/
rfc5965.rs

1//! ARF reports
2use crate::rfc3464::{content_type, RemoteMta};
3use anyhow::anyhow;
4use chrono::{DateTime, Utc};
5use mailparsing::{Header, HeaderParseResult, MimePart};
6use serde::{Deserialize, Serialize};
7use std::collections::BTreeMap;
8use std::str::FromStr;
9
10#[derive(Debug, Serialize, Deserialize, Clone, Eq, PartialEq)]
11pub struct ARFReport {
12    pub feedback_type: String,
13    pub user_agent: String,
14    pub version: String,
15
16    #[serde(default)]
17    pub arrival_date: Option<DateTime<Utc>>,
18    #[serde(default)]
19    pub incidents: Option<u32>,
20    #[serde(default)]
21    pub original_envelope_id: Option<String>,
22    #[serde(default)]
23    pub original_mail_from: Option<String>,
24    #[serde(default)]
25    pub reporting_mta: Option<RemoteMta>,
26    #[serde(default)]
27    pub source_ip: Option<String>,
28
29    #[serde(default)]
30    pub authentication_results: Vec<String>,
31    #[serde(default)]
32    pub original_rcpto_to: Vec<String>,
33    #[serde(default)]
34    pub reported_domain: Vec<String>,
35    #[serde(default)]
36    pub reported_uri: Vec<String>,
37
38    pub extensions: BTreeMap<String, Vec<String>>,
39
40    pub original_message: Option<String>,
41    pub supplemental_trace: Option<serde_json::Value>,
42}
43
44impl ARFReport {
45    pub fn parse(input: &[u8]) -> anyhow::Result<Option<Self>> {
46        let mail = MimePart::parse(input)?;
47        let ct = mail.headers().content_type()?;
48        let ct = match ct {
49            None => return Ok(None),
50            Some(ct) => ct,
51        };
52
53        if ct.value != "multipart/report" {
54            return Ok(None);
55        }
56
57        if ct.get("report-type").as_deref() != Some("feedback-report") {
58            return Ok(None);
59        }
60
61        let mut original_message = None;
62        let mut supplemental_trace = None;
63
64        for part in mail.child_parts() {
65            let ct = content_type(part);
66            let ct = ct.as_deref();
67            if ct == Some("message/rfc822") || ct == Some("text/rfc822-headers") {
68                if let Ok(HeaderParseResult { headers, .. }) =
69                    Header::parse_headers(part.raw_body())
70                {
71                    // Look for x-headers that might be our supplemental trace headers
72                    for hdr in headers.iter() {
73                        if !(hdr.get_name().starts_with("X-") || hdr.get_name().starts_with("x-")) {
74                            continue;
75                        }
76                        if let Ok(decoded) =
77                            data_encoding::BASE64.decode(hdr.get_raw_value().as_bytes())
78                        {
79                            #[derive(Deserialize)]
80                            struct Wrap {
81                                #[serde(rename = "_@_")]
82                                marker: String,
83                                #[serde(flatten)]
84                                payload: serde_json::Value,
85                            }
86                            if let Ok(obj) = serde_json::from_slice::<Wrap>(&decoded) {
87                                // Sanity check that it is our encoded data, rather than
88                                // some other random header that may have been inserted
89                                // somewhere along the way
90                                if obj.marker == "\\_/" {
91                                    supplemental_trace.replace(obj.payload);
92                                    break;
93                                }
94                            }
95                        }
96                    }
97                }
98
99                original_message = Some(part.raw_body().replace("\r\n", "\n"));
100            }
101        }
102
103        for part in mail.child_parts() {
104            let ct = content_type(part);
105            let ct = ct.as_deref();
106            if ct == Some("message/feedback-report") {
107                return Ok(Some(Self::parse_inner(
108                    part,
109                    original_message,
110                    supplemental_trace,
111                )?));
112            }
113        }
114
115        anyhow::bail!("feedback-report part missing");
116    }
117
118    fn parse_inner(
119        part: &MimePart,
120        original_message: Option<String>,
121        supplemental_trace: Option<serde_json::Value>,
122    ) -> anyhow::Result<Self> {
123        let body = part.raw_body();
124        let mut extensions = extract_headers(body.as_bytes())?;
125
126        let feedback_type = extract_single_req("feedback-type", &mut extensions)?;
127        let user_agent = extract_single_req("user-agent", &mut extensions)?;
128        let version = extract_single_req("version", &mut extensions)?;
129        let arrival_date = extract_single_conv_fallback::<DateTimeRfc2822, DateTime<Utc>>(
130            "arrival-date",
131            "received-date",
132            &mut extensions,
133        );
134        let incidents = extract_single("incidents", &mut extensions)?;
135        let original_envelope_id = extract_single("original-envelope-id", &mut extensions)?;
136        let original_mail_from = extract_single("original-mail-from", &mut extensions)?;
137        let reporting_mta = extract_single("reporting-mta", &mut extensions)?;
138        let source_ip = extract_single("source-ip", &mut extensions)?;
139        let authentication_results = extract_multiple("authentication-results", &mut extensions)?;
140        let original_rcpto_to = extract_multiple("original-rcpt-to", &mut extensions)?;
141        let reported_domain = extract_multiple("reported-domain", &mut extensions)?;
142        let reported_uri = extract_multiple("reported-uri", &mut extensions)?;
143
144        Ok(Self {
145            feedback_type,
146            user_agent,
147            version,
148            arrival_date,
149            incidents,
150            original_envelope_id,
151            original_mail_from,
152            reporting_mta,
153            source_ip,
154            authentication_results,
155            original_rcpto_to,
156            reported_domain,
157            reported_uri,
158            extensions,
159            original_message,
160            supplemental_trace,
161        })
162    }
163}
164
165pub(crate) fn extract_headers(part: &[u8]) -> anyhow::Result<BTreeMap<String, Vec<String>>> {
166    let HeaderParseResult { headers, .. } = Header::parse_headers(part)?;
167
168    let mut extensions = BTreeMap::new();
169
170    for hdr in headers.iter() {
171        let name = hdr.get_name().to_ascii_lowercase();
172        extensions
173            .entry(name)
174            .or_insert_with(std::vec::Vec::new)
175            .push(hdr.as_unstructured()?);
176    }
177    Ok(extensions)
178}
179
180pub(crate) struct DateTimeRfc2822(pub DateTime<Utc>);
181
182impl FromStr for DateTimeRfc2822 {
183    type Err = anyhow::Error;
184    fn from_str(input: &str) -> anyhow::Result<Self> {
185        let date = DateTime::parse_from_rfc2822(input)?;
186        Ok(Self(date.into()))
187    }
188}
189
190impl From<DateTimeRfc2822> for DateTime<Utc> {
191    fn from(val: DateTimeRfc2822) -> Self {
192        val.0
193    }
194}
195
196pub(crate) fn extract_single_req<R>(
197    name: &str,
198    extensions: &mut BTreeMap<String, Vec<String>>,
199) -> anyhow::Result<R>
200where
201    R: FromStr,
202    <R as FromStr>::Err: std::fmt::Display,
203{
204    extract_single(name, extensions)?
205        .ok_or_else(|| anyhow!("required header {name} is not present"))
206}
207
208pub(crate) fn extract_single<R>(
209    name: &str,
210    extensions: &mut BTreeMap<String, Vec<String>>,
211) -> anyhow::Result<Option<R>>
212where
213    R: FromStr,
214    <R as FromStr>::Err: std::fmt::Display,
215{
216    match extensions.remove(name) {
217        Some(mut hdrs) if hdrs.len() == 1 => {
218            let value = hdrs.remove(0);
219            let converted = value
220                .parse::<R>()
221                .map_err(|err| anyhow!("failed to convert '{value}': {err:#}"))?;
222            Ok(Some(converted))
223        }
224        Some(_) => anyhow::bail!("header {name} should have only a single value"),
225        None => Ok(None),
226    }
227}
228
229pub(crate) fn extract_single_conv<R, T>(
230    name: &str,
231    extensions: &mut BTreeMap<String, Vec<String>>,
232) -> anyhow::Result<Option<T>>
233where
234    R: FromStr,
235    <R as FromStr>::Err: std::fmt::Display,
236    R: Into<T>,
237{
238    Ok(extract_single::<R>(name, extensions)?.map(|v| v.into()))
239}
240
241pub(crate) fn extract_single_conv_fallback<R, T>(
242    name: &str,
243    fallback: &str,
244    extensions: &mut BTreeMap<String, Vec<String>>,
245) -> Option<T>
246where
247    R: FromStr,
248    <R as FromStr>::Err: std::fmt::Display,
249    R: Into<T>,
250{
251    let maybe = extract_single::<R>(name, extensions).ok()?;
252    match maybe {
253        Some(value) => Some(value.into()),
254        None => extract_single::<R>(fallback, extensions)
255            .ok()?
256            .map(Into::into),
257    }
258}
259
260pub(crate) fn extract_multiple<R>(
261    name: &str,
262    extensions: &mut BTreeMap<String, Vec<String>>,
263) -> anyhow::Result<Vec<R>>
264where
265    R: FromStr,
266    <R as FromStr>::Err: std::fmt::Display,
267{
268    match extensions.remove(name) {
269        Some(hdrs) => {
270            let mut results = vec![];
271            for h in hdrs {
272                let converted = h
273                    .parse::<R>()
274                    .map_err(|err| anyhow!("failed to convert {h}: {err:#}"))?;
275                results.push(converted);
276            }
277            Ok(results)
278        }
279        None => Ok(vec![]),
280    }
281}
282
283#[cfg(test)]
284mod test {
285    use super::*;
286
287    #[test]
288    fn rfc5965_1() {
289        let result = ARFReport::parse(include_bytes!("../data/rfc5965/1.eml")).unwrap();
290        k9::snapshot!(
291            result,
292            r#"
293Some(
294    ARFReport {
295        feedback_type: "abuse",
296        user_agent: "SomeGenerator/1.0",
297        version: "1",
298        arrival_date: None,
299        incidents: None,
300        original_envelope_id: None,
301        original_mail_from: None,
302        reporting_mta: None,
303        source_ip: None,
304        authentication_results: [],
305        original_rcpto_to: [],
306        reported_domain: [],
307        reported_uri: [],
308        extensions: {},
309        original_message: Some(
310            "Received: from mailserver.example.net
311    (mailserver.example.net [192.0.2.1])
312    by example.com with ESMTP id M63d4137594e46;
313    Thu, 08 Mar 2005 14:00:00 -0400
314From: <somespammer@example.net>
315To: <Undisclosed Recipients>
316Subject: Earn money
317MIME-Version: 1.0
318Content-type: text/plain
319Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
320Date: Thu, 02 Sep 2004 12:31:03 -0500
321
322Spam Spam Spam
323Spam Spam Spam
324Spam Spam Spam
325Spam Spam Spam
326",
327        ),
328        supplemental_trace: None,
329    },
330)
331"#
332        );
333    }
334
335    #[test]
336    fn rfc5965_2() {
337        let result = ARFReport::parse(include_bytes!("../data/rfc5965/2.eml")).unwrap();
338        k9::snapshot!(
339            result,
340            r#"
341Some(
342    ARFReport {
343        feedback_type: "abuse",
344        user_agent: "SomeGenerator/1.0",
345        version: "1",
346        arrival_date: None,
347        incidents: None,
348        original_envelope_id: None,
349        original_mail_from: Some(
350            "<somespammer@example.net>",
351        ),
352        reporting_mta: Some(
353            RemoteMta {
354                mta_type: "dns",
355                name: "mail.example.com",
356            },
357        ),
358        source_ip: Some(
359            "192.0.2.1",
360        ),
361        authentication_results: [
362            "mail.example.com; spf=fail smtp.mail=somespammer@example.com",
363        ],
364        original_rcpto_to: [
365            "<user@example.com>",
366        ],
367        reported_domain: [
368            "example.net",
369        ],
370        reported_uri: [
371            "http://example.net/earn_money.html",
372            "mailto:user@example.com",
373        ],
374        extensions: {
375            "removal-recipient": [
376                "user@example.com",
377            ],
378        },
379        original_message: Some(
380            "From: <somespammer@example.net>
381Received: from mailserver.example.net (mailserver.example.net
382    [192.0.2.1]) by example.com with ESMTP id M63d4137594e46;
383    Tue, 08 Mar 2005 14:00:00 -0400
384X-KumoRef: eyJfQF8iOiJcXF8vIiwicmVjaXBpZW50IjoidGVzdEBleGFtcGxlLmNvbSJ9
385To: <Undisclosed Recipients>
386Subject: Earn money
387MIME-Version: 1.0
388Content-type: text/plain
389Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
390Date: Thu, 02 Sep 2004 12:31:03 -0500
391
392Spam Spam Spam
393Spam Spam Spam
394Spam Spam Spam
395Spam Spam Spam
396",
397        ),
398        supplemental_trace: Some(
399            Object {
400                "recipient": String("test@example.com"),
401            },
402        ),
403    },
404)
405"#
406        );
407    }
408
409    #[test]
410    fn rfc5965_3() {
411        let result = ARFReport::parse(include_bytes!("../data/rfc5965/3.eml")).unwrap();
412        k9::snapshot!(
413            result,
414            r#"
415Some(
416    ARFReport {
417        feedback_type: "abuse",
418        user_agent: "Yahoo!-Mail-Feedback/2.0",
419        version: "0.1",
420        arrival_date: Some(
421            2023-12-14T16:16:15Z,
422        ),
423        incidents: None,
424        original_envelope_id: None,
425        original_mail_from: Some(
426            "<test1@example.com>",
427        ),
428        reporting_mta: None,
429        source_ip: None,
430        authentication_results: [
431            "authentication result string is not available",
432        ],
433        original_rcpto_to: [
434            "user@example.com",
435        ],
436        reported_domain: [
437            "bounce.kumo.example.com",
438        ],
439        reported_uri: [],
440        extensions: {},
441        original_message: Some(
442            "Date: Thu, 14 Dec 2023 16:16:14 +0000
443To: user@example.com
444Subject: test Thu, 14 Dec 2023 16:16:14 +0000
445
446This is a test mailing
447
448",
449        ),
450        supplemental_trace: None,
451    },
452)
453"#
454        );
455    }
456
457    #[test]
458    fn rfc5965_4() {
459        let result = ARFReport::parse(include_bytes!("../data/rfc5965/4.eml")).unwrap();
460        k9::snapshot!(
461            result,
462            r#"
463Some(
464    ARFReport {
465        feedback_type: "abuse",
466        user_agent: "ReturnPathFBL/2.0",
467        version: "1",
468        arrival_date: Some(
469            2023-12-13T19:03:30Z,
470        ),
471        incidents: None,
472        original_envelope_id: None,
473        original_mail_from: Some(
474            "foo@bounce.example.com",
475        ),
476        reporting_mta: None,
477        source_ip: Some(
478            "x.x.x.x",
479        ),
480        authentication_results: [],
481        original_rcpto_to: [
482            "cb4a01a48251d4765f489076aa81e2a4@comcast.net",
483        ],
484        reported_domain: [
485            "bounce.example.com",
486        ],
487        reported_uri: [],
488        extensions: {
489            "abuse-type": [
490                "complaint",
491            ],
492            "source": [
493                "Comcast",
494            ],
495            "subscription-link": [
496                "https://fbl.returnpath.net/manage/subscriptions/xxxx",
497            ],
498        },
499        original_message: Some(
500            "Date: Thu, 14 Dec 2023 16:16:14 +0000
501To: user@example.com
502Subject: test Thu, 14 Dec 2023 16:16:14 +0000
503
504This is a test mailing
505
506",
507        ),
508        supplemental_trace: None,
509    },
510)
511"#
512        );
513    }
514}