kumo_log_types/
rfc5965.rs

1//! ARF reports
2use crate::rfc3464::{content_type, RemoteMta};
3use anyhow::anyhow;
4use chrono::{DateTime, Utc};
5use mailparsing::{Header, HeaderParseResult, MimePart};
6use rfc5321::parse_envelope_address;
7use serde::{Deserialize, Serialize};
8use std::collections::BTreeMap;
9use std::str::FromStr;
10
11#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
12#[serde(transparent)]
13pub(crate) struct EnvelopeAddress(String);
14
15impl EnvelopeAddress {
16    pub fn into_inner(self) -> String {
17        self.0
18    }
19}
20
21impl FromStr for EnvelopeAddress {
22    type Err = anyhow::Error;
23    fn from_str(input: &str) -> anyhow::Result<Self> {
24        Ok(Self(parse_envelope_address(input).map_err(|err| {
25            anyhow!("failed to parse {input} as EnvelopeAddress: {err}")
26        })?))
27    }
28}
29
30#[derive(Debug, Serialize, Deserialize, Clone, Eq, PartialEq)]
31pub struct ARFReport {
32    pub feedback_type: String,
33    pub user_agent: String,
34    pub version: String,
35
36    #[serde(default)]
37    pub arrival_date: Option<DateTime<Utc>>,
38    #[serde(default)]
39    pub incidents: Option<u32>,
40    #[serde(default)]
41    pub original_envelope_id: Option<String>,
42    #[serde(default)]
43    pub original_mail_from: Option<String>,
44    #[serde(default)]
45    pub reporting_mta: Option<RemoteMta>,
46    #[serde(default)]
47    pub source_ip: Option<String>,
48
49    #[serde(default)]
50    pub authentication_results: Vec<String>,
51    #[serde(default)]
52    pub original_rcpto_to: Vec<String>,
53    #[serde(default)]
54    pub reported_domain: Vec<String>,
55    #[serde(default)]
56    pub reported_uri: Vec<String>,
57
58    pub extensions: BTreeMap<String, Vec<String>>,
59
60    pub original_message: Option<String>,
61    pub supplemental_trace: Option<serde_json::Value>,
62}
63
64impl ARFReport {
65    pub fn parse(input: &[u8]) -> anyhow::Result<Option<Self>> {
66        let mail = MimePart::parse(input)?;
67        let ct = mail.headers().content_type()?;
68        let ct = match ct {
69            None => return Ok(None),
70            Some(ct) => ct,
71        };
72
73        if ct.value != "multipart/report" {
74            return Ok(None);
75        }
76
77        if ct.get("report-type").as_deref() != Some("feedback-report") {
78            return Ok(None);
79        }
80
81        let mut original_message = None;
82        let mut supplemental_trace = None;
83
84        for part in mail.child_parts() {
85            let ct = content_type(part);
86            let ct = ct.as_deref();
87            if ct == Some("message/rfc822") || ct == Some("text/rfc822-headers") {
88                if let Ok(HeaderParseResult { headers, .. }) =
89                    Header::parse_headers(part.raw_body())
90                {
91                    // Look for x-headers that might be our supplemental trace headers
92                    for hdr in headers.iter() {
93                        if !(hdr.get_name().starts_with("X-") || hdr.get_name().starts_with("x-")) {
94                            continue;
95                        }
96                        if let Ok(decoded) =
97                            data_encoding::BASE64.decode(hdr.get_raw_value().as_bytes())
98                        {
99                            #[derive(Deserialize)]
100                            struct Wrap {
101                                #[serde(rename = "_@_")]
102                                marker: String,
103                                #[serde(flatten)]
104                                payload: serde_json::Value,
105                            }
106                            if let Ok(obj) = serde_json::from_slice::<Wrap>(&decoded) {
107                                // Sanity check that it is our encoded data, rather than
108                                // some other random header that may have been inserted
109                                // somewhere along the way
110                                if obj.marker == "\\_/" {
111                                    supplemental_trace.replace(obj.payload);
112                                    break;
113                                }
114                            }
115                        }
116                    }
117                }
118
119                original_message = Some(part.raw_body().replace("\r\n", "\n"));
120            }
121        }
122
123        for part in mail.child_parts() {
124            let ct = content_type(part);
125            let ct = ct.as_deref();
126            if ct == Some("message/feedback-report") {
127                return Ok(Some(Self::parse_inner(
128                    part,
129                    original_message,
130                    supplemental_trace,
131                )?));
132            }
133        }
134
135        anyhow::bail!("feedback-report part missing");
136    }
137
138    fn parse_inner(
139        part: &MimePart,
140        original_message: Option<String>,
141        supplemental_trace: Option<serde_json::Value>,
142    ) -> anyhow::Result<Self> {
143        let body = part.raw_body();
144        let mut extensions = extract_headers(body.as_bytes())?;
145
146        let feedback_type = extract_single_req("feedback-type", &mut extensions)?;
147        let user_agent = extract_single_req("user-agent", &mut extensions)?;
148        let version = extract_single_req("version", &mut extensions)?;
149        let arrival_date = extract_single_conv_fallback::<DateTimeRfc2822, DateTime<Utc>>(
150            "arrival-date",
151            "received-date",
152            &mut extensions,
153        );
154        let incidents = extract_single("incidents", &mut extensions)?;
155        let original_envelope_id = extract_single("original-envelope-id", &mut extensions)?;
156        let original_mail_from =
157            extract_single::<EnvelopeAddress>("original-mail-from", &mut extensions)?.map(|a| a.0);
158        let reporting_mta = extract_single("reporting-mta", &mut extensions)?;
159        let source_ip = extract_single("source-ip", &mut extensions)?;
160        let authentication_results = extract_multiple("authentication-results", &mut extensions)?;
161        let original_rcpto_to =
162            extract_multiple::<EnvelopeAddress>("original-rcpt-to", &mut extensions)?
163                .into_iter()
164                .map(|a| a.0)
165                .collect();
166        let reported_domain = extract_multiple("reported-domain", &mut extensions)?;
167        let reported_uri = extract_multiple("reported-uri", &mut extensions)?;
168
169        Ok(Self {
170            feedback_type,
171            user_agent,
172            version,
173            arrival_date,
174            incidents,
175            original_envelope_id,
176            original_mail_from,
177            reporting_mta,
178            source_ip,
179            authentication_results,
180            original_rcpto_to,
181            reported_domain,
182            reported_uri,
183            extensions,
184            original_message,
185            supplemental_trace,
186        })
187    }
188}
189
190pub(crate) fn extract_headers(part: &[u8]) -> anyhow::Result<BTreeMap<String, Vec<String>>> {
191    let HeaderParseResult { headers, .. } = Header::parse_headers(part)?;
192
193    let mut extensions = BTreeMap::new();
194
195    for hdr in headers.iter() {
196        let name = hdr.get_name().to_ascii_lowercase();
197        extensions
198            .entry(name)
199            .or_insert_with(std::vec::Vec::new)
200            .push(hdr.as_unstructured()?);
201    }
202    Ok(extensions)
203}
204
205pub(crate) struct DateTimeRfc2822(pub DateTime<Utc>);
206
207impl FromStr for DateTimeRfc2822 {
208    type Err = anyhow::Error;
209    fn from_str(input: &str) -> anyhow::Result<Self> {
210        let date = DateTime::parse_from_rfc2822(input)?;
211        Ok(Self(date.into()))
212    }
213}
214
215impl From<DateTimeRfc2822> for DateTime<Utc> {
216    fn from(val: DateTimeRfc2822) -> Self {
217        val.0
218    }
219}
220
221pub(crate) fn extract_single_req<R>(
222    name: &str,
223    extensions: &mut BTreeMap<String, Vec<String>>,
224) -> anyhow::Result<R>
225where
226    R: FromStr,
227    <R as FromStr>::Err: std::fmt::Display,
228{
229    extract_single(name, extensions)?
230        .ok_or_else(|| anyhow!("required header {name} is not present"))
231}
232
233pub(crate) fn extract_single<R>(
234    name: &str,
235    extensions: &mut BTreeMap<String, Vec<String>>,
236) -> anyhow::Result<Option<R>>
237where
238    R: FromStr,
239    <R as FromStr>::Err: std::fmt::Display,
240{
241    match extensions.remove(name) {
242        Some(mut hdrs) if hdrs.len() == 1 => {
243            let value = hdrs.remove(0);
244            let converted = value
245                .parse::<R>()
246                .map_err(|err| anyhow!("failed to convert '{value}': {err:#}"))?;
247            Ok(Some(converted))
248        }
249        Some(_) => anyhow::bail!("header {name} should have only a single value"),
250        None => Ok(None),
251    }
252}
253
254pub(crate) fn extract_single_conv<R, T>(
255    name: &str,
256    extensions: &mut BTreeMap<String, Vec<String>>,
257) -> anyhow::Result<Option<T>>
258where
259    R: FromStr,
260    <R as FromStr>::Err: std::fmt::Display,
261    R: Into<T>,
262{
263    Ok(extract_single::<R>(name, extensions)?.map(|v| v.into()))
264}
265
266pub(crate) fn extract_single_conv_fallback<R, T>(
267    name: &str,
268    fallback: &str,
269    extensions: &mut BTreeMap<String, Vec<String>>,
270) -> Option<T>
271where
272    R: FromStr,
273    <R as FromStr>::Err: std::fmt::Display,
274    R: Into<T>,
275{
276    let maybe = extract_single::<R>(name, extensions).ok()?;
277    match maybe {
278        Some(value) => Some(value.into()),
279        None => extract_single::<R>(fallback, extensions)
280            .ok()?
281            .map(Into::into),
282    }
283}
284
285pub(crate) fn extract_multiple<R>(
286    name: &str,
287    extensions: &mut BTreeMap<String, Vec<String>>,
288) -> anyhow::Result<Vec<R>>
289where
290    R: FromStr,
291    <R as FromStr>::Err: std::fmt::Display,
292{
293    match extensions.remove(name) {
294        Some(hdrs) => {
295            let mut results = vec![];
296            for h in hdrs {
297                let converted = h
298                    .parse::<R>()
299                    .map_err(|err| anyhow!("failed to convert {h}: {err:#}"))?;
300                results.push(converted);
301            }
302            Ok(results)
303        }
304        None => Ok(vec![]),
305    }
306}
307
308#[cfg(test)]
309mod test {
310    use super::*;
311
312    #[test]
313    fn rfc5965_1() {
314        let result = ARFReport::parse(include_bytes!("../data/rfc5965/1.eml")).unwrap();
315        k9::snapshot!(
316            result,
317            r#"
318Some(
319    ARFReport {
320        feedback_type: "abuse",
321        user_agent: "SomeGenerator/1.0",
322        version: "1",
323        arrival_date: None,
324        incidents: None,
325        original_envelope_id: None,
326        original_mail_from: None,
327        reporting_mta: None,
328        source_ip: None,
329        authentication_results: [],
330        original_rcpto_to: [],
331        reported_domain: [],
332        reported_uri: [],
333        extensions: {},
334        original_message: Some(
335            "Received: from mailserver.example.net
336    (mailserver.example.net [192.0.2.1])
337    by example.com with ESMTP id M63d4137594e46;
338    Thu, 08 Mar 2005 14:00:00 -0400
339From: <somespammer@example.net>
340To: <Undisclosed Recipients>
341Subject: Earn money
342MIME-Version: 1.0
343Content-type: text/plain
344Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
345Date: Thu, 02 Sep 2004 12:31:03 -0500
346
347Spam Spam Spam
348Spam Spam Spam
349Spam Spam Spam
350Spam Spam Spam
351",
352        ),
353        supplemental_trace: None,
354    },
355)
356"#
357        );
358    }
359
360    #[test]
361    fn rfc5965_2() {
362        let result = ARFReport::parse(include_bytes!("../data/rfc5965/2.eml")).unwrap();
363        k9::snapshot!(
364            result,
365            r#"
366Some(
367    ARFReport {
368        feedback_type: "abuse",
369        user_agent: "SomeGenerator/1.0",
370        version: "1",
371        arrival_date: None,
372        incidents: None,
373        original_envelope_id: None,
374        original_mail_from: Some(
375            "somespammer@example.net",
376        ),
377        reporting_mta: Some(
378            RemoteMta {
379                mta_type: "dns",
380                name: "mail.example.com",
381            },
382        ),
383        source_ip: Some(
384            "192.0.2.1",
385        ),
386        authentication_results: [
387            "mail.example.com; spf=fail smtp.mail=somespammer@example.com",
388        ],
389        original_rcpto_to: [
390            "user@example.com",
391        ],
392        reported_domain: [
393            "example.net",
394        ],
395        reported_uri: [
396            "http://example.net/earn_money.html",
397            "mailto:user@example.com",
398        ],
399        extensions: {
400            "removal-recipient": [
401                "user@example.com",
402            ],
403        },
404        original_message: Some(
405            "From: <somespammer@example.net>
406Received: from mailserver.example.net (mailserver.example.net
407    [192.0.2.1]) by example.com with ESMTP id M63d4137594e46;
408    Tue, 08 Mar 2005 14:00:00 -0400
409X-KumoRef: eyJfQF8iOiJcXF8vIiwicmVjaXBpZW50IjoidGVzdEBleGFtcGxlLmNvbSJ9
410To: <Undisclosed Recipients>
411Subject: Earn money
412MIME-Version: 1.0
413Content-type: text/plain
414Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
415Date: Thu, 02 Sep 2004 12:31:03 -0500
416
417Spam Spam Spam
418Spam Spam Spam
419Spam Spam Spam
420Spam Spam Spam
421",
422        ),
423        supplemental_trace: Some(
424            Object {
425                "recipient": String("test@example.com"),
426            },
427        ),
428    },
429)
430"#
431        );
432    }
433
434    #[test]
435    fn rfc5965_3() {
436        let result = ARFReport::parse(include_bytes!("../data/rfc5965/3.eml")).unwrap();
437        k9::snapshot!(
438            result,
439            r#"
440Some(
441    ARFReport {
442        feedback_type: "abuse",
443        user_agent: "Yahoo!-Mail-Feedback/2.0",
444        version: "0.1",
445        arrival_date: Some(
446            2023-12-14T16:16:15Z,
447        ),
448        incidents: None,
449        original_envelope_id: None,
450        original_mail_from: Some(
451            "test1@example.com",
452        ),
453        reporting_mta: None,
454        source_ip: None,
455        authentication_results: [
456            "authentication result string is not available",
457        ],
458        original_rcpto_to: [
459            "user@example.com",
460        ],
461        reported_domain: [
462            "bounce.kumo.example.com",
463        ],
464        reported_uri: [],
465        extensions: {},
466        original_message: Some(
467            "Date: Thu, 14 Dec 2023 16:16:14 +0000
468To: user@example.com
469Subject: test Thu, 14 Dec 2023 16:16:14 +0000
470
471This is a test mailing
472
473",
474        ),
475        supplemental_trace: None,
476    },
477)
478"#
479        );
480    }
481
482    #[test]
483    fn rfc5965_4() {
484        let result = ARFReport::parse(include_bytes!("../data/rfc5965/4.eml")).unwrap();
485        k9::snapshot!(
486            result,
487            r#"
488Some(
489    ARFReport {
490        feedback_type: "abuse",
491        user_agent: "ReturnPathFBL/2.0",
492        version: "1",
493        arrival_date: Some(
494            2023-12-13T19:03:30Z,
495        ),
496        incidents: None,
497        original_envelope_id: None,
498        original_mail_from: Some(
499            "foo@bounce.example.com",
500        ),
501        reporting_mta: None,
502        source_ip: Some(
503            "x.x.x.x",
504        ),
505        authentication_results: [],
506        original_rcpto_to: [
507            "cb4a01a48251d4765f489076aa81e2a4@comcast.net",
508        ],
509        reported_domain: [
510            "bounce.example.com",
511        ],
512        reported_uri: [],
513        extensions: {
514            "abuse-type": [
515                "complaint",
516            ],
517            "source": [
518                "Comcast",
519            ],
520            "subscription-link": [
521                "https://fbl.returnpath.net/manage/subscriptions/xxxx",
522            ],
523        },
524        original_message: Some(
525            "Date: Thu, 14 Dec 2023 16:16:14 +0000
526To: user@example.com
527Subject: test Thu, 14 Dec 2023 16:16:14 +0000
528
529This is a test mailing
530
531",
532        ),
533        supplemental_trace: None,
534    },
535)
536"#
537        );
538    }
539}