kumo_log_types/
rfc5965.rs

1//! ARF reports
2use crate::rfc3464::{content_type, RemoteMta};
3use anyhow::anyhow;
4use bstr::{BStr, BString, ByteSlice};
5use chrono::{DateTime, Utc};
6use mailparsing::{BStringUtf8, Header, HeaderParseResult, MimePart};
7use rfc5321::parser::EnvelopeAddress;
8use serde::{Deserialize, Serialize};
9use serde_with::serde_as;
10use std::collections::BTreeMap;
11use std::str::FromStr;
12
13#[serde_as]
14#[derive(Debug, Serialize, Deserialize, Clone, Eq, PartialEq)]
15pub struct ARFReport {
16    pub feedback_type: String,
17    pub user_agent: String,
18    pub version: String,
19
20    #[serde(default)]
21    pub arrival_date: Option<DateTime<Utc>>,
22    #[serde(default)]
23    pub incidents: Option<u32>,
24    #[serde(default)]
25    pub original_envelope_id: Option<String>,
26    #[serde(default)]
27    pub original_mail_from: Option<String>,
28    #[serde(default)]
29    pub reporting_mta: Option<RemoteMta>,
30    #[serde(default)]
31    pub source_ip: Option<String>,
32
33    #[serde(default)]
34    pub authentication_results: Vec<String>,
35    #[serde(default)]
36    pub original_rcpto_to: Vec<String>,
37    #[serde(default)]
38    pub reported_domain: Vec<String>,
39    #[serde(default)]
40    pub reported_uri: Vec<String>,
41
42    #[serde_as(as = "BTreeMap<_, Vec<BStringUtf8>>")]
43    pub extensions: BTreeMap<String, Vec<BString>>,
44
45    #[serde_as(as = "Option<BStringUtf8>")]
46    pub original_message: Option<BString>,
47    pub supplemental_trace: Option<serde_json::Value>,
48}
49
50impl ARFReport {
51    pub fn parse(input: &[u8]) -> anyhow::Result<Option<Self>> {
52        let mail = MimePart::parse(input)?;
53        let ct = mail.headers().content_type()?;
54        let ct = match ct {
55            None => return Ok(None),
56            Some(ct) => ct,
57        };
58
59        if ct.value != "multipart/report" {
60            return Ok(None);
61        }
62
63        if ct.get("report-type").as_ref().map(|b| b.as_bstr()) != Some(BStr::new("feedback-report"))
64        {
65            return Ok(None);
66        }
67
68        let mut original_message = None;
69        let mut supplemental_trace = None;
70
71        for part in mail.child_parts() {
72            let ct = content_type(part);
73            let ct = ct.as_ref().map(|b| b.as_bstr());
74            if ct == Some(BStr::new("message/rfc822"))
75                || ct == Some(BStr::new("text/rfc822-headers"))
76            {
77                if let Ok(HeaderParseResult { headers, .. }) =
78                    Header::parse_headers(part.raw_body())
79                {
80                    // Look for x-headers that might be our supplemental trace headers
81                    for hdr in headers.iter() {
82                        if !(hdr.get_name().starts_with_str("X-")
83                            || hdr.get_name().starts_with_str("x-"))
84                        {
85                            continue;
86                        }
87                        if let Ok(decoded) =
88                            data_encoding::BASE64.decode(hdr.get_raw_value().as_bytes())
89                        {
90                            #[derive(Deserialize)]
91                            struct Wrap {
92                                #[serde(rename = "_@_")]
93                                marker: String,
94                                #[serde(flatten)]
95                                payload: serde_json::Value,
96                            }
97                            if let Ok(obj) = serde_json::from_slice::<Wrap>(&decoded) {
98                                // Sanity check that it is our encoded data, rather than
99                                // some other random header that may have been inserted
100                                // somewhere along the way
101                                if obj.marker == "\\_/" {
102                                    supplemental_trace.replace(obj.payload);
103                                    break;
104                                }
105                            }
106                        }
107                    }
108                }
109
110                original_message = Some(BString::new(
111                    part.raw_body().as_bytes().replace("\r\n", "\n"),
112                ));
113            }
114        }
115
116        for part in mail.child_parts() {
117            let ct = content_type(part);
118            let ct = ct.as_ref().map(|b| b.as_bstr());
119            if ct == Some(BStr::new("message/feedback-report")) {
120                return Ok(Some(Self::parse_inner(
121                    part,
122                    original_message,
123                    supplemental_trace,
124                )?));
125            }
126        }
127
128        anyhow::bail!("feedback-report part missing");
129    }
130
131    fn parse_inner(
132        part: &MimePart,
133        original_message: Option<BString>,
134        supplemental_trace: Option<serde_json::Value>,
135    ) -> anyhow::Result<Self> {
136        let body = part.raw_body();
137        let mut extensions = extract_headers(body.as_bytes())?;
138
139        let feedback_type = extract_single_req("feedback-type", &mut extensions)?;
140        let user_agent = extract_single_req("user-agent", &mut extensions)?;
141        let version = extract_single_req("version", &mut extensions)?;
142        let arrival_date = extract_single_conv_fallback::<DateTimeRfc2822, DateTime<Utc>>(
143            "arrival-date",
144            "received-date",
145            &mut extensions,
146        );
147        let incidents = extract_single("incidents", &mut extensions)?;
148        let original_envelope_id = extract_single("original-envelope-id", &mut extensions)?;
149        let original_mail_from =
150            extract_single::<EnvelopeAddress>("original-mail-from", &mut extensions)?
151                .map(|a| a.to_string());
152        let reporting_mta = extract_single("reporting-mta", &mut extensions)?;
153        let source_ip = extract_single("source-ip", &mut extensions)?;
154        let authentication_results = extract_multiple("authentication-results", &mut extensions)?;
155        let original_rcpto_to =
156            extract_multiple::<EnvelopeAddress>("original-rcpt-to", &mut extensions)?
157                .into_iter()
158                .map(|a| a.to_string())
159                .collect();
160        let reported_domain = extract_multiple("reported-domain", &mut extensions)?;
161        let reported_uri = extract_multiple("reported-uri", &mut extensions)?;
162
163        Ok(Self {
164            feedback_type,
165            user_agent,
166            version,
167            arrival_date,
168            incidents,
169            original_envelope_id,
170            original_mail_from,
171            reporting_mta,
172            source_ip,
173            authentication_results,
174            original_rcpto_to,
175            reported_domain,
176            reported_uri,
177            extensions,
178            original_message,
179            supplemental_trace,
180        })
181    }
182}
183
184pub(crate) fn extract_headers(part: &[u8]) -> anyhow::Result<BTreeMap<String, Vec<BString>>> {
185    let HeaderParseResult { headers, .. } = Header::parse_headers(part)?;
186
187    let mut extensions = BTreeMap::new();
188
189    for hdr in headers.iter() {
190        let name = String::from_utf8_lossy(&hdr.get_name()).to_ascii_lowercase();
191        extensions
192            .entry(name)
193            .or_insert_with(std::vec::Vec::new)
194            .push(hdr.as_unstructured()?);
195    }
196    Ok(extensions)
197}
198
199pub(crate) struct DateTimeRfc2822(pub DateTime<Utc>);
200
201impl FromStr for DateTimeRfc2822 {
202    type Err = anyhow::Error;
203    fn from_str(input: &str) -> anyhow::Result<Self> {
204        let date = DateTime::parse_from_rfc2822(input)?;
205        Ok(Self(date.into()))
206    }
207}
208
209impl From<DateTimeRfc2822> for DateTime<Utc> {
210    fn from(val: DateTimeRfc2822) -> Self {
211        val.0
212    }
213}
214
215pub(crate) fn extract_single_req<R>(
216    name: &str,
217    extensions: &mut BTreeMap<String, Vec<BString>>,
218) -> anyhow::Result<R>
219where
220    R: FromStr,
221    <R as FromStr>::Err: std::fmt::Display,
222{
223    extract_single(name, extensions)?
224        .ok_or_else(|| anyhow!("required header {name} is not present"))
225}
226
227pub(crate) fn extract_single<R>(
228    name: &str,
229    extensions: &mut BTreeMap<String, Vec<BString>>,
230) -> anyhow::Result<Option<R>>
231where
232    R: FromStr,
233    <R as FromStr>::Err: std::fmt::Display,
234{
235    match extensions.remove(name) {
236        Some(mut hdrs) if hdrs.len() == 1 => {
237            let value = hdrs.remove(0);
238            let value = value
239                .to_str()
240                .map_err(|err| anyhow!("{value} could not be converted to UTF-8: {err:#}"))?;
241            let converted = value
242                .parse::<R>()
243                .map_err(|err| anyhow!("failed to convert '{value}': {err:#}"))?;
244            Ok(Some(converted))
245        }
246        Some(_) => anyhow::bail!("header {name} should have only a single value"),
247        None => Ok(None),
248    }
249}
250
251pub(crate) fn extract_single_conv<R, T>(
252    name: &str,
253    extensions: &mut BTreeMap<String, Vec<BString>>,
254) -> anyhow::Result<Option<T>>
255where
256    R: FromStr,
257    <R as FromStr>::Err: std::fmt::Display,
258    R: Into<T>,
259{
260    Ok(extract_single::<R>(name, extensions)?.map(|v| v.into()))
261}
262
263pub(crate) fn extract_single_conv_fallback<R, T>(
264    name: &str,
265    fallback: &str,
266    extensions: &mut BTreeMap<String, Vec<BString>>,
267) -> Option<T>
268where
269    R: FromStr,
270    <R as FromStr>::Err: std::fmt::Display,
271    R: Into<T>,
272{
273    let maybe = extract_single::<R>(name, extensions).ok()?;
274    match maybe {
275        Some(value) => Some(value.into()),
276        None => extract_single::<R>(fallback, extensions)
277            .ok()?
278            .map(Into::into),
279    }
280}
281
282pub(crate) fn extract_multiple<R>(
283    name: &str,
284    extensions: &mut BTreeMap<String, Vec<BString>>,
285) -> anyhow::Result<Vec<R>>
286where
287    R: FromStr,
288    <R as FromStr>::Err: std::fmt::Display,
289{
290    match extensions.remove(name) {
291        Some(hdrs) => {
292            let mut results = vec![];
293            for h in hdrs {
294                let value = h
295                    .to_str()
296                    .map_err(|err| anyhow!("{h} could not be converted to UTF-8: {err:#}"))?;
297                let converted = value
298                    .parse::<R>()
299                    .map_err(|err| anyhow!("failed to convert {h}: {err:#}"))?;
300                results.push(converted);
301            }
302            Ok(results)
303        }
304        None => Ok(vec![]),
305    }
306}
307
308#[cfg(test)]
309mod test {
310    use super::*;
311
312    #[test]
313    fn rfc5965_1() {
314        let result = ARFReport::parse(include_bytes!("../data/rfc5965/1.eml")).unwrap();
315        k9::snapshot!(
316            result,
317            r#"
318Some(
319    ARFReport {
320        feedback_type: "abuse",
321        user_agent: "SomeGenerator/1.0",
322        version: "1",
323        arrival_date: None,
324        incidents: None,
325        original_envelope_id: None,
326        original_mail_from: None,
327        reporting_mta: None,
328        source_ip: None,
329        authentication_results: [],
330        original_rcpto_to: [],
331        reported_domain: [],
332        reported_uri: [],
333        extensions: {},
334        original_message: Some(
335            "Received: from mailserver.example.net
336    (mailserver.example.net [192.0.2.1])
337    by example.com with ESMTP id M63d4137594e46;
338    Thu, 08 Mar 2005 14:00:00 -0400
339From: <somespammer@example.net>
340To: <Undisclosed Recipients>
341Subject: Earn money
342MIME-Version: 1.0
343Content-type: text/plain
344Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
345Date: Thu, 02 Sep 2004 12:31:03 -0500
346
347Spam Spam Spam
348Spam Spam Spam
349Spam Spam Spam
350Spam Spam Spam
351",
352        ),
353        supplemental_trace: None,
354    },
355)
356"#
357        );
358    }
359
360    #[test]
361    fn rfc5965_2() {
362        let result = ARFReport::parse(include_bytes!("../data/rfc5965/2.eml")).unwrap();
363        k9::snapshot!(
364            result,
365            r#"
366Some(
367    ARFReport {
368        feedback_type: "abuse",
369        user_agent: "SomeGenerator/1.0",
370        version: "1",
371        arrival_date: None,
372        incidents: None,
373        original_envelope_id: None,
374        original_mail_from: Some(
375            "somespammer@example.net",
376        ),
377        reporting_mta: Some(
378            RemoteMta {
379                mta_type: "dns",
380                name: "mail.example.com",
381            },
382        ),
383        source_ip: Some(
384            "192.0.2.1",
385        ),
386        authentication_results: [
387            "mail.example.com; spf=fail smtp.mail=somespammer@example.com",
388        ],
389        original_rcpto_to: [
390            "user@example.com",
391        ],
392        reported_domain: [
393            "example.net",
394        ],
395        reported_uri: [
396            "http://example.net/earn_money.html",
397            "mailto:user@example.com",
398        ],
399        extensions: {
400            "removal-recipient": [
401                "user@example.com",
402            ],
403        },
404        original_message: Some(
405            "From: <somespammer@example.net>
406Received: from mailserver.example.net (mailserver.example.net
407    [192.0.2.1]) by example.com with ESMTP id M63d4137594e46;
408    Tue, 08 Mar 2005 14:00:00 -0400
409X-KumoRef: eyJfQF8iOiJcXF8vIiwicmVjaXBpZW50IjoidGVzdEBleGFtcGxlLmNvbSJ9
410To: <Undisclosed Recipients>
411Subject: Earn money
412MIME-Version: 1.0
413Content-type: text/plain
414Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
415Date: Thu, 02 Sep 2004 12:31:03 -0500
416
417Spam Spam Spam
418Spam Spam Spam
419Spam Spam Spam
420Spam Spam Spam
421",
422        ),
423        supplemental_trace: Some(
424            Object {
425                "recipient": String("test@example.com"),
426            },
427        ),
428    },
429)
430"#
431        );
432    }
433
434    #[test]
435    fn rfc5965_3() {
436        let result = ARFReport::parse(include_bytes!("../data/rfc5965/3.eml")).unwrap();
437        k9::snapshot!(
438            result,
439            r#"
440Some(
441    ARFReport {
442        feedback_type: "abuse",
443        user_agent: "Yahoo!-Mail-Feedback/2.0",
444        version: "0.1",
445        arrival_date: Some(
446            2023-12-14T16:16:15Z,
447        ),
448        incidents: None,
449        original_envelope_id: None,
450        original_mail_from: Some(
451            "test1@example.com",
452        ),
453        reporting_mta: None,
454        source_ip: None,
455        authentication_results: [
456            "authentication result string is not available",
457        ],
458        original_rcpto_to: [
459            "user@example.com",
460        ],
461        reported_domain: [
462            "bounce.kumo.example.com",
463        ],
464        reported_uri: [],
465        extensions: {},
466        original_message: Some(
467            "Date: Thu, 14 Dec 2023 16:16:14 +0000
468To: user@example.com
469Subject: test Thu, 14 Dec 2023 16:16:14 +0000
470
471This is a test mailing
472
473",
474        ),
475        supplemental_trace: None,
476    },
477)
478"#
479        );
480    }
481
482    #[test]
483    fn rfc5965_4() {
484        let result = ARFReport::parse(include_bytes!("../data/rfc5965/4.eml")).unwrap();
485        k9::snapshot!(
486            result,
487            r#"
488Some(
489    ARFReport {
490        feedback_type: "abuse",
491        user_agent: "ReturnPathFBL/2.0",
492        version: "1",
493        arrival_date: Some(
494            2023-12-13T19:03:30Z,
495        ),
496        incidents: None,
497        original_envelope_id: None,
498        original_mail_from: Some(
499            "foo@bounce.example.com",
500        ),
501        reporting_mta: None,
502        source_ip: Some(
503            "x.x.x.x",
504        ),
505        authentication_results: [],
506        original_rcpto_to: [
507            "cb4a01a48251d4765f489076aa81e2a4@comcast.net",
508        ],
509        reported_domain: [
510            "bounce.example.com",
511        ],
512        reported_uri: [],
513        extensions: {
514            "abuse-type": [
515                "complaint",
516            ],
517            "source": [
518                "Comcast",
519            ],
520            "subscription-link": [
521                "https://fbl.returnpath.net/manage/subscriptions/xxxx",
522            ],
523        },
524        original_message: Some(
525            "Date: Thu, 14 Dec 2023 16:16:14 +0000
526To: user@example.com
527Subject: test Thu, 14 Dec 2023 16:16:14 +0000
528
529This is a test mailing
530
531",
532        ),
533        supplemental_trace: None,
534    },
535)
536"#
537        );
538    }
539}