kumo_log_types/
rfc5965.rs

1//! ARF reports
2use crate::rfc3464::{content_type, RemoteMta};
3use anyhow::anyhow;
4use bstr::{BStr, BString, ByteSlice};
5use chrono::{DateTime, Utc};
6use mailparsing::{Header, HeaderParseResult, MimePart};
7use rfc5321::parser::EnvelopeAddress;
8use serde::{Deserialize, Serialize};
9use std::collections::BTreeMap;
10use std::str::FromStr;
11
12#[derive(Debug, Serialize, Deserialize, Clone, Eq, PartialEq)]
13pub struct ARFReport {
14    pub feedback_type: String,
15    pub user_agent: String,
16    pub version: String,
17
18    #[serde(default)]
19    pub arrival_date: Option<DateTime<Utc>>,
20    #[serde(default)]
21    pub incidents: Option<u32>,
22    #[serde(default)]
23    pub original_envelope_id: Option<String>,
24    #[serde(default)]
25    pub original_mail_from: Option<String>,
26    #[serde(default)]
27    pub reporting_mta: Option<RemoteMta>,
28    #[serde(default)]
29    pub source_ip: Option<String>,
30
31    #[serde(default)]
32    pub authentication_results: Vec<String>,
33    #[serde(default)]
34    pub original_rcpto_to: Vec<String>,
35    #[serde(default)]
36    pub reported_domain: Vec<String>,
37    #[serde(default)]
38    pub reported_uri: Vec<String>,
39
40    pub extensions: BTreeMap<String, Vec<BString>>,
41
42    pub original_message: Option<BString>,
43    pub supplemental_trace: Option<serde_json::Value>,
44}
45
46impl ARFReport {
47    pub fn parse(input: &[u8]) -> anyhow::Result<Option<Self>> {
48        let mail = MimePart::parse(input)?;
49        let ct = mail.headers().content_type()?;
50        let ct = match ct {
51            None => return Ok(None),
52            Some(ct) => ct,
53        };
54
55        if ct.value != "multipart/report" {
56            return Ok(None);
57        }
58
59        if ct.get("report-type").as_ref().map(|b| b.as_bstr()) != Some(BStr::new("feedback-report"))
60        {
61            return Ok(None);
62        }
63
64        let mut original_message = None;
65        let mut supplemental_trace = None;
66
67        for part in mail.child_parts() {
68            let ct = content_type(part);
69            let ct = ct.as_ref().map(|b| b.as_bstr());
70            if ct == Some(BStr::new("message/rfc822"))
71                || ct == Some(BStr::new("text/rfc822-headers"))
72            {
73                if let Ok(HeaderParseResult { headers, .. }) =
74                    Header::parse_headers(part.raw_body())
75                {
76                    // Look for x-headers that might be our supplemental trace headers
77                    for hdr in headers.iter() {
78                        if !(hdr.get_name().starts_with_str("X-")
79                            || hdr.get_name().starts_with_str("x-"))
80                        {
81                            continue;
82                        }
83                        if let Ok(decoded) =
84                            data_encoding::BASE64.decode(hdr.get_raw_value().as_bytes())
85                        {
86                            #[derive(Deserialize)]
87                            struct Wrap {
88                                #[serde(rename = "_@_")]
89                                marker: String,
90                                #[serde(flatten)]
91                                payload: serde_json::Value,
92                            }
93                            if let Ok(obj) = serde_json::from_slice::<Wrap>(&decoded) {
94                                // Sanity check that it is our encoded data, rather than
95                                // some other random header that may have been inserted
96                                // somewhere along the way
97                                if obj.marker == "\\_/" {
98                                    supplemental_trace.replace(obj.payload);
99                                    break;
100                                }
101                            }
102                        }
103                    }
104                }
105
106                original_message = Some(BString::new(
107                    part.raw_body().as_bytes().replace("\r\n", "\n"),
108                ));
109            }
110        }
111
112        for part in mail.child_parts() {
113            let ct = content_type(part);
114            let ct = ct.as_ref().map(|b| b.as_bstr());
115            if ct == Some(BStr::new("message/feedback-report")) {
116                return Ok(Some(Self::parse_inner(
117                    part,
118                    original_message,
119                    supplemental_trace,
120                )?));
121            }
122        }
123
124        anyhow::bail!("feedback-report part missing");
125    }
126
127    fn parse_inner(
128        part: &MimePart,
129        original_message: Option<BString>,
130        supplemental_trace: Option<serde_json::Value>,
131    ) -> anyhow::Result<Self> {
132        let body = part.raw_body();
133        let mut extensions = extract_headers(body.as_bytes())?;
134
135        let feedback_type = extract_single_req("feedback-type", &mut extensions)?;
136        let user_agent = extract_single_req("user-agent", &mut extensions)?;
137        let version = extract_single_req("version", &mut extensions)?;
138        let arrival_date = extract_single_conv_fallback::<DateTimeRfc2822, DateTime<Utc>>(
139            "arrival-date",
140            "received-date",
141            &mut extensions,
142        );
143        let incidents = extract_single("incidents", &mut extensions)?;
144        let original_envelope_id = extract_single("original-envelope-id", &mut extensions)?;
145        let original_mail_from =
146            extract_single::<EnvelopeAddress>("original-mail-from", &mut extensions)?
147                .map(|a| a.to_string());
148        let reporting_mta = extract_single("reporting-mta", &mut extensions)?;
149        let source_ip = extract_single("source-ip", &mut extensions)?;
150        let authentication_results = extract_multiple("authentication-results", &mut extensions)?;
151        let original_rcpto_to =
152            extract_multiple::<EnvelopeAddress>("original-rcpt-to", &mut extensions)?
153                .into_iter()
154                .map(|a| a.to_string())
155                .collect();
156        let reported_domain = extract_multiple("reported-domain", &mut extensions)?;
157        let reported_uri = extract_multiple("reported-uri", &mut extensions)?;
158
159        Ok(Self {
160            feedback_type,
161            user_agent,
162            version,
163            arrival_date,
164            incidents,
165            original_envelope_id,
166            original_mail_from,
167            reporting_mta,
168            source_ip,
169            authentication_results,
170            original_rcpto_to,
171            reported_domain,
172            reported_uri,
173            extensions,
174            original_message,
175            supplemental_trace,
176        })
177    }
178}
179
180pub(crate) fn extract_headers(part: &[u8]) -> anyhow::Result<BTreeMap<String, Vec<BString>>> {
181    let HeaderParseResult { headers, .. } = Header::parse_headers(part)?;
182
183    let mut extensions = BTreeMap::new();
184
185    for hdr in headers.iter() {
186        let name = String::from_utf8_lossy(&hdr.get_name()).to_ascii_lowercase();
187        extensions
188            .entry(name)
189            .or_insert_with(std::vec::Vec::new)
190            .push(hdr.as_unstructured()?);
191    }
192    Ok(extensions)
193}
194
195pub(crate) struct DateTimeRfc2822(pub DateTime<Utc>);
196
197impl FromStr for DateTimeRfc2822 {
198    type Err = anyhow::Error;
199    fn from_str(input: &str) -> anyhow::Result<Self> {
200        let date = DateTime::parse_from_rfc2822(input)?;
201        Ok(Self(date.into()))
202    }
203}
204
205impl From<DateTimeRfc2822> for DateTime<Utc> {
206    fn from(val: DateTimeRfc2822) -> Self {
207        val.0
208    }
209}
210
211pub(crate) fn extract_single_req<R>(
212    name: &str,
213    extensions: &mut BTreeMap<String, Vec<BString>>,
214) -> anyhow::Result<R>
215where
216    R: FromStr,
217    <R as FromStr>::Err: std::fmt::Display,
218{
219    extract_single(name, extensions)?
220        .ok_or_else(|| anyhow!("required header {name} is not present"))
221}
222
223pub(crate) fn extract_single<R>(
224    name: &str,
225    extensions: &mut BTreeMap<String, Vec<BString>>,
226) -> anyhow::Result<Option<R>>
227where
228    R: FromStr,
229    <R as FromStr>::Err: std::fmt::Display,
230{
231    match extensions.remove(name) {
232        Some(mut hdrs) if hdrs.len() == 1 => {
233            let value = hdrs.remove(0);
234            let value = value
235                .to_str()
236                .map_err(|err| anyhow!("{value} could not be converted to UTF-8: {err:#}"))?;
237            let converted = value
238                .parse::<R>()
239                .map_err(|err| anyhow!("failed to convert '{value}': {err:#}"))?;
240            Ok(Some(converted))
241        }
242        Some(_) => anyhow::bail!("header {name} should have only a single value"),
243        None => Ok(None),
244    }
245}
246
247pub(crate) fn extract_single_conv<R, T>(
248    name: &str,
249    extensions: &mut BTreeMap<String, Vec<BString>>,
250) -> anyhow::Result<Option<T>>
251where
252    R: FromStr,
253    <R as FromStr>::Err: std::fmt::Display,
254    R: Into<T>,
255{
256    Ok(extract_single::<R>(name, extensions)?.map(|v| v.into()))
257}
258
259pub(crate) fn extract_single_conv_fallback<R, T>(
260    name: &str,
261    fallback: &str,
262    extensions: &mut BTreeMap<String, Vec<BString>>,
263) -> Option<T>
264where
265    R: FromStr,
266    <R as FromStr>::Err: std::fmt::Display,
267    R: Into<T>,
268{
269    let maybe = extract_single::<R>(name, extensions).ok()?;
270    match maybe {
271        Some(value) => Some(value.into()),
272        None => extract_single::<R>(fallback, extensions)
273            .ok()?
274            .map(Into::into),
275    }
276}
277
278pub(crate) fn extract_multiple<R>(
279    name: &str,
280    extensions: &mut BTreeMap<String, Vec<BString>>,
281) -> anyhow::Result<Vec<R>>
282where
283    R: FromStr,
284    <R as FromStr>::Err: std::fmt::Display,
285{
286    match extensions.remove(name) {
287        Some(hdrs) => {
288            let mut results = vec![];
289            for h in hdrs {
290                let value = h
291                    .to_str()
292                    .map_err(|err| anyhow!("{h} could not be converted to UTF-8: {err:#}"))?;
293                let converted = value
294                    .parse::<R>()
295                    .map_err(|err| anyhow!("failed to convert {h}: {err:#}"))?;
296                results.push(converted);
297            }
298            Ok(results)
299        }
300        None => Ok(vec![]),
301    }
302}
303
304#[cfg(test)]
305mod test {
306    use super::*;
307
308    #[test]
309    fn rfc5965_1() {
310        let result = ARFReport::parse(include_bytes!("../data/rfc5965/1.eml")).unwrap();
311        k9::snapshot!(
312            result,
313            r#"
314Some(
315    ARFReport {
316        feedback_type: "abuse",
317        user_agent: "SomeGenerator/1.0",
318        version: "1",
319        arrival_date: None,
320        incidents: None,
321        original_envelope_id: None,
322        original_mail_from: None,
323        reporting_mta: None,
324        source_ip: None,
325        authentication_results: [],
326        original_rcpto_to: [],
327        reported_domain: [],
328        reported_uri: [],
329        extensions: {},
330        original_message: Some(
331            "Received: from mailserver.example.net
332    (mailserver.example.net [192.0.2.1])
333    by example.com with ESMTP id M63d4137594e46;
334    Thu, 08 Mar 2005 14:00:00 -0400
335From: <somespammer@example.net>
336To: <Undisclosed Recipients>
337Subject: Earn money
338MIME-Version: 1.0
339Content-type: text/plain
340Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
341Date: Thu, 02 Sep 2004 12:31:03 -0500
342
343Spam Spam Spam
344Spam Spam Spam
345Spam Spam Spam
346Spam Spam Spam
347",
348        ),
349        supplemental_trace: None,
350    },
351)
352"#
353        );
354    }
355
356    #[test]
357    fn rfc5965_2() {
358        let result = ARFReport::parse(include_bytes!("../data/rfc5965/2.eml")).unwrap();
359        k9::snapshot!(
360            result,
361            r#"
362Some(
363    ARFReport {
364        feedback_type: "abuse",
365        user_agent: "SomeGenerator/1.0",
366        version: "1",
367        arrival_date: None,
368        incidents: None,
369        original_envelope_id: None,
370        original_mail_from: Some(
371            "somespammer@example.net",
372        ),
373        reporting_mta: Some(
374            RemoteMta {
375                mta_type: "dns",
376                name: "mail.example.com",
377            },
378        ),
379        source_ip: Some(
380            "192.0.2.1",
381        ),
382        authentication_results: [
383            "mail.example.com; spf=fail smtp.mail=somespammer@example.com",
384        ],
385        original_rcpto_to: [
386            "user@example.com",
387        ],
388        reported_domain: [
389            "example.net",
390        ],
391        reported_uri: [
392            "http://example.net/earn_money.html",
393            "mailto:user@example.com",
394        ],
395        extensions: {
396            "removal-recipient": [
397                "user@example.com",
398            ],
399        },
400        original_message: Some(
401            "From: <somespammer@example.net>
402Received: from mailserver.example.net (mailserver.example.net
403    [192.0.2.1]) by example.com with ESMTP id M63d4137594e46;
404    Tue, 08 Mar 2005 14:00:00 -0400
405X-KumoRef: eyJfQF8iOiJcXF8vIiwicmVjaXBpZW50IjoidGVzdEBleGFtcGxlLmNvbSJ9
406To: <Undisclosed Recipients>
407Subject: Earn money
408MIME-Version: 1.0
409Content-type: text/plain
410Message-ID: 8787KJKJ3K4J3K4J3K4J3.mail@example.net
411Date: Thu, 02 Sep 2004 12:31:03 -0500
412
413Spam Spam Spam
414Spam Spam Spam
415Spam Spam Spam
416Spam Spam Spam
417",
418        ),
419        supplemental_trace: Some(
420            Object {
421                "recipient": String("test@example.com"),
422            },
423        ),
424    },
425)
426"#
427        );
428    }
429
430    #[test]
431    fn rfc5965_3() {
432        let result = ARFReport::parse(include_bytes!("../data/rfc5965/3.eml")).unwrap();
433        k9::snapshot!(
434            result,
435            r#"
436Some(
437    ARFReport {
438        feedback_type: "abuse",
439        user_agent: "Yahoo!-Mail-Feedback/2.0",
440        version: "0.1",
441        arrival_date: Some(
442            2023-12-14T16:16:15Z,
443        ),
444        incidents: None,
445        original_envelope_id: None,
446        original_mail_from: Some(
447            "test1@example.com",
448        ),
449        reporting_mta: None,
450        source_ip: None,
451        authentication_results: [
452            "authentication result string is not available",
453        ],
454        original_rcpto_to: [
455            "user@example.com",
456        ],
457        reported_domain: [
458            "bounce.kumo.example.com",
459        ],
460        reported_uri: [],
461        extensions: {},
462        original_message: Some(
463            "Date: Thu, 14 Dec 2023 16:16:14 +0000
464To: user@example.com
465Subject: test Thu, 14 Dec 2023 16:16:14 +0000
466
467This is a test mailing
468
469",
470        ),
471        supplemental_trace: None,
472    },
473)
474"#
475        );
476    }
477
478    #[test]
479    fn rfc5965_4() {
480        let result = ARFReport::parse(include_bytes!("../data/rfc5965/4.eml")).unwrap();
481        k9::snapshot!(
482            result,
483            r#"
484Some(
485    ARFReport {
486        feedback_type: "abuse",
487        user_agent: "ReturnPathFBL/2.0",
488        version: "1",
489        arrival_date: Some(
490            2023-12-13T19:03:30Z,
491        ),
492        incidents: None,
493        original_envelope_id: None,
494        original_mail_from: Some(
495            "foo@bounce.example.com",
496        ),
497        reporting_mta: None,
498        source_ip: Some(
499            "x.x.x.x",
500        ),
501        authentication_results: [],
502        original_rcpto_to: [
503            "cb4a01a48251d4765f489076aa81e2a4@comcast.net",
504        ],
505        reported_domain: [
506            "bounce.example.com",
507        ],
508        reported_uri: [],
509        extensions: {
510            "abuse-type": [
511                "complaint",
512            ],
513            "source": [
514                "Comcast",
515            ],
516            "subscription-link": [
517                "https://fbl.returnpath.net/manage/subscriptions/xxxx",
518            ],
519        },
520        original_message: Some(
521            "Date: Thu, 14 Dec 2023 16:16:14 +0000
522To: user@example.com
523Subject: test Thu, 14 Dec 2023 16:16:14 +0000
524
525This is a test mailing
526
527",
528        ),
529        supplemental_trace: None,
530    },
531)
532"#
533        );
534    }
535}