bounce_classify/
lib.rs

1use ordermap::OrderMap;
2use regex::{RegexSet, RegexSetBuilder};
3use serde::{Deserialize, Serialize};
4use std::str::FromStr;
5
6#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Ord, PartialOrd)]
7#[serde(from = "String", into = "String")]
8pub enum BounceClass {
9    PreDefined(PreDefinedBounceClass),
10    UserDefined(String),
11}
12
13impl From<String> for BounceClass {
14    fn from(s: String) -> BounceClass {
15        if let Ok(pre) = PreDefinedBounceClass::from_str(&s) {
16            BounceClass::PreDefined(pre)
17        } else {
18            BounceClass::UserDefined(s)
19        }
20    }
21}
22
23impl From<BounceClass> for String {
24    fn from(val: BounceClass) -> Self {
25        match val {
26            BounceClass::PreDefined(pre) => pre.to_string(),
27            BounceClass::UserDefined(s) => s,
28        }
29    }
30}
31
32impl Default for BounceClass {
33    fn default() -> Self {
34        PreDefinedBounceClass::Uncategorized.into()
35    }
36}
37
38impl From<PreDefinedBounceClass> for BounceClass {
39    fn from(c: PreDefinedBounceClass) -> BounceClass {
40        BounceClass::PreDefined(c)
41    }
42}
43
44#[derive(
45    Serialize,
46    Deserialize,
47    Debug,
48    PartialEq,
49    Eq,
50    Hash,
51    Copy,
52    Clone,
53    Ord,
54    PartialOrd,
55    strum::EnumString,
56    strum::Display,
57)]
58pub enum PreDefinedBounceClass {
59    /// The recipient is invalid
60    InvalidRecipient,
61    /// The message bounced due to a DNS failure.
62    DNSFailure,
63    /// The message was blocked by the receiver as coming from a known spam source.
64    SpamBlock,
65    /// The message was blocked by the receiver as spam
66    SpamContent,
67    /// The message was blocked by the receiver because it contained an attachment
68    ProhibitedAttachment,
69    /// The message was blocked by the receiver because relaying is not allowed.
70    RelayDenied,
71    /// The message is an auto-reply/vacation mail.
72    AutoReply,
73    /// Message transmission has been temporarily delayed.
74    TransientFailure,
75    /// The message is a subscribe request.
76    Subscribe,
77    /// The message is an unsubscribe request.
78    Unsubscribe,
79    /// The message is a challenge-response probe.
80    ChallengeResponse,
81    /// messages rejected due to configuration issues with remote host, 5.X.X error
82    BadConfiguration,
83    /// messages bounced due to bad connection issues with remote host, 4.X.X error
84    BadConnection,
85    /// messages bounced due to invalid or non-existing domains, 5.X.X error
86    BadDomain,
87    /// messages refused or blocked due to content related reasons, 5.X.X error
88    ContentRelated,
89    /// messages rejected due to expired, inactive, or disabled recipient addresses, 5.X.X error
90    InactiveMailbox,
91    /// messages bounced due to invalid DNS or MX entry for sending domain
92    InvalidSender,
93    /// messages bounced due to not being delivered before the bounce-after, 4.X.X error
94    MessageExpired,
95    /// messages bounces due to receiving no response from remote host after connecting, 4.X.X or 5.X.X error
96    NoAnswerFromHost,
97    /// messages refused or blocked due to general policy reasons, 5.X.X error
98    PolicyRelated,
99    /// messages rejected due to SMTP protocol syntax or sequence errors, 5.X.X error
100    ProtocolErrors,
101    /// messages rejected or blocked due to mailbox quota issues, 4.X.X or 5.X.X error
102    QuotaIssues,
103    /// messages refused or blocked due to remote mail server relaying issues, 5.X.X error
104    RelayingIssues,
105    /// messages bounced due to mail routing issues for recipient domain, 5.X.X error
106    RoutingErrors,
107    /// messages refused or blocked due to spam related reasons, 5.X.X error
108    SpamRelated,
109    /// messages refused or blocked due to virus related reasons, 5.X.X error
110    VirusRelated,
111    /// authentication policy was not met
112    AuthenticationFailed,
113    /// A session or batch limit on the number of recipients was hit.
114    /// This is a transient error even if it has a 5xx status code!
115    TooManyRecipients,
116    /// messages rejected due to other reasons, 4.X.X or 5.X.X error
117    Uncategorized,
118}
119
120/// Defines the content of bounce classifier rules files
121#[derive(Deserialize, Serialize, Debug)]
122pub struct BounceClassifierFile {
123    pub rules: OrderMap<BounceClass, Vec<String>>,
124}
125
126/// Holds state for compiling rules files into a classifier
127#[derive(Default)]
128pub struct BounceClassifierBuilder {
129    rules: Vec<(BounceClass, String)>,
130}
131
132impl BounceClassifierBuilder {
133    pub fn new() -> Self {
134        Self::default()
135    }
136
137    pub fn add_rule(&mut self, class: BounceClass, rule: String) {
138        self.rules.push((class, rule));
139    }
140
141    pub fn merge(&mut self, decoded_file: BounceClassifierFile) {
142        for (class, rules) in decoded_file.rules {
143            for rule in rules {
144                self.add_rule(class.clone(), rule);
145            }
146        }
147    }
148
149    pub fn merge_json_file(&mut self, file_name: &str) -> Result<(), String> {
150        let mut f = std::fs::File::open(file_name)
151            .map_err(|err| format!("reading file: {file_name}: {err:#}"))?;
152        let decoded: BounceClassifierFile = serde_json::from_reader(&mut f)
153            .map_err(|err| format!("decoding {file_name} as BounceClassifierFile: {err:#}"))?;
154        self.merge(decoded);
155        Ok(())
156    }
157
158    pub fn merge_toml_file(&mut self, file_name: &str) -> Result<(), String> {
159        let data = std::fs::read_to_string(file_name)
160            .map_err(|err| format!("reading file: {file_name}: {err:#}"))?;
161        let decoded: BounceClassifierFile = toml::from_str(&data)
162            .map_err(|err| format!("decoding {file_name} as BounceClassifierFile: {err:#}"))?;
163        self.merge(decoded);
164        Ok(())
165    }
166
167    pub fn build(self) -> Result<BounceClassifier, String> {
168        let mut pattern_to_class = vec![];
169        let mut patterns = vec![];
170        for (class, rule) in self.rules {
171            // Build a simple implicit reverse map from pattern
172            // index to the bounce classification. This gives
173            // an O(1) mapping from the regex result at the
174            // cost of O(n) memory. If the rules get very large,
175            // this could be changed to a structure that tracks
176            // start/end ranges of pattern indices and uses a
177            // binary search.
178            pattern_to_class.push(class.clone());
179            patterns.push(rule);
180        }
181
182        pattern_to_class.shrink_to_fit();
183
184        let set = RegexSetBuilder::new(patterns)
185            .build()
186            .map_err(|err| format!("compiling rules: {err:#}"))?;
187        Ok(BounceClassifier {
188            set,
189            pattern_to_class,
190        })
191    }
192}
193
194pub struct BounceClassifier {
195    set: RegexSet,
196    pattern_to_class: Vec<BounceClass>,
197}
198
199impl BounceClassifier {
200    pub fn classify_str(&self, s: &str) -> BounceClass {
201        self.set
202            .matches(s)
203            .into_iter()
204            .next()
205            .and_then(|idx| self.pattern_to_class.get(idx))
206            .cloned()
207            .unwrap_or(BounceClass::PreDefined(
208                PreDefinedBounceClass::Uncategorized,
209            ))
210    }
211
212    pub fn classify_response(&self, response: &rfc5321::Response) -> BounceClass {
213        let line = response.to_single_line();
214        self.classify_str(&line)
215    }
216}
217
218#[cfg(test)]
219mod test {
220    use super::*;
221
222    #[test]
223    fn test_rule_order() {
224        let f1: BounceClassifierFile = toml::from_str(
225            r#"
226[rules]
227foo = ["woot", "aaa"]
228bar = ["woot", "aaa", "bbb"]
229        "#,
230        )
231        .unwrap();
232
233        let f2: BounceClassifierFile = toml::from_str(
234            r#"
235[rules]
236second_file = ["bbb", "ccc"]
237        "#,
238        )
239        .unwrap();
240
241        let mut builder = BounceClassifierBuilder::new();
242        builder.merge(f1);
243        builder.merge(f2);
244
245        let classifier = builder.build().unwrap();
246        assert_eq!(
247            classifier.classify_str("woot"),
248            BounceClass::UserDefined("foo".to_string()),
249            "foo should match rather than bar"
250        );
251        assert_eq!(
252            classifier.classify_str("aaa"),
253            BounceClass::UserDefined("foo".to_string()),
254            "foo should match rather than bar"
255        );
256        assert_eq!(
257            classifier.classify_str("bbb"),
258            BounceClass::UserDefined("bar".to_string()),
259        );
260        assert_eq!(
261            classifier.classify_str("ccc"),
262            BounceClass::UserDefined("second_file".to_string()),
263        );
264    }
265
266    #[test]
267    fn test_bounce_classify_iana() {
268        let mut builder = BounceClassifierBuilder::new();
269        builder
270            .merge_toml_file("../../assets/bounce_classifier/iana.toml")
271            .unwrap();
272        let classifier = builder.build().unwrap();
273
274        let corpus = &[
275            (
276                "552 5.2.2 mailbox is stuffed",
277                PreDefinedBounceClass::QuotaIssues,
278            ),
279            (
280                "552 4.2.2 mailbox is stuffed",
281                PreDefinedBounceClass::QuotaIssues,
282            ),
283            (
284                "552 4.2.2 mailbox is stuffed",
285                PreDefinedBounceClass::QuotaIssues,
286            ),
287            (
288                "352 5.2.2 mailbox is stuffed",
289                PreDefinedBounceClass::Uncategorized,
290            ),
291            (
292                "525 4.7.13 user account is disabled",
293                PreDefinedBounceClass::InactiveMailbox,
294            ),
295            (
296                "551 4.7.17 mailbox owner has changed",
297                PreDefinedBounceClass::InvalidRecipient,
298            ),
299            (
300                "551 4.7.18 domain owner has changed",
301                PreDefinedBounceClass::BadDomain,
302            ),
303        ];
304
305        for &(input, output) in corpus {
306            assert_eq!(
307                classifier.classify_str(input),
308                output.into(),
309                "expected {input} -> {output:?}"
310            );
311        }
312    }
313}