bounce_classify/
lib.rs

1use ordermap::OrderMap;
2use regex::{RegexSet, RegexSetBuilder};
3use serde::{Deserialize, Serialize};
4use std::str::FromStr;
5
6#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Ord, PartialOrd)]
7#[serde(from = "String", into = "String")]
8pub enum BounceClass {
9    PreDefined(PreDefinedBounceClass),
10    UserDefined(String),
11}
12
13impl From<String> for BounceClass {
14    fn from(s: String) -> BounceClass {
15        if let Ok(pre) = PreDefinedBounceClass::from_str(&s) {
16            BounceClass::PreDefined(pre)
17        } else {
18            BounceClass::UserDefined(s)
19        }
20    }
21}
22
23impl From<BounceClass> for String {
24    fn from(val: BounceClass) -> Self {
25        match val {
26            BounceClass::PreDefined(pre) => pre.to_string(),
27            BounceClass::UserDefined(s) => s,
28        }
29    }
30}
31
32impl Default for BounceClass {
33    fn default() -> Self {
34        PreDefinedBounceClass::Uncategorized.into()
35    }
36}
37
38impl From<PreDefinedBounceClass> for BounceClass {
39    fn from(c: PreDefinedBounceClass) -> BounceClass {
40        BounceClass::PreDefined(c)
41    }
42}
43
44#[derive(
45    Serialize,
46    Deserialize,
47    Debug,
48    PartialEq,
49    Eq,
50    Hash,
51    Copy,
52    Clone,
53    Ord,
54    PartialOrd,
55    strum::EnumString,
56    strum::Display,
57)]
58pub enum PreDefinedBounceClass {
59    /// The recipient is invalid
60    InvalidRecipient,
61    /// The message bounced due to a DNS failure.
62    DNSFailure,
63    /// The message was blocked by the receiver as coming from a known spam source.
64    SpamBlock,
65    /// The message was blocked by the receiver as spam
66    SpamContent,
67    /// The message was blocked by the receiver because it contained an attachment
68    ProhibitedAttachment,
69    /// The message was blocked by the receiver because relaying is not allowed.
70    RelayDenied,
71    /// The message is an auto-reply/vacation mail.
72    AutoReply,
73    /// Message transmission has been temporarily delayed.
74    TransientFailure,
75    /// The message is a subscribe request.
76    Subscribe,
77    /// The message is an unsubscribe request.
78    Unsubscribe,
79    /// The message is a challenge-response probe.
80    ChallengeResponse,
81    /// messages rejected due to configuration issues with remote host, 5.X.X error
82    BadConfiguration,
83    /// messages bounced due to bad connection issues with remote host, 4.X.X error
84    BadConnection,
85    /// messages bounced due to invalid or non-existing domains, 5.X.X error
86    BadDomain,
87    /// messages refused or blocked due to content related reasons, 5.X.X error
88    ContentRelated,
89    /// messages rejected due to expired, inactive, or disabled recipient addresses, 5.X.X error
90    InactiveMailbox,
91    /// messages bounced due to invalid DNS or MX entry for sending domain
92    InvalidSender,
93    /// messages bounced due to not being delivered before the bounce-after, 4.X.X error
94    MessageExpired,
95    /// messages bounces due to receiving no response from remote host after connecting, 4.X.X or 5.X.X error
96    NoAnswerFromHost,
97    /// messages refused or blocked due to general policy reasons, 5.X.X error
98    PolicyRelated,
99    /// messages rejected due to SMTP protocol syntax or sequence errors, 5.X.X error
100    ProtocolErrors,
101    /// messages rejected or blocked due to mailbox quota issues, 4.X.X or 5.X.X error
102    QuotaIssues,
103    /// messages refused or blocked due to remote mail server relaying issues, 5.X.X error
104    RelayingIssues,
105    /// messages bounced due to mail routing issues for recipient domain, 5.X.X error
106    RoutingErrors,
107    /// messages refused or blocked due to spam related reasons, 5.X.X error
108    SpamRelated,
109    /// messages refused or blocked due to virus related reasons, 5.X.X error
110    VirusRelated,
111    /// authentication policy was not met
112    AuthenticationFailed,
113    /// messages rejected due to other reasons, 4.X.X or 5.X.X error
114    Uncategorized,
115}
116
117/// Defines the content of bounce classifier rules files
118#[derive(Deserialize, Serialize, Debug)]
119pub struct BounceClassifierFile {
120    pub rules: OrderMap<BounceClass, Vec<String>>,
121}
122
123/// Holds state for compiling rules files into a classifier
124#[derive(Default)]
125pub struct BounceClassifierBuilder {
126    rules: Vec<(BounceClass, String)>,
127}
128
129impl BounceClassifierBuilder {
130    pub fn new() -> Self {
131        Self::default()
132    }
133
134    pub fn add_rule(&mut self, class: BounceClass, rule: String) {
135        self.rules.push((class, rule));
136    }
137
138    pub fn merge(&mut self, decoded_file: BounceClassifierFile) {
139        for (class, rules) in decoded_file.rules {
140            for rule in rules {
141                self.add_rule(class.clone(), rule);
142            }
143        }
144    }
145
146    pub fn merge_json_file(&mut self, file_name: &str) -> Result<(), String> {
147        let mut f = std::fs::File::open(file_name)
148            .map_err(|err| format!("reading file: {file_name}: {err:#}"))?;
149        let decoded: BounceClassifierFile = serde_json::from_reader(&mut f)
150            .map_err(|err| format!("decoding {file_name} as BounceClassifierFile: {err:#}"))?;
151        self.merge(decoded);
152        Ok(())
153    }
154
155    pub fn merge_toml_file(&mut self, file_name: &str) -> Result<(), String> {
156        let data = std::fs::read_to_string(file_name)
157            .map_err(|err| format!("reading file: {file_name}: {err:#}"))?;
158        let decoded: BounceClassifierFile = toml::from_str(&data)
159            .map_err(|err| format!("decoding {file_name} as BounceClassifierFile: {err:#}"))?;
160        self.merge(decoded);
161        Ok(())
162    }
163
164    pub fn build(self) -> Result<BounceClassifier, String> {
165        let mut pattern_to_class = vec![];
166        let mut patterns = vec![];
167        for (class, rule) in self.rules {
168            // Build a simple implicit reverse map from pattern
169            // index to the bounce classification. This gives
170            // an O(1) mapping from the regex result at the
171            // cost of O(n) memory. If the rules get very large,
172            // this could be changed to a structure that tracks
173            // start/end ranges of pattern indices and uses a
174            // binary search.
175            pattern_to_class.push(class.clone());
176            patterns.push(rule);
177        }
178
179        pattern_to_class.shrink_to_fit();
180
181        let set = RegexSetBuilder::new(patterns)
182            .build()
183            .map_err(|err| format!("compiling rules: {err:#}"))?;
184        Ok(BounceClassifier {
185            set,
186            pattern_to_class,
187        })
188    }
189}
190
191pub struct BounceClassifier {
192    set: RegexSet,
193    pattern_to_class: Vec<BounceClass>,
194}
195
196impl BounceClassifier {
197    pub fn classify_str(&self, s: &str) -> BounceClass {
198        self.set
199            .matches(s)
200            .into_iter()
201            .next()
202            .and_then(|idx| self.pattern_to_class.get(idx))
203            .cloned()
204            .unwrap_or(BounceClass::PreDefined(
205                PreDefinedBounceClass::Uncategorized,
206            ))
207    }
208
209    pub fn classify_response(&self, response: &rfc5321::Response) -> BounceClass {
210        let line = response.to_single_line();
211        self.classify_str(&line)
212    }
213}
214
215#[cfg(test)]
216mod test {
217    use super::*;
218
219    #[test]
220    fn test_rule_order() {
221        let f1: BounceClassifierFile = toml::from_str(
222            r#"
223[rules]
224foo = ["woot", "aaa"]
225bar = ["woot", "aaa", "bbb"]
226        "#,
227        )
228        .unwrap();
229
230        let f2: BounceClassifierFile = toml::from_str(
231            r#"
232[rules]
233second_file = ["bbb", "ccc"]
234        "#,
235        )
236        .unwrap();
237
238        let mut builder = BounceClassifierBuilder::new();
239        builder.merge(f1);
240        builder.merge(f2);
241
242        let classifier = builder.build().unwrap();
243        assert_eq!(
244            classifier.classify_str("woot"),
245            BounceClass::UserDefined("foo".to_string()),
246            "foo should match rather than bar"
247        );
248        assert_eq!(
249            classifier.classify_str("aaa"),
250            BounceClass::UserDefined("foo".to_string()),
251            "foo should match rather than bar"
252        );
253        assert_eq!(
254            classifier.classify_str("bbb"),
255            BounceClass::UserDefined("bar".to_string()),
256        );
257        assert_eq!(
258            classifier.classify_str("ccc"),
259            BounceClass::UserDefined("second_file".to_string()),
260        );
261    }
262
263    #[test]
264    fn test_bounce_classify_iana() {
265        let mut builder = BounceClassifierBuilder::new();
266        builder
267            .merge_toml_file("../../assets/bounce_classifier/iana.toml")
268            .unwrap();
269        let classifier = builder.build().unwrap();
270
271        let corpus = &[
272            (
273                "552 5.2.2 mailbox is stuffed",
274                PreDefinedBounceClass::QuotaIssues,
275            ),
276            (
277                "552 4.2.2 mailbox is stuffed",
278                PreDefinedBounceClass::QuotaIssues,
279            ),
280            (
281                "552 4.2.2 mailbox is stuffed",
282                PreDefinedBounceClass::QuotaIssues,
283            ),
284            (
285                "352 5.2.2 mailbox is stuffed",
286                PreDefinedBounceClass::Uncategorized,
287            ),
288            (
289                "525 4.7.13 user account is disabled",
290                PreDefinedBounceClass::InactiveMailbox,
291            ),
292            (
293                "551 4.7.17 mailbox owner has changed",
294                PreDefinedBounceClass::InvalidRecipient,
295            ),
296            (
297                "551 4.7.18 domain owner has changed",
298                PreDefinedBounceClass::BadDomain,
299            ),
300        ];
301
302        for &(input, output) in corpus {
303            assert_eq!(
304                classifier.classify_str(input),
305                output.into(),
306                "expected {input} -> {output:?}"
307            );
308        }
309    }
310}