mailparsing/
normalize.rs

1pub fn has_lone_cr_or_lf(data: &[u8]) -> bool {
2    for i in memchr::memchr2_iter(b'\r', b'\n', data) {
3        match data[i] {
4            b'\r' => {
5                if data.get(i + 1).copied() != Some(b'\n') {
6                    return true;
7                }
8            }
9            b'\n' => {
10                if i == 0 || data[i - 1] != b'\r' {
11                    return true;
12                }
13            }
14            _ => unreachable!(),
15        }
16    }
17    false
18}
19
20pub fn normalize_crlf(data: &[u8]) -> Vec<u8> {
21    let mut normalized = Vec::with_capacity(data.len());
22    let mut last_idx = 0;
23
24    for i in memchr::memchr2_iter(b'\r', b'\n', data) {
25        match data[i] {
26            b'\r' => {
27                normalized.extend_from_slice(&data[last_idx..=i]);
28                if data.get(i + 1).copied() != Some(b'\n') {
29                    normalized.push(b'\n');
30                }
31            }
32            b'\n' => {
33                normalized.extend_from_slice(&data[last_idx..i]);
34                let needs_cr = i == 0 || data[i - 1] != b'\r';
35                if needs_cr {
36                    normalized.push(b'\r');
37                }
38                normalized.push(b'\n');
39            }
40            _ => unreachable!(),
41        }
42        last_idx = i + 1;
43    }
44
45    normalized.extend_from_slice(&data[last_idx..]);
46    normalized
47}
48
49pub fn normalize_crlf_in_place(data: &mut Vec<u8>) {
50    let mut idx = 0;
51    'find_again: while idx < data.len() {
52        for i in memchr::memchr2_iter(b'\r', b'\n', &data[idx..]) {
53            match data[idx + i] {
54                b'\r' => {
55                    if data.get(idx + i + 1).copied() != Some(b'\n') {
56                        data.insert(idx + i + 1, b'\n');
57                        idx = idx + i + 2;
58                        continue 'find_again;
59                    }
60                }
61                b'\n' => {
62                    let needs_cr = idx + i == 0 || data[idx + i - 1] != b'\r';
63                    if needs_cr {
64                        data.insert(idx + i, b'\r');
65                        idx = idx + i + 2;
66                        continue 'find_again;
67                    }
68                }
69                _ => unreachable!(),
70            }
71        }
72        return;
73    }
74}
75
76#[cfg(test)]
77mod test {
78    use super::*;
79
80    #[test]
81    fn loner() {
82        assert!(!has_lone_cr_or_lf(b""));
83        assert!(!has_lone_cr_or_lf(b"hello"));
84        assert!(!has_lone_cr_or_lf(b"hello\r\nthere"));
85        assert!(!has_lone_cr_or_lf(b"hello\r\nthere\r\n"));
86        assert!(!has_lone_cr_or_lf(b"\r\nhello\r\nthere\r\n"));
87        assert!(has_lone_cr_or_lf(b"hello\n"));
88        assert!(has_lone_cr_or_lf(b"hello\r"));
89        assert!(has_lone_cr_or_lf(b"\nhello\r\nthere\r\n"));
90        assert!(has_lone_cr_or_lf(b"\rhello\r\nthere\r\n"));
91        assert!(has_lone_cr_or_lf(b"hello\nthere\r\n"));
92        assert!(has_lone_cr_or_lf(b"hello\r\nthere\n"));
93        assert!(has_lone_cr_or_lf(b"hello\r\r\r\nthere\n"));
94    }
95
96    #[test]
97    fn fix_loner() {
98        fn fix(s: &[u8], expect: &[u8]) {
99            let mut data = s.to_vec();
100            normalize_crlf_in_place(&mut data);
101            assert_eq!(data, expect);
102
103            assert_eq!(normalize_crlf(s), expect);
104        }
105
106        fix(b"\nhello\r\nthere\r\n", b"\r\nhello\r\nthere\r\n");
107        fix(b"hello\r", b"hello\r\n");
108        fix(b"hello\nthere\r\n", b"hello\r\nthere\r\n");
109        fix(b"hello\r\nthere\n", b"hello\r\nthere\r\n");
110        fix(b"hello\r\r\r\nthere\n", b"hello\r\n\r\n\r\nthere\r\n");
111    }
112
113    #[test]
114    fn test_normalize_crlf() {
115        assert_eq!(
116            normalize_crlf(b"foo\r\nbar\nwoot\rdouble-r\r\rend"),
117            b"foo\r\nbar\r\nwoot\r\ndouble-r\r\n\r\nend"
118        );
119    }
120}