kumo_dkim/
canonicalization.rs

1use crate::hash::LimitHasher;
2use memchr::memmem::Finder;
3use std::sync::LazyLock;
4
5#[derive(PartialEq, Clone, Debug, Copy)]
6pub enum Type {
7    Simple,
8    Relaxed,
9}
10
11impl Type {
12    pub fn canon_name(&self) -> &'static str {
13        match self {
14            Self::Simple => "simple",
15            Self::Relaxed => "relaxed",
16        }
17    }
18
19    pub(crate) fn canon_body(&self, body: &[u8], hasher: &mut LimitHasher) {
20        match self {
21            Self::Simple => body_simple(body, hasher),
22            Self::Relaxed => body_relaxed(body, hasher),
23        }
24    }
25
26    pub(crate) fn canon_header_into(&self, key: &str, value: &[u8], out: &mut Vec<u8>) {
27        match self {
28            Self::Simple => canonicalize_header_simple(key, value, out),
29            Self::Relaxed => canonicalize_header_relaxed(key, value, out),
30        }
31    }
32}
33
34fn do_body_simple(mut body: &[u8]) -> &[u8] {
35    if body.is_empty() {
36        return b"\r\n";
37    }
38
39    while body.ends_with(b"\r\n\r\n") {
40        body = &body[..body.len() - 2];
41    }
42
43    body
44}
45
46/// Canonicalize body using the simple canonicalization algorithm.
47fn body_simple(body: &[u8], hasher: &mut LimitHasher) {
48    let body = do_body_simple(body);
49    hasher.hash(body);
50}
51
52/// Helper for iterating lines using memmem
53struct IterLines<'haystack> {
54    haystack: &'haystack [u8],
55    inner: memchr::memmem::FindIter<'haystack, 'static>,
56    start: usize,
57    done: bool,
58}
59
60impl<'haystack> Iterator for IterLines<'haystack> {
61    type Item = &'haystack [u8];
62
63    fn next(&mut self) -> Option<Self::Item> {
64        if self.done {
65            return None;
66        }
67
68        match self.inner.next() {
69            Some(idx) => {
70                let line = &self.haystack[self.start..idx + 2];
71                self.start = idx + 2;
72                Some(line)
73            }
74            None => {
75                self.done = true;
76                let line = &self.haystack[self.start..];
77                if line.is_empty() {
78                    None
79                } else {
80                    Some(line)
81                }
82            }
83        }
84    }
85}
86
87fn iter_lines(haystack: &[u8]) -> IterLines {
88    static CRLF: LazyLock<Finder> = LazyLock::new(|| memchr::memmem::Finder::new("\r\n"));
89    IterLines {
90        haystack,
91        inner: CRLF.find_iter(haystack),
92        start: 0,
93        done: false,
94    }
95}
96
97/// https://datatracker.ietf.org/doc/html/rfc6376#section-3.4.3
98/// Canonicalize body using the relaxed canonicalization algorithm.
99fn body_relaxed(mut body: &[u8], hasher: &mut LimitHasher) {
100    if body.is_empty() {
101        return;
102    }
103
104    // Ignore empty lines at the end of the message body
105    while body.ends_with(b"\r\n\r\n") {
106        body = &body[..body.len() - 2];
107    }
108
109    for mut line in iter_lines(body) {
110        // Ignore all whitespace at the end of the line
111        line = trim_ws_end(line);
112
113        let mut prior = 0;
114        // Reduce all sequences of WSP within a line to a single SP character.
115        for idx in memchr::memchr2_iter(b' ', b'\t', line) {
116            if prior > 0 && idx == prior {
117                // Part of a run; ignore this one
118                prior = idx + 1;
119                continue;
120            }
121
122            // Found a new run of space(s).
123            // Emit the bytes ahead of this one
124            hasher.hash(&line[prior..idx]);
125            // and emit the canonical space
126            hasher.hash(b" ");
127
128            prior = idx + 1;
129        }
130        // and emit the remainder
131        hasher.hash(&line[prior..]);
132
133        // and canonical newline
134        hasher.hash(b"\r\n");
135    }
136}
137
138// https://datatracker.ietf.org/doc/html/rfc6376#section-3.4.1
139fn canonicalize_header_simple(key: &str, value: &[u8], out: &mut Vec<u8>) {
140    out.extend_from_slice(key.as_bytes());
141    out.extend_from_slice(b": ");
142    out.extend_from_slice(value);
143    out.extend_from_slice(b"\r\n");
144}
145
146// https://datatracker.ietf.org/doc/html/rfc6376#section-3.4.2
147fn canonicalize_header_relaxed(key: &str, value: &[u8], out: &mut Vec<u8>) {
148    let key = key.to_lowercase();
149    let key = key.trim_end();
150
151    out.extend_from_slice(key.as_bytes());
152    out.extend_from_slice(b":");
153
154    let value = trim_ws_start(trim_ws_end(value));
155    let mut space_run = false;
156    for &c in value {
157        match c {
158            b'\r' | b'\n' => {}
159            b' ' | b'\t' => {
160                if space_run {
161                    continue;
162                }
163                space_run = true;
164                out.push(b' ');
165            }
166            _ => {
167                space_run = false;
168                out.push(c);
169            }
170        }
171    }
172
173    out.extend_from_slice(b"\r\n");
174}
175
176fn trim_ws_start(mut line: &[u8]) -> &[u8] {
177    while let Some(c) = line.first() {
178        match c {
179            b' ' | b'\t' | b'\r' | b'\n' => line = &line[1..],
180            _ => break,
181        }
182    }
183    line
184}
185
186fn trim_ws_end(mut line: &[u8]) -> &[u8] {
187    while let Some(c) = line.last() {
188        match c {
189            b' ' | b'\t' | b'\r' | b'\n' => {
190                line = &line[0..line.len() - 1];
191            }
192            _ => break,
193        }
194    }
195    line
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201
202    fn header_relaxed(key: &str, value: &[u8]) -> Vec<u8> {
203        let mut result = vec![];
204        canonicalize_header_relaxed(key, value, &mut result);
205        result
206    }
207
208    #[test]
209    fn test_canonicalize_header_relaxed() {
210        assert_eq!(header_relaxed("SUBJect", b" AbC\r\n"), b"subject:AbC\r\n");
211        assert_eq!(
212            header_relaxed("Subject \t", b"\t Your Name\t \r\n"),
213            b"subject:Your Name\r\n"
214        );
215        assert_eq!(
216            header_relaxed("Subject \t", b"\t Kimi \t \r\n No \t\r\n Na Wa\r\n"),
217            b"subject:Kimi No Na Wa\r\n"
218        );
219    }
220
221    fn body_relaxed(data: &[u8]) -> Vec<u8> {
222        let mut hasher = LimitHasher {
223            hasher: crate::hash::HashImpl::copy_data(),
224            limit: usize::MAX,
225            hashed: 0,
226        };
227        super::body_relaxed(data, &mut hasher);
228        hasher.finalize_bytes()
229    }
230
231    fn body_simple(data: &[u8]) -> Vec<u8> {
232        let mut hasher = LimitHasher {
233            hasher: crate::hash::HashImpl::copy_data(),
234            limit: usize::MAX,
235            hashed: 0,
236        };
237        super::body_simple(data, &mut hasher);
238        hasher.finalize_bytes()
239    }
240
241    #[test]
242    fn test_canonicalize_body_relaxed() {
243        assert_eq!(body_relaxed(b"\r\n"), b"\r\n");
244        assert_eq!(body_relaxed(b"hey        \r\n"), b"hey\r\n");
245        assert_eq!(body_relaxed(b" C \r\nD \t E\r\n\r\n\r\n"), b" C\r\nD E\r\n");
246    }
247
248    #[test]
249    fn test_canonicalize_body_simple() {
250        assert_eq!(body_simple(b"\r\n"), b"\r\n");
251        assert_eq!(body_simple(b"hey        \r\n"), b"hey        \r\n");
252        assert_eq!(
253            body_simple(b" C \r\nD \t E\r\n\r\n\r\n"),
254            b" C \r\nD \t E\r\n"
255        );
256    }
257}