kumo_dkim/
canonicalization.rs

1use crate::hash::LimitHasher;
2use bstr::ByteSlice;
3use memchr::memmem::Finder;
4use std::sync::LazyLock;
5
6#[derive(PartialEq, Clone, Debug, Copy)]
7pub enum Type {
8    Simple,
9    Relaxed,
10}
11
12impl Type {
13    pub fn canon_name(&self) -> &'static str {
14        match self {
15            Self::Simple => "simple",
16            Self::Relaxed => "relaxed",
17        }
18    }
19
20    pub(crate) fn canon_body(&self, body: &[u8], hasher: &mut LimitHasher) {
21        match self {
22            Self::Simple => body_simple(body, hasher),
23            Self::Relaxed => body_relaxed(body, hasher),
24        }
25    }
26
27    pub(crate) fn canon_header_into(&self, key: &[u8], value: &[u8], out: &mut Vec<u8>) {
28        match self {
29            Self::Simple => canonicalize_header_simple(key, value, out),
30            Self::Relaxed => canonicalize_header_relaxed(key, value, out),
31        }
32    }
33}
34
35fn do_body_simple(mut body: &[u8]) -> &[u8] {
36    if body.is_empty() {
37        return b"\r\n";
38    }
39
40    while body.ends_with(b"\r\n\r\n") {
41        body = &body[..body.len() - 2];
42    }
43
44    body
45}
46
47/// Canonicalize body using the simple canonicalization algorithm.
48fn body_simple(body: &[u8], hasher: &mut LimitHasher) {
49    let body = do_body_simple(body);
50    hasher.hash(body);
51}
52
53/// Helper for iterating lines using memmem
54struct IterLines<'haystack> {
55    haystack: &'haystack [u8],
56    inner: memchr::memmem::FindIter<'haystack, 'static>,
57    start: usize,
58    done: bool,
59}
60
61impl<'haystack> Iterator for IterLines<'haystack> {
62    type Item = &'haystack [u8];
63
64    fn next(&mut self) -> Option<Self::Item> {
65        if self.done {
66            return None;
67        }
68
69        match self.inner.next() {
70            Some(idx) => {
71                let line = &self.haystack[self.start..idx + 2];
72                self.start = idx + 2;
73                Some(line)
74            }
75            None => {
76                self.done = true;
77                let line = &self.haystack[self.start..];
78                if line.is_empty() {
79                    None
80                } else {
81                    Some(line)
82                }
83            }
84        }
85    }
86}
87
88fn iter_lines(haystack: &'_ [u8]) -> IterLines<'_> {
89    static CRLF: LazyLock<Finder> = LazyLock::new(|| memchr::memmem::Finder::new("\r\n"));
90    IterLines {
91        haystack,
92        inner: CRLF.find_iter(haystack),
93        start: 0,
94        done: false,
95    }
96}
97
98/// https://datatracker.ietf.org/doc/html/rfc6376#section-3.4.3
99/// Canonicalize body using the relaxed canonicalization algorithm.
100fn body_relaxed(mut body: &[u8], hasher: &mut LimitHasher) {
101    if body.is_empty() {
102        return;
103    }
104
105    // Ignore empty lines at the end of the message body
106    while body.ends_with(b"\r\n\r\n") {
107        body = &body[..body.len() - 2];
108    }
109
110    for mut line in iter_lines(body) {
111        // Ignore all whitespace at the end of the line
112        line = trim_ws_end(line);
113
114        let mut prior = 0;
115        // Reduce all sequences of WSP within a line to a single SP character.
116        for idx in memchr::memchr2_iter(b' ', b'\t', line) {
117            if prior > 0 && idx == prior {
118                // Part of a run; ignore this one
119                prior = idx + 1;
120                continue;
121            }
122
123            // Found a new run of space(s).
124            // Emit the bytes ahead of this one
125            hasher.hash(&line[prior..idx]);
126            // and emit the canonical space
127            hasher.hash(b" ");
128
129            prior = idx + 1;
130        }
131        // and emit the remainder
132        hasher.hash(&line[prior..]);
133
134        // and canonical newline
135        hasher.hash(b"\r\n");
136    }
137}
138
139// https://datatracker.ietf.org/doc/html/rfc6376#section-3.4.1
140fn canonicalize_header_simple(key: &[u8], value: &[u8], out: &mut Vec<u8>) {
141    out.extend_from_slice(key);
142    out.extend_from_slice(b": ");
143    out.extend_from_slice(value);
144    out.extend_from_slice(b"\r\n");
145}
146
147// https://datatracker.ietf.org/doc/html/rfc6376#section-3.4.2
148fn canonicalize_header_relaxed(key: &[u8], value: &[u8], out: &mut Vec<u8>) {
149    let key = key.to_ascii_lowercase();
150    let key = key.trim_end();
151
152    out.extend_from_slice(key.as_bytes());
153    out.extend_from_slice(b":");
154
155    let value = trim_ws_start(trim_ws_end(value));
156    let mut space_run = false;
157    for &c in value {
158        match c {
159            b'\r' | b'\n' => {}
160            b' ' | b'\t' => {
161                if space_run {
162                    continue;
163                }
164                space_run = true;
165                out.push(b' ');
166            }
167            _ => {
168                space_run = false;
169                out.push(c);
170            }
171        }
172    }
173
174    out.extend_from_slice(b"\r\n");
175}
176
177fn trim_ws_start(mut line: &[u8]) -> &[u8] {
178    while let Some(c) = line.first() {
179        match c {
180            b' ' | b'\t' | b'\r' | b'\n' => line = &line[1..],
181            _ => break,
182        }
183    }
184    line
185}
186
187fn trim_ws_end(mut line: &[u8]) -> &[u8] {
188    while let Some(c) = line.last() {
189        match c {
190            b' ' | b'\t' | b'\r' | b'\n' => {
191                line = &line[0..line.len() - 1];
192            }
193            _ => break,
194        }
195    }
196    line
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202
203    fn header_relaxed(key: &str, value: &[u8]) -> Vec<u8> {
204        let mut result = vec![];
205        canonicalize_header_relaxed(key.as_bytes(), value, &mut result);
206        result
207    }
208
209    #[test]
210    fn test_canonicalize_header_relaxed() {
211        assert_eq!(header_relaxed("SUBJect", b" AbC\r\n"), b"subject:AbC\r\n");
212        assert_eq!(
213            header_relaxed("Subject \t", b"\t Your Name\t \r\n"),
214            b"subject:Your Name\r\n"
215        );
216        assert_eq!(
217            header_relaxed("Subject \t", b"\t Kimi \t \r\n No \t\r\n Na Wa\r\n"),
218            b"subject:Kimi No Na Wa\r\n"
219        );
220    }
221
222    fn body_relaxed(data: &[u8]) -> Vec<u8> {
223        let mut hasher = LimitHasher {
224            hasher: crate::hash::HashImpl::copy_data(),
225            limit: usize::MAX,
226            hashed: 0,
227        };
228        super::body_relaxed(data, &mut hasher);
229        hasher.finalize_bytes()
230    }
231
232    fn body_simple(data: &[u8]) -> Vec<u8> {
233        let mut hasher = LimitHasher {
234            hasher: crate::hash::HashImpl::copy_data(),
235            limit: usize::MAX,
236            hashed: 0,
237        };
238        super::body_simple(data, &mut hasher);
239        hasher.finalize_bytes()
240    }
241
242    #[test]
243    fn test_canonicalize_body_relaxed() {
244        assert_eq!(body_relaxed(b"\r\n"), b"\r\n");
245        assert_eq!(body_relaxed(b"hey        \r\n"), b"hey\r\n");
246        assert_eq!(body_relaxed(b" C \r\nD \t E\r\n\r\n\r\n"), b" C\r\nD E\r\n");
247    }
248
249    #[test]
250    fn test_canonicalize_body_simple() {
251        assert_eq!(body_simple(b"\r\n"), b"\r\n");
252        assert_eq!(body_simple(b"hey        \r\n"), b"hey        \r\n");
253        assert_eq!(
254            body_simple(b" C \r\nD \t E\r\n\r\n\r\n"),
255            b" C \r\nD \t E\r\n"
256        );
257    }
258}