kumo_dkim/
canonicalization.rs1use crate::hash::LimitHasher;
2use bstr::ByteSlice;
3use memchr::memmem::Finder;
4use std::sync::LazyLock;
5
6#[derive(PartialEq, Clone, Debug, Copy)]
7pub enum Type {
8 Simple,
9 Relaxed,
10}
11
12impl Type {
13 pub fn canon_name(&self) -> &'static str {
14 match self {
15 Self::Simple => "simple",
16 Self::Relaxed => "relaxed",
17 }
18 }
19
20 pub(crate) fn canon_body(&self, body: &[u8], hasher: &mut LimitHasher) {
21 match self {
22 Self::Simple => body_simple(body, hasher),
23 Self::Relaxed => body_relaxed(body, hasher),
24 }
25 }
26
27 pub(crate) fn canon_header_into(&self, key: &[u8], value: &[u8], out: &mut Vec<u8>) {
28 match self {
29 Self::Simple => canonicalize_header_simple(key, value, out),
30 Self::Relaxed => canonicalize_header_relaxed(key, value, out),
31 }
32 }
33}
34
35fn do_body_simple(mut body: &[u8]) -> &[u8] {
36 if body.is_empty() {
37 return b"\r\n";
38 }
39
40 while body.ends_with(b"\r\n\r\n") {
41 body = &body[..body.len() - 2];
42 }
43
44 body
45}
46
47fn body_simple(body: &[u8], hasher: &mut LimitHasher) {
49 let body = do_body_simple(body);
50 hasher.hash(body);
51}
52
53struct IterLines<'haystack> {
55 haystack: &'haystack [u8],
56 inner: memchr::memmem::FindIter<'haystack, 'static>,
57 start: usize,
58 done: bool,
59}
60
61impl<'haystack> Iterator for IterLines<'haystack> {
62 type Item = &'haystack [u8];
63
64 fn next(&mut self) -> Option<Self::Item> {
65 if self.done {
66 return None;
67 }
68
69 match self.inner.next() {
70 Some(idx) => {
71 let line = &self.haystack[self.start..idx + 2];
72 self.start = idx + 2;
73 Some(line)
74 }
75 None => {
76 self.done = true;
77 let line = &self.haystack[self.start..];
78 if line.is_empty() {
79 None
80 } else {
81 Some(line)
82 }
83 }
84 }
85 }
86}
87
88fn iter_lines(haystack: &'_ [u8]) -> IterLines<'_> {
89 static CRLF: LazyLock<Finder> = LazyLock::new(|| memchr::memmem::Finder::new("\r\n"));
90 IterLines {
91 haystack,
92 inner: CRLF.find_iter(haystack),
93 start: 0,
94 done: false,
95 }
96}
97
98fn body_relaxed(mut body: &[u8], hasher: &mut LimitHasher) {
101 if body.is_empty() {
102 return;
103 }
104
105 while body.ends_with(b"\r\n\r\n") {
107 body = &body[..body.len() - 2];
108 }
109
110 for mut line in iter_lines(body) {
111 line = trim_ws_end(line);
113
114 let mut prior = 0;
115 for idx in memchr::memchr2_iter(b' ', b'\t', line) {
117 if prior > 0 && idx == prior {
118 prior = idx + 1;
120 continue;
121 }
122
123 hasher.hash(&line[prior..idx]);
126 hasher.hash(b" ");
128
129 prior = idx + 1;
130 }
131 hasher.hash(&line[prior..]);
133
134 hasher.hash(b"\r\n");
136 }
137}
138
139fn canonicalize_header_simple(key: &[u8], value: &[u8], out: &mut Vec<u8>) {
141 out.extend_from_slice(key);
142 out.extend_from_slice(b": ");
143 out.extend_from_slice(value);
144 out.extend_from_slice(b"\r\n");
145}
146
147fn canonicalize_header_relaxed(key: &[u8], value: &[u8], out: &mut Vec<u8>) {
149 let key = key.to_ascii_lowercase();
150 let key = key.trim_end();
151
152 out.extend_from_slice(key.as_bytes());
153 out.extend_from_slice(b":");
154
155 let value = trim_ws_start(trim_ws_end(value));
156 let mut space_run = false;
157 for &c in value {
158 match c {
159 b'\r' | b'\n' => {}
160 b' ' | b'\t' => {
161 if space_run {
162 continue;
163 }
164 space_run = true;
165 out.push(b' ');
166 }
167 _ => {
168 space_run = false;
169 out.push(c);
170 }
171 }
172 }
173
174 out.extend_from_slice(b"\r\n");
175}
176
177fn trim_ws_start(mut line: &[u8]) -> &[u8] {
178 while let Some(c) = line.first() {
179 match c {
180 b' ' | b'\t' | b'\r' | b'\n' => line = &line[1..],
181 _ => break,
182 }
183 }
184 line
185}
186
187fn trim_ws_end(mut line: &[u8]) -> &[u8] {
188 while let Some(c) = line.last() {
189 match c {
190 b' ' | b'\t' | b'\r' | b'\n' => {
191 line = &line[0..line.len() - 1];
192 }
193 _ => break,
194 }
195 }
196 line
197}
198
199#[cfg(test)]
200mod tests {
201 use super::*;
202
203 fn header_relaxed(key: &str, value: &[u8]) -> Vec<u8> {
204 let mut result = vec![];
205 canonicalize_header_relaxed(key.as_bytes(), value, &mut result);
206 result
207 }
208
209 #[test]
210 fn test_canonicalize_header_relaxed() {
211 assert_eq!(header_relaxed("SUBJect", b" AbC\r\n"), b"subject:AbC\r\n");
212 assert_eq!(
213 header_relaxed("Subject \t", b"\t Your Name\t \r\n"),
214 b"subject:Your Name\r\n"
215 );
216 assert_eq!(
217 header_relaxed("Subject \t", b"\t Kimi \t \r\n No \t\r\n Na Wa\r\n"),
218 b"subject:Kimi No Na Wa\r\n"
219 );
220 }
221
222 fn body_relaxed(data: &[u8]) -> Vec<u8> {
223 let mut hasher = LimitHasher {
224 hasher: crate::hash::HashImpl::copy_data(),
225 limit: usize::MAX,
226 hashed: 0,
227 };
228 super::body_relaxed(data, &mut hasher);
229 hasher.finalize_bytes()
230 }
231
232 fn body_simple(data: &[u8]) -> Vec<u8> {
233 let mut hasher = LimitHasher {
234 hasher: crate::hash::HashImpl::copy_data(),
235 limit: usize::MAX,
236 hashed: 0,
237 };
238 super::body_simple(data, &mut hasher);
239 hasher.finalize_bytes()
240 }
241
242 #[test]
243 fn test_canonicalize_body_relaxed() {
244 assert_eq!(body_relaxed(b"\r\n"), b"\r\n");
245 assert_eq!(body_relaxed(b"hey \r\n"), b"hey\r\n");
246 assert_eq!(body_relaxed(b" C \r\nD \t E\r\n\r\n\r\n"), b" C\r\nD E\r\n");
247 }
248
249 #[test]
250 fn test_canonicalize_body_simple() {
251 assert_eq!(body_simple(b"\r\n"), b"\r\n");
252 assert_eq!(body_simple(b"hey \r\n"), b"hey \r\n");
253 assert_eq!(
254 body_simple(b" C \r\nD \t E\r\n\r\n\r\n"),
255 b" C \r\nD \t E\r\n"
256 );
257 }
258}