mailparsing/
mimepart.rs

1use crate::header::{HeaderParseResult, MessageConformance};
2use crate::headermap::HeaderMap;
3use crate::strings::IntoSharedString;
4use crate::{
5    has_lone_cr_or_lf, Header, MailParsingError, MessageID, MimeParameterEncoding, MimeParameters,
6    Result, SharedString,
7};
8use bstr::{BStr, BString, ByteSlice};
9use charset_normalizer_rs::entity::NormalizerSettings;
10use charset_normalizer_rs::Encoding;
11use chrono::Utc;
12use serde::{Deserialize, Serialize};
13use std::borrow::Cow;
14use std::str::FromStr;
15use std::sync::Arc;
16
17/// Define our own because data_encoding::BASE64_MIME, despite its name,
18/// is not RFC2045 compliant, and will not ignore spaces
19const BASE64_RFC2045: data_encoding::Encoding = data_encoding_macro::new_encoding! {
20    symbols: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
21    padding: '=',
22    ignore: " \r\n\t",
23    wrap_width: 76,
24    wrap_separator: "\r\n",
25};
26
27#[derive(Debug, Clone, PartialEq)]
28pub struct MimePart<'a> {
29    /// The bytes that comprise this part, from its beginning to its end
30    bytes: SharedString<'a>,
31    /// The parsed headers from the start of bytes
32    headers: HeaderMap<'a>,
33    /// The index into bytes of the first non-header byte.
34    body_offset: usize,
35    body_len: usize,
36    conformance: MessageConformance,
37    parts: Vec<Self>,
38    /// For multipart, the content the precedes the first boundary
39    intro: SharedString<'a>,
40    /// For multipart, the content the follows the last boundary
41    outro: SharedString<'a>,
42}
43
44#[derive(PartialEq, Debug)]
45pub struct Rfc2045Info {
46    pub encoding: ContentTransferEncoding,
47    pub charset: Result<&'static Encoding>,
48    pub content_type: Option<MimeParameters>,
49    pub is_text: bool,
50    pub is_multipart: bool,
51    pub attachment_options: Option<AttachmentOptions>,
52    pub invalid_mime_headers: bool,
53}
54
55impl Rfc2045Info {
56    // This must be infallible so that a basic mime structure can be parsed
57    // even if the mime headers are a bit borked
58    fn new(headers: &HeaderMap) -> Self {
59        let mut invalid_mime_headers = false;
60        let encoding = match headers.content_transfer_encoding() {
61            Ok(Some(cte)) => match cte
62                .value
63                .to_str()
64                .map_err(|_| ())
65                .and_then(|s| ContentTransferEncoding::from_str(s).map_err(|_| ()))
66            {
67                Ok(encoding) => encoding,
68                Err(_) => {
69                    invalid_mime_headers = true;
70                    ContentTransferEncoding::SevenBit
71                }
72            },
73            Ok(None) => ContentTransferEncoding::SevenBit,
74            Err(_) => {
75                invalid_mime_headers = true;
76                ContentTransferEncoding::SevenBit
77            }
78        };
79
80        let content_type = match headers.content_type() {
81            Ok(ct) => ct,
82            Err(_) => {
83                invalid_mime_headers = true;
84                None
85            }
86        };
87
88        let mut ct_name = None;
89        let charset = if let Some(ct) = &content_type {
90            ct_name = ct.get("name");
91            ct.get("charset")
92        } else {
93            None
94        };
95        let charset = charset.unwrap_or_else(|| "us-ascii".into());
96
97        let charset = match charset.to_str() {
98            Ok(charset) => Encoding::by_name(&*charset).ok_or_else(|| {
99                MailParsingError::BodyParse(format!("unsupported charset {charset}"))
100            }),
101            Err(_) => Err(MailParsingError::BodyParse(format!(
102                "non-ascii charset name {charset}"
103            ))),
104        };
105
106        let (is_text, is_multipart) = if let Some(ct) = &content_type {
107            (ct.is_text(), ct.is_multipart())
108        } else {
109            (true, false)
110        };
111
112        let mut inline = false;
113        let mut cd_file_name = None;
114
115        match headers.content_disposition() {
116            Ok(Some(cd)) => {
117                inline = cd.value == "inline";
118                cd_file_name = cd.get("filename");
119            }
120            Ok(None) => {}
121            Err(_) => {
122                invalid_mime_headers = true;
123            }
124        };
125
126        let content_id = match headers.content_id() {
127            Ok(cid) => cid.map(|cid| cid.0),
128            Err(_) => {
129                invalid_mime_headers = true;
130                None
131            }
132        };
133
134        let file_name = match (cd_file_name, ct_name) {
135            (Some(name), _) | (None, Some(name)) => Some(name),
136            (None, None) => None,
137        };
138
139        let attachment_options = if inline || file_name.is_some() || content_id.is_some() {
140            Some(AttachmentOptions {
141                file_name,
142                inline,
143                content_id,
144            })
145        } else {
146            None
147        };
148
149        Self {
150            encoding,
151            charset,
152            content_type,
153            is_text,
154            is_multipart,
155            attachment_options,
156            invalid_mime_headers,
157        }
158    }
159
160    pub fn content_type(&self) -> Option<&str> {
161        self.content_type
162            .as_ref()
163            .and_then(|params| params.value.to_str().ok())
164    }
165}
166
167impl<'a> MimePart<'a> {
168    /// Parse some data into a tree of MimeParts
169    pub fn parse<S>(bytes: S) -> Result<Self>
170    where
171        S: IntoSharedString<'a>,
172    {
173        let (bytes, base_conformance) = bytes.into_shared_string();
174        Self::parse_impl(bytes, base_conformance, true)
175    }
176
177    /// Obtain a version of self that has a static lifetime
178    pub fn to_owned(&self) -> MimePart<'static> {
179        MimePart {
180            bytes: self.bytes.to_owned(),
181            headers: self.headers.to_owned(),
182            body_offset: self.body_offset,
183            body_len: self.body_len,
184            conformance: self.conformance,
185            parts: self.parts.iter().map(|p| p.to_owned()).collect(),
186            intro: self.intro.to_owned(),
187            outro: self.outro.to_owned(),
188        }
189    }
190
191    fn parse_impl(
192        bytes: SharedString<'a>,
193        base_conformance: MessageConformance,
194        is_top_level: bool,
195    ) -> Result<Self> {
196        let HeaderParseResult {
197            headers,
198            body_offset,
199            overall_conformance: mut conformance,
200        } = Header::parse_headers(bytes.clone())?;
201
202        conformance |= base_conformance;
203
204        let body_len = bytes.len();
205
206        if !bytes.as_bytes().is_ascii() {
207            conformance.set(MessageConformance::NEEDS_TRANSFER_ENCODING, true);
208        }
209        {
210            let mut prev = 0;
211            for idx in memchr::memchr_iter(b'\n', bytes.as_bytes()) {
212                if idx - prev > 78 {
213                    conformance.set(MessageConformance::LINE_TOO_LONG, true);
214                    break;
215                }
216                prev = idx;
217            }
218        }
219        conformance.set(
220            MessageConformance::NON_CANONICAL_LINE_ENDINGS,
221            has_lone_cr_or_lf(bytes.as_bytes()),
222        );
223
224        if is_top_level {
225            conformance.set(
226                MessageConformance::MISSING_DATE_HEADER,
227                !matches!(headers.date(), Ok(Some(_))),
228            );
229            conformance.set(
230                MessageConformance::MISSING_MESSAGE_ID_HEADER,
231                !matches!(headers.message_id(), Ok(Some(_))),
232            );
233            conformance.set(
234                MessageConformance::MISSING_MIME_VERSION,
235                match headers.mime_version() {
236                    Ok(Some(v)) => v != "1.0",
237                    _ => true,
238                },
239            );
240        }
241
242        let mut part = Self {
243            bytes,
244            headers,
245            body_offset,
246            body_len,
247            conformance,
248            parts: vec![],
249            intro: SharedString::Borrowed(b""),
250            outro: SharedString::Borrowed(b""),
251        };
252
253        part.recursive_parse()?;
254
255        Ok(part)
256    }
257
258    fn recursive_parse(&mut self) -> Result<()> {
259        let info = Rfc2045Info::new(&self.headers);
260        if info.invalid_mime_headers {
261            self.conformance |= MessageConformance::INVALID_MIME_HEADERS;
262        }
263        if let Some((boundary, true)) = info
264            .content_type
265            .as_ref()
266            .and_then(|ct| ct.get("boundary").map(|b| (b, info.is_multipart)))
267        {
268            let boundary = format!("\n--{boundary}");
269            let raw_body = self
270                .bytes
271                .slice(self.body_offset.saturating_sub(1)..self.bytes.len());
272
273            let mut iter = memchr::memmem::find_iter(raw_body.as_bytes(), &boundary);
274            if let Some(first_boundary_pos) = iter.next() {
275                self.intro = raw_body.slice(0..first_boundary_pos);
276
277                // When we create parts, we ignore the original body span in
278                // favor of what we're parsing out here now
279                self.body_len = 0;
280
281                let mut boundary_end = first_boundary_pos + boundary.len();
282
283                while let Some(part_start) =
284                    memchr::memchr(b'\n', &raw_body.as_bytes()[boundary_end..])
285                        .map(|p| p + boundary_end + 1)
286                {
287                    let part_end = iter
288                        .next()
289                        .map(|p| {
290                            // P is the newline; we want to include it in the raw
291                            // bytes for this part, so look beyond it
292                            p + 1
293                        })
294                        .unwrap_or(raw_body.len());
295
296                    let child = Self::parse_impl(
297                        raw_body.slice(part_start..part_end),
298                        MessageConformance::default(),
299                        false,
300                    )?;
301                    self.conformance |= child.conformance;
302                    self.parts.push(child);
303
304                    boundary_end = part_end -
305                        1 /* newline we adjusted for when assigning part_end */
306                        + boundary.len();
307
308                    if boundary_end + 2 > raw_body.len() {
309                        break;
310                    }
311                    if &raw_body.as_bytes()[boundary_end..boundary_end + 2] == b"--" {
312                        if let Some(after_boundary) =
313                            memchr::memchr(b'\n', &raw_body.as_bytes()[boundary_end..])
314                                .map(|p| p + boundary_end + 1)
315                        {
316                            self.outro = raw_body.slice(after_boundary..raw_body.len());
317                        }
318                        break;
319                    }
320                }
321            }
322        }
323
324        Ok(())
325    }
326
327    /// Recursively performs deeper conformance checks on the message.
328    /// At this time that includes attempting to decode any text parts
329    /// into UTF-8 to see if they are correctly annotated, but it may
330    /// include more checks in the future.
331    /// The results of the deep checks are combined with any conformance
332    /// issues detected during parsing, and returned.
333    pub fn deep_conformance_check(&self) -> MessageConformance {
334        if self.parts.is_empty() {
335            match self.extract_body(None) {
336                Ok((_, conformance)) => conformance,
337                Err(_) => self.conformance | MessageConformance::NEEDS_TRANSFER_ENCODING,
338            }
339        } else {
340            let mut conformance = self.conformance;
341            for p in &self.parts {
342                conformance |= p.deep_conformance_check();
343            }
344            conformance
345        }
346    }
347
348    /// Returns the conformance flags determined during parsing
349    pub fn conformance(&self) -> MessageConformance {
350        self.conformance
351    }
352
353    /// Obtain a reference to the child parts
354    pub fn child_parts(&self) -> &[Self] {
355        &self.parts
356    }
357
358    /// Obtain a mutable reference to the child parts
359    pub fn child_parts_mut(&mut self) -> &mut Vec<Self> {
360        &mut self.parts
361    }
362
363    /// Obtains a reference to the headers
364    pub fn headers(&'_ self) -> &'_ HeaderMap<'_> {
365        &self.headers
366    }
367
368    /// Obtain a mutable reference to the headers
369    pub fn headers_mut<'b>(&'b mut self) -> &'b mut HeaderMap<'a> {
370        &mut self.headers
371    }
372
373    /// Get the raw, transfer-encoded body
374    pub fn raw_body(&'_ self) -> SharedString<'_> {
375        self.bytes
376            .slice(self.body_offset..self.body_len.max(self.body_offset))
377    }
378
379    pub fn rfc2045_info(&self) -> Rfc2045Info {
380        Rfc2045Info::new(&self.headers)
381    }
382
383    /// Decode transfer decoding and return the body
384    pub fn body(&'_ self) -> Result<DecodedBody<'_>> {
385        let (body, _conformance) = self.extract_body(None)?;
386        Ok(body)
387    }
388
389    fn extract_body(
390        &'_ self,
391        options: Option<&CheckFixSettings>,
392    ) -> Result<(DecodedBody<'_>, MessageConformance)> {
393        let info = Rfc2045Info::new(&self.headers);
394
395        let bytes = match info.encoding {
396            ContentTransferEncoding::Base64 => {
397                let data = self.raw_body();
398                let bytes = data.as_bytes();
399                BASE64_RFC2045.decode(bytes).map_err(|err| {
400                    let b = bytes[err.position] as char;
401                    let region =
402                        &bytes[err.position.saturating_sub(8)..(err.position + 8).min(bytes.len())];
403                    let region = String::from_utf8_lossy(region);
404                    MailParsingError::BodyParse(format!(
405                        "base64 decode: {err:#} b={b:?} in {region}"
406                    ))
407                })?
408            }
409            ContentTransferEncoding::QuotedPrintable => quoted_printable::decode(
410                self.raw_body().as_bytes(),
411                quoted_printable::ParseMode::Robust,
412            )
413            .map_err(|err| {
414                MailParsingError::BodyParse(format!("quoted printable decode: {err:#}"))
415            })?,
416            ContentTransferEncoding::SevenBit
417            | ContentTransferEncoding::EightBit
418            | ContentTransferEncoding::Binary => self.raw_body().as_bytes().to_vec(),
419        };
420
421        if info.is_text {
422            let charset = info.charset?;
423
424            match charset.decode_simple(&bytes) {
425                Ok(decoded) => Ok((
426                    DecodedBody::Text(decoded.to_string().into()),
427                    self.conformance,
428                )),
429                Err(_err) => {
430                    if let Some(settings) = options {
431                        if settings.detect_encoding {
432                            let norm_settings = NormalizerSettings {
433                                include_encodings: settings.include_encodings.clone(),
434                                exclude_encodings: settings.exclude_encodings.clone(),
435                                ..Default::default()
436                            };
437
438                            if let Ok(guess) =
439                                charset_normalizer_rs::from_bytes(&*bytes, Some(norm_settings))
440                            {
441                                if let Some(decoded) =
442                                    guess.get_best().and_then(|best| best.decoded_payload())
443                                {
444                                    return Ok((
445                                        DecodedBody::Text(decoded.to_string().into()),
446                                        MessageConformance::NEEDS_TRANSFER_ENCODING
447                                            | self.conformance,
448                                    ));
449                                }
450                            }
451
452                            // No charset was detected.  This is a strong indicator
453                            // that the content is actually binary, according to
454                            // the docs of the detector, but we know that it should
455                            // be text.  Regardless, we can't represent it as UTF-8
456                            // here.
457                            // We'll return it as a binary part and let the caller
458                            // decide if that is an issue
459                            return Ok((
460                                DecodedBody::Binary(bytes),
461                                MessageConformance::NEEDS_TRANSFER_ENCODING | self.conformance,
462                            ));
463                        }
464                    }
465
466                    // We don't know what the charset is, just that this should
467                    // be some kind of text.  For the sake of compatibility with
468                    // international email, let's try it as UTF-8, and if that
469                    // sticks, we'll use it.
470                    if let Ok(decoded) = std::str::from_utf8(&bytes) {
471                        return Ok((
472                            DecodedBody::Text(decoded.to_string().into()),
473                            MessageConformance::NEEDS_TRANSFER_ENCODING | self.conformance,
474                        ));
475                    }
476
477                    // Who knows what it is? Return it as binary and leave the
478                    // final decision on what to do with it to our caller.
479                    Ok((
480                        DecodedBody::Binary(bytes),
481                        MessageConformance::NEEDS_TRANSFER_ENCODING | self.conformance,
482                    ))
483                }
484            }
485        } else {
486            Ok((DecodedBody::Binary(bytes), self.conformance))
487        }
488    }
489
490    /// Re-constitute the message.
491    /// Each element will be parsed out, and the parsed form used
492    /// to build a new message.
493    /// This has the side effect of "fixing" non-conforming elements,
494    /// but may come at the cost of "losing" the non-sensical or otherwise
495    /// out of spec elements in the rebuilt message
496    pub fn rebuild(&self, settings: Option<&CheckFixSettings>) -> Result<Self> {
497        let info = Rfc2045Info::new(&self.headers);
498
499        let mut children = vec![];
500        for part in &self.parts {
501            children.push(part.rebuild(settings)?);
502        }
503
504        let mut rebuilt = if children.is_empty() {
505            let (body, _conformance) = self.extract_body(settings)?;
506            match body {
507                DecodedBody::Text(text) => {
508                    let ct = info
509                        .content_type
510                        .as_ref()
511                        .map(|ct| ct.value.as_bstr())
512                        .unwrap_or_else(|| BStr::new("text/plain"));
513                    Self::new_text(ct, text.as_bytes())?
514                }
515                DecodedBody::Binary(data) => {
516                    let ct = info
517                        .content_type
518                        .as_ref()
519                        .map(|ct| ct.value.as_bstr())
520                        .unwrap_or_else(|| BStr::new("application/octet-stream"));
521                    Self::new_binary(ct, &data, info.attachment_options.as_ref())?
522                }
523            }
524        } else {
525            let ct = info.content_type.ok_or_else(|| {
526                MailParsingError::BodyParse(
527                    "multipart message has no content-type information!?".to_string(),
528                )
529            })?;
530            Self::new_multipart(
531                &ct.value,
532                children,
533                ct.get("boundary").as_deref().map(|b| b.as_bytes()),
534            )?
535        };
536
537        for hdr in self.headers.iter() {
538            let name = hdr.get_name();
539            if name.eq_ignore_ascii_case(b"Content-ID") {
540                continue;
541            }
542
543            // Merge in any MimeParameters that we might otherwise have lost
544            // in the rebuild
545            if name.eq_ignore_ascii_case(b"Content-Type") {
546                if let Ok(params) = hdr.as_content_type() {
547                    let Some(mut dest) = rebuilt.headers_mut().content_type()? else {
548                        continue;
549                    };
550
551                    for (k, v) in params.parameter_map() {
552                        if dest.get(&k).is_none() {
553                            dest.set(&k, &v);
554                        }
555                    }
556
557                    rebuilt.headers_mut().set_content_type(dest)?;
558                }
559                continue;
560            }
561            if name.eq_ignore_ascii_case(b"Content-Transfer-Encoding") {
562                if let Ok(params) = hdr.as_content_transfer_encoding() {
563                    let Some(mut dest) = rebuilt.headers_mut().content_transfer_encoding()? else {
564                        continue;
565                    };
566
567                    for (k, v) in params.parameter_map() {
568                        if dest.get(&k).is_none() {
569                            dest.set(&k, &v);
570                        }
571                    }
572
573                    rebuilt.headers_mut().set_content_transfer_encoding(dest)?;
574                }
575                continue;
576            }
577            if name.eq_ignore_ascii_case(b"Content-Disposition") {
578                if let Ok(params) = hdr.as_content_disposition() {
579                    let Some(mut dest) = rebuilt.headers_mut().content_disposition()? else {
580                        continue;
581                    };
582
583                    for (k, v) in params.parameter_map() {
584                        if dest.get(&k).is_none() {
585                            dest.set(&k, &v);
586                        }
587                    }
588
589                    rebuilt.headers_mut().set_content_disposition(dest)?;
590                }
591                continue;
592            }
593
594            if let Ok(hdr) = hdr.rebuild() {
595                rebuilt.headers_mut().push(hdr);
596            }
597        }
598
599        Ok(rebuilt)
600    }
601
602    /// Write the message content to the provided output stream
603    pub fn write_message<W: std::io::Write>(&self, out: &mut W) -> Result<()> {
604        let line_ending = if self
605            .conformance
606            .contains(MessageConformance::NON_CANONICAL_LINE_ENDINGS)
607        {
608            "\n"
609        } else {
610            "\r\n"
611        };
612
613        for hdr in self.headers.iter() {
614            hdr.write_header(out)
615                .map_err(|_| MailParsingError::WriteMessageIOError)?;
616        }
617        out.write_all(line_ending.as_bytes())
618            .map_err(|_| MailParsingError::WriteMessageIOError)?;
619
620        if self.parts.is_empty() {
621            out.write_all(self.raw_body().as_bytes())
622                .map_err(|_| MailParsingError::WriteMessageIOError)?;
623        } else {
624            let info = Rfc2045Info::new(&self.headers);
625            let ct = info.content_type.ok_or({
626                MailParsingError::WriteMessageWtf(
627                    "expected to have Content-Type when there are child parts",
628                )
629            })?;
630            let boundary = ct.get("boundary").ok_or({
631                MailParsingError::WriteMessageWtf("expected Content-Type to have a boundary")
632            })?;
633            out.write_all(self.intro.as_bytes())
634                .map_err(|_| MailParsingError::WriteMessageIOError)?;
635            for p in &self.parts {
636                write!(out, "--{boundary}{line_ending}")
637                    .map_err(|_| MailParsingError::WriteMessageIOError)?;
638                p.write_message(out)?;
639            }
640            write!(out, "--{boundary}--{line_ending}")
641                .map_err(|_| MailParsingError::WriteMessageIOError)?;
642            out.write_all(self.outro.as_bytes())
643                .map_err(|_| MailParsingError::WriteMessageIOError)?;
644        }
645        Ok(())
646    }
647
648    /// Convenience method wrapping write_message that returns
649    /// the formatted message as a standalone string
650    pub fn to_message_bytes(&self) -> Vec<u8> {
651        let mut out = vec![];
652        self.write_message(&mut out).unwrap();
653        out
654    }
655
656    pub fn replace_text_body(
657        &mut self,
658        content_type: impl AsRef<[u8]>,
659        content: impl AsRef<BStr>,
660    ) -> Result<()> {
661        let mut new_part = Self::new_text(content_type, content)?;
662        self.bytes = new_part.bytes;
663        self.body_offset = new_part.body_offset;
664        self.body_len = new_part.body_len;
665        // Remove any rfc2047 headers that might reflect how the content
666        // is encoded. Note that we preserve Content-Disposition as that
667        // isn't related purely to the how the content is encoded
668        self.headers.remove_all_named("Content-Type");
669        self.headers.remove_all_named("Content-Transfer-Encoding");
670        // And add any from the new part
671        self.headers.append(&mut new_part.headers.headers);
672        Ok(())
673    }
674
675    pub fn replace_binary_body(&mut self, content_type: &[u8], content: &[u8]) -> Result<()> {
676        let mut new_part = Self::new_binary(content_type, content, None)?;
677        self.bytes = new_part.bytes;
678        self.body_offset = new_part.body_offset;
679        self.body_len = new_part.body_len;
680        // Remove any rfc2047 headers that might reflect how the content
681        // is encoded. Note that we preserve Content-Disposition as that
682        // isn't related purely to the how the content is encoded
683        self.headers.remove_all_named("Content-Type");
684        self.headers.remove_all_named("Content-Transfer-Encoding");
685        // And add any from the new part
686        self.headers.append(&mut new_part.headers.headers);
687        Ok(())
688    }
689
690    pub fn new_no_transfer_encoding(content_type: &str, bytes: &[u8]) -> Result<Self> {
691        if bytes.iter().any(|b| !b.is_ascii()) {
692            return Err(MailParsingError::EightBit);
693        }
694
695        let mut headers = HeaderMap::default();
696
697        let ct = MimeParameters::new(content_type);
698        headers.set_content_type(ct)?;
699
700        let bytes = String::from_utf8_lossy(bytes).to_string();
701        let body_len = bytes.len();
702
703        Ok(Self {
704            bytes: bytes.into(),
705            headers,
706            body_offset: 0,
707            body_len,
708            conformance: MessageConformance::default(),
709            parts: vec![],
710            intro: "".into(),
711            outro: "".into(),
712        })
713    }
714
715    /// Constructs a new part with textual utf8 content.
716    /// quoted-printable transfer encoding will be applied,
717    /// unless it is smaller to represent the text in base64
718    pub fn new_text(content_type: impl AsRef<[u8]>, content: impl AsRef<BStr>) -> Result<Self> {
719        let content = content.as_ref();
720        // We'll probably use qp, so speculatively do the work
721        let qp_encoded = quoted_printable::encode(content);
722
723        let (mut encoded, encoding) = if qp_encoded == content {
724            (qp_encoded, None)
725        } else if qp_encoded.len() <= BASE64_RFC2045.encode_len(content.len()) {
726            (qp_encoded, Some("quoted-printable"))
727        } else {
728            // Turns out base64 will be smaller; perhaps the content
729            // is dominated by non-ASCII text?
730            (BASE64_RFC2045.encode(content).into_bytes(), Some("base64"))
731        };
732
733        if !encoded.ends_with(b"\r\n") {
734            encoded.extend_from_slice(b"\r\n");
735        }
736        let mut headers = HeaderMap::default();
737
738        let mut ct = MimeParameters::new(content_type);
739        ct.set(
740            "charset",
741            if content.is_ascii() {
742                "us-ascii"
743            } else {
744                "utf-8"
745            },
746        );
747        headers.set_content_type(ct)?;
748
749        if let Some(encoding) = encoding {
750            headers.set_content_transfer_encoding(MimeParameters::new(encoding))?;
751        }
752
753        let body_len = encoded.len();
754        let bytes =
755            String::from_utf8(encoded).expect("transfer encoder to produce valid ASCII output");
756
757        Ok(Self {
758            bytes: bytes.into(),
759            headers,
760            body_offset: 0,
761            body_len,
762            conformance: MessageConformance::default(),
763            parts: vec![],
764            intro: "".into(),
765            outro: "".into(),
766        })
767    }
768
769    pub fn new_text_plain(content: impl AsRef<BStr>) -> Result<Self> {
770        Self::new_text("text/plain", content)
771    }
772
773    pub fn new_html(content: impl AsRef<BStr>) -> Result<Self> {
774        Self::new_text("text/html", content)
775    }
776
777    pub fn new_multipart(
778        content_type: impl AsRef<[u8]>,
779        parts: Vec<Self>,
780        boundary: Option<&[u8]>,
781    ) -> Result<Self> {
782        let mut headers = HeaderMap::default();
783
784        let mut ct = MimeParameters::new(content_type);
785        match boundary {
786            Some(b) => {
787                ct.set("boundary", b);
788            }
789            None => {
790                // Generate a random boundary
791                let uuid = uuid::Uuid::new_v4();
792                let boundary = data_encoding::BASE64_NOPAD.encode(uuid.as_bytes());
793                ct.set("boundary", &boundary);
794            }
795        }
796        headers.set_content_type(ct)?;
797
798        Ok(Self {
799            bytes: "".into(),
800            headers,
801            body_offset: 0,
802            body_len: 0,
803            conformance: MessageConformance::default(),
804            parts,
805            intro: "".into(),
806            outro: "".into(),
807        })
808    }
809
810    pub fn new_binary(
811        content_type: impl AsRef<[u8]>,
812        content: &[u8],
813        options: Option<&AttachmentOptions>,
814    ) -> Result<Self> {
815        let mut encoded = BASE64_RFC2045.encode(content);
816        if !encoded.ends_with("\r\n") {
817            encoded.push_str("\r\n");
818        }
819        let mut headers = HeaderMap::default();
820
821        let mut ct = MimeParameters::new(content_type);
822
823        if let Some(opts) = options {
824            let mut cd = MimeParameters::new(if opts.inline { "inline" } else { "attachment" });
825            if let Some(name) = &opts.file_name {
826                cd.set("filename", name);
827                let encoding = if name.chars().any(|c| !c.is_ascii()) {
828                    MimeParameterEncoding::QuotedRfc2047
829                } else {
830                    MimeParameterEncoding::None
831                };
832                ct.set_with_encoding("name", name, encoding);
833            }
834            headers.set_content_disposition(cd)?;
835
836            if let Some(id) = &opts.content_id {
837                headers.set_content_id(MessageID(id.clone()))?;
838            }
839        }
840
841        headers.set_content_type(ct)?;
842        headers.set_content_transfer_encoding(MimeParameters::new("base64"))?;
843
844        let body_len = encoded.len();
845
846        Ok(Self {
847            bytes: encoded.into(),
848            headers,
849            body_offset: 0,
850            body_len,
851            conformance: MessageConformance::default(),
852            parts: vec![],
853            intro: "".into(),
854            outro: "".into(),
855        })
856    }
857
858    /// Returns a SimplifiedStructure representation of the mime tree,
859    /// with the (probable) primary text/plain and text/html parts
860    /// pulled out, and the remaining parts recorded as a flat
861    /// attachments array
862    pub fn simplified_structure(&'a self) -> Result<SimplifiedStructure<'a>> {
863        let parts = self.simplified_structure_pointers()?;
864
865        let mut text = None;
866        let mut html = None;
867        let mut amp_html = None;
868
869        let headers = &self
870            .resolve_ptr(parts.header_part)
871            .expect("header part to always be valid")
872            .headers;
873
874        if let Some(p) = parts.text_part.and_then(|p| self.resolve_ptr(p)) {
875            text = match p.body()? {
876                DecodedBody::Text(t) => Some(t),
877                DecodedBody::Binary(_) => {
878                    return Err(MailParsingError::BodyParse(
879                        "expected text/plain part to be text, but it is binary".to_string(),
880                    ))
881                }
882            };
883        }
884        if let Some(p) = parts.html_part.and_then(|p| self.resolve_ptr(p)) {
885            html = match p.body()? {
886                DecodedBody::Text(t) => Some(t),
887                DecodedBody::Binary(_) => {
888                    return Err(MailParsingError::BodyParse(
889                        "expected text/html part to be text, but it is binary".to_string(),
890                    ))
891                }
892            };
893        }
894        if let Some(p) = parts.amp_html_part.and_then(|p| self.resolve_ptr(p)) {
895            amp_html = match p.body()? {
896                DecodedBody::Text(t) => Some(t),
897                DecodedBody::Binary(_) => {
898                    return Err(MailParsingError::BodyParse(
899                        "expected text/x-amp-html part to be text, but it is binary".to_string(),
900                    ))
901                }
902            };
903        }
904
905        let mut attachments = vec![];
906        for ptr in parts.attachments {
907            attachments.push(self.resolve_ptr(ptr).expect("pointer to be valid").clone());
908        }
909
910        Ok(SimplifiedStructure {
911            text,
912            html,
913            amp_html,
914            headers,
915            attachments,
916        })
917    }
918
919    /// Resolve a PartPointer to the corresponding MimePart
920    pub fn resolve_ptr(&self, ptr: PartPointer) -> Option<&Self> {
921        let mut current = self;
922        let mut cursor = ptr.0.as_slice();
923
924        loop {
925            match cursor.first() {
926                Some(&idx) => {
927                    current = current.parts.get(idx as usize)?;
928                    cursor = &cursor[1..];
929                }
930                None => {
931                    // We have completed the walk
932                    return Some(current);
933                }
934            }
935        }
936    }
937
938    /// Resolve a PartPointer to the corresponding MimePart, for mutable access
939    pub fn resolve_ptr_mut(&mut self, ptr: PartPointer) -> Option<&mut Self> {
940        let mut current = self;
941        let mut cursor = ptr.0.as_slice();
942
943        loop {
944            match cursor.first() {
945                Some(&idx) => {
946                    current = current.parts.get_mut(idx as usize)?;
947                    cursor = &cursor[1..];
948                }
949                None => {
950                    // We have completed the walk
951                    return Some(current);
952                }
953            }
954        }
955    }
956
957    /// Returns a set of PartPointers that locate the (probable) primary
958    /// text/plain and text/html parts, and the remaining parts recorded
959    /// as a flat attachments array.  The resulting
960    /// PartPointers can be resolved to their actual instances for both
961    /// immutable and mutable operations via resolve_ptr and resolve_ptr_mut.
962    pub fn simplified_structure_pointers(&self) -> Result<SimplifiedStructurePointers> {
963        self.simplified_structure_pointers_impl(None)
964    }
965
966    fn simplified_structure_pointers_impl(
967        &self,
968        my_idx: Option<u8>,
969    ) -> Result<SimplifiedStructurePointers> {
970        let info = Rfc2045Info::new(&self.headers);
971        let is_inline = info
972            .attachment_options
973            .as_ref()
974            .map(|ao| ao.inline)
975            .unwrap_or(true);
976
977        if let Some(ct) = &info.content_type {
978            if is_inline {
979                if ct.value == "text/plain" {
980                    return Ok(SimplifiedStructurePointers {
981                        amp_html_part: None,
982                        text_part: Some(PartPointer::root_or_nth(my_idx)),
983                        html_part: None,
984                        header_part: PartPointer::root_or_nth(my_idx),
985                        attachments: vec![],
986                    });
987                }
988                if ct.value == "text/html" {
989                    return Ok(SimplifiedStructurePointers {
990                        amp_html_part: None,
991                        html_part: Some(PartPointer::root_or_nth(my_idx)),
992                        text_part: None,
993                        header_part: PartPointer::root_or_nth(my_idx),
994                        attachments: vec![],
995                    });
996                }
997                if ct.value == "text/x-amp-html" {
998                    return Ok(SimplifiedStructurePointers {
999                        amp_html_part: Some(PartPointer::root_or_nth(my_idx)),
1000                        html_part: None,
1001                        text_part: None,
1002                        header_part: PartPointer::root_or_nth(my_idx),
1003                        attachments: vec![],
1004                    });
1005                }
1006            }
1007
1008            if ct.value.starts_with_str("multipart/") {
1009                let mut text_part = None;
1010                let mut html_part = None;
1011                let mut amp_html_part = None;
1012                let mut attachments = vec![];
1013
1014                for (i, p) in self.parts.iter().enumerate() {
1015                    let part_idx = i.try_into().map_err(|_| MailParsingError::TooManyParts)?;
1016                    if let Ok(mut s) = p.simplified_structure_pointers_impl(Some(part_idx)) {
1017                        if let Some(p) = s.text_part {
1018                            if text_part.is_none() {
1019                                text_part.replace(PartPointer::root_or_nth(my_idx).append(p));
1020                            } else {
1021                                attachments.push(p);
1022                            }
1023                        }
1024                        if let Some(p) = s.html_part {
1025                            if html_part.is_none() {
1026                                html_part.replace(PartPointer::root_or_nth(my_idx).append(p));
1027                            } else {
1028                                attachments.push(p);
1029                            }
1030                        }
1031                        if let Some(p) = s.amp_html_part {
1032                            if amp_html_part.is_none() {
1033                                amp_html_part.replace(PartPointer::root_or_nth(my_idx).append(p));
1034                            } else {
1035                                attachments.push(p);
1036                            }
1037                        }
1038                        attachments.append(&mut s.attachments);
1039                    }
1040                }
1041
1042                return Ok(SimplifiedStructurePointers {
1043                    amp_html_part,
1044                    html_part,
1045                    text_part,
1046                    header_part: PartPointer::root_or_nth(my_idx),
1047                    attachments,
1048                });
1049            }
1050
1051            return Ok(SimplifiedStructurePointers {
1052                html_part: None,
1053                text_part: None,
1054                amp_html_part: None,
1055                header_part: PartPointer::root_or_nth(my_idx),
1056                attachments: vec![PartPointer::root_or_nth(my_idx)],
1057            });
1058        }
1059
1060        // Assume text/plain content-type
1061        Ok(SimplifiedStructurePointers {
1062            text_part: Some(PartPointer::root_or_nth(my_idx)),
1063            html_part: None,
1064            amp_html_part: None,
1065            header_part: PartPointer::root_or_nth(my_idx),
1066            attachments: vec![],
1067        })
1068    }
1069
1070    pub fn check_fix_conformance(
1071        &self,
1072        check: MessageConformance,
1073        fix: MessageConformance,
1074        settings: CheckFixSettings,
1075    ) -> Result<Option<Self>> {
1076        let mut msg = self.clone();
1077        let conformance = msg.deep_conformance_check();
1078
1079        // Don't raise errors for things that we're going to fix anyway
1080        let check = check - fix;
1081
1082        if check.intersects(conformance) {
1083            let problems = check.intersection(conformance);
1084            return Err(MailParsingError::ConformanceIssues(problems));
1085        }
1086
1087        if !fix.intersects(conformance) {
1088            return Ok(None);
1089        }
1090
1091        let to_fix = fix.intersection(conformance);
1092
1093        let missing_headers_only = to_fix
1094            .difference(
1095                MessageConformance::MISSING_DATE_HEADER
1096                    | MessageConformance::MISSING_MIME_VERSION
1097                    | MessageConformance::MISSING_MESSAGE_ID_HEADER,
1098            )
1099            .is_empty();
1100
1101        if !missing_headers_only {
1102            if to_fix.contains(MessageConformance::NEEDS_TRANSFER_ENCODING) {
1103                // Something is 8-bit. If we're lucky, it's simply UTF-8,
1104                // but it could be some other "legacy" charset encoding.
1105                // If we've been asked to detect an encoding, try that now,
1106                // and re-parse the message with the re-coded input.
1107                // Otherwise, we'll attempt a lossy conversion to UTF-8
1108                // and the resulting message will likely include unicode
1109                // replacement characters.
1110
1111                if settings.detect_encoding {
1112                    if let Some(data_bytes) = &settings.data_bytes {
1113                        let norm_settings = NormalizerSettings {
1114                            include_encodings: settings.include_encodings.clone(),
1115                            exclude_encodings: settings.exclude_encodings.clone(),
1116                            ..Default::default()
1117                        };
1118
1119                        let guess =
1120                            charset_normalizer_rs::from_bytes(&*data_bytes, Some(norm_settings))
1121                                .map_err(|err| MailParsingError::CharsetDetectionFailed(err))?;
1122                        if let Some(best) = guess.get_best() {
1123                            if let Some(decoded) = best.decoded_payload() {
1124                                msg = MimePart::parse(decoded.to_string())?;
1125                            }
1126                        }
1127                    }
1128                }
1129            }
1130
1131            msg = msg.rebuild(Some(&settings))?;
1132        }
1133
1134        if to_fix.contains(MessageConformance::MISSING_DATE_HEADER) {
1135            msg.headers_mut().set_date(Utc::now())?;
1136        }
1137
1138        if to_fix.contains(MessageConformance::MISSING_MIME_VERSION) {
1139            msg.headers_mut().set_mime_version("1.0")?;
1140        }
1141
1142        if to_fix.contains(MessageConformance::MISSING_MESSAGE_ID_HEADER) {
1143            if let Some(message_id) = &settings.message_id {
1144                msg.headers_mut()
1145                    .set_message_id(MessageID(message_id.clone().into()))?;
1146            }
1147        }
1148
1149        Ok(Some(msg))
1150    }
1151}
1152
1153#[derive(Default, Debug, Clone, Deserialize)]
1154pub struct CheckFixSettings {
1155    #[serde(default)]
1156    pub detect_encoding: bool,
1157    #[serde(default)]
1158    pub include_encodings: Vec<String>,
1159    #[serde(default)]
1160    pub exclude_encodings: Vec<String>,
1161    #[serde(default)]
1162    pub message_id: Option<String>,
1163    #[serde(skip)]
1164    pub data_bytes: Option<Arc<Box<[u8]>>>,
1165}
1166
1167/// References the position of a MimePart by encoding the steps in
1168/// a tree walking operation. The encoding of PartPointer is a
1169/// sequence of integers that identify the index of a child part
1170/// by its level within the mime tree, selecting the current node
1171/// when no more indices remain. eg: `[]` indicates the
1172/// root part, while `[0]` is the 0th child of the root.
1173#[derive(Debug, Clone, PartialEq, Eq)]
1174pub struct PartPointer(Vec<u8>);
1175
1176impl PartPointer {
1177    /// Construct a PartPointer that references the root node
1178    pub fn root() -> Self {
1179        Self(vec![])
1180    }
1181
1182    /// Construct a PartPointer that references either the nth
1183    /// or the root node depending upon the passed parameter
1184    pub fn root_or_nth(n: Option<u8>) -> Self {
1185        match n {
1186            Some(n) => Self::nth(n),
1187            None => Self::root(),
1188        }
1189    }
1190
1191    /// Construct a PartPointer that references the nth child
1192    pub fn nth(n: u8) -> Self {
1193        Self(vec![n])
1194    }
1195
1196    /// Join other onto self, consuming self and producing
1197    /// a pointer that makes other relative to self
1198    pub fn append(mut self, mut other: Self) -> Self {
1199        self.0.append(&mut other.0);
1200        Self(self.0)
1201    }
1202
1203    pub fn id_string(&self) -> String {
1204        let mut id = String::new();
1205        for p in &self.0 {
1206            if !id.is_empty() {
1207                id.push('.');
1208            }
1209            id.push_str(&p.to_string());
1210        }
1211        id
1212    }
1213}
1214
1215#[derive(Debug, Clone)]
1216pub struct SimplifiedStructurePointers {
1217    /// The primary text/plain part
1218    pub text_part: Option<PartPointer>,
1219    /// The primary text/html part
1220    pub html_part: Option<PartPointer>,
1221    /// The primary text/x-amp-html part
1222    pub amp_html_part: Option<PartPointer>,
1223    /// The "top level" set of headers for the message
1224    pub header_part: PartPointer,
1225    /// all other (terminal) parts are attachments
1226    pub attachments: Vec<PartPointer>,
1227}
1228
1229#[derive(Debug, Clone)]
1230pub struct SimplifiedStructure<'a> {
1231    pub text: Option<SharedString<'a>>,
1232    pub html: Option<SharedString<'a>>,
1233    pub amp_html: Option<SharedString<'a>>,
1234    pub headers: &'a HeaderMap<'a>,
1235    pub attachments: Vec<MimePart<'a>>,
1236}
1237
1238#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1239#[serde(deny_unknown_fields)]
1240pub struct AttachmentOptions {
1241    #[serde(default)]
1242    pub file_name: Option<BString>,
1243    #[serde(default)]
1244    pub inline: bool,
1245    #[serde(default)]
1246    pub content_id: Option<BString>,
1247}
1248
1249#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1250pub enum ContentTransferEncoding {
1251    SevenBit,
1252    EightBit,
1253    Binary,
1254    QuotedPrintable,
1255    Base64,
1256}
1257
1258impl FromStr for ContentTransferEncoding {
1259    type Err = MailParsingError;
1260
1261    fn from_str(s: &str) -> Result<Self> {
1262        if s.eq_ignore_ascii_case("7bit") {
1263            Ok(Self::SevenBit)
1264        } else if s.eq_ignore_ascii_case("8bit") {
1265            Ok(Self::EightBit)
1266        } else if s.eq_ignore_ascii_case("binary") {
1267            Ok(Self::Binary)
1268        } else if s.eq_ignore_ascii_case("quoted-printable") {
1269            Ok(Self::QuotedPrintable)
1270        } else if s.eq_ignore_ascii_case("base64") {
1271            Ok(Self::Base64)
1272        } else {
1273            Err(MailParsingError::InvalidContentTransferEncoding(
1274                s.to_string(),
1275            ))
1276        }
1277    }
1278}
1279
1280#[derive(Debug, PartialEq)]
1281pub enum DecodedBody<'a> {
1282    Text(SharedString<'a>),
1283    Binary(Vec<u8>),
1284}
1285
1286impl<'a> DecodedBody<'a> {
1287    pub fn to_string_lossy(&'a self) -> Cow<'a, str> {
1288        match self {
1289            Self::Text(s) => s.to_str_lossy(),
1290            Self::Binary(b) => String::from_utf8_lossy(b),
1291        }
1292    }
1293}
1294
1295#[cfg(test)]
1296mod test {
1297    use super::*;
1298
1299    #[test]
1300    fn msg_parsing() {
1301        let message = concat!(
1302            "Subject: hello there\n",
1303            "From:  Someone <someone@example.com>\n",
1304            "\n",
1305            "I am the body"
1306        );
1307
1308        let part = MimePart::parse(message).unwrap();
1309        k9::assert_equal!(message.as_bytes(), part.to_message_bytes());
1310        assert_eq!(part.raw_body(), "I am the body");
1311        k9::snapshot!(
1312            part.body(),
1313            r#"
1314Ok(
1315    Text(
1316        "I am the body",
1317    ),
1318)
1319"#
1320        );
1321
1322        k9::snapshot!(
1323            BString::from(part.rebuild(None).unwrap().to_message_bytes()),
1324            r#"
1325Content-Type: text/plain;\r
1326\tcharset="us-ascii"\r
1327Subject: hello there\r
1328From: Someone <someone@example.com>\r
1329\r
1330I am the body\r
1331
1332"#
1333        );
1334    }
1335
1336    #[test]
1337    fn mime_bogus_body() {
1338        let message = concat!(
1339            "Subject: hello there\n",
1340            "From: Someone <someone@example.com>\n",
1341            "Mime-Version: 1.0\n",
1342            "Content-Type: text/plain\n",
1343            "Content-Transfer-Encoding: base64\n",
1344            "\n",
1345            "hello\n"
1346        );
1347
1348        let part = MimePart::parse(message).unwrap();
1349        assert_eq!(
1350            part.body().unwrap_err(),
1351            MailParsingError::BodyParse(
1352                "base64 decode: invalid length at 4 b='o' in hello\n".to_string()
1353            )
1354        );
1355    }
1356
1357    #[test]
1358    fn mime_encoded_body() {
1359        let message = concat!(
1360            "Subject: hello there\n",
1361            "From: Someone <someone@example.com>\n",
1362            "Mime-Version: 1.0\n",
1363            "Content-Type: text/plain\n",
1364            "Content-Transfer-Encoding: base64\n",
1365            "\n",
1366            "aGVsbG8K\n"
1367        );
1368
1369        let part = MimePart::parse(message).unwrap();
1370        k9::assert_equal!(message.as_bytes(), part.to_message_bytes());
1371        assert_eq!(part.raw_body(), "aGVsbG8K\n");
1372        k9::snapshot!(
1373            part.body(),
1374            r#"
1375Ok(
1376    Text(
1377        "hello
1378",
1379    ),
1380)
1381"#
1382        );
1383
1384        k9::snapshot!(
1385            BString::from(part.rebuild(None).unwrap().to_message_bytes()),
1386            r#"
1387Content-Type: text/plain;\r
1388\tcharset="us-ascii"\r
1389Content-Transfer-Encoding: quoted-printable\r
1390Subject: hello there\r
1391From: Someone <someone@example.com>\r
1392Mime-Version: 1.0\r
1393\r
1394hello=0A\r
1395
1396"#
1397        );
1398    }
1399
1400    #[test]
1401    fn mime_multipart_1() {
1402        let message = concat!(
1403            "Subject: This is a test email\n",
1404            "Content-Type: multipart/alternative; boundary=foobar\n",
1405            "Mime-Version: 1.0\n",
1406            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\n",
1407            "\n",
1408            "--foobar\n",
1409            "Content-Type: text/plain; charset=utf-8\n",
1410            "Content-Transfer-Encoding: quoted-printable\n",
1411            "\n",
1412            "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\n",
1413            "--foobar\n",
1414            "Content-Type: text/html\n",
1415            "Content-Transfer-Encoding: base64\n",
1416            "\n",
1417            "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \n",
1418            "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \n",
1419            "--foobar--\n",
1420            "After the final boundary stuff gets ignored.\n"
1421        );
1422
1423        let part = MimePart::parse(message).unwrap();
1424
1425        k9::assert_equal!(message.as_bytes(), part.to_message_bytes());
1426
1427        let children = part.child_parts();
1428        k9::assert_equal!(children.len(), 2);
1429
1430        k9::snapshot!(
1431            children[0].body(),
1432            r#"
1433Ok(
1434    Text(
1435        "This is the plaintext version, in utf-8. Proof by Euro: €\r
1436",
1437    ),
1438)
1439"#
1440        );
1441        k9::snapshot!(
1442            children[1].body(),
1443            r#"
1444Ok(
1445    Text(
1446        "<html><body>This is the <b>HTML</b> version, in us-ascii. Proof by Euro: &euro;</body></html>
1447",
1448    ),
1449)
1450"#
1451        );
1452    }
1453
1454    #[test]
1455    fn mutate_1() {
1456        let message = concat!(
1457            "Subject: This is a test email\r\n",
1458            "Content-Type: multipart/alternative; boundary=foobar\r\n",
1459            "Mime-Version: 1.0\r\n",
1460            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\r\n",
1461            "\r\n",
1462            "--foobar\r\n",
1463            "Content-Type: text/plain; charset=utf-8\r\n",
1464            "Content-Transfer-Encoding: quoted-printable\r\n",
1465            "\r\n",
1466            "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\r\n",
1467            "--foobar\r\n",
1468            "Content-Type: text/html\r\n",
1469            "Content-Transfer-Encoding: base64\r\n",
1470            "\r\n",
1471            "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r\n",
1472            "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r\n",
1473            "--foobar--\r\n",
1474            "After the final boundary stuff gets ignored.\r\n"
1475        );
1476
1477        let mut part = MimePart::parse(message).unwrap();
1478        k9::assert_equal!(message.as_bytes(), part.to_message_bytes());
1479        fn munge(part: &mut MimePart) {
1480            let headers = part.headers_mut();
1481            headers.push(Header::with_name_value("X-Woot", "Hello"));
1482            headers.insert(0, Header::with_name_value("X-First", "at the top"));
1483            headers.retain(|hdr| !hdr.get_name().eq_ignore_ascii_case(b"date"));
1484        }
1485        munge(&mut part);
1486
1487        let re_encoded = BString::from(part.to_message_bytes());
1488        k9::snapshot!(
1489            re_encoded,
1490            r#"
1491X-First: at the top\r
1492Subject: This is a test email\r
1493Content-Type: multipart/alternative; boundary=foobar\r
1494Mime-Version: 1.0\r
1495X-Woot: Hello\r
1496\r
1497--foobar\r
1498Content-Type: text/plain; charset=utf-8\r
1499Content-Transfer-Encoding: quoted-printable\r
1500\r
1501This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\r
1502--foobar\r
1503Content-Type: text/html\r
1504Content-Transfer-Encoding: base64\r
1505\r
1506PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r
1507dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r
1508--foobar--\r
1509After the final boundary stuff gets ignored.\r
1510
1511"#
1512        );
1513
1514        eprintln!("part before mutate:\n{part:#?}");
1515
1516        part.child_parts_mut().retain(|part| {
1517            let ct = part.headers().content_type().unwrap().unwrap();
1518            ct.value == "text/html"
1519        });
1520
1521        eprintln!("part with html removed is:\n{part:#?}");
1522
1523        let re_encoded = BString::from(part.to_message_bytes());
1524        k9::snapshot!(
1525            re_encoded,
1526            r#"
1527X-First: at the top\r
1528Subject: This is a test email\r
1529Content-Type: multipart/alternative; boundary=foobar\r
1530Mime-Version: 1.0\r
1531X-Woot: Hello\r
1532\r
1533--foobar\r
1534Content-Type: text/html\r
1535Content-Transfer-Encoding: base64\r
1536\r
1537PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r
1538dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r
1539--foobar--\r
1540After the final boundary stuff gets ignored.\r
1541
1542"#
1543        );
1544    }
1545
1546    #[test]
1547    fn replace_text_body() {
1548        let mut part = MimePart::new_text_plain("Hello 👻\r\n").unwrap();
1549        let encoded = BString::from(part.to_message_bytes());
1550        k9::snapshot!(
1551            &encoded,
1552            r#"
1553Content-Type: text/plain;\r
1554\tcharset="utf-8"\r
1555Content-Transfer-Encoding: base64\r
1556\r
1557SGVsbG8g8J+Ruw0K\r
1558
1559"#
1560        );
1561
1562        part.replace_text_body("text/plain", "Hello 🚀\r\n")
1563            .unwrap();
1564        let encoded = BString::from(part.to_message_bytes());
1565        k9::snapshot!(
1566            &encoded,
1567            r#"
1568Content-Type: text/plain;\r
1569\tcharset="utf-8"\r
1570Content-Transfer-Encoding: base64\r
1571\r
1572SGVsbG8g8J+agA0K\r
1573
1574"#
1575        );
1576    }
1577
1578    #[test]
1579    fn construct_1() {
1580        let input_text = "Well, hello there! This is the plaintext version, in utf-8. Here's a Euro: €, and here are some emoji 👻 🍉 💩 and this long should be long enough that we wrap it in the returned part, let's see how that turns out!\r\n";
1581
1582        let part = MimePart::new_text_plain(input_text).unwrap();
1583
1584        let encoded = BString::from(part.to_message_bytes());
1585        k9::snapshot!(
1586            &encoded,
1587            r#"
1588Content-Type: text/plain;\r
1589\tcharset="utf-8"\r
1590Content-Transfer-Encoding: quoted-printable\r
1591\r
1592Well, hello there! This is the plaintext version, in utf-8. Here's a Euro: =\r
1593=E2=82=AC, and here are some emoji =F0=9F=91=BB =F0=9F=8D=89 =F0=9F=92=A9 a=\r
1594nd this long should be long enough that we wrap it in the returned part, le=\r
1595t's see how that turns out!\r
1596
1597"#
1598        );
1599
1600        let parsed_part = MimePart::parse(encoded.clone()).unwrap();
1601        k9::assert_equal!(encoded, parsed_part.to_message_bytes());
1602        k9::assert_equal!(part.body().unwrap(), DecodedBody::Text(input_text.into()));
1603        k9::snapshot!(
1604            parsed_part.simplified_structure_pointers(),
1605            "
1606Ok(
1607    SimplifiedStructurePointers {
1608        text_part: Some(
1609            PartPointer(
1610                [],
1611            ),
1612        ),
1613        html_part: None,
1614        amp_html_part: None,
1615        header_part: PartPointer(
1616            [],
1617        ),
1618        attachments: [],
1619    },
1620)
1621"
1622        );
1623    }
1624
1625    #[test]
1626    fn construct_2() {
1627        let msg = MimePart::new_multipart(
1628            "multipart/mixed",
1629            vec![
1630                MimePart::new_text_plain("plain text").unwrap(),
1631                MimePart::new_html("<b>rich</b> text").unwrap(),
1632                MimePart::new_binary(
1633                    "application/octet-stream",
1634                    &[0, 1, 2, 3],
1635                    Some(&AttachmentOptions {
1636                        file_name: Some("woot.bin".into()),
1637                        inline: false,
1638                        content_id: Some("woot.id".into()),
1639                    }),
1640                )
1641                .unwrap(),
1642            ],
1643            Some(b"my-boundary"),
1644        )
1645        .unwrap();
1646        k9::snapshot!(
1647            BString::from(msg.to_message_bytes()),
1648            r#"
1649Content-Type: multipart/mixed;\r
1650\tboundary="my-boundary"\r
1651\r
1652--my-boundary\r
1653Content-Type: text/plain;\r
1654\tcharset="us-ascii"\r
1655\r
1656plain text\r
1657--my-boundary\r
1658Content-Type: text/html;\r
1659\tcharset="us-ascii"\r
1660\r
1661<b>rich</b> text\r
1662--my-boundary\r
1663Content-Disposition: attachment;\r
1664\tfilename="woot.bin"\r
1665Content-ID: <woot.id>\r
1666Content-Type: application/octet-stream;\r
1667\tname="woot.bin"\r
1668Content-Transfer-Encoding: base64\r
1669\r
1670AAECAw==\r
1671--my-boundary--\r
1672
1673"#
1674        );
1675
1676        k9::snapshot!(
1677            msg.simplified_structure_pointers(),
1678            "
1679Ok(
1680    SimplifiedStructurePointers {
1681        text_part: Some(
1682            PartPointer(
1683                [
1684                    0,
1685                ],
1686            ),
1687        ),
1688        html_part: Some(
1689            PartPointer(
1690                [
1691                    1,
1692                ],
1693            ),
1694        ),
1695        amp_html_part: None,
1696        header_part: PartPointer(
1697            [],
1698        ),
1699        attachments: [
1700            PartPointer(
1701                [
1702                    2,
1703                ],
1704            ),
1705        ],
1706    },
1707)
1708"
1709        );
1710    }
1711
1712    #[test]
1713    fn attachment_name_order_prefers_content_disposition() {
1714        let message = concat!(
1715            "Content-Type: multipart/mixed;\r\n",
1716            "	boundary=\"woot\"\r\n",
1717            "\r\n",
1718            "--woot\r\n",
1719            "Content-Type: text/plain;\r\n",
1720            "	charset=\"us-ascii\"\r\n",
1721            "\r\n",
1722            "Hello, I am the main message content\r\n",
1723            "--woot\r\n",
1724            "Content-Disposition: attachment;\r\n",
1725            "	filename=cdname\r\n",
1726            "Content-Type: application/octet-stream;\r\n",
1727            "	name=ctname\r\n",
1728            "Content-Transfer-Encoding: base64\r\n",
1729            "\r\n",
1730            "u6o=\r\n",
1731            "--woot--\r\n"
1732        );
1733        let part = MimePart::parse(message).unwrap();
1734        let structure = part.simplified_structure().unwrap();
1735
1736        k9::assert_equal!(
1737            structure.attachments[0].rfc2045_info().attachment_options,
1738            Some(AttachmentOptions {
1739                content_id: None,
1740                inline: false,
1741                file_name: Some("cdname".into()),
1742            })
1743        );
1744    }
1745
1746    #[test]
1747    fn attachment_name_accepts_content_type_name() {
1748        let message = concat!(
1749            "Content-Type: multipart/mixed;\r\n",
1750            "	boundary=\"woot\"\r\n",
1751            "\r\n",
1752            "--woot\r\n",
1753            "Content-Type: text/plain;\r\n",
1754            "	charset=\"us-ascii\"\r\n",
1755            "\r\n",
1756            "Hello, I am the main message content\r\n",
1757            "--woot\r\n",
1758            "Content-Disposition: attachment\r\n",
1759            "Content-Type: application/octet-stream;\r\n",
1760            "	name=ctname\r\n",
1761            "Content-Transfer-Encoding: base64\r\n",
1762            "\r\n",
1763            "u6o=\r\n",
1764            "--woot--\r\n"
1765        );
1766        let part = MimePart::parse(message).unwrap();
1767        let structure = part.simplified_structure().unwrap();
1768
1769        k9::assert_equal!(
1770            structure.attachments[0].rfc2045_info().attachment_options,
1771            Some(AttachmentOptions {
1772                content_id: None,
1773                inline: false,
1774                file_name: Some("ctname".into()),
1775            })
1776        );
1777    }
1778
1779    #[test]
1780    fn funky_headers() {
1781        let message = concat!(
1782            "Subject\r\n",
1783            "Other:\r\n",
1784            "Content-Type: multipart/alternative; boundary=foobar\r\n",
1785            "Mime-Version: 1.0\r\n",
1786            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\r\n",
1787            "\r\n",
1788            "The body.\r\n"
1789        );
1790
1791        let part = MimePart::parse(message).unwrap();
1792        assert!(part
1793            .conformance()
1794            .contains(MessageConformance::MISSING_COLON_VALUE));
1795    }
1796
1797    /// This is a regression test for an issue where we'd interpret the
1798    /// binary bytes as default windows-1252 codepage charset, and mangle them.
1799    /// The high byte is sufficient to trigger the offending code prior
1800    /// to the fix
1801    #[test]
1802    fn rebuild_binary() {
1803        let expect = &[0, 1, 2, 3, 0xbe, 4, 5];
1804        let part = MimePart::new_binary("applicat/octet-stream", expect, None).unwrap();
1805
1806        let rebuilt = part.rebuild(None).unwrap();
1807        let body = rebuilt.body().unwrap();
1808
1809        assert_eq!(body, DecodedBody::Binary(expect.to_vec()));
1810    }
1811
1812    /// Validate that we don't lose supplemental mime parameters like:
1813    /// `Content-Type: text/calendar; method=REQUEST`
1814    #[test]
1815    fn rebuild_invitation() {
1816        let message = concat!(
1817            "Subject: Test for events 2\r\n",
1818            "Content-Type: multipart/mixed;\r\n",
1819            " boundary=8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r\n",
1820            "\r\n",
1821            "--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r\n",
1822            "Content-Type: multipart/alternative;\r\n",
1823            " boundary=a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r\n",
1824            "\r\n",
1825            "--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r\n",
1826            "Content-Transfer-Encoding: quoted-printable\r\n",
1827            "Content-Type: text/plain; charset=UTF-8\r\n",
1828            "\r\n",
1829            "This is a test for calendar event invitation\r\n",
1830            "--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r\n",
1831            "Content-Transfer-Encoding: quoted-printable\r\n",
1832            "Content-Type: text/html; charset=UTF-8\r\n",
1833            "\r\n",
1834            "<p>This is a test for calendar event invitation</p>\r\n",
1835            "--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f--\r\n",
1836            "\r\n",
1837            "--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r\n",
1838            "Content-Disposition: inline; name=\"Invitation.ics\"\r\n",
1839            "Content-Type: text/calendar; method=REQUEST; name=\"Invitation.ics\"\r\n",
1840            "\r\n",
1841            "Invitation\r\n",
1842            "--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r\n",
1843            "Content-Disposition: attachment; filename=\"event.ics\"\r\n",
1844            "Content-Type: application/ics\r\n",
1845            "\r\n",
1846            "Event\r\n",
1847            "--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3--\r\n",
1848            "\r\n"
1849        );
1850
1851        let part = MimePart::parse(message).unwrap();
1852        let rebuilt = part.rebuild(None).unwrap();
1853
1854        k9::snapshot!(
1855            BString::from(rebuilt.to_message_bytes()),
1856            r#"
1857Content-Type: multipart/mixed;\r
1858\tboundary="8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3"\r
1859Subject: Test for events 2\r
1860\r
1861--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r
1862Content-Type: multipart/alternative;\r
1863\tboundary="a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f"\r
1864\r
1865--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r
1866Content-Type: text/plain;\r
1867\tcharset="us-ascii"\r
1868\r
1869This is a test for calendar event invitation\r
1870--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r
1871Content-Type: text/html;\r
1872\tcharset="us-ascii"\r
1873\r
1874<p>This is a test for calendar event invitation</p>\r
1875--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f--\r
1876--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r
1877Content-Type: text/calendar;\r
1878\tcharset="us-ascii";\r
1879\tmethod="REQUEST";\r
1880\tname="Invitation.ics"\r
1881\r
1882Invitation\r
1883--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r
1884Content-Disposition: attachment;\r
1885\tfilename="event.ics"\r
1886Content-Type: application/ics;\r
1887\tname="event.ics"\r
1888Content-Transfer-Encoding: base64\r
1889\r
1890RXZlbnQNCg==\r
1891--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3--\r
1892
1893"#
1894        );
1895    }
1896
1897    #[test]
1898    fn check_conformance_angle_msg_id() {
1899        const DOUBLE_ANGLE_ONLY: &str = "Subject: hello\r
1900Message-ID: <<1234@example.com>>\r
1901\r
1902Hello";
1903        let msg = MimePart::parse(DOUBLE_ANGLE_ONLY).unwrap();
1904        k9::snapshot!(
1905            msg.check_fix_conformance(
1906                MessageConformance::MISSING_MESSAGE_ID_HEADER,
1907                MessageConformance::empty(),
1908                CheckFixSettings::default(),
1909            )
1910            .unwrap_err()
1911            .to_string(),
1912            "Message has conformance issues: MISSING_MESSAGE_ID_HEADER"
1913        );
1914
1915        let rebuilt = BString::from(
1916            msg.check_fix_conformance(
1917                MessageConformance::MISSING_MESSAGE_ID_HEADER,
1918                MessageConformance::MISSING_MESSAGE_ID_HEADER,
1919                CheckFixSettings {
1920                    message_id: Some("id@example.com".to_string()),
1921                    ..Default::default()
1922                },
1923            )
1924            .unwrap()
1925            .unwrap()
1926            .to_message_bytes(),
1927        );
1928
1929        k9::snapshot!(
1930            rebuilt,
1931            r#"
1932Subject: hello\r
1933Message-ID: <id@example.com>\r
1934\r
1935Hello
1936"#
1937        );
1938
1939        const DOUBLE_ANGLE_AND_LONG_LINE: &str = "Subject: hello\r
1940Message-ID: <<1234@example.com>>\r
1941\r
1942Hello this is a really long line Hello this is a really long line \
1943Hello this is a really long line Hello this is a really long line \
1944Hello this is a really long line Hello this is a really long line \
1945Hello this is a really long line Hello this is a really long line \
1946Hello this is a really long line Hello this is a really long line \
1947Hello this is a really long line Hello this is a really long line \
1948Hello this is a really long line Hello this is a really long line
1949";
1950        let msg = MimePart::parse(DOUBLE_ANGLE_AND_LONG_LINE).unwrap();
1951        let rebuilt = BString::from(
1952            msg.check_fix_conformance(
1953                MessageConformance::MISSING_COLON_VALUE,
1954                MessageConformance::MISSING_MESSAGE_ID_HEADER | MessageConformance::LINE_TOO_LONG,
1955                CheckFixSettings {
1956                    message_id: Some("id@example.com".to_string()),
1957                    ..Default::default()
1958                },
1959            )
1960            .unwrap()
1961            .unwrap()
1962            .to_message_bytes(),
1963        );
1964
1965        k9::snapshot!(
1966            rebuilt,
1967            r#"
1968Content-Type: text/plain;\r
1969\tcharset="us-ascii"\r
1970Content-Transfer-Encoding: quoted-printable\r
1971Subject: hello\r
1972Message-ID: <id@example.com>\r
1973\r
1974Hello this is a really long line Hello this is a really long line Hello thi=\r
1975s is a really long line Hello this is a really long line Hello this is a re=\r
1976ally long line Hello this is a really long line Hello this is a really long=\r
1977 line Hello this is a really long line Hello this is a really long line Hel=\r
1978lo this is a really long line Hello this is a really long line Hello this i=\r
1979s a really long line Hello this is a really long line Hello this is a reall=\r
1980y long line=0A\r
1981
1982"#
1983        );
1984    }
1985
1986    #[test]
1987    fn check_conformance() {
1988        const MULTI_HEADER_CONTENT: &str =
1989        "X-Hello: there\r\nX-Header: value\r\nSubject: Hello\r\nX-Header: another value\r\nFrom :Someone@somewhere\r\n\r\nBody";
1990
1991        let msg = MimePart::parse(MULTI_HEADER_CONTENT).unwrap();
1992        let rebuilt = BString::from(
1993            msg.check_fix_conformance(
1994                MessageConformance::default(),
1995                MessageConformance::MISSING_MIME_VERSION,
1996                CheckFixSettings::default(),
1997            )
1998            .unwrap()
1999            .unwrap()
2000            .to_message_bytes(),
2001        );
2002        k9::snapshot!(
2003            rebuilt,
2004            r#"
2005X-Hello: there\r
2006X-Header: value\r
2007Subject: Hello\r
2008X-Header: another value\r
2009From :Someone@somewhere\r
2010Mime-Version: 1.0\r
2011\r
2012Body
2013"#
2014        );
2015
2016        let msg = MimePart::parse(MULTI_HEADER_CONTENT).unwrap();
2017        let rebuilt = BString::from(
2018            msg.check_fix_conformance(
2019                MessageConformance::default(),
2020                MessageConformance::MISSING_MIME_VERSION | MessageConformance::NAME_ENDS_WITH_SPACE,
2021                CheckFixSettings::default(),
2022            )
2023            .unwrap()
2024            .unwrap()
2025            .to_message_bytes(),
2026        );
2027        k9::snapshot!(
2028            rebuilt,
2029            r#"
2030Content-Type: text/plain;\r
2031\tcharset="us-ascii"\r
2032X-Hello: there\r
2033X-Header: value\r
2034Subject: Hello\r
2035X-Header: another value\r
2036From: <Someone@somewhere>\r
2037Mime-Version: 1.0\r
2038\r
2039Body\r
2040
2041"#
2042        );
2043    }
2044
2045    #[test]
2046    fn check_fix_latin_input() {
2047        const POUNDS: &[u8] = b"Subject: \xa3\r\n\r\nGBP\r\n";
2048        let msg = MimePart::parse(POUNDS).unwrap();
2049        assert_eq!(
2050            msg.conformance(),
2051            MessageConformance::NEEDS_TRANSFER_ENCODING
2052                | MessageConformance::MISSING_DATE_HEADER
2053                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2054                | MessageConformance::MISSING_MIME_VERSION
2055        );
2056        let rebuilt = msg
2057            .check_fix_conformance(
2058                MessageConformance::default(),
2059                MessageConformance::NEEDS_TRANSFER_ENCODING,
2060                CheckFixSettings {
2061                    detect_encoding: true,
2062                    include_encodings: vec!["iso-8859-1".to_string()],
2063                    data_bytes: Some(Arc::new(POUNDS.into())),
2064                    ..Default::default()
2065                },
2066            )
2067            .unwrap()
2068            .unwrap();
2069
2070        let subject = rebuilt.headers.subject().unwrap().unwrap();
2071        assert_eq!(subject, "£");
2072    }
2073
2074    // The issue here is that the message is text/plain with no explicit
2075    // charset, and is thus implicitly us-ascii.  But the part is actually
2076    // utf-8 content inside base64. Since the transfer encoding is 7-bit
2077    // it doesn't get flagged as improper encoding during the initial
2078    // parse.
2079    // We want to ensure that it is found during check-fix, and is corrected.
2080    #[test]
2081    fn check_fix_utf8_inside_transfer_encoding() {
2082        const CONTENT: &str = "Subject: hello\r\nContent-Type: text/plain\r\nContent-Transfer-Encoding: base64\r\n\r\n2KrYs9iqDQoNCg==\r\n";
2083
2084        let msg = MimePart::parse(CONTENT).unwrap();
2085
2086        // Initial parse cannot see that the content is actually utf-8,
2087        // which conflicts with the implicit us-ascii charset for a text/ part.
2088        assert_eq!(
2089            msg.conformance(),
2090            MessageConformance::MISSING_DATE_HEADER
2091                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2092                | MessageConformance::MISSING_MIME_VERSION
2093        );
2094
2095        // Deep check flags the invalid charset and sets NEEDS_TRANSFER_ENCODING
2096        assert_eq!(
2097            msg.deep_conformance_check(),
2098            MessageConformance::NEEDS_TRANSFER_ENCODING
2099                | MessageConformance::MISSING_DATE_HEADER
2100                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2101                | MessageConformance::MISSING_MIME_VERSION
2102        );
2103        let rebuilt = msg
2104            .check_fix_conformance(
2105                MessageConformance::default(),
2106                MessageConformance::NEEDS_TRANSFER_ENCODING,
2107                CheckFixSettings::default(),
2108            )
2109            .unwrap()
2110            .unwrap();
2111
2112        eprintln!("{rebuilt:?}");
2113        assert_eq!(rebuilt.body().unwrap().to_string_lossy().trim(), "تست");
2114    }
2115
2116    #[test]
2117    fn check_fix_latin1_inside_transfer_encoding() {
2118        const CONTENT: &str = "Subject: hello\r\nContent-Type: text/plain\r\nContent-Transfer-Encoding: base64\r\n\r\nVGhlIGNvc3QgaXMgozQyLjAwCg==\r\n";
2119
2120        let msg = MimePart::parse(CONTENT).unwrap();
2121
2122        // Initial parse cannot see that the content is actually utf-8,
2123        // which conflicts with the implicit us-ascii charset for a text/ part.
2124        assert_eq!(
2125            msg.conformance(),
2126            MessageConformance::MISSING_DATE_HEADER
2127                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2128                | MessageConformance::MISSING_MIME_VERSION
2129        );
2130
2131        // Deep check flags the invalid charset and sets NEEDS_TRANSFER_ENCODING
2132        assert_eq!(
2133            msg.deep_conformance_check(),
2134            MessageConformance::NEEDS_TRANSFER_ENCODING
2135                | MessageConformance::MISSING_DATE_HEADER
2136                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2137                | MessageConformance::MISSING_MIME_VERSION
2138        );
2139        let rebuilt = msg
2140            .check_fix_conformance(
2141                MessageConformance::default(),
2142                MessageConformance::NEEDS_TRANSFER_ENCODING,
2143                CheckFixSettings {
2144                    detect_encoding: true,
2145                    include_encodings: vec!["iso-8859-1".to_string()],
2146                    ..Default::default()
2147                },
2148            )
2149            .unwrap()
2150            .unwrap();
2151
2152        eprintln!("{rebuilt:?}");
2153        assert_eq!(
2154            rebuilt.body().unwrap().to_string_lossy().trim(),
2155            "The cost is £42.00"
2156        );
2157    }
2158
2159    #[test]
2160    fn check_fix_unknown_inside_transfer_encoding() {
2161        // `owo=` is 0xa3 (a UK Sterling/Pound sign in latin-1.
2162        // The length of the data passed to the charset detector
2163        // is insufficient for it to decide the charset, so we
2164        // should not expect to see a valid text part emitted.
2165        const CONTENT: &str = "Subject: hello\r\nContent-Type: text/plain\r\nContent-Transfer-Encoding: base64\r\n\r\nowo=\r\n";
2166
2167        let msg = MimePart::parse(CONTENT).unwrap();
2168
2169        // Initial parse cannot see that the content is actually utf-8,
2170        // which conflicts with the implicit us-ascii charset for a text/ part.
2171        assert_eq!(
2172            msg.conformance(),
2173            MessageConformance::MISSING_DATE_HEADER
2174                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2175                | MessageConformance::MISSING_MIME_VERSION
2176        );
2177
2178        // Deep check flags the invalid charset and sets NEEDS_TRANSFER_ENCODING
2179        assert_eq!(
2180            msg.deep_conformance_check(),
2181            MessageConformance::NEEDS_TRANSFER_ENCODING
2182                | MessageConformance::MISSING_DATE_HEADER
2183                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2184                | MessageConformance::MISSING_MIME_VERSION
2185        );
2186        let rebuilt = msg
2187            .check_fix_conformance(
2188                MessageConformance::default(),
2189                MessageConformance::NEEDS_TRANSFER_ENCODING,
2190                CheckFixSettings {
2191                    detect_encoding: true,
2192                    include_encodings: vec!["iso-8859-1".to_string()],
2193                    ..Default::default()
2194                },
2195            )
2196            .unwrap()
2197            .unwrap();
2198
2199        eprintln!("{rebuilt:?}");
2200        assert_eq!(rebuilt.body().unwrap().to_string_lossy().trim(), "�");
2201    }
2202}