mailparsing/
mimepart.rs

1use crate::header::{HeaderParseResult, MessageConformance};
2use crate::headermap::HeaderMap;
3use crate::strings::IntoSharedString;
4use crate::{
5    has_lone_cr_or_lf, Header, MailParsingError, MessageID, MimeParameterEncoding, MimeParameters,
6    Result, SharedString,
7};
8use bstr::{BStr, BString, ByteSlice};
9use charset_normalizer_rs::entity::NormalizerSettings;
10use charset_normalizer_rs::Encoding;
11use chrono::Utc;
12use serde::{Deserialize, Serialize};
13use std::borrow::Cow;
14use std::str::FromStr;
15use std::sync::Arc;
16
17/// Define our own because data_encoding::BASE64_MIME, despite its name,
18/// is not RFC2045 compliant, and will not ignore spaces
19const BASE64_RFC2045: data_encoding::Encoding = data_encoding_macro::new_encoding! {
20    symbols: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
21    padding: '=',
22    ignore: " \r\n\t",
23    wrap_width: 76,
24    wrap_separator: "\r\n",
25};
26
27#[derive(Debug, Clone, PartialEq)]
28pub struct MimePart<'a> {
29    /// The bytes that comprise this part, from its beginning to its end
30    bytes: SharedString<'a>,
31    /// The parsed headers from the start of bytes
32    headers: HeaderMap<'a>,
33    /// The index into bytes of the first non-header byte.
34    body_offset: usize,
35    body_len: usize,
36    conformance: MessageConformance,
37    parts: Vec<Self>,
38    /// For multipart, the content the precedes the first boundary
39    intro: SharedString<'a>,
40    /// For multipart, the content the follows the last boundary
41    outro: SharedString<'a>,
42}
43
44#[derive(PartialEq, Debug)]
45pub struct Rfc2045Info {
46    pub encoding: ContentTransferEncoding,
47    pub charset: Result<&'static Encoding>,
48    pub content_type: Option<MimeParameters>,
49    pub is_text: bool,
50    pub is_multipart: bool,
51    pub attachment_options: Option<AttachmentOptions>,
52    pub invalid_mime_headers: bool,
53}
54
55impl Rfc2045Info {
56    // This must be infallible so that a basic mime structure can be parsed
57    // even if the mime headers are a bit borked
58    fn new(headers: &HeaderMap) -> Self {
59        let mut invalid_mime_headers = false;
60        let encoding = match headers.content_transfer_encoding() {
61            Ok(Some(cte)) => match cte
62                .value
63                .to_str()
64                .map_err(|_| ())
65                .and_then(|s| ContentTransferEncoding::from_str(s).map_err(|_| ()))
66            {
67                Ok(encoding) => encoding,
68                Err(_) => {
69                    invalid_mime_headers = true;
70                    ContentTransferEncoding::SevenBit
71                }
72            },
73            Ok(None) => ContentTransferEncoding::SevenBit,
74            Err(_) => {
75                invalid_mime_headers = true;
76                ContentTransferEncoding::SevenBit
77            }
78        };
79
80        let content_type = match headers.content_type() {
81            Ok(ct) => ct,
82            Err(_) => {
83                invalid_mime_headers = true;
84                None
85            }
86        };
87
88        let mut ct_name = None;
89        let charset = if let Some(ct) = &content_type {
90            ct_name = ct.get("name");
91            ct.get("charset")
92        } else {
93            None
94        };
95        let charset = charset.unwrap_or_else(|| "us-ascii".into());
96
97        let charset = match charset.to_str() {
98            Ok(charset) => Encoding::by_name(&*charset).ok_or_else(|| {
99                MailParsingError::BodyParse(format!("unsupported charset {charset}"))
100            }),
101            Err(_) => Err(MailParsingError::BodyParse(format!(
102                "non-ascii charset name {charset}"
103            ))),
104        };
105
106        let (is_text, is_multipart) = if let Some(ct) = &content_type {
107            (ct.is_text(), ct.is_multipart())
108        } else {
109            (true, false)
110        };
111
112        let mut inline = false;
113        let mut cd_file_name = None;
114
115        match headers.content_disposition() {
116            Ok(Some(cd)) => {
117                inline = cd.value == "inline";
118                cd_file_name = cd.get("filename");
119            }
120            Ok(None) => {}
121            Err(_) => {
122                invalid_mime_headers = true;
123            }
124        };
125
126        let content_id = match headers.content_id() {
127            Ok(cid) => cid.map(|cid| cid.0),
128            Err(_) => {
129                invalid_mime_headers = true;
130                None
131            }
132        };
133
134        let file_name = match (cd_file_name, ct_name) {
135            (Some(name), _) | (None, Some(name)) => Some(name),
136            (None, None) => None,
137        };
138
139        let attachment_options = if inline || file_name.is_some() || content_id.is_some() {
140            Some(AttachmentOptions {
141                file_name,
142                inline,
143                content_id,
144            })
145        } else {
146            None
147        };
148
149        Self {
150            encoding,
151            charset,
152            content_type,
153            is_text,
154            is_multipart,
155            attachment_options,
156            invalid_mime_headers,
157        }
158    }
159
160    pub fn content_type(&self) -> Option<&str> {
161        self.content_type
162            .as_ref()
163            .and_then(|params| params.value.to_str().ok())
164    }
165}
166
167impl<'a> MimePart<'a> {
168    /// Parse some data into a tree of MimeParts
169    pub fn parse<S>(bytes: S) -> Result<Self>
170    where
171        S: IntoSharedString<'a>,
172    {
173        let (bytes, base_conformance) = bytes.into_shared_string();
174        Self::parse_impl(bytes, base_conformance, true)
175    }
176
177    /// Obtain a version of self that has a static lifetime
178    pub fn to_owned(&self) -> MimePart<'static> {
179        MimePart {
180            bytes: self.bytes.to_owned(),
181            headers: self.headers.to_owned(),
182            body_offset: self.body_offset,
183            body_len: self.body_len,
184            conformance: self.conformance,
185            parts: self.parts.iter().map(|p| p.to_owned()).collect(),
186            intro: self.intro.to_owned(),
187            outro: self.outro.to_owned(),
188        }
189    }
190
191    fn parse_impl(
192        bytes: SharedString<'a>,
193        base_conformance: MessageConformance,
194        is_top_level: bool,
195    ) -> Result<Self> {
196        let HeaderParseResult {
197            headers,
198            body_offset,
199            overall_conformance: mut conformance,
200        } = Header::parse_headers(bytes.clone())?;
201
202        conformance |= base_conformance;
203
204        let body_len = bytes.len();
205
206        if !bytes.as_bytes().is_ascii() {
207            conformance.set(MessageConformance::NEEDS_TRANSFER_ENCODING, true);
208        }
209        {
210            let mut prev = 0;
211            for idx in memchr::memchr_iter(b'\n', bytes.as_bytes()) {
212                if idx - prev > 78 {
213                    conformance.set(MessageConformance::LINE_TOO_LONG, true);
214                    break;
215                }
216                prev = idx;
217            }
218        }
219        conformance.set(
220            MessageConformance::NON_CANONICAL_LINE_ENDINGS,
221            has_lone_cr_or_lf(bytes.as_bytes()),
222        );
223
224        if is_top_level {
225            conformance.set(
226                MessageConformance::MISSING_DATE_HEADER,
227                !matches!(headers.date(), Ok(Some(_))),
228            );
229            conformance.set(
230                MessageConformance::MISSING_MESSAGE_ID_HEADER,
231                !matches!(headers.message_id(), Ok(Some(_))),
232            );
233            conformance.set(
234                MessageConformance::MISSING_MIME_VERSION,
235                match headers.mime_version() {
236                    Ok(Some(v)) => v != "1.0",
237                    _ => true,
238                },
239            );
240        }
241
242        let mut part = Self {
243            bytes,
244            headers,
245            body_offset,
246            body_len,
247            conformance,
248            parts: vec![],
249            intro: SharedString::Borrowed(b""),
250            outro: SharedString::Borrowed(b""),
251        };
252
253        part.recursive_parse()?;
254
255        Ok(part)
256    }
257
258    fn recursive_parse(&mut self) -> Result<()> {
259        let info = Rfc2045Info::new(&self.headers);
260        if info.invalid_mime_headers {
261            self.conformance |= MessageConformance::INVALID_MIME_HEADERS;
262        }
263        if let Some((boundary, true)) = info
264            .content_type
265            .as_ref()
266            .and_then(|ct| ct.get("boundary").map(|b| (b, info.is_multipart)))
267        {
268            let boundary = format!("\n--{boundary}");
269            let raw_body = self
270                .bytes
271                .slice(self.body_offset.saturating_sub(1)..self.bytes.len());
272
273            let mut iter = memchr::memmem::find_iter(raw_body.as_bytes(), &boundary);
274            if let Some(first_boundary_pos) = iter.next() {
275                self.intro = raw_body.slice(0..first_boundary_pos);
276
277                // When we create parts, we ignore the original body span in
278                // favor of what we're parsing out here now
279                self.body_len = 0;
280
281                let mut boundary_end = first_boundary_pos + boundary.len();
282
283                while let Some(part_start) =
284                    memchr::memchr(b'\n', &raw_body.as_bytes()[boundary_end..])
285                        .map(|p| p + boundary_end + 1)
286                {
287                    let part_end = iter
288                        .next()
289                        .map(|p| {
290                            // P is the newline; we want to include it in the raw
291                            // bytes for this part, so look beyond it
292                            p + 1
293                        })
294                        .unwrap_or(raw_body.len());
295
296                    let child = Self::parse_impl(
297                        raw_body.slice(part_start..part_end),
298                        MessageConformance::default(),
299                        false,
300                    )?;
301                    self.conformance |= child.conformance;
302                    self.parts.push(child);
303
304                    boundary_end = part_end -
305                        1 /* newline we adjusted for when assigning part_end */
306                        + boundary.len();
307
308                    if boundary_end + 2 > raw_body.len() {
309                        break;
310                    }
311                    if &raw_body.as_bytes()[boundary_end..boundary_end + 2] == b"--" {
312                        if let Some(after_boundary) =
313                            memchr::memchr(b'\n', &raw_body.as_bytes()[boundary_end..])
314                                .map(|p| p + boundary_end + 1)
315                        {
316                            self.outro = raw_body.slice(after_boundary..raw_body.len());
317                        }
318                        break;
319                    }
320                }
321            }
322        }
323
324        Ok(())
325    }
326
327    /// Recursively performs deeper conformance checks on the message.
328    /// At this time that includes attempting to decode any text parts
329    /// into UTF-8 to see if they are correctly annotated, but it may
330    /// include more checks in the future.
331    /// The results of the deep checks are combined with any conformance
332    /// issues detected during parsing, and returned.
333    pub fn deep_conformance_check(&self) -> MessageConformance {
334        if self.parts.is_empty() {
335            match self.extract_body(None) {
336                Ok((_, conformance)) => conformance,
337                Err(_) => self.conformance | MessageConformance::NEEDS_TRANSFER_ENCODING,
338            }
339        } else {
340            let mut conformance = self.conformance;
341            for p in &self.parts {
342                conformance |= p.deep_conformance_check();
343            }
344            conformance
345        }
346    }
347
348    /// Returns the conformance flags determined during parsing
349    pub fn conformance(&self) -> MessageConformance {
350        self.conformance
351    }
352
353    /// Obtain a reference to the child parts
354    pub fn child_parts(&self) -> &[Self] {
355        &self.parts
356    }
357
358    /// Obtain a mutable reference to the child parts
359    pub fn child_parts_mut(&mut self) -> &mut Vec<Self> {
360        &mut self.parts
361    }
362
363    /// Obtains a reference to the headers
364    pub fn headers(&'_ self) -> &'_ HeaderMap<'_> {
365        &self.headers
366    }
367
368    /// Obtain a mutable reference to the headers
369    pub fn headers_mut<'b>(&'b mut self) -> &'b mut HeaderMap<'a> {
370        &mut self.headers
371    }
372
373    /// Get the raw, transfer-encoded body
374    pub fn raw_body(&'_ self) -> SharedString<'_> {
375        self.bytes
376            .slice(self.body_offset..self.body_len.max(self.body_offset))
377    }
378
379    pub fn rfc2045_info(&self) -> Rfc2045Info {
380        Rfc2045Info::new(&self.headers)
381    }
382
383    /// Decode transfer decoding and return the body
384    pub fn body(&'_ self) -> Result<DecodedBody<'_>> {
385        let (body, _conformance) = self.extract_body(None)?;
386        Ok(body)
387    }
388
389    fn extract_body(
390        &'_ self,
391        options: Option<&CheckFixSettings>,
392    ) -> Result<(DecodedBody<'_>, MessageConformance)> {
393        let info = Rfc2045Info::new(&self.headers);
394
395        let bytes = match info.encoding {
396            ContentTransferEncoding::Base64 => {
397                let data = self.raw_body();
398                let bytes = data.as_bytes();
399                BASE64_RFC2045.decode(bytes).map_err(|err| {
400                    let b = bytes[err.position] as char;
401                    let region =
402                        &bytes[err.position.saturating_sub(8)..(err.position + 8).min(bytes.len())];
403                    let region = String::from_utf8_lossy(region);
404                    MailParsingError::BodyParse(format!(
405                        "base64 decode: {err:#} b={b:?} in {region}"
406                    ))
407                })?
408            }
409            ContentTransferEncoding::QuotedPrintable => quoted_printable::decode(
410                self.raw_body().as_bytes(),
411                quoted_printable::ParseMode::Robust,
412            )
413            .map_err(|err| {
414                MailParsingError::BodyParse(format!("quoted printable decode: {err:#}"))
415            })?,
416            ContentTransferEncoding::SevenBit
417            | ContentTransferEncoding::EightBit
418            | ContentTransferEncoding::Binary => self.raw_body().as_bytes().to_vec(),
419        };
420
421        if info.is_text {
422            let charset = info.charset?;
423
424            match charset.decode_simple(&bytes) {
425                Ok(decoded) => Ok((
426                    DecodedBody::Text(decoded.to_string().into()),
427                    self.conformance,
428                )),
429                Err(_err) => {
430                    if let Some(settings) = options {
431                        if settings.detect_encoding {
432                            let norm_settings = NormalizerSettings {
433                                include_encodings: settings.include_encodings.clone(),
434                                exclude_encodings: settings.exclude_encodings.clone(),
435                                ..Default::default()
436                            };
437
438                            if let Ok(guess) =
439                                charset_normalizer_rs::from_bytes(&*bytes, Some(norm_settings))
440                            {
441                                if let Some(decoded) =
442                                    guess.get_best().and_then(|best| best.decoded_payload())
443                                {
444                                    return Ok((
445                                        DecodedBody::Text(decoded.to_string().into()),
446                                        MessageConformance::NEEDS_TRANSFER_ENCODING
447                                            | self.conformance,
448                                    ));
449                                }
450                            }
451
452                            // No charset was detected.  This is a strong indicator
453                            // that the content is actually binary, according to
454                            // the docs of the detector, but we know that it should
455                            // be text.  Regardless, we can't represent it as UTF-8
456                            // here.
457                            // We'll return it as a binary part and let the caller
458                            // decide if that is an issue
459                            return Ok((
460                                DecodedBody::Binary(bytes),
461                                MessageConformance::NEEDS_TRANSFER_ENCODING | self.conformance,
462                            ));
463                        }
464                    }
465
466                    // We don't know what the charset is, just that this should
467                    // be some kind of text.  For the sake of compatibility with
468                    // international email, let's try it as UTF-8, and if that
469                    // sticks, we'll use it.
470                    if let Ok(decoded) = std::str::from_utf8(&bytes) {
471                        return Ok((
472                            DecodedBody::Text(decoded.to_string().into()),
473                            MessageConformance::NEEDS_TRANSFER_ENCODING | self.conformance,
474                        ));
475                    }
476
477                    // Who knows what it is? Return it as binary and leave the
478                    // final decision on what to do with it to our caller.
479                    Ok((
480                        DecodedBody::Binary(bytes),
481                        MessageConformance::NEEDS_TRANSFER_ENCODING | self.conformance,
482                    ))
483                }
484            }
485        } else {
486            Ok((DecodedBody::Binary(bytes), self.conformance))
487        }
488    }
489
490    /// Re-constitute the message.
491    /// Each element will be parsed out, and the parsed form used
492    /// to build a new message.
493    /// This has the side effect of "fixing" non-conforming elements,
494    /// but may come at the cost of "losing" the non-sensical or otherwise
495    /// out of spec elements in the rebuilt message
496    pub fn rebuild(&self, settings: Option<&CheckFixSettings>) -> Result<Self> {
497        let info = Rfc2045Info::new(&self.headers);
498
499        let mut children = vec![];
500        for part in &self.parts {
501            children.push(part.rebuild(settings)?);
502        }
503
504        let mut rebuilt = if children.is_empty() {
505            let (body, _conformance) = self.extract_body(settings)?;
506            match body {
507                DecodedBody::Text(text) => {
508                    let ct = info
509                        .content_type
510                        .as_ref()
511                        .map(|ct| ct.value.as_bstr())
512                        .unwrap_or_else(|| BStr::new("text/plain"));
513                    Self::new_text(ct, text.as_bytes())?
514                }
515                DecodedBody::Binary(data) => {
516                    let ct = info
517                        .content_type
518                        .as_ref()
519                        .map(|ct| ct.value.as_bstr())
520                        .unwrap_or_else(|| BStr::new("application/octet-stream"));
521                    Self::new_binary(ct, &data, info.attachment_options.as_ref())?
522                }
523            }
524        } else {
525            let ct = info.content_type.ok_or_else(|| {
526                MailParsingError::BodyParse(
527                    "multipart message has no content-type information!?".to_string(),
528                )
529            })?;
530            Self::new_multipart(
531                &ct.value,
532                children,
533                ct.get("boundary").as_deref().map(|b| b.as_bytes()),
534            )?
535        };
536
537        for hdr in self.headers.iter() {
538            let name = hdr.get_name();
539            if name.eq_ignore_ascii_case(b"Content-ID") {
540                continue;
541            }
542
543            // Merge in any MimeParameters that we might otherwise have lost
544            // in the rebuild
545            if name.eq_ignore_ascii_case(b"Content-Type") {
546                if let Ok(params) = hdr.as_content_type() {
547                    let Some(mut dest) = rebuilt.headers_mut().content_type()? else {
548                        continue;
549                    };
550
551                    for (k, v) in params.parameter_map() {
552                        if dest.get(&k).is_none() {
553                            dest.set(&k, &v);
554                        }
555                    }
556
557                    rebuilt.headers_mut().set_content_type(dest)?;
558                }
559                continue;
560            }
561            if name.eq_ignore_ascii_case(b"Content-Transfer-Encoding") {
562                if let Ok(params) = hdr.as_content_transfer_encoding() {
563                    let Some(mut dest) = rebuilt.headers_mut().content_transfer_encoding()? else {
564                        continue;
565                    };
566
567                    for (k, v) in params.parameter_map() {
568                        if dest.get(&k).is_none() {
569                            dest.set(&k, &v);
570                        }
571                    }
572
573                    rebuilt.headers_mut().set_content_transfer_encoding(dest)?;
574                }
575                continue;
576            }
577            if name.eq_ignore_ascii_case(b"Content-Disposition") {
578                if let Ok(params) = hdr.as_content_disposition() {
579                    let Some(mut dest) = rebuilt.headers_mut().content_disposition()? else {
580                        continue;
581                    };
582
583                    for (k, v) in params.parameter_map() {
584                        if dest.get(&k).is_none() {
585                            dest.set(&k, &v);
586                        }
587                    }
588
589                    rebuilt.headers_mut().set_content_disposition(dest)?;
590                }
591                continue;
592            }
593
594            if let Ok(hdr) = hdr.rebuild() {
595                rebuilt.headers_mut().push(hdr);
596            }
597        }
598
599        Ok(rebuilt)
600    }
601
602    /// Write the message content to the provided output stream
603    pub fn write_message<W: std::io::Write>(&self, out: &mut W) -> Result<()> {
604        let line_ending = if self
605            .conformance
606            .contains(MessageConformance::NON_CANONICAL_LINE_ENDINGS)
607        {
608            "\n"
609        } else {
610            "\r\n"
611        };
612
613        for hdr in self.headers.iter() {
614            hdr.write_header(out)
615                .map_err(|_| MailParsingError::WriteMessageIOError)?;
616        }
617        out.write_all(line_ending.as_bytes())
618            .map_err(|_| MailParsingError::WriteMessageIOError)?;
619
620        if self.parts.is_empty() {
621            out.write_all(self.raw_body().as_bytes())
622                .map_err(|_| MailParsingError::WriteMessageIOError)?;
623        } else {
624            let info = Rfc2045Info::new(&self.headers);
625            let ct = info.content_type.ok_or({
626                MailParsingError::WriteMessageWtf(
627                    "expected to have Content-Type when there are child parts",
628                )
629            })?;
630            let boundary = ct.get("boundary").ok_or({
631                MailParsingError::WriteMessageWtf("expected Content-Type to have a boundary")
632            })?;
633            out.write_all(self.intro.as_bytes())
634                .map_err(|_| MailParsingError::WriteMessageIOError)?;
635            for p in &self.parts {
636                write!(out, "--{boundary}{line_ending}")
637                    .map_err(|_| MailParsingError::WriteMessageIOError)?;
638                p.write_message(out)?;
639            }
640            write!(out, "--{boundary}--{line_ending}")
641                .map_err(|_| MailParsingError::WriteMessageIOError)?;
642            out.write_all(self.outro.as_bytes())
643                .map_err(|_| MailParsingError::WriteMessageIOError)?;
644        }
645        Ok(())
646    }
647
648    /// Convenience method wrapping write_message that returns
649    /// the formatted message as a standalone string
650    pub fn to_message_bytes(&self) -> Vec<u8> {
651        let mut out = vec![];
652        self.write_message(&mut out).unwrap();
653        out
654    }
655
656    pub fn replace_text_body(
657        &mut self,
658        content_type: impl AsRef<[u8]>,
659        content: impl AsRef<BStr>,
660    ) -> Result<()> {
661        let mut new_part = Self::new_text(content_type, content)?;
662        self.bytes = new_part.bytes;
663        self.body_offset = new_part.body_offset;
664        self.body_len = new_part.body_len;
665        // Remove any rfc2047 headers that might reflect how the content
666        // is encoded. Note that we preserve Content-Disposition as that
667        // isn't related purely to the how the content is encoded
668        self.headers.remove_all_named("Content-Type");
669        self.headers.remove_all_named("Content-Transfer-Encoding");
670        // And add any from the new part
671        self.headers.append(&mut new_part.headers.headers);
672        Ok(())
673    }
674
675    pub fn replace_binary_body(&mut self, content_type: &[u8], content: &[u8]) -> Result<()> {
676        let mut new_part = Self::new_binary(content_type, content, None)?;
677        self.bytes = new_part.bytes;
678        self.body_offset = new_part.body_offset;
679        self.body_len = new_part.body_len;
680        // Remove any rfc2047 headers that might reflect how the content
681        // is encoded. Note that we preserve Content-Disposition as that
682        // isn't related purely to the how the content is encoded
683        self.headers.remove_all_named("Content-Type");
684        self.headers.remove_all_named("Content-Transfer-Encoding");
685        // And add any from the new part
686        self.headers.append(&mut new_part.headers.headers);
687        Ok(())
688    }
689
690    pub fn new_no_transfer_encoding(content_type: &str, bytes: &[u8]) -> Result<Self> {
691        if bytes.iter().any(|b| !b.is_ascii()) {
692            return Err(MailParsingError::EightBit);
693        }
694
695        let mut headers = HeaderMap::default();
696
697        let ct = MimeParameters::new(content_type);
698        headers.set_content_type(ct)?;
699
700        let bytes = String::from_utf8_lossy(bytes).to_string();
701        let body_len = bytes.len();
702
703        Ok(Self {
704            bytes: bytes.into(),
705            headers,
706            body_offset: 0,
707            body_len,
708            conformance: MessageConformance::default(),
709            parts: vec![],
710            intro: "".into(),
711            outro: "".into(),
712        })
713    }
714
715    /// Constructs a new part with textual utf8 content.
716    /// quoted-printable transfer encoding will be applied,
717    /// unless it is smaller to represent the text in base64
718    pub fn new_text(content_type: impl AsRef<[u8]>, content: impl AsRef<BStr>) -> Result<Self> {
719        let content = content.as_ref();
720        // We'll probably use qp, so speculatively do the work
721        let qp_encoded = quoted_printable::encode(content);
722
723        let (mut encoded, encoding) = if qp_encoded == content {
724            (qp_encoded, None)
725        } else if qp_encoded.len() <= BASE64_RFC2045.encode_len(content.len()) {
726            (qp_encoded, Some("quoted-printable"))
727        } else {
728            // Turns out base64 will be smaller; perhaps the content
729            // is dominated by non-ASCII text?
730            (BASE64_RFC2045.encode(content).into_bytes(), Some("base64"))
731        };
732
733        if !encoded.ends_with(b"\r\n") {
734            encoded.extend_from_slice(b"\r\n");
735        }
736        let mut headers = HeaderMap::default();
737
738        let mut ct = MimeParameters::new(content_type);
739        ct.set(
740            "charset",
741            if content.is_ascii() {
742                "us-ascii"
743            } else {
744                "utf-8"
745            },
746        );
747        headers.set_content_type(ct)?;
748
749        if let Some(encoding) = encoding {
750            headers.set_content_transfer_encoding(MimeParameters::new(encoding))?;
751        }
752
753        let body_len = encoded.len();
754        let bytes =
755            String::from_utf8(encoded).expect("transfer encoder to produce valid ASCII output");
756
757        Ok(Self {
758            bytes: bytes.into(),
759            headers,
760            body_offset: 0,
761            body_len,
762            conformance: MessageConformance::default(),
763            parts: vec![],
764            intro: "".into(),
765            outro: "".into(),
766        })
767    }
768
769    pub fn new_text_plain(content: impl AsRef<BStr>) -> Result<Self> {
770        Self::new_text("text/plain", content)
771    }
772
773    pub fn new_html(content: impl AsRef<BStr>) -> Result<Self> {
774        Self::new_text("text/html", content)
775    }
776
777    pub fn new_multipart(
778        content_type: impl AsRef<[u8]>,
779        parts: Vec<Self>,
780        boundary: Option<&[u8]>,
781    ) -> Result<Self> {
782        let mut headers = HeaderMap::default();
783
784        let mut ct = MimeParameters::new(content_type);
785        match boundary {
786            Some(b) => {
787                ct.set("boundary", b);
788            }
789            None => {
790                // Generate a random boundary
791                let uuid = uuid::Uuid::new_v4();
792                let boundary = data_encoding::BASE64_NOPAD.encode(uuid.as_bytes());
793                ct.set("boundary", &boundary);
794            }
795        }
796        headers.set_content_type(ct)?;
797
798        Ok(Self {
799            bytes: "".into(),
800            headers,
801            body_offset: 0,
802            body_len: 0,
803            conformance: MessageConformance::default(),
804            parts,
805            intro: "".into(),
806            outro: "".into(),
807        })
808    }
809
810    pub fn new_binary(
811        content_type: impl AsRef<[u8]>,
812        content: &[u8],
813        options: Option<&AttachmentOptions>,
814    ) -> Result<Self> {
815        let mut encoded = BASE64_RFC2045.encode(content);
816        if !encoded.ends_with("\r\n") {
817            encoded.push_str("\r\n");
818        }
819        let mut headers = HeaderMap::default();
820
821        let mut ct = MimeParameters::new(content_type);
822
823        if let Some(opts) = options {
824            let mut cd = MimeParameters::new(if opts.inline { "inline" } else { "attachment" });
825            if let Some(name) = &opts.file_name {
826                cd.set("filename", name);
827                let encoding = if name.chars().any(|c| !c.is_ascii()) {
828                    MimeParameterEncoding::QuotedRfc2047
829                } else {
830                    MimeParameterEncoding::None
831                };
832                ct.set_with_encoding("name", name, encoding);
833            }
834            headers.set_content_disposition(cd)?;
835
836            if let Some(id) = &opts.content_id {
837                headers.set_content_id(MessageID(id.clone()))?;
838            }
839        }
840
841        headers.set_content_type(ct)?;
842        headers.set_content_transfer_encoding(MimeParameters::new("base64"))?;
843
844        let body_len = encoded.len();
845
846        Ok(Self {
847            bytes: encoded.into(),
848            headers,
849            body_offset: 0,
850            body_len,
851            conformance: MessageConformance::default(),
852            parts: vec![],
853            intro: "".into(),
854            outro: "".into(),
855        })
856    }
857
858    /// Returns a SimplifiedStructure representation of the mime tree,
859    /// with the (probable) primary text/plain and text/html parts
860    /// pulled out, and the remaining parts recorded as a flat
861    /// attachments array
862    pub fn simplified_structure(&'a self) -> Result<SimplifiedStructure<'a>> {
863        let parts = self.simplified_structure_pointers()?;
864
865        let mut text = None;
866        let mut html = None;
867        let mut amp_html = None;
868
869        let headers = &self
870            .resolve_ptr(parts.header_part)
871            .expect("header part to always be valid")
872            .headers;
873
874        if let Some(p) = parts.text_part.and_then(|p| self.resolve_ptr(p)) {
875            text = match p.body()? {
876                DecodedBody::Text(t) => Some(t),
877                DecodedBody::Binary(_) => {
878                    return Err(MailParsingError::BodyParse(
879                        "expected text/plain part to be text, but it is binary".to_string(),
880                    ))
881                }
882            };
883        }
884        if let Some(p) = parts.html_part.and_then(|p| self.resolve_ptr(p)) {
885            html = match p.body()? {
886                DecodedBody::Text(t) => Some(t),
887                DecodedBody::Binary(_) => {
888                    return Err(MailParsingError::BodyParse(
889                        "expected text/html part to be text, but it is binary".to_string(),
890                    ))
891                }
892            };
893        }
894        if let Some(p) = parts.amp_html_part.and_then(|p| self.resolve_ptr(p)) {
895            amp_html = match p.body()? {
896                DecodedBody::Text(t) => Some(t),
897                DecodedBody::Binary(_) => {
898                    return Err(MailParsingError::BodyParse(
899                        "expected text/x-amp-html part to be text, but it is binary".to_string(),
900                    ))
901                }
902            };
903        }
904
905        let mut attachments = vec![];
906        for ptr in parts.attachments {
907            attachments.push(self.resolve_ptr(ptr).expect("pointer to be valid").clone());
908        }
909
910        Ok(SimplifiedStructure {
911            text,
912            html,
913            amp_html,
914            headers,
915            attachments,
916        })
917    }
918
919    /// Resolve a PartPointer to the corresponding MimePart
920    pub fn resolve_ptr(&self, ptr: PartPointer) -> Option<&Self> {
921        let mut current = self;
922        let mut cursor = ptr.0.as_slice();
923
924        loop {
925            match cursor.first() {
926                Some(&idx) => {
927                    current = current.parts.get(idx as usize)?;
928                    cursor = &cursor[1..];
929                }
930                None => {
931                    // We have completed the walk
932                    return Some(current);
933                }
934            }
935        }
936    }
937
938    /// Resolve a PartPointer to the corresponding MimePart, for mutable access
939    pub fn resolve_ptr_mut(&mut self, ptr: PartPointer) -> Option<&mut Self> {
940        let mut current = self;
941        let mut cursor = ptr.0.as_slice();
942
943        loop {
944            match cursor.first() {
945                Some(&idx) => {
946                    current = current.parts.get_mut(idx as usize)?;
947                    cursor = &cursor[1..];
948                }
949                None => {
950                    // We have completed the walk
951                    return Some(current);
952                }
953            }
954        }
955    }
956
957    /// Returns a set of PartPointers that locate the (probable) primary
958    /// text/plain and text/html parts, and the remaining parts recorded
959    /// as a flat attachments array.  The resulting
960    /// PartPointers can be resolved to their actual instances for both
961    /// immutable and mutable operations via resolve_ptr and resolve_ptr_mut.
962    pub fn simplified_structure_pointers(&self) -> Result<SimplifiedStructurePointers> {
963        self.simplified_structure_pointers_impl(None)
964    }
965
966    fn simplified_structure_pointers_impl(
967        &self,
968        my_idx: Option<u8>,
969    ) -> Result<SimplifiedStructurePointers> {
970        let info = Rfc2045Info::new(&self.headers);
971        let is_inline = info
972            .attachment_options
973            .as_ref()
974            .map(|ao| ao.inline)
975            .unwrap_or(true);
976
977        if let Some(ct) = &info.content_type {
978            if is_inline {
979                if ct.value == "text/plain" {
980                    return Ok(SimplifiedStructurePointers {
981                        amp_html_part: None,
982                        text_part: Some(PartPointer::root_or_nth(my_idx)),
983                        html_part: None,
984                        header_part: PartPointer::root_or_nth(my_idx),
985                        attachments: vec![],
986                    });
987                }
988                if ct.value == "text/html" {
989                    return Ok(SimplifiedStructurePointers {
990                        amp_html_part: None,
991                        html_part: Some(PartPointer::root_or_nth(my_idx)),
992                        text_part: None,
993                        header_part: PartPointer::root_or_nth(my_idx),
994                        attachments: vec![],
995                    });
996                }
997                if ct.value == "text/x-amp-html" {
998                    return Ok(SimplifiedStructurePointers {
999                        amp_html_part: Some(PartPointer::root_or_nth(my_idx)),
1000                        html_part: None,
1001                        text_part: None,
1002                        header_part: PartPointer::root_or_nth(my_idx),
1003                        attachments: vec![],
1004                    });
1005                }
1006            }
1007
1008            if ct.value.starts_with_str("multipart/") {
1009                let mut text_part = None;
1010                let mut html_part = None;
1011                let mut amp_html_part = None;
1012                let mut attachments = vec![];
1013
1014                for (i, p) in self.parts.iter().enumerate() {
1015                    let part_idx = i.try_into().map_err(|_| MailParsingError::TooManyParts)?;
1016                    if let Ok(s) = p.simplified_structure_pointers_impl(Some(part_idx)) {
1017                        if let Some(p) = s.text_part {
1018                            let ptr = PartPointer::root_or_nth(my_idx).append(p);
1019                            if text_part.is_none() {
1020                                text_part.replace(ptr);
1021                            } else {
1022                                attachments.push(ptr);
1023                            }
1024                        }
1025                        if let Some(p) = s.html_part {
1026                            let ptr = PartPointer::root_or_nth(my_idx).append(p);
1027                            if html_part.is_none() {
1028                                html_part.replace(ptr);
1029                            } else {
1030                                attachments.push(ptr);
1031                            }
1032                        }
1033                        if let Some(p) = s.amp_html_part {
1034                            let ptr = PartPointer::root_or_nth(my_idx).append(p);
1035                            if amp_html_part.is_none() {
1036                                amp_html_part.replace(ptr);
1037                            } else {
1038                                attachments.push(ptr);
1039                            }
1040                        }
1041                        for attachment in s.attachments {
1042                            attachments.push(PartPointer::root_or_nth(my_idx).append(attachment));
1043                        }
1044                    }
1045                }
1046
1047                return Ok(SimplifiedStructurePointers {
1048                    amp_html_part,
1049                    html_part,
1050                    text_part,
1051                    header_part: PartPointer::root_or_nth(my_idx),
1052                    attachments,
1053                });
1054            }
1055
1056            return Ok(SimplifiedStructurePointers {
1057                html_part: None,
1058                text_part: None,
1059                amp_html_part: None,
1060                header_part: PartPointer::root_or_nth(my_idx),
1061                attachments: vec![PartPointer::root_or_nth(my_idx)],
1062            });
1063        }
1064
1065        // Assume text/plain content-type
1066        Ok(SimplifiedStructurePointers {
1067            text_part: Some(PartPointer::root_or_nth(my_idx)),
1068            html_part: None,
1069            amp_html_part: None,
1070            header_part: PartPointer::root_or_nth(my_idx),
1071            attachments: vec![],
1072        })
1073    }
1074
1075    pub fn check_fix_conformance(
1076        &self,
1077        check: MessageConformance,
1078        fix: MessageConformance,
1079        settings: CheckFixSettings,
1080    ) -> Result<Option<Self>> {
1081        let mut msg = self.clone();
1082        let conformance = msg.deep_conformance_check();
1083
1084        // Don't raise errors for things that we're going to fix anyway
1085        let check = check - fix;
1086
1087        if check.intersects(conformance) {
1088            let problems = check.intersection(conformance);
1089            return Err(MailParsingError::ConformanceIssues(problems));
1090        }
1091
1092        if !fix.intersects(conformance) {
1093            return Ok(None);
1094        }
1095
1096        let to_fix = fix.intersection(conformance);
1097
1098        let missing_headers_only = to_fix
1099            .difference(
1100                MessageConformance::MISSING_DATE_HEADER
1101                    | MessageConformance::MISSING_MIME_VERSION
1102                    | MessageConformance::MISSING_MESSAGE_ID_HEADER,
1103            )
1104            .is_empty();
1105
1106        if !missing_headers_only {
1107            if to_fix.contains(MessageConformance::NEEDS_TRANSFER_ENCODING) {
1108                // Something is 8-bit. If we're lucky, it's simply UTF-8,
1109                // but it could be some other "legacy" charset encoding.
1110                // If we've been asked to detect an encoding, try that now,
1111                // and re-parse the message with the re-coded input.
1112                // Otherwise, we'll attempt a lossy conversion to UTF-8
1113                // and the resulting message will likely include unicode
1114                // replacement characters.
1115
1116                if settings.detect_encoding {
1117                    if let Some(data_bytes) = &settings.data_bytes {
1118                        let norm_settings = NormalizerSettings {
1119                            include_encodings: settings.include_encodings.clone(),
1120                            exclude_encodings: settings.exclude_encodings.clone(),
1121                            ..Default::default()
1122                        };
1123
1124                        let guess =
1125                            charset_normalizer_rs::from_bytes(&*data_bytes, Some(norm_settings))
1126                                .map_err(|err| MailParsingError::CharsetDetectionFailed(err))?;
1127                        if let Some(best) = guess.get_best() {
1128                            if let Some(decoded) = best.decoded_payload() {
1129                                msg = MimePart::parse(decoded.to_string())?;
1130                            }
1131                        }
1132                    }
1133                }
1134            }
1135
1136            msg = msg.rebuild(Some(&settings))?;
1137        }
1138
1139        if to_fix.contains(MessageConformance::MISSING_DATE_HEADER) {
1140            msg.headers_mut().set_date(Utc::now())?;
1141        }
1142
1143        if to_fix.contains(MessageConformance::MISSING_MIME_VERSION) {
1144            msg.headers_mut().set_mime_version("1.0")?;
1145        }
1146
1147        if to_fix.contains(MessageConformance::MISSING_MESSAGE_ID_HEADER) {
1148            if let Some(message_id) = &settings.message_id {
1149                msg.headers_mut()
1150                    .set_message_id(MessageID(message_id.clone().into()))?;
1151            }
1152        }
1153
1154        Ok(Some(msg))
1155    }
1156}
1157
1158#[derive(Default, Debug, Clone, Deserialize)]
1159pub struct CheckFixSettings {
1160    #[serde(default)]
1161    pub detect_encoding: bool,
1162    #[serde(default)]
1163    pub include_encodings: Vec<String>,
1164    #[serde(default)]
1165    pub exclude_encodings: Vec<String>,
1166    #[serde(default)]
1167    pub message_id: Option<String>,
1168    #[serde(skip)]
1169    pub data_bytes: Option<Arc<Box<[u8]>>>,
1170}
1171
1172/// References the position of a MimePart by encoding the steps in
1173/// a tree walking operation. The encoding of PartPointer is a
1174/// sequence of integers that identify the index of a child part
1175/// by its level within the mime tree, selecting the current node
1176/// when no more indices remain. eg: `[]` indicates the
1177/// root part, while `[0]` is the 0th child of the root.
1178#[derive(Debug, Clone, PartialEq, Eq)]
1179pub struct PartPointer(Vec<u8>);
1180
1181impl PartPointer {
1182    /// Construct a PartPointer that references the root node
1183    pub fn root() -> Self {
1184        Self(vec![])
1185    }
1186
1187    /// Construct a PartPointer that references either the nth
1188    /// or the root node depending upon the passed parameter
1189    pub fn root_or_nth(n: Option<u8>) -> Self {
1190        match n {
1191            Some(n) => Self::nth(n),
1192            None => Self::root(),
1193        }
1194    }
1195
1196    /// Construct a PartPointer that references the nth child
1197    pub fn nth(n: u8) -> Self {
1198        Self(vec![n])
1199    }
1200
1201    /// Join other onto self, consuming self and producing
1202    /// a pointer that makes other relative to self
1203    pub fn append(mut self, mut other: Self) -> Self {
1204        self.0.append(&mut other.0);
1205        Self(self.0)
1206    }
1207
1208    pub fn id_string(&self) -> String {
1209        let mut id = String::new();
1210        for p in &self.0 {
1211            if !id.is_empty() {
1212                id.push('.');
1213            }
1214            id.push_str(&p.to_string());
1215        }
1216        id
1217    }
1218}
1219
1220#[derive(Debug, Clone)]
1221pub struct SimplifiedStructurePointers {
1222    /// The primary text/plain part
1223    pub text_part: Option<PartPointer>,
1224    /// The primary text/html part
1225    pub html_part: Option<PartPointer>,
1226    /// The primary text/x-amp-html part
1227    pub amp_html_part: Option<PartPointer>,
1228    /// The "top level" set of headers for the message
1229    pub header_part: PartPointer,
1230    /// all other (terminal) parts are attachments
1231    pub attachments: Vec<PartPointer>,
1232}
1233
1234#[derive(Debug, Clone, PartialEq)]
1235pub struct SimplifiedStructure<'a> {
1236    pub text: Option<SharedString<'a>>,
1237    pub html: Option<SharedString<'a>>,
1238    pub amp_html: Option<SharedString<'a>>,
1239    pub headers: &'a HeaderMap<'a>,
1240    pub attachments: Vec<MimePart<'a>>,
1241}
1242
1243#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1244#[serde(deny_unknown_fields)]
1245pub struct AttachmentOptions {
1246    #[serde(default)]
1247    pub file_name: Option<BString>,
1248    #[serde(default)]
1249    pub inline: bool,
1250    #[serde(default)]
1251    pub content_id: Option<BString>,
1252}
1253
1254#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1255pub enum ContentTransferEncoding {
1256    SevenBit,
1257    EightBit,
1258    Binary,
1259    QuotedPrintable,
1260    Base64,
1261}
1262
1263impl FromStr for ContentTransferEncoding {
1264    type Err = MailParsingError;
1265
1266    fn from_str(s: &str) -> Result<Self> {
1267        if s.eq_ignore_ascii_case("7bit") {
1268            Ok(Self::SevenBit)
1269        } else if s.eq_ignore_ascii_case("8bit") {
1270            Ok(Self::EightBit)
1271        } else if s.eq_ignore_ascii_case("binary") {
1272            Ok(Self::Binary)
1273        } else if s.eq_ignore_ascii_case("quoted-printable") {
1274            Ok(Self::QuotedPrintable)
1275        } else if s.eq_ignore_ascii_case("base64") {
1276            Ok(Self::Base64)
1277        } else {
1278            Err(MailParsingError::InvalidContentTransferEncoding(
1279                s.to_string(),
1280            ))
1281        }
1282    }
1283}
1284
1285#[derive(Debug, PartialEq)]
1286pub enum DecodedBody<'a> {
1287    Text(SharedString<'a>),
1288    Binary(Vec<u8>),
1289}
1290
1291impl<'a> DecodedBody<'a> {
1292    pub fn to_string_lossy(&'a self) -> Cow<'a, str> {
1293        match self {
1294            Self::Text(s) => s.to_str_lossy(),
1295            Self::Binary(b) => String::from_utf8_lossy(b),
1296        }
1297    }
1298}
1299
1300#[cfg(test)]
1301mod test {
1302    use super::*;
1303
1304    #[test]
1305    fn msg_parsing() {
1306        let message = concat!(
1307            "Subject: hello there\n",
1308            "From:  Someone <someone@example.com>\n",
1309            "\n",
1310            "I am the body"
1311        );
1312
1313        let part = MimePart::parse(message).unwrap();
1314        k9::assert_equal!(message.as_bytes(), part.to_message_bytes());
1315        assert_eq!(part.raw_body(), "I am the body");
1316        k9::snapshot!(
1317            part.body(),
1318            r#"
1319Ok(
1320    Text(
1321        "I am the body",
1322    ),
1323)
1324"#
1325        );
1326
1327        k9::snapshot!(
1328            BString::from(part.rebuild(None).unwrap().to_message_bytes()),
1329            r#"
1330Content-Type: text/plain;\r
1331\tcharset="us-ascii"\r
1332Subject: hello there\r
1333From: Someone <someone@example.com>\r
1334\r
1335I am the body\r
1336
1337"#
1338        );
1339    }
1340
1341    #[test]
1342    fn mime_bogus_body() {
1343        let message = concat!(
1344            "Subject: hello there\n",
1345            "From: Someone <someone@example.com>\n",
1346            "Mime-Version: 1.0\n",
1347            "Content-Type: text/plain\n",
1348            "Content-Transfer-Encoding: base64\n",
1349            "\n",
1350            "hello\n"
1351        );
1352
1353        let part = MimePart::parse(message).unwrap();
1354        assert_eq!(
1355            part.body().unwrap_err(),
1356            MailParsingError::BodyParse(
1357                "base64 decode: invalid length at 4 b='o' in hello\n".to_string()
1358            )
1359        );
1360    }
1361
1362    #[test]
1363    fn mime_encoded_body() {
1364        let message = concat!(
1365            "Subject: hello there\n",
1366            "From: Someone <someone@example.com>\n",
1367            "Mime-Version: 1.0\n",
1368            "Content-Type: text/plain\n",
1369            "Content-Transfer-Encoding: base64\n",
1370            "\n",
1371            "aGVsbG8K\n"
1372        );
1373
1374        let part = MimePart::parse(message).unwrap();
1375        k9::assert_equal!(message.as_bytes(), part.to_message_bytes());
1376        assert_eq!(part.raw_body(), "aGVsbG8K\n");
1377        k9::snapshot!(
1378            part.body(),
1379            r#"
1380Ok(
1381    Text(
1382        "hello
1383",
1384    ),
1385)
1386"#
1387        );
1388
1389        k9::snapshot!(
1390            BString::from(part.rebuild(None).unwrap().to_message_bytes()),
1391            r#"
1392Content-Type: text/plain;\r
1393\tcharset="us-ascii"\r
1394Content-Transfer-Encoding: quoted-printable\r
1395Subject: hello there\r
1396From: Someone <someone@example.com>\r
1397Mime-Version: 1.0\r
1398\r
1399hello=0A\r
1400
1401"#
1402        );
1403    }
1404
1405    #[test]
1406    fn mime_multipart_1() {
1407        let message = concat!(
1408            "Subject: This is a test email\n",
1409            "Content-Type: multipart/alternative; boundary=foobar\n",
1410            "Mime-Version: 1.0\n",
1411            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\n",
1412            "\n",
1413            "--foobar\n",
1414            "Content-Type: text/plain; charset=utf-8\n",
1415            "Content-Transfer-Encoding: quoted-printable\n",
1416            "\n",
1417            "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\n",
1418            "--foobar\n",
1419            "Content-Type: text/html\n",
1420            "Content-Transfer-Encoding: base64\n",
1421            "\n",
1422            "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \n",
1423            "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \n",
1424            "--foobar--\n",
1425            "After the final boundary stuff gets ignored.\n"
1426        );
1427
1428        let part = MimePart::parse(message).unwrap();
1429
1430        k9::assert_equal!(message.as_bytes(), part.to_message_bytes());
1431
1432        let children = part.child_parts();
1433        k9::assert_equal!(children.len(), 2);
1434
1435        k9::snapshot!(
1436            children[0].body(),
1437            r#"
1438Ok(
1439    Text(
1440        "This is the plaintext version, in utf-8. Proof by Euro: €\r
1441",
1442    ),
1443)
1444"#
1445        );
1446        k9::snapshot!(
1447            children[1].body(),
1448            r#"
1449Ok(
1450    Text(
1451        "<html><body>This is the <b>HTML</b> version, in us-ascii. Proof by Euro: &euro;</body></html>
1452",
1453    ),
1454)
1455"#
1456        );
1457    }
1458
1459    #[test]
1460    fn mutate_1() {
1461        let message = concat!(
1462            "Subject: This is a test email\r\n",
1463            "Content-Type: multipart/alternative; boundary=foobar\r\n",
1464            "Mime-Version: 1.0\r\n",
1465            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\r\n",
1466            "\r\n",
1467            "--foobar\r\n",
1468            "Content-Type: text/plain; charset=utf-8\r\n",
1469            "Content-Transfer-Encoding: quoted-printable\r\n",
1470            "\r\n",
1471            "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\r\n",
1472            "--foobar\r\n",
1473            "Content-Type: text/html\r\n",
1474            "Content-Transfer-Encoding: base64\r\n",
1475            "\r\n",
1476            "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r\n",
1477            "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r\n",
1478            "--foobar--\r\n",
1479            "After the final boundary stuff gets ignored.\r\n"
1480        );
1481
1482        let mut part = MimePart::parse(message).unwrap();
1483        k9::assert_equal!(message.as_bytes(), part.to_message_bytes());
1484        fn munge(part: &mut MimePart) {
1485            let headers = part.headers_mut();
1486            headers.push(Header::with_name_value("X-Woot", "Hello"));
1487            headers.insert(0, Header::with_name_value("X-First", "at the top"));
1488            headers.retain(|hdr| !hdr.get_name().eq_ignore_ascii_case(b"date"));
1489        }
1490        munge(&mut part);
1491
1492        let re_encoded = BString::from(part.to_message_bytes());
1493        k9::snapshot!(
1494            re_encoded,
1495            r#"
1496X-First: at the top\r
1497Subject: This is a test email\r
1498Content-Type: multipart/alternative; boundary=foobar\r
1499Mime-Version: 1.0\r
1500X-Woot: Hello\r
1501\r
1502--foobar\r
1503Content-Type: text/plain; charset=utf-8\r
1504Content-Transfer-Encoding: quoted-printable\r
1505\r
1506This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\r
1507--foobar\r
1508Content-Type: text/html\r
1509Content-Transfer-Encoding: base64\r
1510\r
1511PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r
1512dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r
1513--foobar--\r
1514After the final boundary stuff gets ignored.\r
1515
1516"#
1517        );
1518
1519        eprintln!("part before mutate:\n{part:#?}");
1520
1521        part.child_parts_mut().retain(|part| {
1522            let ct = part.headers().content_type().unwrap().unwrap();
1523            ct.value == "text/html"
1524        });
1525
1526        eprintln!("part with html removed is:\n{part:#?}");
1527
1528        let re_encoded = BString::from(part.to_message_bytes());
1529        k9::snapshot!(
1530            re_encoded,
1531            r#"
1532X-First: at the top\r
1533Subject: This is a test email\r
1534Content-Type: multipart/alternative; boundary=foobar\r
1535Mime-Version: 1.0\r
1536X-Woot: Hello\r
1537\r
1538--foobar\r
1539Content-Type: text/html\r
1540Content-Transfer-Encoding: base64\r
1541\r
1542PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \r
1543dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \r
1544--foobar--\r
1545After the final boundary stuff gets ignored.\r
1546
1547"#
1548        );
1549    }
1550
1551    #[test]
1552    fn replace_text_body() {
1553        let mut part = MimePart::new_text_plain("Hello 👻\r\n").unwrap();
1554        let encoded = BString::from(part.to_message_bytes());
1555        k9::snapshot!(
1556            &encoded,
1557            r#"
1558Content-Type: text/plain;\r
1559\tcharset="utf-8"\r
1560Content-Transfer-Encoding: base64\r
1561\r
1562SGVsbG8g8J+Ruw0K\r
1563
1564"#
1565        );
1566
1567        part.replace_text_body("text/plain", "Hello 🚀\r\n")
1568            .unwrap();
1569        let encoded = BString::from(part.to_message_bytes());
1570        k9::snapshot!(
1571            &encoded,
1572            r#"
1573Content-Type: text/plain;\r
1574\tcharset="utf-8"\r
1575Content-Transfer-Encoding: base64\r
1576\r
1577SGVsbG8g8J+agA0K\r
1578
1579"#
1580        );
1581    }
1582
1583    #[test]
1584    fn construct_1() {
1585        let input_text = "Well, hello there! This is the plaintext version, in utf-8. Here's a Euro: €, and here are some emoji 👻 🍉 💩 and this long should be long enough that we wrap it in the returned part, let's see how that turns out!\r\n";
1586
1587        let part = MimePart::new_text_plain(input_text).unwrap();
1588
1589        let encoded = BString::from(part.to_message_bytes());
1590        k9::snapshot!(
1591            &encoded,
1592            r#"
1593Content-Type: text/plain;\r
1594\tcharset="utf-8"\r
1595Content-Transfer-Encoding: quoted-printable\r
1596\r
1597Well, hello there! This is the plaintext version, in utf-8. Here's a Euro: =\r
1598=E2=82=AC, and here are some emoji =F0=9F=91=BB =F0=9F=8D=89 =F0=9F=92=A9 a=\r
1599nd this long should be long enough that we wrap it in the returned part, le=\r
1600t's see how that turns out!\r
1601
1602"#
1603        );
1604
1605        let parsed_part = MimePart::parse(encoded.clone()).unwrap();
1606        k9::assert_equal!(encoded, parsed_part.to_message_bytes());
1607        k9::assert_equal!(part.body().unwrap(), DecodedBody::Text(input_text.into()));
1608        k9::snapshot!(
1609            parsed_part.simplified_structure_pointers(),
1610            "
1611Ok(
1612    SimplifiedStructurePointers {
1613        text_part: Some(
1614            PartPointer(
1615                [],
1616            ),
1617        ),
1618        html_part: None,
1619        amp_html_part: None,
1620        header_part: PartPointer(
1621            [],
1622        ),
1623        attachments: [],
1624    },
1625)
1626"
1627        );
1628    }
1629
1630    #[test]
1631    fn construct_2() {
1632        let msg = MimePart::new_multipart(
1633            "multipart/mixed",
1634            vec![
1635                MimePart::new_text_plain("plain text").unwrap(),
1636                MimePart::new_html("<b>rich</b> text").unwrap(),
1637                MimePart::new_binary(
1638                    "application/octet-stream",
1639                    &[0, 1, 2, 3],
1640                    Some(&AttachmentOptions {
1641                        file_name: Some("woot.bin".into()),
1642                        inline: false,
1643                        content_id: Some("woot.id".into()),
1644                    }),
1645                )
1646                .unwrap(),
1647            ],
1648            Some(b"my-boundary"),
1649        )
1650        .unwrap();
1651        k9::snapshot!(
1652            BString::from(msg.to_message_bytes()),
1653            r#"
1654Content-Type: multipart/mixed;\r
1655\tboundary="my-boundary"\r
1656\r
1657--my-boundary\r
1658Content-Type: text/plain;\r
1659\tcharset="us-ascii"\r
1660\r
1661plain text\r
1662--my-boundary\r
1663Content-Type: text/html;\r
1664\tcharset="us-ascii"\r
1665\r
1666<b>rich</b> text\r
1667--my-boundary\r
1668Content-Disposition: attachment;\r
1669\tfilename="woot.bin"\r
1670Content-ID: <woot.id>\r
1671Content-Type: application/octet-stream;\r
1672\tname="woot.bin"\r
1673Content-Transfer-Encoding: base64\r
1674\r
1675AAECAw==\r
1676--my-boundary--\r
1677
1678"#
1679        );
1680
1681        k9::snapshot!(
1682            msg.simplified_structure_pointers(),
1683            "
1684Ok(
1685    SimplifiedStructurePointers {
1686        text_part: Some(
1687            PartPointer(
1688                [
1689                    0,
1690                ],
1691            ),
1692        ),
1693        html_part: Some(
1694            PartPointer(
1695                [
1696                    1,
1697                ],
1698            ),
1699        ),
1700        amp_html_part: None,
1701        header_part: PartPointer(
1702            [],
1703        ),
1704        attachments: [
1705            PartPointer(
1706                [
1707                    2,
1708                ],
1709            ),
1710        ],
1711    },
1712)
1713"
1714        );
1715    }
1716
1717    #[test]
1718    fn attachment_name_order_prefers_content_disposition() {
1719        let message = concat!(
1720            "Content-Type: multipart/mixed;\r\n",
1721            "	boundary=\"woot\"\r\n",
1722            "\r\n",
1723            "--woot\r\n",
1724            "Content-Type: text/plain;\r\n",
1725            "	charset=\"us-ascii\"\r\n",
1726            "\r\n",
1727            "Hello, I am the main message content\r\n",
1728            "--woot\r\n",
1729            "Content-Disposition: attachment;\r\n",
1730            "	filename=cdname\r\n",
1731            "Content-Type: application/octet-stream;\r\n",
1732            "	name=ctname\r\n",
1733            "Content-Transfer-Encoding: base64\r\n",
1734            "\r\n",
1735            "u6o=\r\n",
1736            "--woot--\r\n"
1737        );
1738        let part = MimePart::parse(message).unwrap();
1739        let structure = part.simplified_structure().unwrap();
1740
1741        k9::assert_equal!(
1742            structure.attachments[0].rfc2045_info().attachment_options,
1743            Some(AttachmentOptions {
1744                content_id: None,
1745                inline: false,
1746                file_name: Some("cdname".into()),
1747            })
1748        );
1749    }
1750
1751    #[test]
1752    fn attachment_name_accepts_content_type_name() {
1753        let message = concat!(
1754            "Content-Type: multipart/mixed;\r\n",
1755            "	boundary=\"woot\"\r\n",
1756            "\r\n",
1757            "--woot\r\n",
1758            "Content-Type: text/plain;\r\n",
1759            "	charset=\"us-ascii\"\r\n",
1760            "\r\n",
1761            "Hello, I am the main message content\r\n",
1762            "--woot\r\n",
1763            "Content-Disposition: attachment\r\n",
1764            "Content-Type: application/octet-stream;\r\n",
1765            "	name=ctname\r\n",
1766            "Content-Transfer-Encoding: base64\r\n",
1767            "\r\n",
1768            "u6o=\r\n",
1769            "--woot--\r\n"
1770        );
1771        let part = MimePart::parse(message).unwrap();
1772        let structure = part.simplified_structure().unwrap();
1773
1774        k9::assert_equal!(
1775            structure.attachments[0].rfc2045_info().attachment_options,
1776            Some(AttachmentOptions {
1777                content_id: None,
1778                inline: false,
1779                file_name: Some("ctname".into()),
1780            })
1781        );
1782    }
1783
1784    #[test]
1785    fn funky_headers() {
1786        let message = concat!(
1787            "Subject\r\n",
1788            "Other:\r\n",
1789            "Content-Type: multipart/alternative; boundary=foobar\r\n",
1790            "Mime-Version: 1.0\r\n",
1791            "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\r\n",
1792            "\r\n",
1793            "The body.\r\n"
1794        );
1795
1796        let part = MimePart::parse(message).unwrap();
1797        assert!(part
1798            .conformance()
1799            .contains(MessageConformance::MISSING_COLON_VALUE));
1800    }
1801
1802    /// This is a regression test for an issue where we'd interpret the
1803    /// binary bytes as default windows-1252 codepage charset, and mangle them.
1804    /// The high byte is sufficient to trigger the offending code prior
1805    /// to the fix
1806    #[test]
1807    fn rebuild_binary() {
1808        let expect = &[0, 1, 2, 3, 0xbe, 4, 5];
1809        let part = MimePart::new_binary("applicat/octet-stream", expect, None).unwrap();
1810
1811        let rebuilt = part.rebuild(None).unwrap();
1812        let body = rebuilt.body().unwrap();
1813
1814        assert_eq!(body, DecodedBody::Binary(expect.to_vec()));
1815    }
1816
1817    /// Validate that we don't lose supplemental mime parameters like:
1818    /// `Content-Type: text/calendar; method=REQUEST`
1819    #[test]
1820    fn rebuild_invitation() {
1821        let message = concat!(
1822            "Subject: Test for events 2\r\n",
1823            "Content-Type: multipart/mixed;\r\n",
1824            " boundary=8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r\n",
1825            "\r\n",
1826            "--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r\n",
1827            "Content-Type: multipart/alternative;\r\n",
1828            " boundary=a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r\n",
1829            "\r\n",
1830            "--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r\n",
1831            "Content-Transfer-Encoding: quoted-printable\r\n",
1832            "Content-Type: text/plain; charset=UTF-8\r\n",
1833            "\r\n",
1834            "This is a test for calendar event invitation\r\n",
1835            "--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r\n",
1836            "Content-Transfer-Encoding: quoted-printable\r\n",
1837            "Content-Type: text/html; charset=UTF-8\r\n",
1838            "\r\n",
1839            "<p>This is a test for calendar event invitation</p>\r\n",
1840            "--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f--\r\n",
1841            "\r\n",
1842            "--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r\n",
1843            "Content-Disposition: inline; name=\"Invitation.ics\"\r\n",
1844            "Content-Type: text/calendar; method=REQUEST; name=\"Invitation.ics\"\r\n",
1845            "\r\n",
1846            "Invitation\r\n",
1847            "--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r\n",
1848            "Content-Disposition: attachment; filename=\"event.ics\"\r\n",
1849            "Content-Type: application/ics\r\n",
1850            "\r\n",
1851            "Event\r\n",
1852            "--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3--\r\n",
1853            "\r\n"
1854        );
1855
1856        let part = MimePart::parse(message).unwrap();
1857        let rebuilt = part.rebuild(None).unwrap();
1858
1859        k9::snapshot!(
1860            BString::from(rebuilt.to_message_bytes()),
1861            r#"
1862Content-Type: multipart/mixed;\r
1863\tboundary="8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3"\r
1864Subject: Test for events 2\r
1865\r
1866--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r
1867Content-Type: multipart/alternative;\r
1868\tboundary="a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f"\r
1869\r
1870--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r
1871Content-Type: text/plain;\r
1872\tcharset="us-ascii"\r
1873\r
1874This is a test for calendar event invitation\r
1875--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f\r
1876Content-Type: text/html;\r
1877\tcharset="us-ascii"\r
1878\r
1879<p>This is a test for calendar event invitation</p>\r
1880--a4e0aff9e05c7d94e2e13bd5590302f7802daac1e952c065207790d15a9f--\r
1881--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r
1882Content-Type: text/calendar;\r
1883\tcharset="us-ascii";\r
1884\tmethod="REQUEST";\r
1885\tname="Invitation.ics"\r
1886\r
1887Invitation\r
1888--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3\r
1889Content-Disposition: attachment;\r
1890\tfilename="event.ics"\r
1891Content-Type: application/ics;\r
1892\tname="event.ics"\r
1893Content-Transfer-Encoding: base64\r
1894\r
1895RXZlbnQNCg==\r
1896--8a54d64d7ad7c04a084478052b36cbe1609b33bf3a41203aaee8dd642cd3--\r
1897
1898"#
1899        );
1900    }
1901
1902    #[test]
1903    fn check_conformance_angle_msg_id() {
1904        const DOUBLE_ANGLE_ONLY: &str = "Subject: hello\r
1905Message-ID: <<1234@example.com>>\r
1906\r
1907Hello";
1908        let msg = MimePart::parse(DOUBLE_ANGLE_ONLY).unwrap();
1909        k9::snapshot!(
1910            msg.check_fix_conformance(
1911                MessageConformance::MISSING_MESSAGE_ID_HEADER,
1912                MessageConformance::empty(),
1913                CheckFixSettings::default(),
1914            )
1915            .unwrap_err()
1916            .to_string(),
1917            "Message has conformance issues: MISSING_MESSAGE_ID_HEADER"
1918        );
1919
1920        let rebuilt = BString::from(
1921            msg.check_fix_conformance(
1922                MessageConformance::MISSING_MESSAGE_ID_HEADER,
1923                MessageConformance::MISSING_MESSAGE_ID_HEADER,
1924                CheckFixSettings {
1925                    message_id: Some("id@example.com".to_string()),
1926                    ..Default::default()
1927                },
1928            )
1929            .unwrap()
1930            .unwrap()
1931            .to_message_bytes(),
1932        );
1933
1934        k9::snapshot!(
1935            rebuilt,
1936            r#"
1937Subject: hello\r
1938Message-ID: <id@example.com>\r
1939\r
1940Hello
1941"#
1942        );
1943
1944        const DOUBLE_ANGLE_AND_LONG_LINE: &str = "Subject: hello\r
1945Message-ID: <<1234@example.com>>\r
1946\r
1947Hello this is a really long line Hello this is a really long line \
1948Hello this is a really long line Hello this is a really long line \
1949Hello this is a really long line Hello this is a really long line \
1950Hello this is a really long line Hello this is a really long line \
1951Hello this is a really long line Hello this is a really long line \
1952Hello this is a really long line Hello this is a really long line \
1953Hello this is a really long line Hello this is a really long line
1954";
1955        let msg = MimePart::parse(DOUBLE_ANGLE_AND_LONG_LINE).unwrap();
1956        let rebuilt = BString::from(
1957            msg.check_fix_conformance(
1958                MessageConformance::MISSING_COLON_VALUE,
1959                MessageConformance::MISSING_MESSAGE_ID_HEADER | MessageConformance::LINE_TOO_LONG,
1960                CheckFixSettings {
1961                    message_id: Some("id@example.com".to_string()),
1962                    ..Default::default()
1963                },
1964            )
1965            .unwrap()
1966            .unwrap()
1967            .to_message_bytes(),
1968        );
1969
1970        k9::snapshot!(
1971            rebuilt,
1972            r#"
1973Content-Type: text/plain;\r
1974\tcharset="us-ascii"\r
1975Content-Transfer-Encoding: quoted-printable\r
1976Subject: hello\r
1977Message-ID: <id@example.com>\r
1978\r
1979Hello this is a really long line Hello this is a really long line Hello thi=\r
1980s is a really long line Hello this is a really long line Hello this is a re=\r
1981ally long line Hello this is a really long line Hello this is a really long=\r
1982 line Hello this is a really long line Hello this is a really long line Hel=\r
1983lo this is a really long line Hello this is a really long line Hello this i=\r
1984s a really long line Hello this is a really long line Hello this is a reall=\r
1985y long line=0A\r
1986
1987"#
1988        );
1989    }
1990
1991    #[test]
1992    fn check_conformance() {
1993        const MULTI_HEADER_CONTENT: &str =
1994        "X-Hello: there\r\nX-Header: value\r\nSubject: Hello\r\nX-Header: another value\r\nFrom :Someone@somewhere\r\n\r\nBody";
1995
1996        let msg = MimePart::parse(MULTI_HEADER_CONTENT).unwrap();
1997        let rebuilt = BString::from(
1998            msg.check_fix_conformance(
1999                MessageConformance::default(),
2000                MessageConformance::MISSING_MIME_VERSION,
2001                CheckFixSettings::default(),
2002            )
2003            .unwrap()
2004            .unwrap()
2005            .to_message_bytes(),
2006        );
2007        k9::snapshot!(
2008            rebuilt,
2009            r#"
2010X-Hello: there\r
2011X-Header: value\r
2012Subject: Hello\r
2013X-Header: another value\r
2014From :Someone@somewhere\r
2015Mime-Version: 1.0\r
2016\r
2017Body
2018"#
2019        );
2020
2021        let msg = MimePart::parse(MULTI_HEADER_CONTENT).unwrap();
2022        let rebuilt = BString::from(
2023            msg.check_fix_conformance(
2024                MessageConformance::default(),
2025                MessageConformance::MISSING_MIME_VERSION | MessageConformance::NAME_ENDS_WITH_SPACE,
2026                CheckFixSettings::default(),
2027            )
2028            .unwrap()
2029            .unwrap()
2030            .to_message_bytes(),
2031        );
2032        k9::snapshot!(
2033            rebuilt,
2034            r#"
2035Content-Type: text/plain;\r
2036\tcharset="us-ascii"\r
2037X-Hello: there\r
2038X-Header: value\r
2039Subject: Hello\r
2040X-Header: another value\r
2041From: <Someone@somewhere>\r
2042Mime-Version: 1.0\r
2043\r
2044Body\r
2045
2046"#
2047        );
2048    }
2049
2050    #[test]
2051    fn check_fix_latin_input() {
2052        const POUNDS: &[u8] = b"Subject: \xa3\r\n\r\nGBP\r\n";
2053        let msg = MimePart::parse(POUNDS).unwrap();
2054        assert_eq!(
2055            msg.conformance(),
2056            MessageConformance::NEEDS_TRANSFER_ENCODING
2057                | MessageConformance::MISSING_DATE_HEADER
2058                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2059                | MessageConformance::MISSING_MIME_VERSION
2060        );
2061        let rebuilt = msg
2062            .check_fix_conformance(
2063                MessageConformance::default(),
2064                MessageConformance::NEEDS_TRANSFER_ENCODING,
2065                CheckFixSettings {
2066                    detect_encoding: true,
2067                    include_encodings: vec!["iso-8859-1".to_string()],
2068                    data_bytes: Some(Arc::new(POUNDS.into())),
2069                    ..Default::default()
2070                },
2071            )
2072            .unwrap()
2073            .unwrap();
2074
2075        let subject = rebuilt.headers.subject().unwrap().unwrap();
2076        assert_eq!(subject, "£");
2077    }
2078
2079    // The issue here is that the message is text/plain with no explicit
2080    // charset, and is thus implicitly us-ascii.  But the part is actually
2081    // utf-8 content inside base64. Since the transfer encoding is 7-bit
2082    // it doesn't get flagged as improper encoding during the initial
2083    // parse.
2084    // We want to ensure that it is found during check-fix, and is corrected.
2085    #[test]
2086    fn check_fix_utf8_inside_transfer_encoding() {
2087        const CONTENT: &str = "Subject: hello\r\nContent-Type: text/plain\r\nContent-Transfer-Encoding: base64\r\n\r\n2KrYs9iqDQoNCg==\r\n";
2088
2089        let msg = MimePart::parse(CONTENT).unwrap();
2090
2091        // Initial parse cannot see that the content is actually utf-8,
2092        // which conflicts with the implicit us-ascii charset for a text/ part.
2093        assert_eq!(
2094            msg.conformance(),
2095            MessageConformance::MISSING_DATE_HEADER
2096                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2097                | MessageConformance::MISSING_MIME_VERSION
2098        );
2099
2100        // Deep check flags the invalid charset and sets NEEDS_TRANSFER_ENCODING
2101        assert_eq!(
2102            msg.deep_conformance_check(),
2103            MessageConformance::NEEDS_TRANSFER_ENCODING
2104                | MessageConformance::MISSING_DATE_HEADER
2105                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2106                | MessageConformance::MISSING_MIME_VERSION
2107        );
2108        let rebuilt = msg
2109            .check_fix_conformance(
2110                MessageConformance::default(),
2111                MessageConformance::NEEDS_TRANSFER_ENCODING,
2112                CheckFixSettings::default(),
2113            )
2114            .unwrap()
2115            .unwrap();
2116
2117        eprintln!("{rebuilt:?}");
2118        assert_eq!(rebuilt.body().unwrap().to_string_lossy().trim(), "تست");
2119    }
2120
2121    #[test]
2122    fn check_fix_latin1_inside_transfer_encoding() {
2123        const CONTENT: &str = "Subject: hello\r\nContent-Type: text/plain\r\nContent-Transfer-Encoding: base64\r\n\r\nVGhlIGNvc3QgaXMgozQyLjAwCg==\r\n";
2124
2125        let msg = MimePart::parse(CONTENT).unwrap();
2126
2127        // Initial parse cannot see that the content is actually utf-8,
2128        // which conflicts with the implicit us-ascii charset for a text/ part.
2129        assert_eq!(
2130            msg.conformance(),
2131            MessageConformance::MISSING_DATE_HEADER
2132                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2133                | MessageConformance::MISSING_MIME_VERSION
2134        );
2135
2136        // Deep check flags the invalid charset and sets NEEDS_TRANSFER_ENCODING
2137        assert_eq!(
2138            msg.deep_conformance_check(),
2139            MessageConformance::NEEDS_TRANSFER_ENCODING
2140                | MessageConformance::MISSING_DATE_HEADER
2141                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2142                | MessageConformance::MISSING_MIME_VERSION
2143        );
2144        let rebuilt = msg
2145            .check_fix_conformance(
2146                MessageConformance::default(),
2147                MessageConformance::NEEDS_TRANSFER_ENCODING,
2148                CheckFixSettings {
2149                    detect_encoding: true,
2150                    include_encodings: vec!["iso-8859-1".to_string()],
2151                    ..Default::default()
2152                },
2153            )
2154            .unwrap()
2155            .unwrap();
2156
2157        eprintln!("{rebuilt:?}");
2158        assert_eq!(
2159            rebuilt.body().unwrap().to_string_lossy().trim(),
2160            "The cost is £42.00"
2161        );
2162    }
2163
2164    #[test]
2165    fn check_fix_unknown_inside_transfer_encoding() {
2166        // `owo=` is 0xa3 (a UK Sterling/Pound sign in latin-1.
2167        // The length of the data passed to the charset detector
2168        // is insufficient for it to decide the charset, so we
2169        // should not expect to see a valid text part emitted.
2170        const CONTENT: &str = "Subject: hello\r\nContent-Type: text/plain\r\nContent-Transfer-Encoding: base64\r\n\r\nowo=\r\n";
2171
2172        let msg = MimePart::parse(CONTENT).unwrap();
2173
2174        // Initial parse cannot see that the content is actually utf-8,
2175        // which conflicts with the implicit us-ascii charset for a text/ part.
2176        assert_eq!(
2177            msg.conformance(),
2178            MessageConformance::MISSING_DATE_HEADER
2179                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2180                | MessageConformance::MISSING_MIME_VERSION
2181        );
2182
2183        // Deep check flags the invalid charset and sets NEEDS_TRANSFER_ENCODING
2184        assert_eq!(
2185            msg.deep_conformance_check(),
2186            MessageConformance::NEEDS_TRANSFER_ENCODING
2187                | MessageConformance::MISSING_DATE_HEADER
2188                | MessageConformance::MISSING_MESSAGE_ID_HEADER
2189                | MessageConformance::MISSING_MIME_VERSION
2190        );
2191        let rebuilt = msg
2192            .check_fix_conformance(
2193                MessageConformance::default(),
2194                MessageConformance::NEEDS_TRANSFER_ENCODING,
2195                CheckFixSettings {
2196                    detect_encoding: true,
2197                    include_encodings: vec!["iso-8859-1".to_string()],
2198                    ..Default::default()
2199                },
2200            )
2201            .unwrap()
2202            .unwrap();
2203
2204        eprintln!("{rebuilt:?}");
2205        assert_eq!(rebuilt.body().unwrap().to_string_lossy().trim(), "�");
2206    }
2207
2208    #[test]
2209    fn nested_multipart_mixed_related() {
2210        // Reproduces the structure: multipart/mixed -> multipart/related -> [text/html, image/png]
2211        let message = concat!(
2212            "MIME-Version: 1.0\r\n",
2213            "Content-Type: multipart/mixed;\r\n",
2214            "\tboundary=\"----=_Part_602641_1899404624.1775349148919\"\r\n",
2215            "\r\n",
2216            "------=_Part_602641_1899404624.1775349148919\r\n",
2217            "Content-Type: multipart/related;\r\n",
2218            "\tboundary=\"----=_Part_602642_1070442961.1775349148920\"\r\n",
2219            "\r\n",
2220            "------=_Part_602642_1070442961.1775349148920\r\n",
2221            "Content-Type: text/html;charset=UTF-8\r\n",
2222            "Content-Transfer-Encoding: quoted-printable\r\n",
2223            "\r\n",
2224            "<html><body>Test HTML</body></html>\r\n",
2225            "------=_Part_602642_1070442961.1775349148920\r\n",
2226            "Content-Type: image/png; name=inline\r\n",
2227            "Content-Transfer-Encoding: base64\r\n",
2228            "Content-Disposition: inline; filename=inline\r\n",
2229            "Content-ID: <dell-aiops>\r\n",
2230            "\r\n",
2231            "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==\r\n",
2232            "------=_Part_602642_1070442961.1775349148920--\r\n",
2233            "------=_Part_602641_1899404624.1775349148919--\r\n"
2234        );
2235
2236        let root = MimePart::parse(message).unwrap();
2237
2238        /// Extract content-type from part
2239        fn ct(p: &MimePart) -> String {
2240            p.headers()
2241                .content_type()
2242                .unwrap()
2243                .unwrap()
2244                .value
2245                .to_string()
2246        }
2247
2248        assert_eq!(ct(&root), "multipart/mixed");
2249
2250        // Structure check: root should have 1 part (multipart/related)
2251        let [related_part] = &root.child_parts()[..] else {
2252            panic!("root must have one child")
2253        };
2254        assert_eq!(ct(related_part), "multipart/related");
2255
2256        // multipart/related should have 2 parts (text/html and image)
2257        let [html_part, image_part] = &related_part.child_parts()[..] else {
2258            panic!("related part must have two children")
2259        };
2260
2261        // Check content types
2262        assert_eq!(ct(html_part), "text/html");
2263        assert_eq!(ct(image_part), "image/png");
2264
2265        // Verify simplified structure can be retrieved (this tests the PartRef resolution path)
2266        let simplified = root.simplified_structure().unwrap();
2267        let DecodedBody::Text(html) = html_part.body().unwrap() else {
2268            panic!("must be text")
2269        };
2270        assert_eq!(
2271            simplified,
2272            SimplifiedStructure {
2273                text: None,
2274                html: Some(html),
2275                amp_html: None,
2276                headers: &root.headers(),
2277                attachments: vec![image_part.clone()],
2278            }
2279        );
2280    }
2281}