aboutsummaryrefslogtreecommitdiff
path: root/docs/scripts/tests
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-02-07 21:41:19 -0600
committerCraig Jennings <c@cjennings.net>2026-02-07 21:41:19 -0600
commit24a681c0696fbdad9c32073ffd24cf7218296ed2 (patch)
treee5b43c8c62e027b7cabffa31b43238027ec284d0 /docs/scripts/tests
parentbf6eef6183df6051b2423c7850c230406861f927 (diff)
downloadarchangel-24a681c0696fbdad9c32073ffd24cf7218296ed2.tar.gz
archangel-24a681c0696fbdad9c32073ffd24cf7218296ed2.zip
docs: sync templates, rename workflows and notes.org
Sync from templates. Rename NOTES.org to notes.org, session-wrap-up to wrap-it-up, retrospective-workflow to retrospective, session-start to startup. Update all references.
Diffstat (limited to 'docs/scripts/tests')
-rw-r--r--docs/scripts/tests/conftest.py77
-rw-r--r--docs/scripts/tests/fixtures/empty-body.eml16
-rw-r--r--docs/scripts/tests/fixtures/html-only.eml20
-rw-r--r--docs/scripts/tests/fixtures/multiple-received-headers.eml12
-rw-r--r--docs/scripts/tests/fixtures/no-received-headers.eml9
-rw-r--r--docs/scripts/tests/fixtures/plain-text.eml15
-rw-r--r--docs/scripts/tests/fixtures/with-attachment.eml27
-rw-r--r--docs/scripts/tests/test_extract_body.py96
-rw-r--r--docs/scripts/tests/test_extract_metadata.py65
-rw-r--r--docs/scripts/tests/test_generate_filenames.py157
-rw-r--r--docs/scripts/tests/test_integration_stdout.py68
-rw-r--r--docs/scripts/tests/test_parse_received_headers.py105
-rw-r--r--docs/scripts/tests/test_process_eml.py129
-rw-r--r--docs/scripts/tests/test_save_attachments.py97
14 files changed, 893 insertions, 0 deletions
diff --git a/docs/scripts/tests/conftest.py b/docs/scripts/tests/conftest.py
new file mode 100644
index 0000000..8d965ab
--- /dev/null
+++ b/docs/scripts/tests/conftest.py
@@ -0,0 +1,77 @@
+"""Shared fixtures for EML extraction tests."""
+
+import os
+from email.message import EmailMessage
+from email.mime.application import MIMEApplication
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+
+import pytest
+
+
+@pytest.fixture
+def fixtures_dir():
+ """Return path to the fixtures/ directory."""
+ return os.path.join(os.path.dirname(__file__), 'fixtures')
+
+
+def make_plain_message(body="Test body", from_="Jonathan Smith <jsmith@example.com>",
+ to="Craig <craig@example.com>",
+ subject="Test Subject",
+ date="Wed, 05 Feb 2026 11:36:00 -0600"):
+ """Create an EmailMessage with text/plain body."""
+ msg = EmailMessage()
+ msg['From'] = from_
+ msg['To'] = to
+ msg['Subject'] = subject
+ msg['Date'] = date
+ msg.set_content(body)
+ return msg
+
+
+def make_html_message(html_body="<p>Test body</p>",
+ from_="Jonathan Smith <jsmith@example.com>",
+ to="Craig <craig@example.com>",
+ subject="Test Subject",
+ date="Wed, 05 Feb 2026 11:36:00 -0600"):
+ """Create an EmailMessage with text/html body only."""
+ msg = EmailMessage()
+ msg['From'] = from_
+ msg['To'] = to
+ msg['Subject'] = subject
+ msg['Date'] = date
+ msg.set_content(html_body, subtype='html')
+ return msg
+
+
+def make_message_with_attachment(body="Test body",
+ from_="Jonathan Smith <jsmith@example.com>",
+ to="Craig <craig@example.com>",
+ subject="Test Subject",
+ date="Wed, 05 Feb 2026 11:36:00 -0600",
+ attachment_filename="document.pdf",
+ attachment_content=b"fake pdf content"):
+ """Create a multipart message with a text body and one attachment."""
+ msg = MIMEMultipart()
+ msg['From'] = from_
+ msg['To'] = to
+ msg['Subject'] = subject
+ msg['Date'] = date
+
+ msg.attach(MIMEText(body, 'plain'))
+
+ att = MIMEApplication(attachment_content, Name=attachment_filename)
+ att['Content-Disposition'] = f'attachment; filename="{attachment_filename}"'
+ msg.attach(att)
+
+ return msg
+
+
+def add_received_headers(msg, headers):
+ """Add Received headers to an existing message.
+
+ headers: list of header strings, added in order (first = most recent).
+ """
+ for header in headers:
+ msg['Received'] = header
+ return msg
diff --git a/docs/scripts/tests/fixtures/empty-body.eml b/docs/scripts/tests/fixtures/empty-body.eml
new file mode 100644
index 0000000..cf008df
--- /dev/null
+++ b/docs/scripts/tests/fixtures/empty-body.eml
@@ -0,0 +1,16 @@
+From: Jonathan Smith <jsmith@example.com>
+To: Craig Jennings <craig@example.com>
+Subject: Empty Body Test
+Date: Thu, 05 Feb 2026 11:36:00 -0600
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="boundary456"
+Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
+
+--boundary456
+Content-Type: application/octet-stream; name="data.bin"
+Content-Disposition: attachment; filename="data.bin"
+Content-Transfer-Encoding: base64
+
+AQIDBA==
+
+--boundary456--
diff --git a/docs/scripts/tests/fixtures/html-only.eml b/docs/scripts/tests/fixtures/html-only.eml
new file mode 100644
index 0000000..4db7645
--- /dev/null
+++ b/docs/scripts/tests/fixtures/html-only.eml
@@ -0,0 +1,20 @@
+From: Jonathan Smith <jsmith@example.com>
+To: Craig Jennings <craig@example.com>
+Subject: HTML Update
+Date: Thu, 05 Feb 2026 11:36:00 -0600
+MIME-Version: 1.0
+Content-Type: text/html; charset="utf-8"
+Content-Transfer-Encoding: 7bit
+Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
+
+<html>
+<body>
+<p>Hi Craig,</p>
+<p>Here is the <strong>HTML</strong> update.</p>
+<ul>
+<li>Item one</li>
+<li>Item two</li>
+</ul>
+<p>Best,<br>Jonathan</p>
+</body>
+</html>
diff --git a/docs/scripts/tests/fixtures/multiple-received-headers.eml b/docs/scripts/tests/fixtures/multiple-received-headers.eml
new file mode 100644
index 0000000..1b8d6a7
--- /dev/null
+++ b/docs/scripts/tests/fixtures/multiple-received-headers.eml
@@ -0,0 +1,12 @@
+From: Jonathan Smith <jsmith@example.com>
+To: Craig Jennings <craig@example.com>
+Subject: Multiple Received Headers Test
+Date: Thu, 05 Feb 2026 11:36:00 -0600
+MIME-Version: 1.0
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: 7bit
+Received: by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600
+Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
+Received: from originator.example.com by relay.example.com with SMTP; Thu, 05 Feb 2026 11:35:58 -0600
+
+Test body with multiple received headers.
diff --git a/docs/scripts/tests/fixtures/no-received-headers.eml b/docs/scripts/tests/fixtures/no-received-headers.eml
new file mode 100644
index 0000000..8a05dc7
--- /dev/null
+++ b/docs/scripts/tests/fixtures/no-received-headers.eml
@@ -0,0 +1,9 @@
+From: Jonathan Smith <jsmith@example.com>
+To: Craig Jennings <craig@example.com>
+Subject: No Received Headers
+Date: Thu, 05 Feb 2026 11:36:00 -0600
+MIME-Version: 1.0
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: 7bit
+
+Test body with no received headers at all.
diff --git a/docs/scripts/tests/fixtures/plain-text.eml b/docs/scripts/tests/fixtures/plain-text.eml
new file mode 100644
index 0000000..8cc9d9c
--- /dev/null
+++ b/docs/scripts/tests/fixtures/plain-text.eml
@@ -0,0 +1,15 @@
+From: Jonathan Smith <jsmith@example.com>
+To: Craig Jennings <craig@example.com>
+Subject: Re: Fw: 4319 Danneel Street
+Date: Thu, 05 Feb 2026 11:36:00 -0600
+MIME-Version: 1.0
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: 7bit
+Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
+
+Hi Craig,
+
+Here is the update on 4319 Danneel Street.
+
+Best,
+Jonathan
diff --git a/docs/scripts/tests/fixtures/with-attachment.eml b/docs/scripts/tests/fixtures/with-attachment.eml
new file mode 100644
index 0000000..ac49c5d
--- /dev/null
+++ b/docs/scripts/tests/fixtures/with-attachment.eml
@@ -0,0 +1,27 @@
+From: Jonathan Smith <jsmith@example.com>
+To: Craig Jennings <craig@example.com>
+Subject: Ltr from Carrollton
+Date: Thu, 05 Feb 2026 11:36:00 -0600
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="boundary123"
+Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
+
+--boundary123
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: 7bit
+
+Hi Craig,
+
+Please find the letter attached.
+
+Best,
+Jonathan
+
+--boundary123
+Content-Type: application/octet-stream; name="Ltr Carrollton.pdf"
+Content-Disposition: attachment; filename="Ltr Carrollton.pdf"
+Content-Transfer-Encoding: base64
+
+ZmFrZSBwZGYgY29udGVudA==
+
+--boundary123--
diff --git a/docs/scripts/tests/test_extract_body.py b/docs/scripts/tests/test_extract_body.py
new file mode 100644
index 0000000..7b53cda
--- /dev/null
+++ b/docs/scripts/tests/test_extract_body.py
@@ -0,0 +1,96 @@
+"""Tests for extract_body()."""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+from conftest import make_plain_message, make_html_message, make_message_with_attachment
+from email.message import EmailMessage
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+from email.mime.application import MIMEApplication
+
+import importlib.util
+spec = importlib.util.spec_from_file_location(
+ "eml_script",
+ os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
+)
+eml_script = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(eml_script)
+
+extract_body = eml_script.extract_body
+
+
+class TestPlainText:
+ def test_returns_plain_text(self):
+ msg = make_plain_message(body="Hello, this is plain text.")
+ result = extract_body(msg)
+ assert "Hello, this is plain text." in result
+
+
+class TestHtmlOnly:
+ def test_returns_converted_html(self):
+ msg = make_html_message(html_body="<p>Hello <strong>world</strong></p>")
+ result = extract_body(msg)
+ assert "Hello" in result
+ assert "world" in result
+ # Should not contain raw HTML tags
+ assert "<p>" not in result
+ assert "<strong>" not in result
+
+
+class TestBothPlainAndHtml:
+ def test_prefers_plain_text(self):
+ msg = MIMEMultipart('alternative')
+ msg['From'] = 'test@example.com'
+ msg['To'] = 'dest@example.com'
+ msg['Subject'] = 'Test'
+ msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600'
+ msg.attach(MIMEText("Plain text version", 'plain'))
+ msg.attach(MIMEText("<p>HTML version</p>", 'html'))
+ result = extract_body(msg)
+ assert "Plain text version" in result
+ assert "HTML version" not in result
+
+
+class TestEmptyBody:
+ def test_returns_empty_string(self):
+ # Multipart with only attachments, no text parts
+ msg = MIMEMultipart()
+ msg['From'] = 'test@example.com'
+ att = MIMEApplication(b"binary data", Name="file.bin")
+ att['Content-Disposition'] = 'attachment; filename="file.bin"'
+ msg.attach(att)
+ result = extract_body(msg)
+ assert result == ""
+
+
+class TestNonUtf8Encoding:
+ def test_decodes_with_errors_ignore(self):
+ msg = EmailMessage()
+ msg['From'] = 'test@example.com'
+ # Set raw bytes that include invalid UTF-8
+ msg.set_content("Valid text with special: café")
+ result = extract_body(msg)
+ assert "Valid text" in result
+
+
+class TestHtmlWithStructure:
+ def test_preserves_list_structure(self):
+ html = "<ul><li>Item one</li><li>Item two</li></ul>"
+ msg = make_html_message(html_body=html)
+ result = extract_body(msg)
+ assert "Item one" in result
+ assert "Item two" in result
+
+
+class TestNoTextParts:
+ def test_returns_empty_string(self):
+ msg = MIMEMultipart()
+ msg['From'] = 'test@example.com'
+ att = MIMEApplication(b"data", Name="image.png")
+ att['Content-Disposition'] = 'attachment; filename="image.png"'
+ msg.attach(att)
+ result = extract_body(msg)
+ assert result == ""
diff --git a/docs/scripts/tests/test_extract_metadata.py b/docs/scripts/tests/test_extract_metadata.py
new file mode 100644
index 0000000..d5ee52e
--- /dev/null
+++ b/docs/scripts/tests/test_extract_metadata.py
@@ -0,0 +1,65 @@
+"""Tests for extract_metadata()."""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+from conftest import make_plain_message, add_received_headers
+from email.message import EmailMessage
+
+import importlib.util
+spec = importlib.util.spec_from_file_location(
+ "eml_script",
+ os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
+)
+eml_script = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(eml_script)
+
+extract_metadata = eml_script.extract_metadata
+
+
+class TestAllHeadersPresent:
+ def test_complete_dict(self):
+ msg = make_plain_message(
+ from_="Jonathan Smith <jsmith@example.com>",
+ to="Craig <craig@example.com>",
+ subject="Test Subject",
+ date="Thu, 05 Feb 2026 11:36:00 -0600"
+ )
+ result = extract_metadata(msg)
+ assert result['from'] == "Jonathan Smith <jsmith@example.com>"
+ assert result['to'] == "Craig <craig@example.com>"
+ assert result['subject'] == "Test Subject"
+ assert result['date'] == "Thu, 05 Feb 2026 11:36:00 -0600"
+ assert 'timing' in result
+
+
+class TestMissingFrom:
+ def test_from_is_none(self):
+ msg = EmailMessage()
+ msg['To'] = 'craig@example.com'
+ msg['Subject'] = 'Test'
+ msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600'
+ msg.set_content("body")
+ result = extract_metadata(msg)
+ assert result['from'] is None
+
+
+class TestMissingDate:
+ def test_date_is_none(self):
+ msg = EmailMessage()
+ msg['From'] = 'test@example.com'
+ msg['To'] = 'craig@example.com'
+ msg['Subject'] = 'Test'
+ msg.set_content("body")
+ result = extract_metadata(msg)
+ assert result['date'] is None
+
+
+class TestLongSubject:
+ def test_full_subject_returned(self):
+ long_subject = "Re: Fw: This is a very long subject line that spans many words and might be folded"
+ msg = make_plain_message(subject=long_subject)
+ result = extract_metadata(msg)
+ assert result['subject'] == long_subject
diff --git a/docs/scripts/tests/test_generate_filenames.py b/docs/scripts/tests/test_generate_filenames.py
new file mode 100644
index 0000000..07c8f84
--- /dev/null
+++ b/docs/scripts/tests/test_generate_filenames.py
@@ -0,0 +1,157 @@
+"""Tests for generate_basename(), generate_email_filename(), generate_attachment_filename()."""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+import importlib.util
+spec = importlib.util.spec_from_file_location(
+ "eml_script",
+ os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
+)
+eml_script = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(eml_script)
+
+generate_basename = eml_script.generate_basename
+generate_email_filename = eml_script.generate_email_filename
+generate_attachment_filename = eml_script.generate_attachment_filename
+
+
+# --- generate_basename ---
+
+class TestGenerateBasename:
+ def test_standard_from_and_date(self):
+ metadata = {
+ 'from': 'Jonathan Smith <jsmith@example.com>',
+ 'date': 'Wed, 05 Feb 2026 11:36:00 -0600',
+ }
+ assert generate_basename(metadata) == "2026-02-05-1136-Jonathan"
+
+ def test_from_with_display_name_first_token(self):
+ metadata = {
+ 'from': 'C Ciarm <cciarm@example.com>',
+ 'date': 'Wed, 05 Feb 2026 11:36:00 -0600',
+ }
+ result = generate_basename(metadata)
+ assert result == "2026-02-05-1136-C"
+
+ def test_from_without_display_name(self):
+ metadata = {
+ 'from': 'jsmith@example.com',
+ 'date': 'Wed, 05 Feb 2026 11:36:00 -0600',
+ }
+ result = generate_basename(metadata)
+ assert result == "2026-02-05-1136-jsmith"
+
+ def test_missing_date(self):
+ metadata = {
+ 'from': 'Jonathan Smith <jsmith@example.com>',
+ 'date': None,
+ }
+ result = generate_basename(metadata)
+ assert result == "unknown-Jonathan"
+
+ def test_missing_from(self):
+ metadata = {
+ 'from': None,
+ 'date': 'Wed, 05 Feb 2026 11:36:00 -0600',
+ }
+ result = generate_basename(metadata)
+ assert result == "2026-02-05-1136-unknown"
+
+ def test_both_missing(self):
+ metadata = {'from': None, 'date': None}
+ result = generate_basename(metadata)
+ assert result == "unknown-unknown"
+
+ def test_unparseable_date(self):
+ metadata = {
+ 'from': 'Jonathan <j@example.com>',
+ 'date': 'not a real date',
+ }
+ result = generate_basename(metadata)
+ assert result == "unknown-Jonathan"
+
+ def test_none_date_no_crash(self):
+ metadata = {'from': 'Test <t@e.com>', 'date': None}
+ # Should not raise
+ result = generate_basename(metadata)
+ assert "unknown" in result
+
+
+# --- generate_email_filename ---
+
+class TestGenerateEmailFilename:
+ def test_standard_subject(self):
+ result = generate_email_filename(
+ "2026-02-05-1136-Jonathan",
+ "Re: Fw: 4319 Danneel Street"
+ )
+ assert result == "2026-02-05-1136-Jonathan-EMAIL-Re-Fw-4319-Danneel-Street"
+
+ def test_subject_with_special_chars(self):
+ result = generate_email_filename(
+ "2026-02-05-1136-Jonathan",
+ "Update: Meeting (draft) & notes!"
+ )
+ # Colons, parens, ampersands, exclamation stripped
+ assert "EMAIL" in result
+ assert ":" not in result
+ assert "(" not in result
+ assert ")" not in result
+ assert "&" not in result
+ assert "!" not in result
+
+ def test_none_subject(self):
+ result = generate_email_filename("2026-02-05-1136-Jonathan", None)
+ assert result == "2026-02-05-1136-Jonathan-EMAIL-no-subject"
+
+ def test_empty_subject(self):
+ result = generate_email_filename("2026-02-05-1136-Jonathan", "")
+ assert result == "2026-02-05-1136-Jonathan-EMAIL-no-subject"
+
+ def test_very_long_subject(self):
+ long_subject = "A" * 100 + " " + "B" * 100
+ result = generate_email_filename("2026-02-05-1136-Jonathan", long_subject)
+ # The cleaned subject part should be truncated
+ # basename (27) + "-EMAIL-" (7) + subject
+ # Subject itself is limited to 80 chars by _clean_for_filename
+ subject_part = result.split("-EMAIL-")[1]
+ assert len(subject_part) <= 80
+
+
+# --- generate_attachment_filename ---
+
+class TestGenerateAttachmentFilename:
+ def test_standard_attachment(self):
+ result = generate_attachment_filename(
+ "2026-02-05-1136-Jonathan",
+ "Ltr Carrollton.pdf"
+ )
+ assert result == "2026-02-05-1136-Jonathan-ATTACH-Ltr-Carrollton.pdf"
+
+ def test_filename_with_spaces_and_parens(self):
+ result = generate_attachment_filename(
+ "2026-02-05-1136-Jonathan",
+ "Document (final copy).pdf"
+ )
+ assert " " not in result
+ assert "(" not in result
+ assert ")" not in result
+ assert result.endswith(".pdf")
+
+ def test_preserves_extension(self):
+ result = generate_attachment_filename(
+ "2026-02-05-1136-Jonathan",
+ "photo.jpg"
+ )
+ assert result.endswith(".jpg")
+
+ def test_none_filename(self):
+ result = generate_attachment_filename("2026-02-05-1136-Jonathan", None)
+ assert result == "2026-02-05-1136-Jonathan-ATTACH-unnamed"
+
+ def test_empty_filename(self):
+ result = generate_attachment_filename("2026-02-05-1136-Jonathan", "")
+ assert result == "2026-02-05-1136-Jonathan-ATTACH-unnamed"
diff --git a/docs/scripts/tests/test_integration_stdout.py b/docs/scripts/tests/test_integration_stdout.py
new file mode 100644
index 0000000..d87478e
--- /dev/null
+++ b/docs/scripts/tests/test_integration_stdout.py
@@ -0,0 +1,68 @@
+"""Integration tests for backwards-compatible stdout mode (no --output-dir)."""
+
+import os
+import shutil
+import sys
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+import importlib.util
+spec = importlib.util.spec_from_file_location(
+ "eml_script",
+ os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
+)
+eml_script = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(eml_script)
+
+print_email = eml_script.print_email
+
+FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures')
+
+
+class TestPlainTextStdout:
+ def test_metadata_and_body_printed(self, tmp_path, capsys):
+ eml_src = os.path.join(FIXTURES, 'plain-text.eml')
+ working_eml = tmp_path / "message.eml"
+ shutil.copy2(eml_src, working_eml)
+
+ print_email(str(working_eml))
+ captured = capsys.readouterr()
+
+ assert "From: Jonathan Smith <jsmith@example.com>" in captured.out
+ assert "To: Craig Jennings <craig@example.com>" in captured.out
+ assert "Subject: Re: Fw: 4319 Danneel Street" in captured.out
+ assert "Date:" in captured.out
+ assert "Sent:" in captured.out
+ assert "Received:" in captured.out
+ assert "4319 Danneel Street" in captured.out
+
+
+class TestHtmlFallbackStdout:
+ def test_html_converted_on_stdout(self, tmp_path, capsys):
+ eml_src = os.path.join(FIXTURES, 'html-only.eml')
+ working_eml = tmp_path / "message.eml"
+ shutil.copy2(eml_src, working_eml)
+
+ print_email(str(working_eml))
+ captured = capsys.readouterr()
+
+ # Should see converted text, not raw HTML
+ assert "HTML" in captured.out
+ assert "<p>" not in captured.out
+
+
+class TestAttachmentsStdout:
+ def test_attachment_extracted_alongside_eml(self, tmp_path, capsys):
+ eml_src = os.path.join(FIXTURES, 'with-attachment.eml')
+ working_eml = tmp_path / "message.eml"
+ shutil.copy2(eml_src, working_eml)
+
+ print_email(str(working_eml))
+ captured = capsys.readouterr()
+
+ assert "Extracted attachment:" in captured.out
+ assert "Ltr Carrollton.pdf" in captured.out
+
+ # File should exist alongside the EML
+ extracted = tmp_path / "Ltr Carrollton.pdf"
+ assert extracted.exists()
diff --git a/docs/scripts/tests/test_parse_received_headers.py b/docs/scripts/tests/test_parse_received_headers.py
new file mode 100644
index 0000000..e12e1fb
--- /dev/null
+++ b/docs/scripts/tests/test_parse_received_headers.py
@@ -0,0 +1,105 @@
+"""Tests for parse_received_headers()."""
+
+import email
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+from conftest import make_plain_message, add_received_headers
+from email.message import EmailMessage
+
+# Import the function under test
+import importlib.util
+spec = importlib.util.spec_from_file_location(
+ "eml_script",
+ os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
+)
+eml_script = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(eml_script)
+
+parse_received_headers = eml_script.parse_received_headers
+
+
+class TestSingleHeader:
+ def test_header_with_from_and_by(self):
+ msg = EmailMessage()
+ msg['Received'] = (
+ 'from mail-sender.example.com by mx.receiver.example.com '
+ 'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600'
+ )
+ result = parse_received_headers(msg)
+ assert result['sent_server'] == 'mail-sender.example.com'
+ assert result['received_server'] == 'mx.receiver.example.com'
+ assert result['sent_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600'
+ assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600'
+
+
+class TestMultipleHeaders:
+ def test_uses_first_with_both_from_and_by(self):
+ msg = EmailMessage()
+ # Most recent first (by only)
+ msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600'
+ # Next: has both from and by — this should be selected
+ msg['Received'] = (
+ 'from mail-sender.example.com by mx.receiver.example.com '
+ 'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600'
+ )
+ # Oldest
+ msg['Received'] = (
+ 'from originator.example.com by relay.example.com '
+ 'with SMTP; Thu, 05 Feb 2026 11:35:58 -0600'
+ )
+ result = parse_received_headers(msg)
+ assert result['sent_server'] == 'mail-sender.example.com'
+ assert result['received_server'] == 'mx.receiver.example.com'
+
+
+class TestNoReceivedHeaders:
+ def test_all_values_none(self):
+ msg = EmailMessage()
+ result = parse_received_headers(msg)
+ assert result['sent_time'] is None
+ assert result['sent_server'] is None
+ assert result['received_time'] is None
+ assert result['received_server'] is None
+
+
+class TestByButNoFrom:
+ def test_falls_back_to_first_header(self):
+ msg = EmailMessage()
+ msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600'
+ result = parse_received_headers(msg)
+ assert result['received_server'] == 'internal.example.com'
+ assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:10 -0600'
+ # No from in any header, so sent_server stays None
+ assert result['sent_server'] is None
+
+
+class TestMultilineFoldedHeader:
+ def test_normalizes_whitespace(self):
+ # Use email.message_from_string to parse raw folded headers
+ # (EmailMessage policy rejects embedded CRLF in set values)
+ raw = (
+ "From: test@example.com\r\n"
+ "Received: from mail-sender.example.com\r\n"
+ " by mx.receiver.example.com\r\n"
+ " with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600\r\n"
+ "\r\n"
+ "body\r\n"
+ )
+ msg = email.message_from_string(raw)
+ result = parse_received_headers(msg)
+ assert result['sent_server'] == 'mail-sender.example.com'
+ assert result['received_server'] == 'mx.receiver.example.com'
+
+
+class TestMalformedTimestamp:
+ def test_no_semicolon(self):
+ msg = EmailMessage()
+ msg['Received'] = 'from sender.example.com by receiver.example.com with SMTP'
+ result = parse_received_headers(msg)
+ assert result['sent_server'] == 'sender.example.com'
+ assert result['received_server'] == 'receiver.example.com'
+ assert result['sent_time'] is None
+ assert result['received_time'] is None
diff --git a/docs/scripts/tests/test_process_eml.py b/docs/scripts/tests/test_process_eml.py
new file mode 100644
index 0000000..26c5ad5
--- /dev/null
+++ b/docs/scripts/tests/test_process_eml.py
@@ -0,0 +1,129 @@
+"""Integration tests for process_eml() — full pipeline with --output-dir."""
+
+import os
+import shutil
+import sys
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+import importlib.util
+spec = importlib.util.spec_from_file_location(
+ "eml_script",
+ os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
+)
+eml_script = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(eml_script)
+
+process_eml = eml_script.process_eml
+
+import pytest
+
+
+FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures')
+
+
+class TestPlainTextPipeline:
+ def test_creates_eml_and_txt(self, tmp_path):
+ eml_src = os.path.join(FIXTURES, 'plain-text.eml')
+ # Copy fixture to tmp_path so temp dir can be created as sibling
+ working_eml = tmp_path / "inbox" / "message.eml"
+ working_eml.parent.mkdir()
+ shutil.copy2(eml_src, working_eml)
+
+ output_dir = tmp_path / "output"
+ result = process_eml(str(working_eml), str(output_dir))
+
+ # Should have exactly 2 files: .eml and .txt
+ assert len(result['files']) == 2
+ eml_file = result['files'][0]
+ txt_file = result['files'][1]
+
+ assert eml_file['type'] == 'eml'
+ assert txt_file['type'] == 'txt'
+ assert eml_file['name'].endswith('.eml')
+ assert txt_file['name'].endswith('.txt')
+
+ # Files exist in output dir
+ assert os.path.isfile(eml_file['path'])
+ assert os.path.isfile(txt_file['path'])
+
+ # Filenames contain expected components
+ assert 'Jonathan' in eml_file['name']
+ assert 'EMAIL' in eml_file['name']
+ assert '2026-02-05' in eml_file['name']
+
+ # Temp dir cleaned up (no extract-* dirs in inbox)
+ inbox_contents = os.listdir(str(tmp_path / "inbox"))
+ assert not any(d.startswith('extract-') for d in inbox_contents)
+
+
+class TestHtmlFallbackPipeline:
+ def test_txt_contains_converted_html(self, tmp_path):
+ eml_src = os.path.join(FIXTURES, 'html-only.eml')
+ working_eml = tmp_path / "inbox" / "message.eml"
+ working_eml.parent.mkdir()
+ shutil.copy2(eml_src, working_eml)
+
+ output_dir = tmp_path / "output"
+ result = process_eml(str(working_eml), str(output_dir))
+
+ txt_file = result['files'][1]
+ with open(txt_file['path'], 'r') as f:
+ content = f.read()
+
+ # Should be converted, not raw HTML
+ assert '<p>' not in content
+ assert '<strong>' not in content
+ assert 'HTML' in content
+
+
+class TestAttachmentPipeline:
+ def test_eml_txt_and_attachment_created(self, tmp_path):
+ eml_src = os.path.join(FIXTURES, 'with-attachment.eml')
+ working_eml = tmp_path / "inbox" / "message.eml"
+ working_eml.parent.mkdir()
+ shutil.copy2(eml_src, working_eml)
+
+ output_dir = tmp_path / "output"
+ result = process_eml(str(working_eml), str(output_dir))
+
+ assert len(result['files']) == 3
+ types = [f['type'] for f in result['files']]
+ assert types == ['eml', 'txt', 'attach']
+
+ # Attachment is auto-renamed
+ attach_file = result['files'][2]
+ assert 'ATTACH' in attach_file['name']
+ assert attach_file['name'].endswith('.pdf')
+ assert os.path.isfile(attach_file['path'])
+
+
+class TestCollisionDetection:
+ def test_raises_on_existing_file(self, tmp_path):
+ eml_src = os.path.join(FIXTURES, 'plain-text.eml')
+ working_eml = tmp_path / "inbox" / "message.eml"
+ working_eml.parent.mkdir()
+ shutil.copy2(eml_src, working_eml)
+
+ output_dir = tmp_path / "output"
+ # Run once to create files
+ result = process_eml(str(working_eml), str(output_dir))
+
+ # Run again — should raise FileExistsError
+ with pytest.raises(FileExistsError, match="Collision"):
+ process_eml(str(working_eml), str(output_dir))
+
+
+class TestMissingOutputDir:
+ def test_creates_directory(self, tmp_path):
+ eml_src = os.path.join(FIXTURES, 'plain-text.eml')
+ working_eml = tmp_path / "inbox" / "message.eml"
+ working_eml.parent.mkdir()
+ shutil.copy2(eml_src, working_eml)
+
+ output_dir = tmp_path / "new" / "nested" / "output"
+ assert not output_dir.exists()
+
+ result = process_eml(str(working_eml), str(output_dir))
+ assert output_dir.exists()
+ assert len(result['files']) == 2
diff --git a/docs/scripts/tests/test_save_attachments.py b/docs/scripts/tests/test_save_attachments.py
new file mode 100644
index 0000000..32f02a6
--- /dev/null
+++ b/docs/scripts/tests/test_save_attachments.py
@@ -0,0 +1,97 @@
+"""Tests for save_attachments()."""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+from conftest import make_plain_message, make_message_with_attachment
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+from email.mime.application import MIMEApplication
+
+import importlib.util
+spec = importlib.util.spec_from_file_location(
+ "eml_script",
+ os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
+)
+eml_script = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(eml_script)
+
+save_attachments = eml_script.save_attachments
+
+
+class TestSingleAttachment:
+ def test_file_written_and_returned(self, tmp_path):
+ msg = make_message_with_attachment(
+ attachment_filename="report.pdf",
+ attachment_content=b"pdf bytes here"
+ )
+ result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
+
+ assert len(result) == 1
+ assert result[0]['original_name'] == "report.pdf"
+ assert "ATTACH" in result[0]['renamed_name']
+ assert result[0]['renamed_name'].endswith(".pdf")
+
+ # File actually exists and has correct content
+ written_path = result[0]['path']
+ assert os.path.isfile(written_path)
+ with open(written_path, 'rb') as f:
+ assert f.read() == b"pdf bytes here"
+
+
+class TestMultipleAttachments:
+ def test_all_written_and_returned(self, tmp_path):
+ msg = MIMEMultipart()
+ msg['From'] = 'test@example.com'
+ msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600'
+ msg.attach(MIMEText("body", 'plain'))
+
+ for name, content in [("doc1.pdf", b"pdf1"), ("image.png", b"png1")]:
+ att = MIMEApplication(content, Name=name)
+ att['Content-Disposition'] = f'attachment; filename="{name}"'
+ msg.attach(att)
+
+ result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
+
+ assert len(result) == 2
+ for r in result:
+ assert os.path.isfile(r['path'])
+
+
+class TestNoAttachments:
+ def test_empty_list(self, tmp_path):
+ msg = make_plain_message()
+ result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
+ assert result == []
+
+
+class TestFilenameWithSpaces:
+ def test_cleaned_filename(self, tmp_path):
+ msg = make_message_with_attachment(
+ attachment_filename="My Document (1).pdf",
+ attachment_content=b"data"
+ )
+ result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
+
+ assert len(result) == 1
+ assert " " not in result[0]['renamed_name']
+ assert os.path.isfile(result[0]['path'])
+
+
+class TestNoContentDisposition:
+ def test_skipped(self, tmp_path):
+ msg = MIMEMultipart()
+ msg['From'] = 'test@example.com'
+ msg.attach(MIMEText("body", 'plain'))
+
+ # Add a part without Content-Disposition
+ part = MIMEApplication(b"data", Name="file.bin")
+ # Explicitly remove Content-Disposition if present
+ if 'Content-Disposition' in part:
+ del part['Content-Disposition']
+ msg.attach(part)
+
+ result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
+ assert result == []