aboutsummaryrefslogtreecommitdiff
path: root/docs/scripts/tests
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-02-22 23:20:56 -0600
committerCraig Jennings <c@cjennings.net>2026-02-22 23:20:56 -0600
commit3a2445080c880544985f50fb0d916534698cc073 (patch)
tree909f98edbbb940aafb95de02457d4d6f7db3cba4 /docs/scripts/tests
parent3595aa8a8122da543676717fb5825044eee99a9d (diff)
downloadarchangel-3a2445080c880544985f50fb0d916534698cc073.tar.gz
archangel-3a2445080c880544985f50fb0d916534698cc073.zip
chore: add docs/ to .gitignore and untrack personal files
docs/ contains session history, personal workflows, and private protocols that shouldn't be in a public repository.
Diffstat (limited to 'docs/scripts/tests')
-rw-r--r--docs/scripts/tests/conftest.py77
-rw-r--r--docs/scripts/tests/fixtures/empty-body.eml16
-rw-r--r--docs/scripts/tests/fixtures/html-only.eml20
-rw-r--r--docs/scripts/tests/fixtures/multiple-received-headers.eml12
-rw-r--r--docs/scripts/tests/fixtures/no-received-headers.eml9
-rw-r--r--docs/scripts/tests/fixtures/plain-text.eml15
-rw-r--r--docs/scripts/tests/fixtures/with-attachment.eml27
-rw-r--r--docs/scripts/tests/test_extract_body.py96
-rw-r--r--docs/scripts/tests/test_extract_metadata.py65
-rw-r--r--docs/scripts/tests/test_generate_filenames.py157
-rw-r--r--docs/scripts/tests/test_integration_stdout.py68
-rw-r--r--docs/scripts/tests/test_parse_received_headers.py105
-rw-r--r--docs/scripts/tests/test_process_eml.py129
-rw-r--r--docs/scripts/tests/test_save_attachments.py97
14 files changed, 0 insertions, 893 deletions
diff --git a/docs/scripts/tests/conftest.py b/docs/scripts/tests/conftest.py
deleted file mode 100644
index 8d965ab..0000000
--- a/docs/scripts/tests/conftest.py
+++ /dev/null
@@ -1,77 +0,0 @@
-"""Shared fixtures for EML extraction tests."""
-
-import os
-from email.message import EmailMessage
-from email.mime.application import MIMEApplication
-from email.mime.multipart import MIMEMultipart
-from email.mime.text import MIMEText
-
-import pytest
-
-
-@pytest.fixture
-def fixtures_dir():
- """Return path to the fixtures/ directory."""
- return os.path.join(os.path.dirname(__file__), 'fixtures')
-
-
-def make_plain_message(body="Test body", from_="Jonathan Smith <jsmith@example.com>",
- to="Craig <craig@example.com>",
- subject="Test Subject",
- date="Wed, 05 Feb 2026 11:36:00 -0600"):
- """Create an EmailMessage with text/plain body."""
- msg = EmailMessage()
- msg['From'] = from_
- msg['To'] = to
- msg['Subject'] = subject
- msg['Date'] = date
- msg.set_content(body)
- return msg
-
-
-def make_html_message(html_body="<p>Test body</p>",
- from_="Jonathan Smith <jsmith@example.com>",
- to="Craig <craig@example.com>",
- subject="Test Subject",
- date="Wed, 05 Feb 2026 11:36:00 -0600"):
- """Create an EmailMessage with text/html body only."""
- msg = EmailMessage()
- msg['From'] = from_
- msg['To'] = to
- msg['Subject'] = subject
- msg['Date'] = date
- msg.set_content(html_body, subtype='html')
- return msg
-
-
-def make_message_with_attachment(body="Test body",
- from_="Jonathan Smith <jsmith@example.com>",
- to="Craig <craig@example.com>",
- subject="Test Subject",
- date="Wed, 05 Feb 2026 11:36:00 -0600",
- attachment_filename="document.pdf",
- attachment_content=b"fake pdf content"):
- """Create a multipart message with a text body and one attachment."""
- msg = MIMEMultipart()
- msg['From'] = from_
- msg['To'] = to
- msg['Subject'] = subject
- msg['Date'] = date
-
- msg.attach(MIMEText(body, 'plain'))
-
- att = MIMEApplication(attachment_content, Name=attachment_filename)
- att['Content-Disposition'] = f'attachment; filename="{attachment_filename}"'
- msg.attach(att)
-
- return msg
-
-
-def add_received_headers(msg, headers):
- """Add Received headers to an existing message.
-
- headers: list of header strings, added in order (first = most recent).
- """
- for header in headers:
- msg['Received'] = header
- return msg
diff --git a/docs/scripts/tests/fixtures/empty-body.eml b/docs/scripts/tests/fixtures/empty-body.eml
deleted file mode 100644
index cf008df..0000000
--- a/docs/scripts/tests/fixtures/empty-body.eml
+++ /dev/null
@@ -1,16 +0,0 @@
-From: Jonathan Smith <jsmith@example.com>
-To: Craig Jennings <craig@example.com>
-Subject: Empty Body Test
-Date: Thu, 05 Feb 2026 11:36:00 -0600
-MIME-Version: 1.0
-Content-Type: multipart/mixed; boundary="boundary456"
-Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
-
---boundary456
-Content-Type: application/octet-stream; name="data.bin"
-Content-Disposition: attachment; filename="data.bin"
-Content-Transfer-Encoding: base64
-
-AQIDBA==
-
---boundary456--
diff --git a/docs/scripts/tests/fixtures/html-only.eml b/docs/scripts/tests/fixtures/html-only.eml
deleted file mode 100644
index 4db7645..0000000
--- a/docs/scripts/tests/fixtures/html-only.eml
+++ /dev/null
@@ -1,20 +0,0 @@
-From: Jonathan Smith <jsmith@example.com>
-To: Craig Jennings <craig@example.com>
-Subject: HTML Update
-Date: Thu, 05 Feb 2026 11:36:00 -0600
-MIME-Version: 1.0
-Content-Type: text/html; charset="utf-8"
-Content-Transfer-Encoding: 7bit
-Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
-
-<html>
-<body>
-<p>Hi Craig,</p>
-<p>Here is the <strong>HTML</strong> update.</p>
-<ul>
-<li>Item one</li>
-<li>Item two</li>
-</ul>
-<p>Best,<br>Jonathan</p>
-</body>
-</html>
diff --git a/docs/scripts/tests/fixtures/multiple-received-headers.eml b/docs/scripts/tests/fixtures/multiple-received-headers.eml
deleted file mode 100644
index 1b8d6a7..0000000
--- a/docs/scripts/tests/fixtures/multiple-received-headers.eml
+++ /dev/null
@@ -1,12 +0,0 @@
-From: Jonathan Smith <jsmith@example.com>
-To: Craig Jennings <craig@example.com>
-Subject: Multiple Received Headers Test
-Date: Thu, 05 Feb 2026 11:36:00 -0600
-MIME-Version: 1.0
-Content-Type: text/plain; charset="utf-8"
-Content-Transfer-Encoding: 7bit
-Received: by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600
-Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
-Received: from originator.example.com by relay.example.com with SMTP; Thu, 05 Feb 2026 11:35:58 -0600
-
-Test body with multiple received headers.
diff --git a/docs/scripts/tests/fixtures/no-received-headers.eml b/docs/scripts/tests/fixtures/no-received-headers.eml
deleted file mode 100644
index 8a05dc7..0000000
--- a/docs/scripts/tests/fixtures/no-received-headers.eml
+++ /dev/null
@@ -1,9 +0,0 @@
-From: Jonathan Smith <jsmith@example.com>
-To: Craig Jennings <craig@example.com>
-Subject: No Received Headers
-Date: Thu, 05 Feb 2026 11:36:00 -0600
-MIME-Version: 1.0
-Content-Type: text/plain; charset="utf-8"
-Content-Transfer-Encoding: 7bit
-
-Test body with no received headers at all.
diff --git a/docs/scripts/tests/fixtures/plain-text.eml b/docs/scripts/tests/fixtures/plain-text.eml
deleted file mode 100644
index 8cc9d9c..0000000
--- a/docs/scripts/tests/fixtures/plain-text.eml
+++ /dev/null
@@ -1,15 +0,0 @@
-From: Jonathan Smith <jsmith@example.com>
-To: Craig Jennings <craig@example.com>
-Subject: Re: Fw: 4319 Danneel Street
-Date: Thu, 05 Feb 2026 11:36:00 -0600
-MIME-Version: 1.0
-Content-Type: text/plain; charset="utf-8"
-Content-Transfer-Encoding: 7bit
-Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
-
-Hi Craig,
-
-Here is the update on 4319 Danneel Street.
-
-Best,
-Jonathan
diff --git a/docs/scripts/tests/fixtures/with-attachment.eml b/docs/scripts/tests/fixtures/with-attachment.eml
deleted file mode 100644
index ac49c5d..0000000
--- a/docs/scripts/tests/fixtures/with-attachment.eml
+++ /dev/null
@@ -1,27 +0,0 @@
-From: Jonathan Smith <jsmith@example.com>
-To: Craig Jennings <craig@example.com>
-Subject: Ltr from Carrollton
-Date: Thu, 05 Feb 2026 11:36:00 -0600
-MIME-Version: 1.0
-Content-Type: multipart/mixed; boundary="boundary123"
-Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
-
---boundary123
-Content-Type: text/plain; charset="utf-8"
-Content-Transfer-Encoding: 7bit
-
-Hi Craig,
-
-Please find the letter attached.
-
-Best,
-Jonathan
-
---boundary123
-Content-Type: application/octet-stream; name="Ltr Carrollton.pdf"
-Content-Disposition: attachment; filename="Ltr Carrollton.pdf"
-Content-Transfer-Encoding: base64
-
-ZmFrZSBwZGYgY29udGVudA==
-
---boundary123--
diff --git a/docs/scripts/tests/test_extract_body.py b/docs/scripts/tests/test_extract_body.py
deleted file mode 100644
index 7b53cda..0000000
--- a/docs/scripts/tests/test_extract_body.py
+++ /dev/null
@@ -1,96 +0,0 @@
-"""Tests for extract_body()."""
-
-import sys
-import os
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-from conftest import make_plain_message, make_html_message, make_message_with_attachment
-from email.message import EmailMessage
-from email.mime.multipart import MIMEMultipart
-from email.mime.text import MIMEText
-from email.mime.application import MIMEApplication
-
-import importlib.util
-spec = importlib.util.spec_from_file_location(
- "eml_script",
- os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
-)
-eml_script = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(eml_script)
-
-extract_body = eml_script.extract_body
-
-
-class TestPlainText:
- def test_returns_plain_text(self):
- msg = make_plain_message(body="Hello, this is plain text.")
- result = extract_body(msg)
- assert "Hello, this is plain text." in result
-
-
-class TestHtmlOnly:
- def test_returns_converted_html(self):
- msg = make_html_message(html_body="<p>Hello <strong>world</strong></p>")
- result = extract_body(msg)
- assert "Hello" in result
- assert "world" in result
- # Should not contain raw HTML tags
- assert "<p>" not in result
- assert "<strong>" not in result
-
-
-class TestBothPlainAndHtml:
- def test_prefers_plain_text(self):
- msg = MIMEMultipart('alternative')
- msg['From'] = 'test@example.com'
- msg['To'] = 'dest@example.com'
- msg['Subject'] = 'Test'
- msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600'
- msg.attach(MIMEText("Plain text version", 'plain'))
- msg.attach(MIMEText("<p>HTML version</p>", 'html'))
- result = extract_body(msg)
- assert "Plain text version" in result
- assert "HTML version" not in result
-
-
-class TestEmptyBody:
- def test_returns_empty_string(self):
- # Multipart with only attachments, no text parts
- msg = MIMEMultipart()
- msg['From'] = 'test@example.com'
- att = MIMEApplication(b"binary data", Name="file.bin")
- att['Content-Disposition'] = 'attachment; filename="file.bin"'
- msg.attach(att)
- result = extract_body(msg)
- assert result == ""
-
-
-class TestNonUtf8Encoding:
- def test_decodes_with_errors_ignore(self):
- msg = EmailMessage()
- msg['From'] = 'test@example.com'
- # Set raw bytes that include invalid UTF-8
- msg.set_content("Valid text with special: café")
- result = extract_body(msg)
- assert "Valid text" in result
-
-
-class TestHtmlWithStructure:
- def test_preserves_list_structure(self):
- html = "<ul><li>Item one</li><li>Item two</li></ul>"
- msg = make_html_message(html_body=html)
- result = extract_body(msg)
- assert "Item one" in result
- assert "Item two" in result
-
-
-class TestNoTextParts:
- def test_returns_empty_string(self):
- msg = MIMEMultipart()
- msg['From'] = 'test@example.com'
- att = MIMEApplication(b"data", Name="image.png")
- att['Content-Disposition'] = 'attachment; filename="image.png"'
- msg.attach(att)
- result = extract_body(msg)
- assert result == ""
diff --git a/docs/scripts/tests/test_extract_metadata.py b/docs/scripts/tests/test_extract_metadata.py
deleted file mode 100644
index d5ee52e..0000000
--- a/docs/scripts/tests/test_extract_metadata.py
+++ /dev/null
@@ -1,65 +0,0 @@
-"""Tests for extract_metadata()."""
-
-import sys
-import os
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-from conftest import make_plain_message, add_received_headers
-from email.message import EmailMessage
-
-import importlib.util
-spec = importlib.util.spec_from_file_location(
- "eml_script",
- os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
-)
-eml_script = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(eml_script)
-
-extract_metadata = eml_script.extract_metadata
-
-
-class TestAllHeadersPresent:
- def test_complete_dict(self):
- msg = make_plain_message(
- from_="Jonathan Smith <jsmith@example.com>",
- to="Craig <craig@example.com>",
- subject="Test Subject",
- date="Thu, 05 Feb 2026 11:36:00 -0600"
- )
- result = extract_metadata(msg)
- assert result['from'] == "Jonathan Smith <jsmith@example.com>"
- assert result['to'] == "Craig <craig@example.com>"
- assert result['subject'] == "Test Subject"
- assert result['date'] == "Thu, 05 Feb 2026 11:36:00 -0600"
- assert 'timing' in result
-
-
-class TestMissingFrom:
- def test_from_is_none(self):
- msg = EmailMessage()
- msg['To'] = 'craig@example.com'
- msg['Subject'] = 'Test'
- msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600'
- msg.set_content("body")
- result = extract_metadata(msg)
- assert result['from'] is None
-
-
-class TestMissingDate:
- def test_date_is_none(self):
- msg = EmailMessage()
- msg['From'] = 'test@example.com'
- msg['To'] = 'craig@example.com'
- msg['Subject'] = 'Test'
- msg.set_content("body")
- result = extract_metadata(msg)
- assert result['date'] is None
-
-
-class TestLongSubject:
- def test_full_subject_returned(self):
- long_subject = "Re: Fw: This is a very long subject line that spans many words and might be folded"
- msg = make_plain_message(subject=long_subject)
- result = extract_metadata(msg)
- assert result['subject'] == long_subject
diff --git a/docs/scripts/tests/test_generate_filenames.py b/docs/scripts/tests/test_generate_filenames.py
deleted file mode 100644
index 07c8f84..0000000
--- a/docs/scripts/tests/test_generate_filenames.py
+++ /dev/null
@@ -1,157 +0,0 @@
-"""Tests for generate_basename(), generate_email_filename(), generate_attachment_filename()."""
-
-import sys
-import os
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-import importlib.util
-spec = importlib.util.spec_from_file_location(
- "eml_script",
- os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
-)
-eml_script = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(eml_script)
-
-generate_basename = eml_script.generate_basename
-generate_email_filename = eml_script.generate_email_filename
-generate_attachment_filename = eml_script.generate_attachment_filename
-
-
-# --- generate_basename ---
-
-class TestGenerateBasename:
- def test_standard_from_and_date(self):
- metadata = {
- 'from': 'Jonathan Smith <jsmith@example.com>',
- 'date': 'Wed, 05 Feb 2026 11:36:00 -0600',
- }
- assert generate_basename(metadata) == "2026-02-05-1136-Jonathan"
-
- def test_from_with_display_name_first_token(self):
- metadata = {
- 'from': 'C Ciarm <cciarm@example.com>',
- 'date': 'Wed, 05 Feb 2026 11:36:00 -0600',
- }
- result = generate_basename(metadata)
- assert result == "2026-02-05-1136-C"
-
- def test_from_without_display_name(self):
- metadata = {
- 'from': 'jsmith@example.com',
- 'date': 'Wed, 05 Feb 2026 11:36:00 -0600',
- }
- result = generate_basename(metadata)
- assert result == "2026-02-05-1136-jsmith"
-
- def test_missing_date(self):
- metadata = {
- 'from': 'Jonathan Smith <jsmith@example.com>',
- 'date': None,
- }
- result = generate_basename(metadata)
- assert result == "unknown-Jonathan"
-
- def test_missing_from(self):
- metadata = {
- 'from': None,
- 'date': 'Wed, 05 Feb 2026 11:36:00 -0600',
- }
- result = generate_basename(metadata)
- assert result == "2026-02-05-1136-unknown"
-
- def test_both_missing(self):
- metadata = {'from': None, 'date': None}
- result = generate_basename(metadata)
- assert result == "unknown-unknown"
-
- def test_unparseable_date(self):
- metadata = {
- 'from': 'Jonathan <j@example.com>',
- 'date': 'not a real date',
- }
- result = generate_basename(metadata)
- assert result == "unknown-Jonathan"
-
- def test_none_date_no_crash(self):
- metadata = {'from': 'Test <t@e.com>', 'date': None}
- # Should not raise
- result = generate_basename(metadata)
- assert "unknown" in result
-
-
-# --- generate_email_filename ---
-
-class TestGenerateEmailFilename:
- def test_standard_subject(self):
- result = generate_email_filename(
- "2026-02-05-1136-Jonathan",
- "Re: Fw: 4319 Danneel Street"
- )
- assert result == "2026-02-05-1136-Jonathan-EMAIL-Re-Fw-4319-Danneel-Street"
-
- def test_subject_with_special_chars(self):
- result = generate_email_filename(
- "2026-02-05-1136-Jonathan",
- "Update: Meeting (draft) & notes!"
- )
- # Colons, parens, ampersands, exclamation stripped
- assert "EMAIL" in result
- assert ":" not in result
- assert "(" not in result
- assert ")" not in result
- assert "&" not in result
- assert "!" not in result
-
- def test_none_subject(self):
- result = generate_email_filename("2026-02-05-1136-Jonathan", None)
- assert result == "2026-02-05-1136-Jonathan-EMAIL-no-subject"
-
- def test_empty_subject(self):
- result = generate_email_filename("2026-02-05-1136-Jonathan", "")
- assert result == "2026-02-05-1136-Jonathan-EMAIL-no-subject"
-
- def test_very_long_subject(self):
- long_subject = "A" * 100 + " " + "B" * 100
- result = generate_email_filename("2026-02-05-1136-Jonathan", long_subject)
- # The cleaned subject part should be truncated
- # basename (27) + "-EMAIL-" (7) + subject
- # Subject itself is limited to 80 chars by _clean_for_filename
- subject_part = result.split("-EMAIL-")[1]
- assert len(subject_part) <= 80
-
-
-# --- generate_attachment_filename ---
-
-class TestGenerateAttachmentFilename:
- def test_standard_attachment(self):
- result = generate_attachment_filename(
- "2026-02-05-1136-Jonathan",
- "Ltr Carrollton.pdf"
- )
- assert result == "2026-02-05-1136-Jonathan-ATTACH-Ltr-Carrollton.pdf"
-
- def test_filename_with_spaces_and_parens(self):
- result = generate_attachment_filename(
- "2026-02-05-1136-Jonathan",
- "Document (final copy).pdf"
- )
- assert " " not in result
- assert "(" not in result
- assert ")" not in result
- assert result.endswith(".pdf")
-
- def test_preserves_extension(self):
- result = generate_attachment_filename(
- "2026-02-05-1136-Jonathan",
- "photo.jpg"
- )
- assert result.endswith(".jpg")
-
- def test_none_filename(self):
- result = generate_attachment_filename("2026-02-05-1136-Jonathan", None)
- assert result == "2026-02-05-1136-Jonathan-ATTACH-unnamed"
-
- def test_empty_filename(self):
- result = generate_attachment_filename("2026-02-05-1136-Jonathan", "")
- assert result == "2026-02-05-1136-Jonathan-ATTACH-unnamed"
diff --git a/docs/scripts/tests/test_integration_stdout.py b/docs/scripts/tests/test_integration_stdout.py
deleted file mode 100644
index d87478e..0000000
--- a/docs/scripts/tests/test_integration_stdout.py
+++ /dev/null
@@ -1,68 +0,0 @@
-"""Integration tests for backwards-compatible stdout mode (no --output-dir)."""
-
-import os
-import shutil
-import sys
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-import importlib.util
-spec = importlib.util.spec_from_file_location(
- "eml_script",
- os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
-)
-eml_script = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(eml_script)
-
-print_email = eml_script.print_email
-
-FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures')
-
-
-class TestPlainTextStdout:
- def test_metadata_and_body_printed(self, tmp_path, capsys):
- eml_src = os.path.join(FIXTURES, 'plain-text.eml')
- working_eml = tmp_path / "message.eml"
- shutil.copy2(eml_src, working_eml)
-
- print_email(str(working_eml))
- captured = capsys.readouterr()
-
- assert "From: Jonathan Smith <jsmith@example.com>" in captured.out
- assert "To: Craig Jennings <craig@example.com>" in captured.out
- assert "Subject: Re: Fw: 4319 Danneel Street" in captured.out
- assert "Date:" in captured.out
- assert "Sent:" in captured.out
- assert "Received:" in captured.out
- assert "4319 Danneel Street" in captured.out
-
-
-class TestHtmlFallbackStdout:
- def test_html_converted_on_stdout(self, tmp_path, capsys):
- eml_src = os.path.join(FIXTURES, 'html-only.eml')
- working_eml = tmp_path / "message.eml"
- shutil.copy2(eml_src, working_eml)
-
- print_email(str(working_eml))
- captured = capsys.readouterr()
-
- # Should see converted text, not raw HTML
- assert "HTML" in captured.out
- assert "<p>" not in captured.out
-
-
-class TestAttachmentsStdout:
- def test_attachment_extracted_alongside_eml(self, tmp_path, capsys):
- eml_src = os.path.join(FIXTURES, 'with-attachment.eml')
- working_eml = tmp_path / "message.eml"
- shutil.copy2(eml_src, working_eml)
-
- print_email(str(working_eml))
- captured = capsys.readouterr()
-
- assert "Extracted attachment:" in captured.out
- assert "Ltr Carrollton.pdf" in captured.out
-
- # File should exist alongside the EML
- extracted = tmp_path / "Ltr Carrollton.pdf"
- assert extracted.exists()
diff --git a/docs/scripts/tests/test_parse_received_headers.py b/docs/scripts/tests/test_parse_received_headers.py
deleted file mode 100644
index e12e1fb..0000000
--- a/docs/scripts/tests/test_parse_received_headers.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""Tests for parse_received_headers()."""
-
-import email
-import sys
-import os
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-from conftest import make_plain_message, add_received_headers
-from email.message import EmailMessage
-
-# Import the function under test
-import importlib.util
-spec = importlib.util.spec_from_file_location(
- "eml_script",
- os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
-)
-eml_script = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(eml_script)
-
-parse_received_headers = eml_script.parse_received_headers
-
-
-class TestSingleHeader:
- def test_header_with_from_and_by(self):
- msg = EmailMessage()
- msg['Received'] = (
- 'from mail-sender.example.com by mx.receiver.example.com '
- 'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600'
- )
- result = parse_received_headers(msg)
- assert result['sent_server'] == 'mail-sender.example.com'
- assert result['received_server'] == 'mx.receiver.example.com'
- assert result['sent_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600'
- assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600'
-
-
-class TestMultipleHeaders:
- def test_uses_first_with_both_from_and_by(self):
- msg = EmailMessage()
- # Most recent first (by only)
- msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600'
- # Next: has both from and by — this should be selected
- msg['Received'] = (
- 'from mail-sender.example.com by mx.receiver.example.com '
- 'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600'
- )
- # Oldest
- msg['Received'] = (
- 'from originator.example.com by relay.example.com '
- 'with SMTP; Thu, 05 Feb 2026 11:35:58 -0600'
- )
- result = parse_received_headers(msg)
- assert result['sent_server'] == 'mail-sender.example.com'
- assert result['received_server'] == 'mx.receiver.example.com'
-
-
-class TestNoReceivedHeaders:
- def test_all_values_none(self):
- msg = EmailMessage()
- result = parse_received_headers(msg)
- assert result['sent_time'] is None
- assert result['sent_server'] is None
- assert result['received_time'] is None
- assert result['received_server'] is None
-
-
-class TestByButNoFrom:
- def test_falls_back_to_first_header(self):
- msg = EmailMessage()
- msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600'
- result = parse_received_headers(msg)
- assert result['received_server'] == 'internal.example.com'
- assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:10 -0600'
- # No from in any header, so sent_server stays None
- assert result['sent_server'] is None
-
-
-class TestMultilineFoldedHeader:
- def test_normalizes_whitespace(self):
- # Use email.message_from_string to parse raw folded headers
- # (EmailMessage policy rejects embedded CRLF in set values)
- raw = (
- "From: test@example.com\r\n"
- "Received: from mail-sender.example.com\r\n"
- " by mx.receiver.example.com\r\n"
- " with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600\r\n"
- "\r\n"
- "body\r\n"
- )
- msg = email.message_from_string(raw)
- result = parse_received_headers(msg)
- assert result['sent_server'] == 'mail-sender.example.com'
- assert result['received_server'] == 'mx.receiver.example.com'
-
-
-class TestMalformedTimestamp:
- def test_no_semicolon(self):
- msg = EmailMessage()
- msg['Received'] = 'from sender.example.com by receiver.example.com with SMTP'
- result = parse_received_headers(msg)
- assert result['sent_server'] == 'sender.example.com'
- assert result['received_server'] == 'receiver.example.com'
- assert result['sent_time'] is None
- assert result['received_time'] is None
diff --git a/docs/scripts/tests/test_process_eml.py b/docs/scripts/tests/test_process_eml.py
deleted file mode 100644
index 26c5ad5..0000000
--- a/docs/scripts/tests/test_process_eml.py
+++ /dev/null
@@ -1,129 +0,0 @@
-"""Integration tests for process_eml() — full pipeline with --output-dir."""
-
-import os
-import shutil
-import sys
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-import importlib.util
-spec = importlib.util.spec_from_file_location(
- "eml_script",
- os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
-)
-eml_script = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(eml_script)
-
-process_eml = eml_script.process_eml
-
-import pytest
-
-
-FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures')
-
-
-class TestPlainTextPipeline:
- def test_creates_eml_and_txt(self, tmp_path):
- eml_src = os.path.join(FIXTURES, 'plain-text.eml')
- # Copy fixture to tmp_path so temp dir can be created as sibling
- working_eml = tmp_path / "inbox" / "message.eml"
- working_eml.parent.mkdir()
- shutil.copy2(eml_src, working_eml)
-
- output_dir = tmp_path / "output"
- result = process_eml(str(working_eml), str(output_dir))
-
- # Should have exactly 2 files: .eml and .txt
- assert len(result['files']) == 2
- eml_file = result['files'][0]
- txt_file = result['files'][1]
-
- assert eml_file['type'] == 'eml'
- assert txt_file['type'] == 'txt'
- assert eml_file['name'].endswith('.eml')
- assert txt_file['name'].endswith('.txt')
-
- # Files exist in output dir
- assert os.path.isfile(eml_file['path'])
- assert os.path.isfile(txt_file['path'])
-
- # Filenames contain expected components
- assert 'Jonathan' in eml_file['name']
- assert 'EMAIL' in eml_file['name']
- assert '2026-02-05' in eml_file['name']
-
- # Temp dir cleaned up (no extract-* dirs in inbox)
- inbox_contents = os.listdir(str(tmp_path / "inbox"))
- assert not any(d.startswith('extract-') for d in inbox_contents)
-
-
-class TestHtmlFallbackPipeline:
- def test_txt_contains_converted_html(self, tmp_path):
- eml_src = os.path.join(FIXTURES, 'html-only.eml')
- working_eml = tmp_path / "inbox" / "message.eml"
- working_eml.parent.mkdir()
- shutil.copy2(eml_src, working_eml)
-
- output_dir = tmp_path / "output"
- result = process_eml(str(working_eml), str(output_dir))
-
- txt_file = result['files'][1]
- with open(txt_file['path'], 'r') as f:
- content = f.read()
-
- # Should be converted, not raw HTML
- assert '<p>' not in content
- assert '<strong>' not in content
- assert 'HTML' in content
-
-
-class TestAttachmentPipeline:
- def test_eml_txt_and_attachment_created(self, tmp_path):
- eml_src = os.path.join(FIXTURES, 'with-attachment.eml')
- working_eml = tmp_path / "inbox" / "message.eml"
- working_eml.parent.mkdir()
- shutil.copy2(eml_src, working_eml)
-
- output_dir = tmp_path / "output"
- result = process_eml(str(working_eml), str(output_dir))
-
- assert len(result['files']) == 3
- types = [f['type'] for f in result['files']]
- assert types == ['eml', 'txt', 'attach']
-
- # Attachment is auto-renamed
- attach_file = result['files'][2]
- assert 'ATTACH' in attach_file['name']
- assert attach_file['name'].endswith('.pdf')
- assert os.path.isfile(attach_file['path'])
-
-
-class TestCollisionDetection:
- def test_raises_on_existing_file(self, tmp_path):
- eml_src = os.path.join(FIXTURES, 'plain-text.eml')
- working_eml = tmp_path / "inbox" / "message.eml"
- working_eml.parent.mkdir()
- shutil.copy2(eml_src, working_eml)
-
- output_dir = tmp_path / "output"
- # Run once to create files
- result = process_eml(str(working_eml), str(output_dir))
-
- # Run again — should raise FileExistsError
- with pytest.raises(FileExistsError, match="Collision"):
- process_eml(str(working_eml), str(output_dir))
-
-
-class TestMissingOutputDir:
- def test_creates_directory(self, tmp_path):
- eml_src = os.path.join(FIXTURES, 'plain-text.eml')
- working_eml = tmp_path / "inbox" / "message.eml"
- working_eml.parent.mkdir()
- shutil.copy2(eml_src, working_eml)
-
- output_dir = tmp_path / "new" / "nested" / "output"
- assert not output_dir.exists()
-
- result = process_eml(str(working_eml), str(output_dir))
- assert output_dir.exists()
- assert len(result['files']) == 2
diff --git a/docs/scripts/tests/test_save_attachments.py b/docs/scripts/tests/test_save_attachments.py
deleted file mode 100644
index 32f02a6..0000000
--- a/docs/scripts/tests/test_save_attachments.py
+++ /dev/null
@@ -1,97 +0,0 @@
-"""Tests for save_attachments()."""
-
-import sys
-import os
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-from conftest import make_plain_message, make_message_with_attachment
-from email.mime.multipart import MIMEMultipart
-from email.mime.text import MIMEText
-from email.mime.application import MIMEApplication
-
-import importlib.util
-spec = importlib.util.spec_from_file_location(
- "eml_script",
- os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
-)
-eml_script = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(eml_script)
-
-save_attachments = eml_script.save_attachments
-
-
-class TestSingleAttachment:
- def test_file_written_and_returned(self, tmp_path):
- msg = make_message_with_attachment(
- attachment_filename="report.pdf",
- attachment_content=b"pdf bytes here"
- )
- result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
-
- assert len(result) == 1
- assert result[0]['original_name'] == "report.pdf"
- assert "ATTACH" in result[0]['renamed_name']
- assert result[0]['renamed_name'].endswith(".pdf")
-
- # File actually exists and has correct content
- written_path = result[0]['path']
- assert os.path.isfile(written_path)
- with open(written_path, 'rb') as f:
- assert f.read() == b"pdf bytes here"
-
-
-class TestMultipleAttachments:
- def test_all_written_and_returned(self, tmp_path):
- msg = MIMEMultipart()
- msg['From'] = 'test@example.com'
- msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600'
- msg.attach(MIMEText("body", 'plain'))
-
- for name, content in [("doc1.pdf", b"pdf1"), ("image.png", b"png1")]:
- att = MIMEApplication(content, Name=name)
- att['Content-Disposition'] = f'attachment; filename="{name}"'
- msg.attach(att)
-
- result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
-
- assert len(result) == 2
- for r in result:
- assert os.path.isfile(r['path'])
-
-
-class TestNoAttachments:
- def test_empty_list(self, tmp_path):
- msg = make_plain_message()
- result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
- assert result == []
-
-
-class TestFilenameWithSpaces:
- def test_cleaned_filename(self, tmp_path):
- msg = make_message_with_attachment(
- attachment_filename="My Document (1).pdf",
- attachment_content=b"data"
- )
- result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
-
- assert len(result) == 1
- assert " " not in result[0]['renamed_name']
- assert os.path.isfile(result[0]['path'])
-
-
-class TestNoContentDisposition:
- def test_skipped(self, tmp_path):
- msg = MIMEMultipart()
- msg['From'] = 'test@example.com'
- msg.attach(MIMEText("body", 'plain'))
-
- # Add a part without Content-Disposition
- part = MIMEApplication(b"data", Name="file.bin")
- # Explicitly remove Content-Disposition if present
- if 'Content-Disposition' in part:
- del part['Content-Disposition']
- msg.attach(part)
-
- result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
- assert result == []