diff options
| author | Craig Jennings <c@cjennings.net> | 2026-02-07 21:41:19 -0600 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-02-07 21:41:19 -0600 |
| commit | 6787fb160c80118b80df37c01cb69dcfbebfff17 (patch) | |
| tree | e5b43c8c62e027b7cabffa31b43238027ec284d0 /docs/scripts/tests | |
| parent | cad8146f1bfe6224ad476f33e3087b2e2074c717 (diff) | |
| download | archangel-6787fb160c80118b80df37c01cb69dcfbebfff17.tar.gz archangel-6787fb160c80118b80df37c01cb69dcfbebfff17.zip | |
docs: sync templates, rename workflows and notes.org
Sync from templates. Rename NOTES.org to notes.org,
session-wrap-up to wrap-it-up, retrospective-workflow to
retrospective, session-start to startup. Update all references.
Diffstat (limited to 'docs/scripts/tests')
| -rw-r--r-- | docs/scripts/tests/conftest.py | 77 | ||||
| -rw-r--r-- | docs/scripts/tests/fixtures/empty-body.eml | 16 | ||||
| -rw-r--r-- | docs/scripts/tests/fixtures/html-only.eml | 20 | ||||
| -rw-r--r-- | docs/scripts/tests/fixtures/multiple-received-headers.eml | 12 | ||||
| -rw-r--r-- | docs/scripts/tests/fixtures/no-received-headers.eml | 9 | ||||
| -rw-r--r-- | docs/scripts/tests/fixtures/plain-text.eml | 15 | ||||
| -rw-r--r-- | docs/scripts/tests/fixtures/with-attachment.eml | 27 | ||||
| -rw-r--r-- | docs/scripts/tests/test_extract_body.py | 96 | ||||
| -rw-r--r-- | docs/scripts/tests/test_extract_metadata.py | 65 | ||||
| -rw-r--r-- | docs/scripts/tests/test_generate_filenames.py | 157 | ||||
| -rw-r--r-- | docs/scripts/tests/test_integration_stdout.py | 68 | ||||
| -rw-r--r-- | docs/scripts/tests/test_parse_received_headers.py | 105 | ||||
| -rw-r--r-- | docs/scripts/tests/test_process_eml.py | 129 | ||||
| -rw-r--r-- | docs/scripts/tests/test_save_attachments.py | 97 |
14 files changed, 893 insertions, 0 deletions
diff --git a/docs/scripts/tests/conftest.py b/docs/scripts/tests/conftest.py new file mode 100644 index 0000000..8d965ab --- /dev/null +++ b/docs/scripts/tests/conftest.py @@ -0,0 +1,77 @@ +"""Shared fixtures for EML extraction tests.""" + +import os +from email.message import EmailMessage +from email.mime.application import MIMEApplication +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText + +import pytest + + +@pytest.fixture +def fixtures_dir(): + """Return path to the fixtures/ directory.""" + return os.path.join(os.path.dirname(__file__), 'fixtures') + + +def make_plain_message(body="Test body", from_="Jonathan Smith <jsmith@example.com>", + to="Craig <craig@example.com>", + subject="Test Subject", + date="Wed, 05 Feb 2026 11:36:00 -0600"): + """Create an EmailMessage with text/plain body.""" + msg = EmailMessage() + msg['From'] = from_ + msg['To'] = to + msg['Subject'] = subject + msg['Date'] = date + msg.set_content(body) + return msg + + +def make_html_message(html_body="<p>Test body</p>", + from_="Jonathan Smith <jsmith@example.com>", + to="Craig <craig@example.com>", + subject="Test Subject", + date="Wed, 05 Feb 2026 11:36:00 -0600"): + """Create an EmailMessage with text/html body only.""" + msg = EmailMessage() + msg['From'] = from_ + msg['To'] = to + msg['Subject'] = subject + msg['Date'] = date + msg.set_content(html_body, subtype='html') + return msg + + +def make_message_with_attachment(body="Test body", + from_="Jonathan Smith <jsmith@example.com>", + to="Craig <craig@example.com>", + subject="Test Subject", + date="Wed, 05 Feb 2026 11:36:00 -0600", + attachment_filename="document.pdf", + attachment_content=b"fake pdf content"): + """Create a multipart message with a text body and one attachment.""" + msg = MIMEMultipart() + msg['From'] = from_ + msg['To'] = to + msg['Subject'] = subject + msg['Date'] = date + + msg.attach(MIMEText(body, 'plain')) + + att = MIMEApplication(attachment_content, Name=attachment_filename) + att['Content-Disposition'] = f'attachment; filename="{attachment_filename}"' + msg.attach(att) + + return msg + + +def add_received_headers(msg, headers): + """Add Received headers to an existing message. + + headers: list of header strings, added in order (first = most recent). + """ + for header in headers: + msg['Received'] = header + return msg diff --git a/docs/scripts/tests/fixtures/empty-body.eml b/docs/scripts/tests/fixtures/empty-body.eml new file mode 100644 index 0000000..cf008df --- /dev/null +++ b/docs/scripts/tests/fixtures/empty-body.eml @@ -0,0 +1,16 @@ +From: Jonathan Smith <jsmith@example.com> +To: Craig Jennings <craig@example.com> +Subject: Empty Body Test +Date: Thu, 05 Feb 2026 11:36:00 -0600 +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="boundary456" +Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 + +--boundary456 +Content-Type: application/octet-stream; name="data.bin" +Content-Disposition: attachment; filename="data.bin" +Content-Transfer-Encoding: base64 + +AQIDBA== + +--boundary456-- diff --git a/docs/scripts/tests/fixtures/html-only.eml b/docs/scripts/tests/fixtures/html-only.eml new file mode 100644 index 0000000..4db7645 --- /dev/null +++ b/docs/scripts/tests/fixtures/html-only.eml @@ -0,0 +1,20 @@ +From: Jonathan Smith <jsmith@example.com> +To: Craig Jennings <craig@example.com> +Subject: HTML Update +Date: Thu, 05 Feb 2026 11:36:00 -0600 +MIME-Version: 1.0 +Content-Type: text/html; charset="utf-8" +Content-Transfer-Encoding: 7bit +Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 + +<html> +<body> +<p>Hi Craig,</p> +<p>Here is the <strong>HTML</strong> update.</p> +<ul> +<li>Item one</li> +<li>Item two</li> +</ul> +<p>Best,<br>Jonathan</p> +</body> +</html> diff --git a/docs/scripts/tests/fixtures/multiple-received-headers.eml b/docs/scripts/tests/fixtures/multiple-received-headers.eml new file mode 100644 index 0000000..1b8d6a7 --- /dev/null +++ b/docs/scripts/tests/fixtures/multiple-received-headers.eml @@ -0,0 +1,12 @@ +From: Jonathan Smith <jsmith@example.com> +To: Craig Jennings <craig@example.com> +Subject: Multiple Received Headers Test +Date: Thu, 05 Feb 2026 11:36:00 -0600 +MIME-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 7bit +Received: by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600 +Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 +Received: from originator.example.com by relay.example.com with SMTP; Thu, 05 Feb 2026 11:35:58 -0600 + +Test body with multiple received headers. diff --git a/docs/scripts/tests/fixtures/no-received-headers.eml b/docs/scripts/tests/fixtures/no-received-headers.eml new file mode 100644 index 0000000..8a05dc7 --- /dev/null +++ b/docs/scripts/tests/fixtures/no-received-headers.eml @@ -0,0 +1,9 @@ +From: Jonathan Smith <jsmith@example.com> +To: Craig Jennings <craig@example.com> +Subject: No Received Headers +Date: Thu, 05 Feb 2026 11:36:00 -0600 +MIME-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 7bit + +Test body with no received headers at all. diff --git a/docs/scripts/tests/fixtures/plain-text.eml b/docs/scripts/tests/fixtures/plain-text.eml new file mode 100644 index 0000000..8cc9d9c --- /dev/null +++ b/docs/scripts/tests/fixtures/plain-text.eml @@ -0,0 +1,15 @@ +From: Jonathan Smith <jsmith@example.com> +To: Craig Jennings <craig@example.com> +Subject: Re: Fw: 4319 Danneel Street +Date: Thu, 05 Feb 2026 11:36:00 -0600 +MIME-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 7bit +Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 + +Hi Craig, + +Here is the update on 4319 Danneel Street. + +Best, +Jonathan diff --git a/docs/scripts/tests/fixtures/with-attachment.eml b/docs/scripts/tests/fixtures/with-attachment.eml new file mode 100644 index 0000000..ac49c5d --- /dev/null +++ b/docs/scripts/tests/fixtures/with-attachment.eml @@ -0,0 +1,27 @@ +From: Jonathan Smith <jsmith@example.com> +To: Craig Jennings <craig@example.com> +Subject: Ltr from Carrollton +Date: Thu, 05 Feb 2026 11:36:00 -0600 +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="boundary123" +Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 + +--boundary123 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 7bit + +Hi Craig, + +Please find the letter attached. + +Best, +Jonathan + +--boundary123 +Content-Type: application/octet-stream; name="Ltr Carrollton.pdf" +Content-Disposition: attachment; filename="Ltr Carrollton.pdf" +Content-Transfer-Encoding: base64 + +ZmFrZSBwZGYgY29udGVudA== + +--boundary123-- diff --git a/docs/scripts/tests/test_extract_body.py b/docs/scripts/tests/test_extract_body.py new file mode 100644 index 0000000..7b53cda --- /dev/null +++ b/docs/scripts/tests/test_extract_body.py @@ -0,0 +1,96 @@ +"""Tests for extract_body().""" + +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +from conftest import make_plain_message, make_html_message, make_message_with_attachment +from email.message import EmailMessage +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from email.mime.application import MIMEApplication + +import importlib.util +spec = importlib.util.spec_from_file_location( + "eml_script", + os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') +) +eml_script = importlib.util.module_from_spec(spec) +spec.loader.exec_module(eml_script) + +extract_body = eml_script.extract_body + + +class TestPlainText: + def test_returns_plain_text(self): + msg = make_plain_message(body="Hello, this is plain text.") + result = extract_body(msg) + assert "Hello, this is plain text." in result + + +class TestHtmlOnly: + def test_returns_converted_html(self): + msg = make_html_message(html_body="<p>Hello <strong>world</strong></p>") + result = extract_body(msg) + assert "Hello" in result + assert "world" in result + # Should not contain raw HTML tags + assert "<p>" not in result + assert "<strong>" not in result + + +class TestBothPlainAndHtml: + def test_prefers_plain_text(self): + msg = MIMEMultipart('alternative') + msg['From'] = 'test@example.com' + msg['To'] = 'dest@example.com' + msg['Subject'] = 'Test' + msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600' + msg.attach(MIMEText("Plain text version", 'plain')) + msg.attach(MIMEText("<p>HTML version</p>", 'html')) + result = extract_body(msg) + assert "Plain text version" in result + assert "HTML version" not in result + + +class TestEmptyBody: + def test_returns_empty_string(self): + # Multipart with only attachments, no text parts + msg = MIMEMultipart() + msg['From'] = 'test@example.com' + att = MIMEApplication(b"binary data", Name="file.bin") + att['Content-Disposition'] = 'attachment; filename="file.bin"' + msg.attach(att) + result = extract_body(msg) + assert result == "" + + +class TestNonUtf8Encoding: + def test_decodes_with_errors_ignore(self): + msg = EmailMessage() + msg['From'] = 'test@example.com' + # Set raw bytes that include invalid UTF-8 + msg.set_content("Valid text with special: café") + result = extract_body(msg) + assert "Valid text" in result + + +class TestHtmlWithStructure: + def test_preserves_list_structure(self): + html = "<ul><li>Item one</li><li>Item two</li></ul>" + msg = make_html_message(html_body=html) + result = extract_body(msg) + assert "Item one" in result + assert "Item two" in result + + +class TestNoTextParts: + def test_returns_empty_string(self): + msg = MIMEMultipart() + msg['From'] = 'test@example.com' + att = MIMEApplication(b"data", Name="image.png") + att['Content-Disposition'] = 'attachment; filename="image.png"' + msg.attach(att) + result = extract_body(msg) + assert result == "" diff --git a/docs/scripts/tests/test_extract_metadata.py b/docs/scripts/tests/test_extract_metadata.py new file mode 100644 index 0000000..d5ee52e --- /dev/null +++ b/docs/scripts/tests/test_extract_metadata.py @@ -0,0 +1,65 @@ +"""Tests for extract_metadata().""" + +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +from conftest import make_plain_message, add_received_headers +from email.message import EmailMessage + +import importlib.util +spec = importlib.util.spec_from_file_location( + "eml_script", + os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') +) +eml_script = importlib.util.module_from_spec(spec) +spec.loader.exec_module(eml_script) + +extract_metadata = eml_script.extract_metadata + + +class TestAllHeadersPresent: + def test_complete_dict(self): + msg = make_plain_message( + from_="Jonathan Smith <jsmith@example.com>", + to="Craig <craig@example.com>", + subject="Test Subject", + date="Thu, 05 Feb 2026 11:36:00 -0600" + ) + result = extract_metadata(msg) + assert result['from'] == "Jonathan Smith <jsmith@example.com>" + assert result['to'] == "Craig <craig@example.com>" + assert result['subject'] == "Test Subject" + assert result['date'] == "Thu, 05 Feb 2026 11:36:00 -0600" + assert 'timing' in result + + +class TestMissingFrom: + def test_from_is_none(self): + msg = EmailMessage() + msg['To'] = 'craig@example.com' + msg['Subject'] = 'Test' + msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600' + msg.set_content("body") + result = extract_metadata(msg) + assert result['from'] is None + + +class TestMissingDate: + def test_date_is_none(self): + msg = EmailMessage() + msg['From'] = 'test@example.com' + msg['To'] = 'craig@example.com' + msg['Subject'] = 'Test' + msg.set_content("body") + result = extract_metadata(msg) + assert result['date'] is None + + +class TestLongSubject: + def test_full_subject_returned(self): + long_subject = "Re: Fw: This is a very long subject line that spans many words and might be folded" + msg = make_plain_message(subject=long_subject) + result = extract_metadata(msg) + assert result['subject'] == long_subject diff --git a/docs/scripts/tests/test_generate_filenames.py b/docs/scripts/tests/test_generate_filenames.py new file mode 100644 index 0000000..07c8f84 --- /dev/null +++ b/docs/scripts/tests/test_generate_filenames.py @@ -0,0 +1,157 @@ +"""Tests for generate_basename(), generate_email_filename(), generate_attachment_filename().""" + +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +import importlib.util +spec = importlib.util.spec_from_file_location( + "eml_script", + os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') +) +eml_script = importlib.util.module_from_spec(spec) +spec.loader.exec_module(eml_script) + +generate_basename = eml_script.generate_basename +generate_email_filename = eml_script.generate_email_filename +generate_attachment_filename = eml_script.generate_attachment_filename + + +# --- generate_basename --- + +class TestGenerateBasename: + def test_standard_from_and_date(self): + metadata = { + 'from': 'Jonathan Smith <jsmith@example.com>', + 'date': 'Wed, 05 Feb 2026 11:36:00 -0600', + } + assert generate_basename(metadata) == "2026-02-05-1136-Jonathan" + + def test_from_with_display_name_first_token(self): + metadata = { + 'from': 'C Ciarm <cciarm@example.com>', + 'date': 'Wed, 05 Feb 2026 11:36:00 -0600', + } + result = generate_basename(metadata) + assert result == "2026-02-05-1136-C" + + def test_from_without_display_name(self): + metadata = { + 'from': 'jsmith@example.com', + 'date': 'Wed, 05 Feb 2026 11:36:00 -0600', + } + result = generate_basename(metadata) + assert result == "2026-02-05-1136-jsmith" + + def test_missing_date(self): + metadata = { + 'from': 'Jonathan Smith <jsmith@example.com>', + 'date': None, + } + result = generate_basename(metadata) + assert result == "unknown-Jonathan" + + def test_missing_from(self): + metadata = { + 'from': None, + 'date': 'Wed, 05 Feb 2026 11:36:00 -0600', + } + result = generate_basename(metadata) + assert result == "2026-02-05-1136-unknown" + + def test_both_missing(self): + metadata = {'from': None, 'date': None} + result = generate_basename(metadata) + assert result == "unknown-unknown" + + def test_unparseable_date(self): + metadata = { + 'from': 'Jonathan <j@example.com>', + 'date': 'not a real date', + } + result = generate_basename(metadata) + assert result == "unknown-Jonathan" + + def test_none_date_no_crash(self): + metadata = {'from': 'Test <t@e.com>', 'date': None} + # Should not raise + result = generate_basename(metadata) + assert "unknown" in result + + +# --- generate_email_filename --- + +class TestGenerateEmailFilename: + def test_standard_subject(self): + result = generate_email_filename( + "2026-02-05-1136-Jonathan", + "Re: Fw: 4319 Danneel Street" + ) + assert result == "2026-02-05-1136-Jonathan-EMAIL-Re-Fw-4319-Danneel-Street" + + def test_subject_with_special_chars(self): + result = generate_email_filename( + "2026-02-05-1136-Jonathan", + "Update: Meeting (draft) & notes!" + ) + # Colons, parens, ampersands, exclamation stripped + assert "EMAIL" in result + assert ":" not in result + assert "(" not in result + assert ")" not in result + assert "&" not in result + assert "!" not in result + + def test_none_subject(self): + result = generate_email_filename("2026-02-05-1136-Jonathan", None) + assert result == "2026-02-05-1136-Jonathan-EMAIL-no-subject" + + def test_empty_subject(self): + result = generate_email_filename("2026-02-05-1136-Jonathan", "") + assert result == "2026-02-05-1136-Jonathan-EMAIL-no-subject" + + def test_very_long_subject(self): + long_subject = "A" * 100 + " " + "B" * 100 + result = generate_email_filename("2026-02-05-1136-Jonathan", long_subject) + # The cleaned subject part should be truncated + # basename (27) + "-EMAIL-" (7) + subject + # Subject itself is limited to 80 chars by _clean_for_filename + subject_part = result.split("-EMAIL-")[1] + assert len(subject_part) <= 80 + + +# --- generate_attachment_filename --- + +class TestGenerateAttachmentFilename: + def test_standard_attachment(self): + result = generate_attachment_filename( + "2026-02-05-1136-Jonathan", + "Ltr Carrollton.pdf" + ) + assert result == "2026-02-05-1136-Jonathan-ATTACH-Ltr-Carrollton.pdf" + + def test_filename_with_spaces_and_parens(self): + result = generate_attachment_filename( + "2026-02-05-1136-Jonathan", + "Document (final copy).pdf" + ) + assert " " not in result + assert "(" not in result + assert ")" not in result + assert result.endswith(".pdf") + + def test_preserves_extension(self): + result = generate_attachment_filename( + "2026-02-05-1136-Jonathan", + "photo.jpg" + ) + assert result.endswith(".jpg") + + def test_none_filename(self): + result = generate_attachment_filename("2026-02-05-1136-Jonathan", None) + assert result == "2026-02-05-1136-Jonathan-ATTACH-unnamed" + + def test_empty_filename(self): + result = generate_attachment_filename("2026-02-05-1136-Jonathan", "") + assert result == "2026-02-05-1136-Jonathan-ATTACH-unnamed" diff --git a/docs/scripts/tests/test_integration_stdout.py b/docs/scripts/tests/test_integration_stdout.py new file mode 100644 index 0000000..d87478e --- /dev/null +++ b/docs/scripts/tests/test_integration_stdout.py @@ -0,0 +1,68 @@ +"""Integration tests for backwards-compatible stdout mode (no --output-dir).""" + +import os +import shutil +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +import importlib.util +spec = importlib.util.spec_from_file_location( + "eml_script", + os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') +) +eml_script = importlib.util.module_from_spec(spec) +spec.loader.exec_module(eml_script) + +print_email = eml_script.print_email + +FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures') + + +class TestPlainTextStdout: + def test_metadata_and_body_printed(self, tmp_path, capsys): + eml_src = os.path.join(FIXTURES, 'plain-text.eml') + working_eml = tmp_path / "message.eml" + shutil.copy2(eml_src, working_eml) + + print_email(str(working_eml)) + captured = capsys.readouterr() + + assert "From: Jonathan Smith <jsmith@example.com>" in captured.out + assert "To: Craig Jennings <craig@example.com>" in captured.out + assert "Subject: Re: Fw: 4319 Danneel Street" in captured.out + assert "Date:" in captured.out + assert "Sent:" in captured.out + assert "Received:" in captured.out + assert "4319 Danneel Street" in captured.out + + +class TestHtmlFallbackStdout: + def test_html_converted_on_stdout(self, tmp_path, capsys): + eml_src = os.path.join(FIXTURES, 'html-only.eml') + working_eml = tmp_path / "message.eml" + shutil.copy2(eml_src, working_eml) + + print_email(str(working_eml)) + captured = capsys.readouterr() + + # Should see converted text, not raw HTML + assert "HTML" in captured.out + assert "<p>" not in captured.out + + +class TestAttachmentsStdout: + def test_attachment_extracted_alongside_eml(self, tmp_path, capsys): + eml_src = os.path.join(FIXTURES, 'with-attachment.eml') + working_eml = tmp_path / "message.eml" + shutil.copy2(eml_src, working_eml) + + print_email(str(working_eml)) + captured = capsys.readouterr() + + assert "Extracted attachment:" in captured.out + assert "Ltr Carrollton.pdf" in captured.out + + # File should exist alongside the EML + extracted = tmp_path / "Ltr Carrollton.pdf" + assert extracted.exists() diff --git a/docs/scripts/tests/test_parse_received_headers.py b/docs/scripts/tests/test_parse_received_headers.py new file mode 100644 index 0000000..e12e1fb --- /dev/null +++ b/docs/scripts/tests/test_parse_received_headers.py @@ -0,0 +1,105 @@ +"""Tests for parse_received_headers().""" + +import email +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +from conftest import make_plain_message, add_received_headers +from email.message import EmailMessage + +# Import the function under test +import importlib.util +spec = importlib.util.spec_from_file_location( + "eml_script", + os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') +) +eml_script = importlib.util.module_from_spec(spec) +spec.loader.exec_module(eml_script) + +parse_received_headers = eml_script.parse_received_headers + + +class TestSingleHeader: + def test_header_with_from_and_by(self): + msg = EmailMessage() + msg['Received'] = ( + 'from mail-sender.example.com by mx.receiver.example.com ' + 'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600' + ) + result = parse_received_headers(msg) + assert result['sent_server'] == 'mail-sender.example.com' + assert result['received_server'] == 'mx.receiver.example.com' + assert result['sent_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600' + assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600' + + +class TestMultipleHeaders: + def test_uses_first_with_both_from_and_by(self): + msg = EmailMessage() + # Most recent first (by only) + msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600' + # Next: has both from and by — this should be selected + msg['Received'] = ( + 'from mail-sender.example.com by mx.receiver.example.com ' + 'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600' + ) + # Oldest + msg['Received'] = ( + 'from originator.example.com by relay.example.com ' + 'with SMTP; Thu, 05 Feb 2026 11:35:58 -0600' + ) + result = parse_received_headers(msg) + assert result['sent_server'] == 'mail-sender.example.com' + assert result['received_server'] == 'mx.receiver.example.com' + + +class TestNoReceivedHeaders: + def test_all_values_none(self): + msg = EmailMessage() + result = parse_received_headers(msg) + assert result['sent_time'] is None + assert result['sent_server'] is None + assert result['received_time'] is None + assert result['received_server'] is None + + +class TestByButNoFrom: + def test_falls_back_to_first_header(self): + msg = EmailMessage() + msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600' + result = parse_received_headers(msg) + assert result['received_server'] == 'internal.example.com' + assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:10 -0600' + # No from in any header, so sent_server stays None + assert result['sent_server'] is None + + +class TestMultilineFoldedHeader: + def test_normalizes_whitespace(self): + # Use email.message_from_string to parse raw folded headers + # (EmailMessage policy rejects embedded CRLF in set values) + raw = ( + "From: test@example.com\r\n" + "Received: from mail-sender.example.com\r\n" + " by mx.receiver.example.com\r\n" + " with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600\r\n" + "\r\n" + "body\r\n" + ) + msg = email.message_from_string(raw) + result = parse_received_headers(msg) + assert result['sent_server'] == 'mail-sender.example.com' + assert result['received_server'] == 'mx.receiver.example.com' + + +class TestMalformedTimestamp: + def test_no_semicolon(self): + msg = EmailMessage() + msg['Received'] = 'from sender.example.com by receiver.example.com with SMTP' + result = parse_received_headers(msg) + assert result['sent_server'] == 'sender.example.com' + assert result['received_server'] == 'receiver.example.com' + assert result['sent_time'] is None + assert result['received_time'] is None diff --git a/docs/scripts/tests/test_process_eml.py b/docs/scripts/tests/test_process_eml.py new file mode 100644 index 0000000..26c5ad5 --- /dev/null +++ b/docs/scripts/tests/test_process_eml.py @@ -0,0 +1,129 @@ +"""Integration tests for process_eml() — full pipeline with --output-dir.""" + +import os +import shutil +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +import importlib.util +spec = importlib.util.spec_from_file_location( + "eml_script", + os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') +) +eml_script = importlib.util.module_from_spec(spec) +spec.loader.exec_module(eml_script) + +process_eml = eml_script.process_eml + +import pytest + + +FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures') + + +class TestPlainTextPipeline: + def test_creates_eml_and_txt(self, tmp_path): + eml_src = os.path.join(FIXTURES, 'plain-text.eml') + # Copy fixture to tmp_path so temp dir can be created as sibling + working_eml = tmp_path / "inbox" / "message.eml" + working_eml.parent.mkdir() + shutil.copy2(eml_src, working_eml) + + output_dir = tmp_path / "output" + result = process_eml(str(working_eml), str(output_dir)) + + # Should have exactly 2 files: .eml and .txt + assert len(result['files']) == 2 + eml_file = result['files'][0] + txt_file = result['files'][1] + + assert eml_file['type'] == 'eml' + assert txt_file['type'] == 'txt' + assert eml_file['name'].endswith('.eml') + assert txt_file['name'].endswith('.txt') + + # Files exist in output dir + assert os.path.isfile(eml_file['path']) + assert os.path.isfile(txt_file['path']) + + # Filenames contain expected components + assert 'Jonathan' in eml_file['name'] + assert 'EMAIL' in eml_file['name'] + assert '2026-02-05' in eml_file['name'] + + # Temp dir cleaned up (no extract-* dirs in inbox) + inbox_contents = os.listdir(str(tmp_path / "inbox")) + assert not any(d.startswith('extract-') for d in inbox_contents) + + +class TestHtmlFallbackPipeline: + def test_txt_contains_converted_html(self, tmp_path): + eml_src = os.path.join(FIXTURES, 'html-only.eml') + working_eml = tmp_path / "inbox" / "message.eml" + working_eml.parent.mkdir() + shutil.copy2(eml_src, working_eml) + + output_dir = tmp_path / "output" + result = process_eml(str(working_eml), str(output_dir)) + + txt_file = result['files'][1] + with open(txt_file['path'], 'r') as f: + content = f.read() + + # Should be converted, not raw HTML + assert '<p>' not in content + assert '<strong>' not in content + assert 'HTML' in content + + +class TestAttachmentPipeline: + def test_eml_txt_and_attachment_created(self, tmp_path): + eml_src = os.path.join(FIXTURES, 'with-attachment.eml') + working_eml = tmp_path / "inbox" / "message.eml" + working_eml.parent.mkdir() + shutil.copy2(eml_src, working_eml) + + output_dir = tmp_path / "output" + result = process_eml(str(working_eml), str(output_dir)) + + assert len(result['files']) == 3 + types = [f['type'] for f in result['files']] + assert types == ['eml', 'txt', 'attach'] + + # Attachment is auto-renamed + attach_file = result['files'][2] + assert 'ATTACH' in attach_file['name'] + assert attach_file['name'].endswith('.pdf') + assert os.path.isfile(attach_file['path']) + + +class TestCollisionDetection: + def test_raises_on_existing_file(self, tmp_path): + eml_src = os.path.join(FIXTURES, 'plain-text.eml') + working_eml = tmp_path / "inbox" / "message.eml" + working_eml.parent.mkdir() + shutil.copy2(eml_src, working_eml) + + output_dir = tmp_path / "output" + # Run once to create files + result = process_eml(str(working_eml), str(output_dir)) + + # Run again — should raise FileExistsError + with pytest.raises(FileExistsError, match="Collision"): + process_eml(str(working_eml), str(output_dir)) + + +class TestMissingOutputDir: + def test_creates_directory(self, tmp_path): + eml_src = os.path.join(FIXTURES, 'plain-text.eml') + working_eml = tmp_path / "inbox" / "message.eml" + working_eml.parent.mkdir() + shutil.copy2(eml_src, working_eml) + + output_dir = tmp_path / "new" / "nested" / "output" + assert not output_dir.exists() + + result = process_eml(str(working_eml), str(output_dir)) + assert output_dir.exists() + assert len(result['files']) == 2 diff --git a/docs/scripts/tests/test_save_attachments.py b/docs/scripts/tests/test_save_attachments.py new file mode 100644 index 0000000..32f02a6 --- /dev/null +++ b/docs/scripts/tests/test_save_attachments.py @@ -0,0 +1,97 @@ +"""Tests for save_attachments().""" + +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +from conftest import make_plain_message, make_message_with_attachment +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from email.mime.application import MIMEApplication + +import importlib.util +spec = importlib.util.spec_from_file_location( + "eml_script", + os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') +) +eml_script = importlib.util.module_from_spec(spec) +spec.loader.exec_module(eml_script) + +save_attachments = eml_script.save_attachments + + +class TestSingleAttachment: + def test_file_written_and_returned(self, tmp_path): + msg = make_message_with_attachment( + attachment_filename="report.pdf", + attachment_content=b"pdf bytes here" + ) + result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") + + assert len(result) == 1 + assert result[0]['original_name'] == "report.pdf" + assert "ATTACH" in result[0]['renamed_name'] + assert result[0]['renamed_name'].endswith(".pdf") + + # File actually exists and has correct content + written_path = result[0]['path'] + assert os.path.isfile(written_path) + with open(written_path, 'rb') as f: + assert f.read() == b"pdf bytes here" + + +class TestMultipleAttachments: + def test_all_written_and_returned(self, tmp_path): + msg = MIMEMultipart() + msg['From'] = 'test@example.com' + msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600' + msg.attach(MIMEText("body", 'plain')) + + for name, content in [("doc1.pdf", b"pdf1"), ("image.png", b"png1")]: + att = MIMEApplication(content, Name=name) + att['Content-Disposition'] = f'attachment; filename="{name}"' + msg.attach(att) + + result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") + + assert len(result) == 2 + for r in result: + assert os.path.isfile(r['path']) + + +class TestNoAttachments: + def test_empty_list(self, tmp_path): + msg = make_plain_message() + result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") + assert result == [] + + +class TestFilenameWithSpaces: + def test_cleaned_filename(self, tmp_path): + msg = make_message_with_attachment( + attachment_filename="My Document (1).pdf", + attachment_content=b"data" + ) + result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") + + assert len(result) == 1 + assert " " not in result[0]['renamed_name'] + assert os.path.isfile(result[0]['path']) + + +class TestNoContentDisposition: + def test_skipped(self, tmp_path): + msg = MIMEMultipart() + msg['From'] = 'test@example.com' + msg.attach(MIMEText("body", 'plain')) + + # Add a part without Content-Disposition + part = MIMEApplication(b"data", Name="file.bin") + # Explicitly remove Content-Disposition if present + if 'Content-Disposition' in part: + del part['Content-Disposition'] + msg.attach(part) + + result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") + assert result == [] |
