From 5e6877e8f3fb552fce3367ff273167d2cf6af75f Mon Sep 17 00:00:00 2001 From: Craig Jennings Date: Sun, 22 Feb 2026 23:20:56 -0600 Subject: chore: add docs/ to .gitignore and untrack personal files docs/ contains session history, personal workflows, and private protocols that shouldn't be in a public repository. --- docs/scripts/tests/conftest.py | 77 ---------- docs/scripts/tests/fixtures/empty-body.eml | 16 --- docs/scripts/tests/fixtures/html-only.eml | 20 --- .../tests/fixtures/multiple-received-headers.eml | 12 -- .../scripts/tests/fixtures/no-received-headers.eml | 9 -- docs/scripts/tests/fixtures/plain-text.eml | 15 -- docs/scripts/tests/fixtures/with-attachment.eml | 27 ---- docs/scripts/tests/test_extract_body.py | 96 ------------- docs/scripts/tests/test_extract_metadata.py | 65 --------- docs/scripts/tests/test_generate_filenames.py | 157 --------------------- docs/scripts/tests/test_integration_stdout.py | 68 --------- docs/scripts/tests/test_parse_received_headers.py | 105 -------------- docs/scripts/tests/test_process_eml.py | 129 ----------------- docs/scripts/tests/test_save_attachments.py | 97 ------------- 14 files changed, 893 deletions(-) delete mode 100644 docs/scripts/tests/conftest.py delete mode 100644 docs/scripts/tests/fixtures/empty-body.eml delete mode 100644 docs/scripts/tests/fixtures/html-only.eml delete mode 100644 docs/scripts/tests/fixtures/multiple-received-headers.eml delete mode 100644 docs/scripts/tests/fixtures/no-received-headers.eml delete mode 100644 docs/scripts/tests/fixtures/plain-text.eml delete mode 100644 docs/scripts/tests/fixtures/with-attachment.eml delete mode 100644 docs/scripts/tests/test_extract_body.py delete mode 100644 docs/scripts/tests/test_extract_metadata.py delete mode 100644 docs/scripts/tests/test_generate_filenames.py delete mode 100644 docs/scripts/tests/test_integration_stdout.py delete mode 100644 docs/scripts/tests/test_parse_received_headers.py delete mode 100644 docs/scripts/tests/test_process_eml.py delete mode 100644 docs/scripts/tests/test_save_attachments.py (limited to 'docs/scripts/tests') diff --git a/docs/scripts/tests/conftest.py b/docs/scripts/tests/conftest.py deleted file mode 100644 index 8d965ab..0000000 --- a/docs/scripts/tests/conftest.py +++ /dev/null @@ -1,77 +0,0 @@ -"""Shared fixtures for EML extraction tests.""" - -import os -from email.message import EmailMessage -from email.mime.application import MIMEApplication -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText - -import pytest - - -@pytest.fixture -def fixtures_dir(): - """Return path to the fixtures/ directory.""" - return os.path.join(os.path.dirname(__file__), 'fixtures') - - -def make_plain_message(body="Test body", from_="Jonathan Smith ", - to="Craig ", - subject="Test Subject", - date="Wed, 05 Feb 2026 11:36:00 -0600"): - """Create an EmailMessage with text/plain body.""" - msg = EmailMessage() - msg['From'] = from_ - msg['To'] = to - msg['Subject'] = subject - msg['Date'] = date - msg.set_content(body) - return msg - - -def make_html_message(html_body="

Test body

", - from_="Jonathan Smith ", - to="Craig ", - subject="Test Subject", - date="Wed, 05 Feb 2026 11:36:00 -0600"): - """Create an EmailMessage with text/html body only.""" - msg = EmailMessage() - msg['From'] = from_ - msg['To'] = to - msg['Subject'] = subject - msg['Date'] = date - msg.set_content(html_body, subtype='html') - return msg - - -def make_message_with_attachment(body="Test body", - from_="Jonathan Smith ", - to="Craig ", - subject="Test Subject", - date="Wed, 05 Feb 2026 11:36:00 -0600", - attachment_filename="document.pdf", - attachment_content=b"fake pdf content"): - """Create a multipart message with a text body and one attachment.""" - msg = MIMEMultipart() - msg['From'] = from_ - msg['To'] = to - msg['Subject'] = subject - msg['Date'] = date - - msg.attach(MIMEText(body, 'plain')) - - att = MIMEApplication(attachment_content, Name=attachment_filename) - att['Content-Disposition'] = f'attachment; filename="{attachment_filename}"' - msg.attach(att) - - return msg - - -def add_received_headers(msg, headers): - """Add Received headers to an existing message. - - headers: list of header strings, added in order (first = most recent). - """ - for header in headers: - msg['Received'] = header - return msg diff --git a/docs/scripts/tests/fixtures/empty-body.eml b/docs/scripts/tests/fixtures/empty-body.eml deleted file mode 100644 index cf008df..0000000 --- a/docs/scripts/tests/fixtures/empty-body.eml +++ /dev/null @@ -1,16 +0,0 @@ -From: Jonathan Smith -To: Craig Jennings -Subject: Empty Body Test -Date: Thu, 05 Feb 2026 11:36:00 -0600 -MIME-Version: 1.0 -Content-Type: multipart/mixed; boundary="boundary456" -Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 - ---boundary456 -Content-Type: application/octet-stream; name="data.bin" -Content-Disposition: attachment; filename="data.bin" -Content-Transfer-Encoding: base64 - -AQIDBA== - ---boundary456-- diff --git a/docs/scripts/tests/fixtures/html-only.eml b/docs/scripts/tests/fixtures/html-only.eml deleted file mode 100644 index 4db7645..0000000 --- a/docs/scripts/tests/fixtures/html-only.eml +++ /dev/null @@ -1,20 +0,0 @@ -From: Jonathan Smith -To: Craig Jennings -Subject: HTML Update -Date: Thu, 05 Feb 2026 11:36:00 -0600 -MIME-Version: 1.0 -Content-Type: text/html; charset="utf-8" -Content-Transfer-Encoding: 7bit -Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 - - - -

Hi Craig,

-

Here is the HTML update.

-
    -
  • Item one
  • -
  • Item two
  • -
-

Best,
Jonathan

- - diff --git a/docs/scripts/tests/fixtures/multiple-received-headers.eml b/docs/scripts/tests/fixtures/multiple-received-headers.eml deleted file mode 100644 index 1b8d6a7..0000000 --- a/docs/scripts/tests/fixtures/multiple-received-headers.eml +++ /dev/null @@ -1,12 +0,0 @@ -From: Jonathan Smith -To: Craig Jennings -Subject: Multiple Received Headers Test -Date: Thu, 05 Feb 2026 11:36:00 -0600 -MIME-Version: 1.0 -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 7bit -Received: by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600 -Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 -Received: from originator.example.com by relay.example.com with SMTP; Thu, 05 Feb 2026 11:35:58 -0600 - -Test body with multiple received headers. diff --git a/docs/scripts/tests/fixtures/no-received-headers.eml b/docs/scripts/tests/fixtures/no-received-headers.eml deleted file mode 100644 index 8a05dc7..0000000 --- a/docs/scripts/tests/fixtures/no-received-headers.eml +++ /dev/null @@ -1,9 +0,0 @@ -From: Jonathan Smith -To: Craig Jennings -Subject: No Received Headers -Date: Thu, 05 Feb 2026 11:36:00 -0600 -MIME-Version: 1.0 -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 7bit - -Test body with no received headers at all. diff --git a/docs/scripts/tests/fixtures/plain-text.eml b/docs/scripts/tests/fixtures/plain-text.eml deleted file mode 100644 index 8cc9d9c..0000000 --- a/docs/scripts/tests/fixtures/plain-text.eml +++ /dev/null @@ -1,15 +0,0 @@ -From: Jonathan Smith -To: Craig Jennings -Subject: Re: Fw: 4319 Danneel Street -Date: Thu, 05 Feb 2026 11:36:00 -0600 -MIME-Version: 1.0 -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 7bit -Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 - -Hi Craig, - -Here is the update on 4319 Danneel Street. - -Best, -Jonathan diff --git a/docs/scripts/tests/fixtures/with-attachment.eml b/docs/scripts/tests/fixtures/with-attachment.eml deleted file mode 100644 index ac49c5d..0000000 --- a/docs/scripts/tests/fixtures/with-attachment.eml +++ /dev/null @@ -1,27 +0,0 @@ -From: Jonathan Smith -To: Craig Jennings -Subject: Ltr from Carrollton -Date: Thu, 05 Feb 2026 11:36:00 -0600 -MIME-Version: 1.0 -Content-Type: multipart/mixed; boundary="boundary123" -Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 - ---boundary123 -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 7bit - -Hi Craig, - -Please find the letter attached. - -Best, -Jonathan - ---boundary123 -Content-Type: application/octet-stream; name="Ltr Carrollton.pdf" -Content-Disposition: attachment; filename="Ltr Carrollton.pdf" -Content-Transfer-Encoding: base64 - -ZmFrZSBwZGYgY29udGVudA== - ---boundary123-- diff --git a/docs/scripts/tests/test_extract_body.py b/docs/scripts/tests/test_extract_body.py deleted file mode 100644 index 7b53cda..0000000 --- a/docs/scripts/tests/test_extract_body.py +++ /dev/null @@ -1,96 +0,0 @@ -"""Tests for extract_body().""" - -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -from conftest import make_plain_message, make_html_message, make_message_with_attachment -from email.message import EmailMessage -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText -from email.mime.application import MIMEApplication - -import importlib.util -spec = importlib.util.spec_from_file_location( - "eml_script", - os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') -) -eml_script = importlib.util.module_from_spec(spec) -spec.loader.exec_module(eml_script) - -extract_body = eml_script.extract_body - - -class TestPlainText: - def test_returns_plain_text(self): - msg = make_plain_message(body="Hello, this is plain text.") - result = extract_body(msg) - assert "Hello, this is plain text." in result - - -class TestHtmlOnly: - def test_returns_converted_html(self): - msg = make_html_message(html_body="

Hello world

") - result = extract_body(msg) - assert "Hello" in result - assert "world" in result - # Should not contain raw HTML tags - assert "

" not in result - assert "" not in result - - -class TestBothPlainAndHtml: - def test_prefers_plain_text(self): - msg = MIMEMultipart('alternative') - msg['From'] = 'test@example.com' - msg['To'] = 'dest@example.com' - msg['Subject'] = 'Test' - msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600' - msg.attach(MIMEText("Plain text version", 'plain')) - msg.attach(MIMEText("

HTML version

", 'html')) - result = extract_body(msg) - assert "Plain text version" in result - assert "HTML version" not in result - - -class TestEmptyBody: - def test_returns_empty_string(self): - # Multipart with only attachments, no text parts - msg = MIMEMultipart() - msg['From'] = 'test@example.com' - att = MIMEApplication(b"binary data", Name="file.bin") - att['Content-Disposition'] = 'attachment; filename="file.bin"' - msg.attach(att) - result = extract_body(msg) - assert result == "" - - -class TestNonUtf8Encoding: - def test_decodes_with_errors_ignore(self): - msg = EmailMessage() - msg['From'] = 'test@example.com' - # Set raw bytes that include invalid UTF-8 - msg.set_content("Valid text with special: café") - result = extract_body(msg) - assert "Valid text" in result - - -class TestHtmlWithStructure: - def test_preserves_list_structure(self): - html = "
  • Item one
  • Item two
" - msg = make_html_message(html_body=html) - result = extract_body(msg) - assert "Item one" in result - assert "Item two" in result - - -class TestNoTextParts: - def test_returns_empty_string(self): - msg = MIMEMultipart() - msg['From'] = 'test@example.com' - att = MIMEApplication(b"data", Name="image.png") - att['Content-Disposition'] = 'attachment; filename="image.png"' - msg.attach(att) - result = extract_body(msg) - assert result == "" diff --git a/docs/scripts/tests/test_extract_metadata.py b/docs/scripts/tests/test_extract_metadata.py deleted file mode 100644 index d5ee52e..0000000 --- a/docs/scripts/tests/test_extract_metadata.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Tests for extract_metadata().""" - -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -from conftest import make_plain_message, add_received_headers -from email.message import EmailMessage - -import importlib.util -spec = importlib.util.spec_from_file_location( - "eml_script", - os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') -) -eml_script = importlib.util.module_from_spec(spec) -spec.loader.exec_module(eml_script) - -extract_metadata = eml_script.extract_metadata - - -class TestAllHeadersPresent: - def test_complete_dict(self): - msg = make_plain_message( - from_="Jonathan Smith ", - to="Craig ", - subject="Test Subject", - date="Thu, 05 Feb 2026 11:36:00 -0600" - ) - result = extract_metadata(msg) - assert result['from'] == "Jonathan Smith " - assert result['to'] == "Craig " - assert result['subject'] == "Test Subject" - assert result['date'] == "Thu, 05 Feb 2026 11:36:00 -0600" - assert 'timing' in result - - -class TestMissingFrom: - def test_from_is_none(self): - msg = EmailMessage() - msg['To'] = 'craig@example.com' - msg['Subject'] = 'Test' - msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600' - msg.set_content("body") - result = extract_metadata(msg) - assert result['from'] is None - - -class TestMissingDate: - def test_date_is_none(self): - msg = EmailMessage() - msg['From'] = 'test@example.com' - msg['To'] = 'craig@example.com' - msg['Subject'] = 'Test' - msg.set_content("body") - result = extract_metadata(msg) - assert result['date'] is None - - -class TestLongSubject: - def test_full_subject_returned(self): - long_subject = "Re: Fw: This is a very long subject line that spans many words and might be folded" - msg = make_plain_message(subject=long_subject) - result = extract_metadata(msg) - assert result['subject'] == long_subject diff --git a/docs/scripts/tests/test_generate_filenames.py b/docs/scripts/tests/test_generate_filenames.py deleted file mode 100644 index 07c8f84..0000000 --- a/docs/scripts/tests/test_generate_filenames.py +++ /dev/null @@ -1,157 +0,0 @@ -"""Tests for generate_basename(), generate_email_filename(), generate_attachment_filename().""" - -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -import importlib.util -spec = importlib.util.spec_from_file_location( - "eml_script", - os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') -) -eml_script = importlib.util.module_from_spec(spec) -spec.loader.exec_module(eml_script) - -generate_basename = eml_script.generate_basename -generate_email_filename = eml_script.generate_email_filename -generate_attachment_filename = eml_script.generate_attachment_filename - - -# --- generate_basename --- - -class TestGenerateBasename: - def test_standard_from_and_date(self): - metadata = { - 'from': 'Jonathan Smith ', - 'date': 'Wed, 05 Feb 2026 11:36:00 -0600', - } - assert generate_basename(metadata) == "2026-02-05-1136-Jonathan" - - def test_from_with_display_name_first_token(self): - metadata = { - 'from': 'C Ciarm ', - 'date': 'Wed, 05 Feb 2026 11:36:00 -0600', - } - result = generate_basename(metadata) - assert result == "2026-02-05-1136-C" - - def test_from_without_display_name(self): - metadata = { - 'from': 'jsmith@example.com', - 'date': 'Wed, 05 Feb 2026 11:36:00 -0600', - } - result = generate_basename(metadata) - assert result == "2026-02-05-1136-jsmith" - - def test_missing_date(self): - metadata = { - 'from': 'Jonathan Smith ', - 'date': None, - } - result = generate_basename(metadata) - assert result == "unknown-Jonathan" - - def test_missing_from(self): - metadata = { - 'from': None, - 'date': 'Wed, 05 Feb 2026 11:36:00 -0600', - } - result = generate_basename(metadata) - assert result == "2026-02-05-1136-unknown" - - def test_both_missing(self): - metadata = {'from': None, 'date': None} - result = generate_basename(metadata) - assert result == "unknown-unknown" - - def test_unparseable_date(self): - metadata = { - 'from': 'Jonathan ', - 'date': 'not a real date', - } - result = generate_basename(metadata) - assert result == "unknown-Jonathan" - - def test_none_date_no_crash(self): - metadata = {'from': 'Test ', 'date': None} - # Should not raise - result = generate_basename(metadata) - assert "unknown" in result - - -# --- generate_email_filename --- - -class TestGenerateEmailFilename: - def test_standard_subject(self): - result = generate_email_filename( - "2026-02-05-1136-Jonathan", - "Re: Fw: 4319 Danneel Street" - ) - assert result == "2026-02-05-1136-Jonathan-EMAIL-Re-Fw-4319-Danneel-Street" - - def test_subject_with_special_chars(self): - result = generate_email_filename( - "2026-02-05-1136-Jonathan", - "Update: Meeting (draft) & notes!" - ) - # Colons, parens, ampersands, exclamation stripped - assert "EMAIL" in result - assert ":" not in result - assert "(" not in result - assert ")" not in result - assert "&" not in result - assert "!" not in result - - def test_none_subject(self): - result = generate_email_filename("2026-02-05-1136-Jonathan", None) - assert result == "2026-02-05-1136-Jonathan-EMAIL-no-subject" - - def test_empty_subject(self): - result = generate_email_filename("2026-02-05-1136-Jonathan", "") - assert result == "2026-02-05-1136-Jonathan-EMAIL-no-subject" - - def test_very_long_subject(self): - long_subject = "A" * 100 + " " + "B" * 100 - result = generate_email_filename("2026-02-05-1136-Jonathan", long_subject) - # The cleaned subject part should be truncated - # basename (27) + "-EMAIL-" (7) + subject - # Subject itself is limited to 80 chars by _clean_for_filename - subject_part = result.split("-EMAIL-")[1] - assert len(subject_part) <= 80 - - -# --- generate_attachment_filename --- - -class TestGenerateAttachmentFilename: - def test_standard_attachment(self): - result = generate_attachment_filename( - "2026-02-05-1136-Jonathan", - "Ltr Carrollton.pdf" - ) - assert result == "2026-02-05-1136-Jonathan-ATTACH-Ltr-Carrollton.pdf" - - def test_filename_with_spaces_and_parens(self): - result = generate_attachment_filename( - "2026-02-05-1136-Jonathan", - "Document (final copy).pdf" - ) - assert " " not in result - assert "(" not in result - assert ")" not in result - assert result.endswith(".pdf") - - def test_preserves_extension(self): - result = generate_attachment_filename( - "2026-02-05-1136-Jonathan", - "photo.jpg" - ) - assert result.endswith(".jpg") - - def test_none_filename(self): - result = generate_attachment_filename("2026-02-05-1136-Jonathan", None) - assert result == "2026-02-05-1136-Jonathan-ATTACH-unnamed" - - def test_empty_filename(self): - result = generate_attachment_filename("2026-02-05-1136-Jonathan", "") - assert result == "2026-02-05-1136-Jonathan-ATTACH-unnamed" diff --git a/docs/scripts/tests/test_integration_stdout.py b/docs/scripts/tests/test_integration_stdout.py deleted file mode 100644 index d87478e..0000000 --- a/docs/scripts/tests/test_integration_stdout.py +++ /dev/null @@ -1,68 +0,0 @@ -"""Integration tests for backwards-compatible stdout mode (no --output-dir).""" - -import os -import shutil -import sys - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -import importlib.util -spec = importlib.util.spec_from_file_location( - "eml_script", - os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') -) -eml_script = importlib.util.module_from_spec(spec) -spec.loader.exec_module(eml_script) - -print_email = eml_script.print_email - -FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures') - - -class TestPlainTextStdout: - def test_metadata_and_body_printed(self, tmp_path, capsys): - eml_src = os.path.join(FIXTURES, 'plain-text.eml') - working_eml = tmp_path / "message.eml" - shutil.copy2(eml_src, working_eml) - - print_email(str(working_eml)) - captured = capsys.readouterr() - - assert "From: Jonathan Smith " in captured.out - assert "To: Craig Jennings " in captured.out - assert "Subject: Re: Fw: 4319 Danneel Street" in captured.out - assert "Date:" in captured.out - assert "Sent:" in captured.out - assert "Received:" in captured.out - assert "4319 Danneel Street" in captured.out - - -class TestHtmlFallbackStdout: - def test_html_converted_on_stdout(self, tmp_path, capsys): - eml_src = os.path.join(FIXTURES, 'html-only.eml') - working_eml = tmp_path / "message.eml" - shutil.copy2(eml_src, working_eml) - - print_email(str(working_eml)) - captured = capsys.readouterr() - - # Should see converted text, not raw HTML - assert "HTML" in captured.out - assert "

" not in captured.out - - -class TestAttachmentsStdout: - def test_attachment_extracted_alongside_eml(self, tmp_path, capsys): - eml_src = os.path.join(FIXTURES, 'with-attachment.eml') - working_eml = tmp_path / "message.eml" - shutil.copy2(eml_src, working_eml) - - print_email(str(working_eml)) - captured = capsys.readouterr() - - assert "Extracted attachment:" in captured.out - assert "Ltr Carrollton.pdf" in captured.out - - # File should exist alongside the EML - extracted = tmp_path / "Ltr Carrollton.pdf" - assert extracted.exists() diff --git a/docs/scripts/tests/test_parse_received_headers.py b/docs/scripts/tests/test_parse_received_headers.py deleted file mode 100644 index e12e1fb..0000000 --- a/docs/scripts/tests/test_parse_received_headers.py +++ /dev/null @@ -1,105 +0,0 @@ -"""Tests for parse_received_headers().""" - -import email -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -from conftest import make_plain_message, add_received_headers -from email.message import EmailMessage - -# Import the function under test -import importlib.util -spec = importlib.util.spec_from_file_location( - "eml_script", - os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') -) -eml_script = importlib.util.module_from_spec(spec) -spec.loader.exec_module(eml_script) - -parse_received_headers = eml_script.parse_received_headers - - -class TestSingleHeader: - def test_header_with_from_and_by(self): - msg = EmailMessage() - msg['Received'] = ( - 'from mail-sender.example.com by mx.receiver.example.com ' - 'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600' - ) - result = parse_received_headers(msg) - assert result['sent_server'] == 'mail-sender.example.com' - assert result['received_server'] == 'mx.receiver.example.com' - assert result['sent_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600' - assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600' - - -class TestMultipleHeaders: - def test_uses_first_with_both_from_and_by(self): - msg = EmailMessage() - # Most recent first (by only) - msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600' - # Next: has both from and by — this should be selected - msg['Received'] = ( - 'from mail-sender.example.com by mx.receiver.example.com ' - 'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600' - ) - # Oldest - msg['Received'] = ( - 'from originator.example.com by relay.example.com ' - 'with SMTP; Thu, 05 Feb 2026 11:35:58 -0600' - ) - result = parse_received_headers(msg) - assert result['sent_server'] == 'mail-sender.example.com' - assert result['received_server'] == 'mx.receiver.example.com' - - -class TestNoReceivedHeaders: - def test_all_values_none(self): - msg = EmailMessage() - result = parse_received_headers(msg) - assert result['sent_time'] is None - assert result['sent_server'] is None - assert result['received_time'] is None - assert result['received_server'] is None - - -class TestByButNoFrom: - def test_falls_back_to_first_header(self): - msg = EmailMessage() - msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600' - result = parse_received_headers(msg) - assert result['received_server'] == 'internal.example.com' - assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:10 -0600' - # No from in any header, so sent_server stays None - assert result['sent_server'] is None - - -class TestMultilineFoldedHeader: - def test_normalizes_whitespace(self): - # Use email.message_from_string to parse raw folded headers - # (EmailMessage policy rejects embedded CRLF in set values) - raw = ( - "From: test@example.com\r\n" - "Received: from mail-sender.example.com\r\n" - " by mx.receiver.example.com\r\n" - " with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600\r\n" - "\r\n" - "body\r\n" - ) - msg = email.message_from_string(raw) - result = parse_received_headers(msg) - assert result['sent_server'] == 'mail-sender.example.com' - assert result['received_server'] == 'mx.receiver.example.com' - - -class TestMalformedTimestamp: - def test_no_semicolon(self): - msg = EmailMessage() - msg['Received'] = 'from sender.example.com by receiver.example.com with SMTP' - result = parse_received_headers(msg) - assert result['sent_server'] == 'sender.example.com' - assert result['received_server'] == 'receiver.example.com' - assert result['sent_time'] is None - assert result['received_time'] is None diff --git a/docs/scripts/tests/test_process_eml.py b/docs/scripts/tests/test_process_eml.py deleted file mode 100644 index 26c5ad5..0000000 --- a/docs/scripts/tests/test_process_eml.py +++ /dev/null @@ -1,129 +0,0 @@ -"""Integration tests for process_eml() — full pipeline with --output-dir.""" - -import os -import shutil -import sys - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -import importlib.util -spec = importlib.util.spec_from_file_location( - "eml_script", - os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') -) -eml_script = importlib.util.module_from_spec(spec) -spec.loader.exec_module(eml_script) - -process_eml = eml_script.process_eml - -import pytest - - -FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures') - - -class TestPlainTextPipeline: - def test_creates_eml_and_txt(self, tmp_path): - eml_src = os.path.join(FIXTURES, 'plain-text.eml') - # Copy fixture to tmp_path so temp dir can be created as sibling - working_eml = tmp_path / "inbox" / "message.eml" - working_eml.parent.mkdir() - shutil.copy2(eml_src, working_eml) - - output_dir = tmp_path / "output" - result = process_eml(str(working_eml), str(output_dir)) - - # Should have exactly 2 files: .eml and .txt - assert len(result['files']) == 2 - eml_file = result['files'][0] - txt_file = result['files'][1] - - assert eml_file['type'] == 'eml' - assert txt_file['type'] == 'txt' - assert eml_file['name'].endswith('.eml') - assert txt_file['name'].endswith('.txt') - - # Files exist in output dir - assert os.path.isfile(eml_file['path']) - assert os.path.isfile(txt_file['path']) - - # Filenames contain expected components - assert 'Jonathan' in eml_file['name'] - assert 'EMAIL' in eml_file['name'] - assert '2026-02-05' in eml_file['name'] - - # Temp dir cleaned up (no extract-* dirs in inbox) - inbox_contents = os.listdir(str(tmp_path / "inbox")) - assert not any(d.startswith('extract-') for d in inbox_contents) - - -class TestHtmlFallbackPipeline: - def test_txt_contains_converted_html(self, tmp_path): - eml_src = os.path.join(FIXTURES, 'html-only.eml') - working_eml = tmp_path / "inbox" / "message.eml" - working_eml.parent.mkdir() - shutil.copy2(eml_src, working_eml) - - output_dir = tmp_path / "output" - result = process_eml(str(working_eml), str(output_dir)) - - txt_file = result['files'][1] - with open(txt_file['path'], 'r') as f: - content = f.read() - - # Should be converted, not raw HTML - assert '

' not in content - assert '' not in content - assert 'HTML' in content - - -class TestAttachmentPipeline: - def test_eml_txt_and_attachment_created(self, tmp_path): - eml_src = os.path.join(FIXTURES, 'with-attachment.eml') - working_eml = tmp_path / "inbox" / "message.eml" - working_eml.parent.mkdir() - shutil.copy2(eml_src, working_eml) - - output_dir = tmp_path / "output" - result = process_eml(str(working_eml), str(output_dir)) - - assert len(result['files']) == 3 - types = [f['type'] for f in result['files']] - assert types == ['eml', 'txt', 'attach'] - - # Attachment is auto-renamed - attach_file = result['files'][2] - assert 'ATTACH' in attach_file['name'] - assert attach_file['name'].endswith('.pdf') - assert os.path.isfile(attach_file['path']) - - -class TestCollisionDetection: - def test_raises_on_existing_file(self, tmp_path): - eml_src = os.path.join(FIXTURES, 'plain-text.eml') - working_eml = tmp_path / "inbox" / "message.eml" - working_eml.parent.mkdir() - shutil.copy2(eml_src, working_eml) - - output_dir = tmp_path / "output" - # Run once to create files - result = process_eml(str(working_eml), str(output_dir)) - - # Run again — should raise FileExistsError - with pytest.raises(FileExistsError, match="Collision"): - process_eml(str(working_eml), str(output_dir)) - - -class TestMissingOutputDir: - def test_creates_directory(self, tmp_path): - eml_src = os.path.join(FIXTURES, 'plain-text.eml') - working_eml = tmp_path / "inbox" / "message.eml" - working_eml.parent.mkdir() - shutil.copy2(eml_src, working_eml) - - output_dir = tmp_path / "new" / "nested" / "output" - assert not output_dir.exists() - - result = process_eml(str(working_eml), str(output_dir)) - assert output_dir.exists() - assert len(result['files']) == 2 diff --git a/docs/scripts/tests/test_save_attachments.py b/docs/scripts/tests/test_save_attachments.py deleted file mode 100644 index 32f02a6..0000000 --- a/docs/scripts/tests/test_save_attachments.py +++ /dev/null @@ -1,97 +0,0 @@ -"""Tests for save_attachments().""" - -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -from conftest import make_plain_message, make_message_with_attachment -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText -from email.mime.application import MIMEApplication - -import importlib.util -spec = importlib.util.spec_from_file_location( - "eml_script", - os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') -) -eml_script = importlib.util.module_from_spec(spec) -spec.loader.exec_module(eml_script) - -save_attachments = eml_script.save_attachments - - -class TestSingleAttachment: - def test_file_written_and_returned(self, tmp_path): - msg = make_message_with_attachment( - attachment_filename="report.pdf", - attachment_content=b"pdf bytes here" - ) - result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") - - assert len(result) == 1 - assert result[0]['original_name'] == "report.pdf" - assert "ATTACH" in result[0]['renamed_name'] - assert result[0]['renamed_name'].endswith(".pdf") - - # File actually exists and has correct content - written_path = result[0]['path'] - assert os.path.isfile(written_path) - with open(written_path, 'rb') as f: - assert f.read() == b"pdf bytes here" - - -class TestMultipleAttachments: - def test_all_written_and_returned(self, tmp_path): - msg = MIMEMultipart() - msg['From'] = 'test@example.com' - msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600' - msg.attach(MIMEText("body", 'plain')) - - for name, content in [("doc1.pdf", b"pdf1"), ("image.png", b"png1")]: - att = MIMEApplication(content, Name=name) - att['Content-Disposition'] = f'attachment; filename="{name}"' - msg.attach(att) - - result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") - - assert len(result) == 2 - for r in result: - assert os.path.isfile(r['path']) - - -class TestNoAttachments: - def test_empty_list(self, tmp_path): - msg = make_plain_message() - result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") - assert result == [] - - -class TestFilenameWithSpaces: - def test_cleaned_filename(self, tmp_path): - msg = make_message_with_attachment( - attachment_filename="My Document (1).pdf", - attachment_content=b"data" - ) - result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") - - assert len(result) == 1 - assert " " not in result[0]['renamed_name'] - assert os.path.isfile(result[0]['path']) - - -class TestNoContentDisposition: - def test_skipped(self, tmp_path): - msg = MIMEMultipart() - msg['From'] = 'test@example.com' - msg.attach(MIMEText("body", 'plain')) - - # Add a part without Content-Disposition - part = MIMEApplication(b"data", Name="file.bin") - # Explicitly remove Content-Disposition if present - if 'Content-Disposition' in part: - del part['Content-Disposition'] - msg.attach(part) - - result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") - assert result == [] -- cgit v1.2.3