diff options
| author | Craig Jennings <c@cjennings.net> | 2026-02-22 23:20:56 -0600 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-02-22 23:20:56 -0600 |
| commit | 5e6877e8f3fb552fce3367ff273167d2cf6af75f (patch) | |
| tree | 909f98edbbb940aafb95de02457d4d6f7db3cba4 /docs/scripts | |
| parent | b104dde43fcc717681a8733a977eb528c60eb13f (diff) | |
| download | archangel-5e6877e8f3fb552fce3367ff273167d2cf6af75f.tar.gz archangel-5e6877e8f3fb552fce3367ff273167d2cf6af75f.zip | |
chore: add docs/ to .gitignore and untrack personal files
docs/ contains session history, personal workflows, and private
protocols that shouldn't be in a public repository.
Diffstat (limited to 'docs/scripts')
| -rw-r--r-- | docs/scripts/eml-view-and-extract-attachments-readme.org | 47 | ||||
| -rw-r--r-- | docs/scripts/eml-view-and-extract-attachments.py | 398 | ||||
| -rwxr-xr-x | docs/scripts/maildir-flag-manager.py | 345 | ||||
| -rw-r--r-- | docs/scripts/tests/conftest.py | 77 | ||||
| -rw-r--r-- | docs/scripts/tests/fixtures/empty-body.eml | 16 | ||||
| -rw-r--r-- | docs/scripts/tests/fixtures/html-only.eml | 20 | ||||
| -rw-r--r-- | docs/scripts/tests/fixtures/multiple-received-headers.eml | 12 | ||||
| -rw-r--r-- | docs/scripts/tests/fixtures/no-received-headers.eml | 9 | ||||
| -rw-r--r-- | docs/scripts/tests/fixtures/plain-text.eml | 15 | ||||
| -rw-r--r-- | docs/scripts/tests/fixtures/with-attachment.eml | 27 | ||||
| -rw-r--r-- | docs/scripts/tests/test_extract_body.py | 96 | ||||
| -rw-r--r-- | docs/scripts/tests/test_extract_metadata.py | 65 | ||||
| -rw-r--r-- | docs/scripts/tests/test_generate_filenames.py | 157 | ||||
| -rw-r--r-- | docs/scripts/tests/test_integration_stdout.py | 68 | ||||
| -rw-r--r-- | docs/scripts/tests/test_parse_received_headers.py | 105 | ||||
| -rw-r--r-- | docs/scripts/tests/test_process_eml.py | 129 | ||||
| -rw-r--r-- | docs/scripts/tests/test_save_attachments.py | 97 |
17 files changed, 0 insertions, 1683 deletions
diff --git a/docs/scripts/eml-view-and-extract-attachments-readme.org b/docs/scripts/eml-view-and-extract-attachments-readme.org deleted file mode 100644 index c132df8..0000000 --- a/docs/scripts/eml-view-and-extract-attachments-readme.org +++ /dev/null @@ -1,47 +0,0 @@ -#+TITLE: eml-view-and-extract-attachments.py - -Extract email content and attachments from EML files with auto-renaming. - -* Usage - -#+begin_src bash -# View mode — print metadata and body to stdout, extract attachments alongside EML -python3 docs/scripts/eml-view-and-extract-attachments.py inbox/message.eml - -# Pipeline mode — extract, auto-rename, refile to output dir, clean up -python3 docs/scripts/eml-view-and-extract-attachments.py inbox/message.eml --output-dir assets/ -#+end_src - -* Naming Convention - -Files are auto-renamed as =YYYY-MM-DD-HHMM-Sender-TYPE-Description.ext=: - -- =2026-02-05-1136-Jonathan-EMAIL-Re-Fw-4319-Danneel-Street.eml= -- =2026-02-05-1136-Jonathan-EMAIL-Re-Fw-4319-Danneel-Street.txt= -- =2026-02-05-1136-Jonathan-ATTACH-Ltr-Carrollton.pdf= - -Date and sender are parsed from email headers. Falls back to "unknown" for missing values. - -* Dependencies - -- Python 3 (stdlib only for core functionality) -- =html2text= (optional — used for HTML-only emails, falls back to tag stripping) - -* Pipeline Mode Behavior - -1. Creates a temp directory alongside the source EML -2. Copies and renames the EML, writes a =.txt= of the body, extracts attachments -3. Checks for filename collisions in the output directory -4. Moves all files to the output directory -5. Cleans up the temp directory -6. Prints a summary of created files - -Source EML is never modified or moved. - -* Tests - -#+begin_src bash -python3 -m pytest docs/scripts/tests/ -v -#+end_src - -48 tests: unit tests for parsing, filename generation, and attachment saving; integration tests for both pipeline and stdout modes. Requires =pytest=. diff --git a/docs/scripts/eml-view-and-extract-attachments.py b/docs/scripts/eml-view-and-extract-attachments.py deleted file mode 100644 index 3201c99..0000000 --- a/docs/scripts/eml-view-and-extract-attachments.py +++ /dev/null @@ -1,398 +0,0 @@ -#!/usr/bin/env python3 -"""Extract email content and attachments from EML files. - -Without --output-dir: parse and print to stdout (backwards compatible). -With --output-dir: full pipeline — extract, auto-rename, refile, clean up. -""" - -import argparse -import email -import email.utils -import os -import re -import shutil -import sys -import tempfile - - -# --------------------------------------------------------------------------- -# Parsing functions (no I/O beyond reading the input file) -# --------------------------------------------------------------------------- - -def parse_received_headers(msg): - """Parse Received headers to extract sent/received times and servers.""" - received_headers = msg.get_all('Received', []) - - sent_server = None - sent_time = None - received_server = None - received_time = None - - for header in received_headers: - header = ' '.join(header.split()) - - time_match = re.search(r';\s*(.+)$', header) - timestamp = time_match.group(1).strip() if time_match else None - - from_match = re.search(r'from\s+([\w.-]+)', header) - by_match = re.search(r'by\s+([\w.-]+)', header) - - if from_match and by_match and received_server is None: - received_time = timestamp - received_server = by_match.group(1) - sent_server = from_match.group(1) - sent_time = timestamp - - if received_server is None and received_headers: - header = ' '.join(received_headers[0].split()) - time_match = re.search(r';\s*(.+)$', header) - received_time = time_match.group(1).strip() if time_match else None - by_match = re.search(r'by\s+([\w.-]+)', header) - received_server = by_match.group(1) if by_match else "unknown" - - return { - 'sent_time': sent_time, - 'sent_server': sent_server, - 'received_time': received_time, - 'received_server': received_server - } - - -def extract_body(msg): - """Walk MIME parts, prefer text/plain, fall back to html2text on text/html. - - Returns body text string. - """ - plain_text = None - html_text = None - - for part in msg.walk(): - content_type = part.get_content_type() - if content_type == "text/plain" and plain_text is None: - payload = part.get_payload(decode=True) - if payload is not None: - plain_text = payload.decode('utf-8', errors='ignore') - elif content_type == "text/html" and html_text is None: - payload = part.get_payload(decode=True) - if payload is not None: - html_text = payload.decode('utf-8', errors='ignore') - - if plain_text is not None: - return plain_text - - if html_text is not None: - try: - import html2text - h = html2text.HTML2Text() - h.body_width = 0 - return h.handle(html_text) - except ImportError: - # Strip HTML tags as fallback if html2text not installed - return re.sub(r'<[^>]+>', '', html_text) - - return "" - - -def extract_metadata(msg): - """Extract email metadata from headers. - - Returns dict with from, to, subject, date, and timing info. - """ - return { - 'from': msg.get('From'), - 'to': msg.get('To'), - 'subject': msg.get('Subject'), - 'date': msg.get('Date'), - 'timing': parse_received_headers(msg), - } - - -def generate_basename(metadata): - """Generate date-sender prefix from metadata. - - Returns e.g. "2026-02-05-1136-Jonathan". - Falls back to "unknown" for missing/malformed Date or From. - """ - # Parse date - date_str = metadata.get('date') - date_prefix = "unknown" - if date_str: - try: - parsed = email.utils.parsedate_to_datetime(date_str) - date_prefix = parsed.strftime('%Y-%m-%d-%H%M') - except (ValueError, TypeError): - pass - - # Parse sender first name - from_str = metadata.get('from') - sender = "unknown" - if from_str: - # Extract display name or email local part - display_name, addr = email.utils.parseaddr(from_str) - if display_name: - sender = display_name.split()[0] - elif addr: - sender = addr.split('@')[0] - - return f"{date_prefix}-{sender}" - - -def _clean_for_filename(text, max_length=80): - """Clean text for use in a filename. - - Replace spaces with hyphens, strip chars unsafe for filenames, - collapse multiple hyphens. - """ - text = text.strip() - text = text.replace(' ', '-') - # Keep alphanumeric, hyphens, dots, underscores - text = re.sub(r'[^\w\-.]', '', text) - # Collapse multiple hyphens - text = re.sub(r'-{2,}', '-', text) - # Strip leading/trailing hyphens - text = text.strip('-') - if len(text) > max_length: - text = text[:max_length].rstrip('-') - return text - - -def generate_email_filename(basename, subject): - """Generate email filename from basename and subject. - - Returns e.g. "2026-02-05-1136-Jonathan-EMAIL-Re-Fw-4319-Danneel-Street" - (without extension — caller adds .eml or .txt). - """ - if subject: - clean_subject = _clean_for_filename(subject) - else: - clean_subject = "no-subject" - return f"{basename}-EMAIL-{clean_subject}" - - -def generate_attachment_filename(basename, original_filename): - """Generate attachment filename from basename and original filename. - - Returns e.g. "2026-02-05-1136-Jonathan-ATTACH-Ltr-Carrollton.pdf". - Preserves original extension. - """ - if not original_filename: - return f"{basename}-ATTACH-unnamed" - - name, ext = os.path.splitext(original_filename) - clean_name = _clean_for_filename(name) - return f"{basename}-ATTACH-{clean_name}{ext}" - - -# --------------------------------------------------------------------------- -# I/O functions (file operations) -# --------------------------------------------------------------------------- - -def save_attachments(msg, output_dir, basename): - """Write attachment files to output_dir with auto-renamed filenames. - - Returns list of dicts: {original_name, renamed_name, path}. - """ - results = [] - for part in msg.walk(): - if part.get_content_maintype() == 'multipart': - continue - if part.get('Content-Disposition') is None: - continue - - filename = part.get_filename() - if filename: - renamed = generate_attachment_filename(basename, filename) - filepath = os.path.join(output_dir, renamed) - with open(filepath, 'wb') as f: - f.write(part.get_payload(decode=True)) - results.append({ - 'original_name': filename, - 'renamed_name': renamed, - 'path': filepath, - }) - - return results - - -def save_text(text, filepath): - """Write body text to a .txt file.""" - with open(filepath, 'w', encoding='utf-8') as f: - f.write(text) - - -# --------------------------------------------------------------------------- -# Pipeline function -# --------------------------------------------------------------------------- - -def process_eml(eml_path, output_dir): - """Full extraction pipeline. - - 1. Create temp extraction dir - 2. Copy EML into temp dir - 3. Parse email (metadata, body, attachments) - 4. Generate filenames from headers - 5. Save renamed .eml, .txt, and attachments to temp dir - 6. Check for collisions in output_dir - 7. Move all files to output_dir - 8. Clean up temp dir - 9. Return results dict - """ - eml_path = os.path.abspath(eml_path) - output_dir = os.path.abspath(output_dir) - os.makedirs(output_dir, exist_ok=True) - - # Create temp dir as sibling of the EML file - eml_dir = os.path.dirname(eml_path) - temp_dir = tempfile.mkdtemp(prefix='extract-', dir=eml_dir) - - try: - # Copy EML to temp dir - temp_eml = os.path.join(temp_dir, os.path.basename(eml_path)) - shutil.copy2(eml_path, temp_eml) - - # Parse - with open(eml_path, 'rb') as f: - msg = email.message_from_binary_file(f) - - metadata = extract_metadata(msg) - body = extract_body(msg) - basename = generate_basename(metadata) - email_stem = generate_email_filename(basename, metadata['subject']) - - # Save renamed EML - renamed_eml = f"{email_stem}.eml" - renamed_eml_path = os.path.join(temp_dir, renamed_eml) - os.rename(temp_eml, renamed_eml_path) - - # Save .txt - renamed_txt = f"{email_stem}.txt" - renamed_txt_path = os.path.join(temp_dir, renamed_txt) - save_text(body, renamed_txt_path) - - # Save attachments - attachment_results = save_attachments(msg, temp_dir, basename) - - # Build file list - files = [ - {'type': 'eml', 'name': renamed_eml, 'path': None}, - {'type': 'txt', 'name': renamed_txt, 'path': None}, - ] - for att in attachment_results: - files.append({ - 'type': 'attach', - 'name': att['renamed_name'], - 'path': None, - }) - - # Check for collisions in output_dir - for file_info in files: - dest = os.path.join(output_dir, file_info['name']) - if os.path.exists(dest): - raise FileExistsError( - f"Collision: '{file_info['name']}' already exists in {output_dir}" - ) - - # Move all files to output_dir - for file_info in files: - src = os.path.join(temp_dir, file_info['name']) - dest = os.path.join(output_dir, file_info['name']) - shutil.move(src, dest) - file_info['path'] = dest - - return { - 'metadata': metadata, - 'body': body, - 'files': files, - } - - finally: - # Clean up temp dir - if os.path.exists(temp_dir): - shutil.rmtree(temp_dir) - - -# --------------------------------------------------------------------------- -# Stdout display (backwards-compatible mode) -# --------------------------------------------------------------------------- - -def print_email(eml_path): - """Parse and print email to stdout. Extract attachments alongside EML. - - This preserves the original script behavior when --output-dir is not given. - """ - with open(eml_path, 'rb') as f: - msg = email.message_from_binary_file(f) - - metadata = extract_metadata(msg) - body = extract_body(msg) - timing = metadata['timing'] - - print(f"From: {metadata['from']}") - print(f"To: {metadata['to']}") - print(f"Subject: {metadata['subject']}") - print(f"Date: {metadata['date']}") - print(f"Sent: {timing['sent_time']} (via {timing['sent_server']})") - print(f"Received: {timing['received_time']} (at {timing['received_server']})") - print() - print(body) - print() - - # Extract attachments alongside the EML file - for part in msg.walk(): - if part.get_content_maintype() == 'multipart': - continue - if part.get('Content-Disposition') is None: - continue - - filename = part.get_filename() - if filename: - filepath = os.path.join(os.path.dirname(eml_path), filename) - with open(filepath, 'wb') as f: - f.write(part.get_payload(decode=True)) - print(f"Extracted attachment: {filename}") - - -def print_pipeline_summary(result): - """Print summary after pipeline extraction.""" - metadata = result['metadata'] - timing = metadata['timing'] - - print(f"From: {metadata['from']}") - print(f"To: {metadata['to']}") - print(f"Subject: {metadata['subject']}") - print(f"Date: {metadata['date']}") - print(f"Sent: {timing['sent_time']} (via {timing['sent_server']})") - print(f"Received: {timing['received_time']} (at {timing['received_server']})") - print() - print("Files created:") - for f in result['files']: - print(f" [{f['type']:>6}] {f['name']}") - print(f"\nOutput directory: {os.path.dirname(result['files'][0]['path'])}") - - -# --------------------------------------------------------------------------- -# CLI -# --------------------------------------------------------------------------- - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Extract email content and attachments from EML files." - ) - parser.add_argument('eml_path', help="Path to source EML file") - parser.add_argument( - '--output-dir', - help="Destination directory for extracted files. " - "Without this flag, prints to stdout only (backwards compatible)." - ) - - args = parser.parse_args() - - if not os.path.isfile(args.eml_path): - print(f"Error: '{args.eml_path}' not found or is not a file.", file=sys.stderr) - sys.exit(1) - - if args.output_dir: - result = process_eml(args.eml_path, args.output_dir) - print_pipeline_summary(result) - else: - print_email(args.eml_path) diff --git a/docs/scripts/maildir-flag-manager.py b/docs/scripts/maildir-flag-manager.py deleted file mode 100755 index 9c4a59c..0000000 --- a/docs/scripts/maildir-flag-manager.py +++ /dev/null @@ -1,345 +0,0 @@ -#!/usr/bin/env python3 -"""Manage maildir flags (read, starred) across email accounts. - -Uses atomic os.rename() for flag operations directly on maildir files. -Safer and more reliable than shell-based approaches (zsh loses PATH in -while-read loops, piped mu move silently fails). - -Supports the same flag semantics as mu4e: maildir files in new/ are moved -to cur/ when the Seen flag is added, and flag changes are persisted to the -filesystem so mbsync picks them up on the next sync. - -Usage: - # Mark all unread INBOX emails as read - maildir-flag-manager.py mark-read - - # Mark specific emails as read (by path) - maildir-flag-manager.py mark-read /path/to/message1 /path/to/message2 - - # Mark all unread INBOX emails as read, then reindex mu - maildir-flag-manager.py mark-read --reindex - - # Star specific emails (by path) - maildir-flag-manager.py star /path/to/message1 /path/to/message2 - - # Star and mark read - maildir-flag-manager.py star --mark-read /path/to/message1 - - # Dry run — show what would change without modifying anything - maildir-flag-manager.py mark-read --dry-run -""" - -import argparse -import os -import shutil -import subprocess -import sys - - -# --------------------------------------------------------------------------- -# Configuration -# --------------------------------------------------------------------------- - -MAILDIR_ACCOUNTS = { - 'gmail': os.path.expanduser('~/.mail/gmail/INBOX'), - 'cmail': os.path.expanduser('~/.mail/cmail/Inbox'), -} - - -# --------------------------------------------------------------------------- -# Core flag operations -# --------------------------------------------------------------------------- - -def parse_maildir_flags(filename): - """Extract flags from a maildir filename. - - Maildir filenames follow the pattern: unique:2,FLAGS - where FLAGS is a sorted string of flag characters (e.g., "FS" for - Flagged+Seen). - - Returns (base, flags_string). If no flags section, returns (filename, ''). - """ - if ':2,' in filename: - base, flags = filename.rsplit(':2,', 1) - return base, flags - return filename, '' - - -def build_flagged_filename(filename, new_flags): - """Build a maildir filename with the given flags. - - Flags are always sorted alphabetically per maildir spec. - """ - base, _ = parse_maildir_flags(filename) - sorted_flags = ''.join(sorted(set(new_flags))) - return f"{base}:2,{sorted_flags}" - - -def rename_with_flag(file_path, flag, dry_run=False): - """Add a flag to a single maildir message file via atomic rename. - - Handles moving from new/ to cur/ when adding the Seen flag. - Returns True if the flag was added, False if already present. - """ - dirname = os.path.dirname(file_path) - filename = os.path.basename(file_path) - maildir_root = os.path.dirname(dirname) - subdir = os.path.basename(dirname) - - _, current_flags = parse_maildir_flags(filename) - - if flag in current_flags: - return False - - new_flags = current_flags + flag - new_filename = build_flagged_filename(filename, new_flags) - - # Messages with the Seen flag belong in cur/, not new/ - if 'S' in new_flags and subdir == 'new': - target_dir = os.path.join(maildir_root, 'cur') - else: - target_dir = dirname - - new_path = os.path.join(target_dir, new_filename) - - if dry_run: - return True - - os.rename(file_path, new_path) - return True - - -def process_maildir(maildir_path, flag, dry_run=False): - """Add a flag to all messages in a maildir that don't have it. - - Scans both new/ and cur/ subdirectories. - Returns (changed_count, skipped_count, error_count). - """ - if not os.path.isdir(maildir_path): - print(f" Skipping {maildir_path} (not found)", file=sys.stderr) - return 0, 0, 0 - - changed = 0 - skipped = 0 - errors = 0 - - for subdir in ('new', 'cur'): - subdir_path = os.path.join(maildir_path, subdir) - if not os.path.isdir(subdir_path): - continue - - for filename in os.listdir(subdir_path): - file_path = os.path.join(subdir_path, filename) - if not os.path.isfile(file_path): - continue - - try: - if rename_with_flag(file_path, flag, dry_run): - changed += 1 - else: - skipped += 1 - except Exception as e: - print(f" Error on {filename}: {e}", file=sys.stderr) - errors += 1 - - return changed, skipped, errors - - -def process_specific_files(paths, flag, dry_run=False): - """Add a flag to specific message files by path. - - Returns (changed_count, skipped_count, error_count). - """ - changed = 0 - skipped = 0 - errors = 0 - - for path in paths: - path = os.path.abspath(path) - if not os.path.isfile(path): - print(f" File not found: {path}", file=sys.stderr) - errors += 1 - continue - - # Verify file is inside a maildir (parent should be cur/ or new/) - parent_dir = os.path.basename(os.path.dirname(path)) - if parent_dir not in ('cur', 'new'): - print(f" Not in a maildir cur/ or new/ dir: {path}", - file=sys.stderr) - errors += 1 - continue - - try: - if rename_with_flag(path, flag, dry_run): - changed += 1 - else: - skipped += 1 - except Exception as e: - print(f" Error on {path}: {e}", file=sys.stderr) - errors += 1 - - return changed, skipped, errors - - -def reindex_mu(): - """Run mu index to update the database after flag changes.""" - mu_path = shutil.which('mu') - if not mu_path: - print("Warning: mu not found in PATH, skipping reindex", - file=sys.stderr) - return False - - try: - result = subprocess.run( - [mu_path, 'index'], - capture_output=True, text=True, timeout=120 - ) - if result.returncode == 0: - print("mu index: database updated") - return True - else: - print(f"mu index failed: {result.stderr}", file=sys.stderr) - return False - except subprocess.TimeoutExpired: - print("mu index timed out after 120s", file=sys.stderr) - return False - - -# --------------------------------------------------------------------------- -# Commands -# --------------------------------------------------------------------------- - -def cmd_mark_read(args): - """Mark emails as read (add Seen flag).""" - flag = 'S' - action = "Marking as read" - if args.dry_run: - action = "Would mark as read" - - total_changed = 0 - total_skipped = 0 - total_errors = 0 - - if args.paths: - print(f"{action}: {len(args.paths)} specific message(s)") - c, s, e = process_specific_files(args.paths, flag, args.dry_run) - total_changed += c - total_skipped += s - total_errors += e - else: - for name, maildir_path in MAILDIR_ACCOUNTS.items(): - print(f"{action} in {name} ({maildir_path})") - c, s, e = process_maildir(maildir_path, flag, args.dry_run) - total_changed += c - total_skipped += s - total_errors += e - if c > 0: - print(f" {c} message(s) marked as read") - if s > 0: - print(f" {s} already read") - - print(f"\nTotal: {total_changed} changed, {total_skipped} already set, " - f"{total_errors} errors") - - if args.reindex and not args.dry_run and total_changed > 0: - reindex_mu() - - return 0 if total_errors == 0 else 1 - - -def cmd_star(args): - """Star/flag emails (add Flagged flag).""" - flag = 'F' - action = "Starring" - if args.dry_run: - action = "Would star" - - if not args.paths: - print("Error: star requires specific message paths", file=sys.stderr) - return 1 - - print(f"{action}: {len(args.paths)} message(s)") - total_changed = 0 - total_skipped = 0 - total_errors = 0 - - c, s, e = process_specific_files(args.paths, flag, args.dry_run) - total_changed += c - total_skipped += s - total_errors += e - - # Also mark as read if requested - if args.mark_read: - print("Also marking as read...") - c2, _, e2 = process_specific_files(args.paths, 'S', args.dry_run) - total_changed += c2 - total_errors += e2 - - print(f"\nTotal: {total_changed} flag(s) changed, {total_skipped} already set, " - f"{total_errors} errors") - - if args.reindex and not args.dry_run and total_changed > 0: - reindex_mu() - - return 0 if total_errors == 0 else 1 - - -# --------------------------------------------------------------------------- -# CLI -# --------------------------------------------------------------------------- - -def main(): - parser = argparse.ArgumentParser( - description="Manage maildir flags (read, starred) across email accounts." - ) - subparsers = parser.add_subparsers(dest='command', required=True) - - # mark-read - p_read = subparsers.add_parser( - 'mark-read', - help="Mark emails as read (add Seen flag)" - ) - p_read.add_argument( - 'paths', nargs='*', - help="Specific message file paths. If omitted, marks all unread " - "messages in configured INBOX maildirs." - ) - p_read.add_argument( - '--reindex', action='store_true', - help="Run mu index after changing flags" - ) - p_read.add_argument( - '--dry-run', action='store_true', - help="Show what would change without modifying anything" - ) - p_read.set_defaults(func=cmd_mark_read) - - # star - p_star = subparsers.add_parser( - 'star', - help="Star/flag emails (add Flagged flag)" - ) - p_star.add_argument( - 'paths', nargs='+', - help="Message file paths to star" - ) - p_star.add_argument( - '--mark-read', action='store_true', - help="Also mark starred messages as read" - ) - p_star.add_argument( - '--reindex', action='store_true', - help="Run mu index after changing flags" - ) - p_star.add_argument( - '--dry-run', action='store_true', - help="Show what would change without modifying anything" - ) - p_star.set_defaults(func=cmd_star) - - args = parser.parse_args() - sys.exit(args.func(args)) - - -if __name__ == '__main__': - main() diff --git a/docs/scripts/tests/conftest.py b/docs/scripts/tests/conftest.py deleted file mode 100644 index 8d965ab..0000000 --- a/docs/scripts/tests/conftest.py +++ /dev/null @@ -1,77 +0,0 @@ -"""Shared fixtures for EML extraction tests.""" - -import os -from email.message import EmailMessage -from email.mime.application import MIMEApplication -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText - -import pytest - - -@pytest.fixture -def fixtures_dir(): - """Return path to the fixtures/ directory.""" - return os.path.join(os.path.dirname(__file__), 'fixtures') - - -def make_plain_message(body="Test body", from_="Jonathan Smith <jsmith@example.com>", - to="Craig <craig@example.com>", - subject="Test Subject", - date="Wed, 05 Feb 2026 11:36:00 -0600"): - """Create an EmailMessage with text/plain body.""" - msg = EmailMessage() - msg['From'] = from_ - msg['To'] = to - msg['Subject'] = subject - msg['Date'] = date - msg.set_content(body) - return msg - - -def make_html_message(html_body="<p>Test body</p>", - from_="Jonathan Smith <jsmith@example.com>", - to="Craig <craig@example.com>", - subject="Test Subject", - date="Wed, 05 Feb 2026 11:36:00 -0600"): - """Create an EmailMessage with text/html body only.""" - msg = EmailMessage() - msg['From'] = from_ - msg['To'] = to - msg['Subject'] = subject - msg['Date'] = date - msg.set_content(html_body, subtype='html') - return msg - - -def make_message_with_attachment(body="Test body", - from_="Jonathan Smith <jsmith@example.com>", - to="Craig <craig@example.com>", - subject="Test Subject", - date="Wed, 05 Feb 2026 11:36:00 -0600", - attachment_filename="document.pdf", - attachment_content=b"fake pdf content"): - """Create a multipart message with a text body and one attachment.""" - msg = MIMEMultipart() - msg['From'] = from_ - msg['To'] = to - msg['Subject'] = subject - msg['Date'] = date - - msg.attach(MIMEText(body, 'plain')) - - att = MIMEApplication(attachment_content, Name=attachment_filename) - att['Content-Disposition'] = f'attachment; filename="{attachment_filename}"' - msg.attach(att) - - return msg - - -def add_received_headers(msg, headers): - """Add Received headers to an existing message. - - headers: list of header strings, added in order (first = most recent). - """ - for header in headers: - msg['Received'] = header - return msg diff --git a/docs/scripts/tests/fixtures/empty-body.eml b/docs/scripts/tests/fixtures/empty-body.eml deleted file mode 100644 index cf008df..0000000 --- a/docs/scripts/tests/fixtures/empty-body.eml +++ /dev/null @@ -1,16 +0,0 @@ -From: Jonathan Smith <jsmith@example.com> -To: Craig Jennings <craig@example.com> -Subject: Empty Body Test -Date: Thu, 05 Feb 2026 11:36:00 -0600 -MIME-Version: 1.0 -Content-Type: multipart/mixed; boundary="boundary456" -Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 - ---boundary456 -Content-Type: application/octet-stream; name="data.bin" -Content-Disposition: attachment; filename="data.bin" -Content-Transfer-Encoding: base64 - -AQIDBA== - ---boundary456-- diff --git a/docs/scripts/tests/fixtures/html-only.eml b/docs/scripts/tests/fixtures/html-only.eml deleted file mode 100644 index 4db7645..0000000 --- a/docs/scripts/tests/fixtures/html-only.eml +++ /dev/null @@ -1,20 +0,0 @@ -From: Jonathan Smith <jsmith@example.com> -To: Craig Jennings <craig@example.com> -Subject: HTML Update -Date: Thu, 05 Feb 2026 11:36:00 -0600 -MIME-Version: 1.0 -Content-Type: text/html; charset="utf-8" -Content-Transfer-Encoding: 7bit -Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 - -<html> -<body> -<p>Hi Craig,</p> -<p>Here is the <strong>HTML</strong> update.</p> -<ul> -<li>Item one</li> -<li>Item two</li> -</ul> -<p>Best,<br>Jonathan</p> -</body> -</html> diff --git a/docs/scripts/tests/fixtures/multiple-received-headers.eml b/docs/scripts/tests/fixtures/multiple-received-headers.eml deleted file mode 100644 index 1b8d6a7..0000000 --- a/docs/scripts/tests/fixtures/multiple-received-headers.eml +++ /dev/null @@ -1,12 +0,0 @@ -From: Jonathan Smith <jsmith@example.com> -To: Craig Jennings <craig@example.com> -Subject: Multiple Received Headers Test -Date: Thu, 05 Feb 2026 11:36:00 -0600 -MIME-Version: 1.0 -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 7bit -Received: by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600 -Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 -Received: from originator.example.com by relay.example.com with SMTP; Thu, 05 Feb 2026 11:35:58 -0600 - -Test body with multiple received headers. diff --git a/docs/scripts/tests/fixtures/no-received-headers.eml b/docs/scripts/tests/fixtures/no-received-headers.eml deleted file mode 100644 index 8a05dc7..0000000 --- a/docs/scripts/tests/fixtures/no-received-headers.eml +++ /dev/null @@ -1,9 +0,0 @@ -From: Jonathan Smith <jsmith@example.com> -To: Craig Jennings <craig@example.com> -Subject: No Received Headers -Date: Thu, 05 Feb 2026 11:36:00 -0600 -MIME-Version: 1.0 -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 7bit - -Test body with no received headers at all. diff --git a/docs/scripts/tests/fixtures/plain-text.eml b/docs/scripts/tests/fixtures/plain-text.eml deleted file mode 100644 index 8cc9d9c..0000000 --- a/docs/scripts/tests/fixtures/plain-text.eml +++ /dev/null @@ -1,15 +0,0 @@ -From: Jonathan Smith <jsmith@example.com> -To: Craig Jennings <craig@example.com> -Subject: Re: Fw: 4319 Danneel Street -Date: Thu, 05 Feb 2026 11:36:00 -0600 -MIME-Version: 1.0 -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 7bit -Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 - -Hi Craig, - -Here is the update on 4319 Danneel Street. - -Best, -Jonathan diff --git a/docs/scripts/tests/fixtures/with-attachment.eml b/docs/scripts/tests/fixtures/with-attachment.eml deleted file mode 100644 index ac49c5d..0000000 --- a/docs/scripts/tests/fixtures/with-attachment.eml +++ /dev/null @@ -1,27 +0,0 @@ -From: Jonathan Smith <jsmith@example.com> -To: Craig Jennings <craig@example.com> -Subject: Ltr from Carrollton -Date: Thu, 05 Feb 2026 11:36:00 -0600 -MIME-Version: 1.0 -Content-Type: multipart/mixed; boundary="boundary123" -Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600 - ---boundary123 -Content-Type: text/plain; charset="utf-8" -Content-Transfer-Encoding: 7bit - -Hi Craig, - -Please find the letter attached. - -Best, -Jonathan - ---boundary123 -Content-Type: application/octet-stream; name="Ltr Carrollton.pdf" -Content-Disposition: attachment; filename="Ltr Carrollton.pdf" -Content-Transfer-Encoding: base64 - -ZmFrZSBwZGYgY29udGVudA== - ---boundary123-- diff --git a/docs/scripts/tests/test_extract_body.py b/docs/scripts/tests/test_extract_body.py deleted file mode 100644 index 7b53cda..0000000 --- a/docs/scripts/tests/test_extract_body.py +++ /dev/null @@ -1,96 +0,0 @@ -"""Tests for extract_body().""" - -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -from conftest import make_plain_message, make_html_message, make_message_with_attachment -from email.message import EmailMessage -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText -from email.mime.application import MIMEApplication - -import importlib.util -spec = importlib.util.spec_from_file_location( - "eml_script", - os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') -) -eml_script = importlib.util.module_from_spec(spec) -spec.loader.exec_module(eml_script) - -extract_body = eml_script.extract_body - - -class TestPlainText: - def test_returns_plain_text(self): - msg = make_plain_message(body="Hello, this is plain text.") - result = extract_body(msg) - assert "Hello, this is plain text." in result - - -class TestHtmlOnly: - def test_returns_converted_html(self): - msg = make_html_message(html_body="<p>Hello <strong>world</strong></p>") - result = extract_body(msg) - assert "Hello" in result - assert "world" in result - # Should not contain raw HTML tags - assert "<p>" not in result - assert "<strong>" not in result - - -class TestBothPlainAndHtml: - def test_prefers_plain_text(self): - msg = MIMEMultipart('alternative') - msg['From'] = 'test@example.com' - msg['To'] = 'dest@example.com' - msg['Subject'] = 'Test' - msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600' - msg.attach(MIMEText("Plain text version", 'plain')) - msg.attach(MIMEText("<p>HTML version</p>", 'html')) - result = extract_body(msg) - assert "Plain text version" in result - assert "HTML version" not in result - - -class TestEmptyBody: - def test_returns_empty_string(self): - # Multipart with only attachments, no text parts - msg = MIMEMultipart() - msg['From'] = 'test@example.com' - att = MIMEApplication(b"binary data", Name="file.bin") - att['Content-Disposition'] = 'attachment; filename="file.bin"' - msg.attach(att) - result = extract_body(msg) - assert result == "" - - -class TestNonUtf8Encoding: - def test_decodes_with_errors_ignore(self): - msg = EmailMessage() - msg['From'] = 'test@example.com' - # Set raw bytes that include invalid UTF-8 - msg.set_content("Valid text with special: café") - result = extract_body(msg) - assert "Valid text" in result - - -class TestHtmlWithStructure: - def test_preserves_list_structure(self): - html = "<ul><li>Item one</li><li>Item two</li></ul>" - msg = make_html_message(html_body=html) - result = extract_body(msg) - assert "Item one" in result - assert "Item two" in result - - -class TestNoTextParts: - def test_returns_empty_string(self): - msg = MIMEMultipart() - msg['From'] = 'test@example.com' - att = MIMEApplication(b"data", Name="image.png") - att['Content-Disposition'] = 'attachment; filename="image.png"' - msg.attach(att) - result = extract_body(msg) - assert result == "" diff --git a/docs/scripts/tests/test_extract_metadata.py b/docs/scripts/tests/test_extract_metadata.py deleted file mode 100644 index d5ee52e..0000000 --- a/docs/scripts/tests/test_extract_metadata.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Tests for extract_metadata().""" - -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -from conftest import make_plain_message, add_received_headers -from email.message import EmailMessage - -import importlib.util -spec = importlib.util.spec_from_file_location( - "eml_script", - os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') -) -eml_script = importlib.util.module_from_spec(spec) -spec.loader.exec_module(eml_script) - -extract_metadata = eml_script.extract_metadata - - -class TestAllHeadersPresent: - def test_complete_dict(self): - msg = make_plain_message( - from_="Jonathan Smith <jsmith@example.com>", - to="Craig <craig@example.com>", - subject="Test Subject", - date="Thu, 05 Feb 2026 11:36:00 -0600" - ) - result = extract_metadata(msg) - assert result['from'] == "Jonathan Smith <jsmith@example.com>" - assert result['to'] == "Craig <craig@example.com>" - assert result['subject'] == "Test Subject" - assert result['date'] == "Thu, 05 Feb 2026 11:36:00 -0600" - assert 'timing' in result - - -class TestMissingFrom: - def test_from_is_none(self): - msg = EmailMessage() - msg['To'] = 'craig@example.com' - msg['Subject'] = 'Test' - msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600' - msg.set_content("body") - result = extract_metadata(msg) - assert result['from'] is None - - -class TestMissingDate: - def test_date_is_none(self): - msg = EmailMessage() - msg['From'] = 'test@example.com' - msg['To'] = 'craig@example.com' - msg['Subject'] = 'Test' - msg.set_content("body") - result = extract_metadata(msg) - assert result['date'] is None - - -class TestLongSubject: - def test_full_subject_returned(self): - long_subject = "Re: Fw: This is a very long subject line that spans many words and might be folded" - msg = make_plain_message(subject=long_subject) - result = extract_metadata(msg) - assert result['subject'] == long_subject diff --git a/docs/scripts/tests/test_generate_filenames.py b/docs/scripts/tests/test_generate_filenames.py deleted file mode 100644 index 07c8f84..0000000 --- a/docs/scripts/tests/test_generate_filenames.py +++ /dev/null @@ -1,157 +0,0 @@ -"""Tests for generate_basename(), generate_email_filename(), generate_attachment_filename().""" - -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -import importlib.util -spec = importlib.util.spec_from_file_location( - "eml_script", - os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') -) -eml_script = importlib.util.module_from_spec(spec) -spec.loader.exec_module(eml_script) - -generate_basename = eml_script.generate_basename -generate_email_filename = eml_script.generate_email_filename -generate_attachment_filename = eml_script.generate_attachment_filename - - -# --- generate_basename --- - -class TestGenerateBasename: - def test_standard_from_and_date(self): - metadata = { - 'from': 'Jonathan Smith <jsmith@example.com>', - 'date': 'Wed, 05 Feb 2026 11:36:00 -0600', - } - assert generate_basename(metadata) == "2026-02-05-1136-Jonathan" - - def test_from_with_display_name_first_token(self): - metadata = { - 'from': 'C Ciarm <cciarm@example.com>', - 'date': 'Wed, 05 Feb 2026 11:36:00 -0600', - } - result = generate_basename(metadata) - assert result == "2026-02-05-1136-C" - - def test_from_without_display_name(self): - metadata = { - 'from': 'jsmith@example.com', - 'date': 'Wed, 05 Feb 2026 11:36:00 -0600', - } - result = generate_basename(metadata) - assert result == "2026-02-05-1136-jsmith" - - def test_missing_date(self): - metadata = { - 'from': 'Jonathan Smith <jsmith@example.com>', - 'date': None, - } - result = generate_basename(metadata) - assert result == "unknown-Jonathan" - - def test_missing_from(self): - metadata = { - 'from': None, - 'date': 'Wed, 05 Feb 2026 11:36:00 -0600', - } - result = generate_basename(metadata) - assert result == "2026-02-05-1136-unknown" - - def test_both_missing(self): - metadata = {'from': None, 'date': None} - result = generate_basename(metadata) - assert result == "unknown-unknown" - - def test_unparseable_date(self): - metadata = { - 'from': 'Jonathan <j@example.com>', - 'date': 'not a real date', - } - result = generate_basename(metadata) - assert result == "unknown-Jonathan" - - def test_none_date_no_crash(self): - metadata = {'from': 'Test <t@e.com>', 'date': None} - # Should not raise - result = generate_basename(metadata) - assert "unknown" in result - - -# --- generate_email_filename --- - -class TestGenerateEmailFilename: - def test_standard_subject(self): - result = generate_email_filename( - "2026-02-05-1136-Jonathan", - "Re: Fw: 4319 Danneel Street" - ) - assert result == "2026-02-05-1136-Jonathan-EMAIL-Re-Fw-4319-Danneel-Street" - - def test_subject_with_special_chars(self): - result = generate_email_filename( - "2026-02-05-1136-Jonathan", - "Update: Meeting (draft) & notes!" - ) - # Colons, parens, ampersands, exclamation stripped - assert "EMAIL" in result - assert ":" not in result - assert "(" not in result - assert ")" not in result - assert "&" not in result - assert "!" not in result - - def test_none_subject(self): - result = generate_email_filename("2026-02-05-1136-Jonathan", None) - assert result == "2026-02-05-1136-Jonathan-EMAIL-no-subject" - - def test_empty_subject(self): - result = generate_email_filename("2026-02-05-1136-Jonathan", "") - assert result == "2026-02-05-1136-Jonathan-EMAIL-no-subject" - - def test_very_long_subject(self): - long_subject = "A" * 100 + " " + "B" * 100 - result = generate_email_filename("2026-02-05-1136-Jonathan", long_subject) - # The cleaned subject part should be truncated - # basename (27) + "-EMAIL-" (7) + subject - # Subject itself is limited to 80 chars by _clean_for_filename - subject_part = result.split("-EMAIL-")[1] - assert len(subject_part) <= 80 - - -# --- generate_attachment_filename --- - -class TestGenerateAttachmentFilename: - def test_standard_attachment(self): - result = generate_attachment_filename( - "2026-02-05-1136-Jonathan", - "Ltr Carrollton.pdf" - ) - assert result == "2026-02-05-1136-Jonathan-ATTACH-Ltr-Carrollton.pdf" - - def test_filename_with_spaces_and_parens(self): - result = generate_attachment_filename( - "2026-02-05-1136-Jonathan", - "Document (final copy).pdf" - ) - assert " " not in result - assert "(" not in result - assert ")" not in result - assert result.endswith(".pdf") - - def test_preserves_extension(self): - result = generate_attachment_filename( - "2026-02-05-1136-Jonathan", - "photo.jpg" - ) - assert result.endswith(".jpg") - - def test_none_filename(self): - result = generate_attachment_filename("2026-02-05-1136-Jonathan", None) - assert result == "2026-02-05-1136-Jonathan-ATTACH-unnamed" - - def test_empty_filename(self): - result = generate_attachment_filename("2026-02-05-1136-Jonathan", "") - assert result == "2026-02-05-1136-Jonathan-ATTACH-unnamed" diff --git a/docs/scripts/tests/test_integration_stdout.py b/docs/scripts/tests/test_integration_stdout.py deleted file mode 100644 index d87478e..0000000 --- a/docs/scripts/tests/test_integration_stdout.py +++ /dev/null @@ -1,68 +0,0 @@ -"""Integration tests for backwards-compatible stdout mode (no --output-dir).""" - -import os -import shutil -import sys - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -import importlib.util -spec = importlib.util.spec_from_file_location( - "eml_script", - os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') -) -eml_script = importlib.util.module_from_spec(spec) -spec.loader.exec_module(eml_script) - -print_email = eml_script.print_email - -FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures') - - -class TestPlainTextStdout: - def test_metadata_and_body_printed(self, tmp_path, capsys): - eml_src = os.path.join(FIXTURES, 'plain-text.eml') - working_eml = tmp_path / "message.eml" - shutil.copy2(eml_src, working_eml) - - print_email(str(working_eml)) - captured = capsys.readouterr() - - assert "From: Jonathan Smith <jsmith@example.com>" in captured.out - assert "To: Craig Jennings <craig@example.com>" in captured.out - assert "Subject: Re: Fw: 4319 Danneel Street" in captured.out - assert "Date:" in captured.out - assert "Sent:" in captured.out - assert "Received:" in captured.out - assert "4319 Danneel Street" in captured.out - - -class TestHtmlFallbackStdout: - def test_html_converted_on_stdout(self, tmp_path, capsys): - eml_src = os.path.join(FIXTURES, 'html-only.eml') - working_eml = tmp_path / "message.eml" - shutil.copy2(eml_src, working_eml) - - print_email(str(working_eml)) - captured = capsys.readouterr() - - # Should see converted text, not raw HTML - assert "HTML" in captured.out - assert "<p>" not in captured.out - - -class TestAttachmentsStdout: - def test_attachment_extracted_alongside_eml(self, tmp_path, capsys): - eml_src = os.path.join(FIXTURES, 'with-attachment.eml') - working_eml = tmp_path / "message.eml" - shutil.copy2(eml_src, working_eml) - - print_email(str(working_eml)) - captured = capsys.readouterr() - - assert "Extracted attachment:" in captured.out - assert "Ltr Carrollton.pdf" in captured.out - - # File should exist alongside the EML - extracted = tmp_path / "Ltr Carrollton.pdf" - assert extracted.exists() diff --git a/docs/scripts/tests/test_parse_received_headers.py b/docs/scripts/tests/test_parse_received_headers.py deleted file mode 100644 index e12e1fb..0000000 --- a/docs/scripts/tests/test_parse_received_headers.py +++ /dev/null @@ -1,105 +0,0 @@ -"""Tests for parse_received_headers().""" - -import email -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -from conftest import make_plain_message, add_received_headers -from email.message import EmailMessage - -# Import the function under test -import importlib.util -spec = importlib.util.spec_from_file_location( - "eml_script", - os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') -) -eml_script = importlib.util.module_from_spec(spec) -spec.loader.exec_module(eml_script) - -parse_received_headers = eml_script.parse_received_headers - - -class TestSingleHeader: - def test_header_with_from_and_by(self): - msg = EmailMessage() - msg['Received'] = ( - 'from mail-sender.example.com by mx.receiver.example.com ' - 'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600' - ) - result = parse_received_headers(msg) - assert result['sent_server'] == 'mail-sender.example.com' - assert result['received_server'] == 'mx.receiver.example.com' - assert result['sent_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600' - assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600' - - -class TestMultipleHeaders: - def test_uses_first_with_both_from_and_by(self): - msg = EmailMessage() - # Most recent first (by only) - msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600' - # Next: has both from and by — this should be selected - msg['Received'] = ( - 'from mail-sender.example.com by mx.receiver.example.com ' - 'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600' - ) - # Oldest - msg['Received'] = ( - 'from originator.example.com by relay.example.com ' - 'with SMTP; Thu, 05 Feb 2026 11:35:58 -0600' - ) - result = parse_received_headers(msg) - assert result['sent_server'] == 'mail-sender.example.com' - assert result['received_server'] == 'mx.receiver.example.com' - - -class TestNoReceivedHeaders: - def test_all_values_none(self): - msg = EmailMessage() - result = parse_received_headers(msg) - assert result['sent_time'] is None - assert result['sent_server'] is None - assert result['received_time'] is None - assert result['received_server'] is None - - -class TestByButNoFrom: - def test_falls_back_to_first_header(self): - msg = EmailMessage() - msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600' - result = parse_received_headers(msg) - assert result['received_server'] == 'internal.example.com' - assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:10 -0600' - # No from in any header, so sent_server stays None - assert result['sent_server'] is None - - -class TestMultilineFoldedHeader: - def test_normalizes_whitespace(self): - # Use email.message_from_string to parse raw folded headers - # (EmailMessage policy rejects embedded CRLF in set values) - raw = ( - "From: test@example.com\r\n" - "Received: from mail-sender.example.com\r\n" - " by mx.receiver.example.com\r\n" - " with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600\r\n" - "\r\n" - "body\r\n" - ) - msg = email.message_from_string(raw) - result = parse_received_headers(msg) - assert result['sent_server'] == 'mail-sender.example.com' - assert result['received_server'] == 'mx.receiver.example.com' - - -class TestMalformedTimestamp: - def test_no_semicolon(self): - msg = EmailMessage() - msg['Received'] = 'from sender.example.com by receiver.example.com with SMTP' - result = parse_received_headers(msg) - assert result['sent_server'] == 'sender.example.com' - assert result['received_server'] == 'receiver.example.com' - assert result['sent_time'] is None - assert result['received_time'] is None diff --git a/docs/scripts/tests/test_process_eml.py b/docs/scripts/tests/test_process_eml.py deleted file mode 100644 index 26c5ad5..0000000 --- a/docs/scripts/tests/test_process_eml.py +++ /dev/null @@ -1,129 +0,0 @@ -"""Integration tests for process_eml() — full pipeline with --output-dir.""" - -import os -import shutil -import sys - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -import importlib.util -spec = importlib.util.spec_from_file_location( - "eml_script", - os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') -) -eml_script = importlib.util.module_from_spec(spec) -spec.loader.exec_module(eml_script) - -process_eml = eml_script.process_eml - -import pytest - - -FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures') - - -class TestPlainTextPipeline: - def test_creates_eml_and_txt(self, tmp_path): - eml_src = os.path.join(FIXTURES, 'plain-text.eml') - # Copy fixture to tmp_path so temp dir can be created as sibling - working_eml = tmp_path / "inbox" / "message.eml" - working_eml.parent.mkdir() - shutil.copy2(eml_src, working_eml) - - output_dir = tmp_path / "output" - result = process_eml(str(working_eml), str(output_dir)) - - # Should have exactly 2 files: .eml and .txt - assert len(result['files']) == 2 - eml_file = result['files'][0] - txt_file = result['files'][1] - - assert eml_file['type'] == 'eml' - assert txt_file['type'] == 'txt' - assert eml_file['name'].endswith('.eml') - assert txt_file['name'].endswith('.txt') - - # Files exist in output dir - assert os.path.isfile(eml_file['path']) - assert os.path.isfile(txt_file['path']) - - # Filenames contain expected components - assert 'Jonathan' in eml_file['name'] - assert 'EMAIL' in eml_file['name'] - assert '2026-02-05' in eml_file['name'] - - # Temp dir cleaned up (no extract-* dirs in inbox) - inbox_contents = os.listdir(str(tmp_path / "inbox")) - assert not any(d.startswith('extract-') for d in inbox_contents) - - -class TestHtmlFallbackPipeline: - def test_txt_contains_converted_html(self, tmp_path): - eml_src = os.path.join(FIXTURES, 'html-only.eml') - working_eml = tmp_path / "inbox" / "message.eml" - working_eml.parent.mkdir() - shutil.copy2(eml_src, working_eml) - - output_dir = tmp_path / "output" - result = process_eml(str(working_eml), str(output_dir)) - - txt_file = result['files'][1] - with open(txt_file['path'], 'r') as f: - content = f.read() - - # Should be converted, not raw HTML - assert '<p>' not in content - assert '<strong>' not in content - assert 'HTML' in content - - -class TestAttachmentPipeline: - def test_eml_txt_and_attachment_created(self, tmp_path): - eml_src = os.path.join(FIXTURES, 'with-attachment.eml') - working_eml = tmp_path / "inbox" / "message.eml" - working_eml.parent.mkdir() - shutil.copy2(eml_src, working_eml) - - output_dir = tmp_path / "output" - result = process_eml(str(working_eml), str(output_dir)) - - assert len(result['files']) == 3 - types = [f['type'] for f in result['files']] - assert types == ['eml', 'txt', 'attach'] - - # Attachment is auto-renamed - attach_file = result['files'][2] - assert 'ATTACH' in attach_file['name'] - assert attach_file['name'].endswith('.pdf') - assert os.path.isfile(attach_file['path']) - - -class TestCollisionDetection: - def test_raises_on_existing_file(self, tmp_path): - eml_src = os.path.join(FIXTURES, 'plain-text.eml') - working_eml = tmp_path / "inbox" / "message.eml" - working_eml.parent.mkdir() - shutil.copy2(eml_src, working_eml) - - output_dir = tmp_path / "output" - # Run once to create files - result = process_eml(str(working_eml), str(output_dir)) - - # Run again — should raise FileExistsError - with pytest.raises(FileExistsError, match="Collision"): - process_eml(str(working_eml), str(output_dir)) - - -class TestMissingOutputDir: - def test_creates_directory(self, tmp_path): - eml_src = os.path.join(FIXTURES, 'plain-text.eml') - working_eml = tmp_path / "inbox" / "message.eml" - working_eml.parent.mkdir() - shutil.copy2(eml_src, working_eml) - - output_dir = tmp_path / "new" / "nested" / "output" - assert not output_dir.exists() - - result = process_eml(str(working_eml), str(output_dir)) - assert output_dir.exists() - assert len(result['files']) == 2 diff --git a/docs/scripts/tests/test_save_attachments.py b/docs/scripts/tests/test_save_attachments.py deleted file mode 100644 index 32f02a6..0000000 --- a/docs/scripts/tests/test_save_attachments.py +++ /dev/null @@ -1,97 +0,0 @@ -"""Tests for save_attachments().""" - -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -from conftest import make_plain_message, make_message_with_attachment -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText -from email.mime.application import MIMEApplication - -import importlib.util -spec = importlib.util.spec_from_file_location( - "eml_script", - os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py') -) -eml_script = importlib.util.module_from_spec(spec) -spec.loader.exec_module(eml_script) - -save_attachments = eml_script.save_attachments - - -class TestSingleAttachment: - def test_file_written_and_returned(self, tmp_path): - msg = make_message_with_attachment( - attachment_filename="report.pdf", - attachment_content=b"pdf bytes here" - ) - result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") - - assert len(result) == 1 - assert result[0]['original_name'] == "report.pdf" - assert "ATTACH" in result[0]['renamed_name'] - assert result[0]['renamed_name'].endswith(".pdf") - - # File actually exists and has correct content - written_path = result[0]['path'] - assert os.path.isfile(written_path) - with open(written_path, 'rb') as f: - assert f.read() == b"pdf bytes here" - - -class TestMultipleAttachments: - def test_all_written_and_returned(self, tmp_path): - msg = MIMEMultipart() - msg['From'] = 'test@example.com' - msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600' - msg.attach(MIMEText("body", 'plain')) - - for name, content in [("doc1.pdf", b"pdf1"), ("image.png", b"png1")]: - att = MIMEApplication(content, Name=name) - att['Content-Disposition'] = f'attachment; filename="{name}"' - msg.attach(att) - - result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") - - assert len(result) == 2 - for r in result: - assert os.path.isfile(r['path']) - - -class TestNoAttachments: - def test_empty_list(self, tmp_path): - msg = make_plain_message() - result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") - assert result == [] - - -class TestFilenameWithSpaces: - def test_cleaned_filename(self, tmp_path): - msg = make_message_with_attachment( - attachment_filename="My Document (1).pdf", - attachment_content=b"data" - ) - result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") - - assert len(result) == 1 - assert " " not in result[0]['renamed_name'] - assert os.path.isfile(result[0]['path']) - - -class TestNoContentDisposition: - def test_skipped(self, tmp_path): - msg = MIMEMultipart() - msg['From'] = 'test@example.com' - msg.attach(MIMEText("body", 'plain')) - - # Add a part without Content-Disposition - part = MIMEApplication(b"data", Name="file.bin") - # Explicitly remove Content-Disposition if present - if 'Content-Disposition' in part: - del part['Content-Disposition'] - msg.attach(part) - - result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan") - assert result == [] |
