aboutsummaryrefslogtreecommitdiff
path: root/docs/scripts
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-02-22 23:20:56 -0600
committerCraig Jennings <c@cjennings.net>2026-02-22 23:20:56 -0600
commit5e6877e8f3fb552fce3367ff273167d2cf6af75f (patch)
tree909f98edbbb940aafb95de02457d4d6f7db3cba4 /docs/scripts
parentb104dde43fcc717681a8733a977eb528c60eb13f (diff)
downloadarchangel-5e6877e8f3fb552fce3367ff273167d2cf6af75f.tar.gz
archangel-5e6877e8f3fb552fce3367ff273167d2cf6af75f.zip
chore: add docs/ to .gitignore and untrack personal files
docs/ contains session history, personal workflows, and private protocols that shouldn't be in a public repository.
Diffstat (limited to 'docs/scripts')
-rw-r--r--docs/scripts/eml-view-and-extract-attachments-readme.org47
-rw-r--r--docs/scripts/eml-view-and-extract-attachments.py398
-rwxr-xr-xdocs/scripts/maildir-flag-manager.py345
-rw-r--r--docs/scripts/tests/conftest.py77
-rw-r--r--docs/scripts/tests/fixtures/empty-body.eml16
-rw-r--r--docs/scripts/tests/fixtures/html-only.eml20
-rw-r--r--docs/scripts/tests/fixtures/multiple-received-headers.eml12
-rw-r--r--docs/scripts/tests/fixtures/no-received-headers.eml9
-rw-r--r--docs/scripts/tests/fixtures/plain-text.eml15
-rw-r--r--docs/scripts/tests/fixtures/with-attachment.eml27
-rw-r--r--docs/scripts/tests/test_extract_body.py96
-rw-r--r--docs/scripts/tests/test_extract_metadata.py65
-rw-r--r--docs/scripts/tests/test_generate_filenames.py157
-rw-r--r--docs/scripts/tests/test_integration_stdout.py68
-rw-r--r--docs/scripts/tests/test_parse_received_headers.py105
-rw-r--r--docs/scripts/tests/test_process_eml.py129
-rw-r--r--docs/scripts/tests/test_save_attachments.py97
17 files changed, 0 insertions, 1683 deletions
diff --git a/docs/scripts/eml-view-and-extract-attachments-readme.org b/docs/scripts/eml-view-and-extract-attachments-readme.org
deleted file mode 100644
index c132df8..0000000
--- a/docs/scripts/eml-view-and-extract-attachments-readme.org
+++ /dev/null
@@ -1,47 +0,0 @@
-#+TITLE: eml-view-and-extract-attachments.py
-
-Extract email content and attachments from EML files with auto-renaming.
-
-* Usage
-
-#+begin_src bash
-# View mode — print metadata and body to stdout, extract attachments alongside EML
-python3 docs/scripts/eml-view-and-extract-attachments.py inbox/message.eml
-
-# Pipeline mode — extract, auto-rename, refile to output dir, clean up
-python3 docs/scripts/eml-view-and-extract-attachments.py inbox/message.eml --output-dir assets/
-#+end_src
-
-* Naming Convention
-
-Files are auto-renamed as =YYYY-MM-DD-HHMM-Sender-TYPE-Description.ext=:
-
-- =2026-02-05-1136-Jonathan-EMAIL-Re-Fw-4319-Danneel-Street.eml=
-- =2026-02-05-1136-Jonathan-EMAIL-Re-Fw-4319-Danneel-Street.txt=
-- =2026-02-05-1136-Jonathan-ATTACH-Ltr-Carrollton.pdf=
-
-Date and sender are parsed from email headers. Falls back to "unknown" for missing values.
-
-* Dependencies
-
-- Python 3 (stdlib only for core functionality)
-- =html2text= (optional — used for HTML-only emails, falls back to tag stripping)
-
-* Pipeline Mode Behavior
-
-1. Creates a temp directory alongside the source EML
-2. Copies and renames the EML, writes a =.txt= of the body, extracts attachments
-3. Checks for filename collisions in the output directory
-4. Moves all files to the output directory
-5. Cleans up the temp directory
-6. Prints a summary of created files
-
-Source EML is never modified or moved.
-
-* Tests
-
-#+begin_src bash
-python3 -m pytest docs/scripts/tests/ -v
-#+end_src
-
-48 tests: unit tests for parsing, filename generation, and attachment saving; integration tests for both pipeline and stdout modes. Requires =pytest=.
diff --git a/docs/scripts/eml-view-and-extract-attachments.py b/docs/scripts/eml-view-and-extract-attachments.py
deleted file mode 100644
index 3201c99..0000000
--- a/docs/scripts/eml-view-and-extract-attachments.py
+++ /dev/null
@@ -1,398 +0,0 @@
-#!/usr/bin/env python3
-"""Extract email content and attachments from EML files.
-
-Without --output-dir: parse and print to stdout (backwards compatible).
-With --output-dir: full pipeline — extract, auto-rename, refile, clean up.
-"""
-
-import argparse
-import email
-import email.utils
-import os
-import re
-import shutil
-import sys
-import tempfile
-
-
-# ---------------------------------------------------------------------------
-# Parsing functions (no I/O beyond reading the input file)
-# ---------------------------------------------------------------------------
-
-def parse_received_headers(msg):
- """Parse Received headers to extract sent/received times and servers."""
- received_headers = msg.get_all('Received', [])
-
- sent_server = None
- sent_time = None
- received_server = None
- received_time = None
-
- for header in received_headers:
- header = ' '.join(header.split())
-
- time_match = re.search(r';\s*(.+)$', header)
- timestamp = time_match.group(1).strip() if time_match else None
-
- from_match = re.search(r'from\s+([\w.-]+)', header)
- by_match = re.search(r'by\s+([\w.-]+)', header)
-
- if from_match and by_match and received_server is None:
- received_time = timestamp
- received_server = by_match.group(1)
- sent_server = from_match.group(1)
- sent_time = timestamp
-
- if received_server is None and received_headers:
- header = ' '.join(received_headers[0].split())
- time_match = re.search(r';\s*(.+)$', header)
- received_time = time_match.group(1).strip() if time_match else None
- by_match = re.search(r'by\s+([\w.-]+)', header)
- received_server = by_match.group(1) if by_match else "unknown"
-
- return {
- 'sent_time': sent_time,
- 'sent_server': sent_server,
- 'received_time': received_time,
- 'received_server': received_server
- }
-
-
-def extract_body(msg):
- """Walk MIME parts, prefer text/plain, fall back to html2text on text/html.
-
- Returns body text string.
- """
- plain_text = None
- html_text = None
-
- for part in msg.walk():
- content_type = part.get_content_type()
- if content_type == "text/plain" and plain_text is None:
- payload = part.get_payload(decode=True)
- if payload is not None:
- plain_text = payload.decode('utf-8', errors='ignore')
- elif content_type == "text/html" and html_text is None:
- payload = part.get_payload(decode=True)
- if payload is not None:
- html_text = payload.decode('utf-8', errors='ignore')
-
- if plain_text is not None:
- return plain_text
-
- if html_text is not None:
- try:
- import html2text
- h = html2text.HTML2Text()
- h.body_width = 0
- return h.handle(html_text)
- except ImportError:
- # Strip HTML tags as fallback if html2text not installed
- return re.sub(r'<[^>]+>', '', html_text)
-
- return ""
-
-
-def extract_metadata(msg):
- """Extract email metadata from headers.
-
- Returns dict with from, to, subject, date, and timing info.
- """
- return {
- 'from': msg.get('From'),
- 'to': msg.get('To'),
- 'subject': msg.get('Subject'),
- 'date': msg.get('Date'),
- 'timing': parse_received_headers(msg),
- }
-
-
-def generate_basename(metadata):
- """Generate date-sender prefix from metadata.
-
- Returns e.g. "2026-02-05-1136-Jonathan".
- Falls back to "unknown" for missing/malformed Date or From.
- """
- # Parse date
- date_str = metadata.get('date')
- date_prefix = "unknown"
- if date_str:
- try:
- parsed = email.utils.parsedate_to_datetime(date_str)
- date_prefix = parsed.strftime('%Y-%m-%d-%H%M')
- except (ValueError, TypeError):
- pass
-
- # Parse sender first name
- from_str = metadata.get('from')
- sender = "unknown"
- if from_str:
- # Extract display name or email local part
- display_name, addr = email.utils.parseaddr(from_str)
- if display_name:
- sender = display_name.split()[0]
- elif addr:
- sender = addr.split('@')[0]
-
- return f"{date_prefix}-{sender}"
-
-
-def _clean_for_filename(text, max_length=80):
- """Clean text for use in a filename.
-
- Replace spaces with hyphens, strip chars unsafe for filenames,
- collapse multiple hyphens.
- """
- text = text.strip()
- text = text.replace(' ', '-')
- # Keep alphanumeric, hyphens, dots, underscores
- text = re.sub(r'[^\w\-.]', '', text)
- # Collapse multiple hyphens
- text = re.sub(r'-{2,}', '-', text)
- # Strip leading/trailing hyphens
- text = text.strip('-')
- if len(text) > max_length:
- text = text[:max_length].rstrip('-')
- return text
-
-
-def generate_email_filename(basename, subject):
- """Generate email filename from basename and subject.
-
- Returns e.g. "2026-02-05-1136-Jonathan-EMAIL-Re-Fw-4319-Danneel-Street"
- (without extension — caller adds .eml or .txt).
- """
- if subject:
- clean_subject = _clean_for_filename(subject)
- else:
- clean_subject = "no-subject"
- return f"{basename}-EMAIL-{clean_subject}"
-
-
-def generate_attachment_filename(basename, original_filename):
- """Generate attachment filename from basename and original filename.
-
- Returns e.g. "2026-02-05-1136-Jonathan-ATTACH-Ltr-Carrollton.pdf".
- Preserves original extension.
- """
- if not original_filename:
- return f"{basename}-ATTACH-unnamed"
-
- name, ext = os.path.splitext(original_filename)
- clean_name = _clean_for_filename(name)
- return f"{basename}-ATTACH-{clean_name}{ext}"
-
-
-# ---------------------------------------------------------------------------
-# I/O functions (file operations)
-# ---------------------------------------------------------------------------
-
-def save_attachments(msg, output_dir, basename):
- """Write attachment files to output_dir with auto-renamed filenames.
-
- Returns list of dicts: {original_name, renamed_name, path}.
- """
- results = []
- for part in msg.walk():
- if part.get_content_maintype() == 'multipart':
- continue
- if part.get('Content-Disposition') is None:
- continue
-
- filename = part.get_filename()
- if filename:
- renamed = generate_attachment_filename(basename, filename)
- filepath = os.path.join(output_dir, renamed)
- with open(filepath, 'wb') as f:
- f.write(part.get_payload(decode=True))
- results.append({
- 'original_name': filename,
- 'renamed_name': renamed,
- 'path': filepath,
- })
-
- return results
-
-
-def save_text(text, filepath):
- """Write body text to a .txt file."""
- with open(filepath, 'w', encoding='utf-8') as f:
- f.write(text)
-
-
-# ---------------------------------------------------------------------------
-# Pipeline function
-# ---------------------------------------------------------------------------
-
-def process_eml(eml_path, output_dir):
- """Full extraction pipeline.
-
- 1. Create temp extraction dir
- 2. Copy EML into temp dir
- 3. Parse email (metadata, body, attachments)
- 4. Generate filenames from headers
- 5. Save renamed .eml, .txt, and attachments to temp dir
- 6. Check for collisions in output_dir
- 7. Move all files to output_dir
- 8. Clean up temp dir
- 9. Return results dict
- """
- eml_path = os.path.abspath(eml_path)
- output_dir = os.path.abspath(output_dir)
- os.makedirs(output_dir, exist_ok=True)
-
- # Create temp dir as sibling of the EML file
- eml_dir = os.path.dirname(eml_path)
- temp_dir = tempfile.mkdtemp(prefix='extract-', dir=eml_dir)
-
- try:
- # Copy EML to temp dir
- temp_eml = os.path.join(temp_dir, os.path.basename(eml_path))
- shutil.copy2(eml_path, temp_eml)
-
- # Parse
- with open(eml_path, 'rb') as f:
- msg = email.message_from_binary_file(f)
-
- metadata = extract_metadata(msg)
- body = extract_body(msg)
- basename = generate_basename(metadata)
- email_stem = generate_email_filename(basename, metadata['subject'])
-
- # Save renamed EML
- renamed_eml = f"{email_stem}.eml"
- renamed_eml_path = os.path.join(temp_dir, renamed_eml)
- os.rename(temp_eml, renamed_eml_path)
-
- # Save .txt
- renamed_txt = f"{email_stem}.txt"
- renamed_txt_path = os.path.join(temp_dir, renamed_txt)
- save_text(body, renamed_txt_path)
-
- # Save attachments
- attachment_results = save_attachments(msg, temp_dir, basename)
-
- # Build file list
- files = [
- {'type': 'eml', 'name': renamed_eml, 'path': None},
- {'type': 'txt', 'name': renamed_txt, 'path': None},
- ]
- for att in attachment_results:
- files.append({
- 'type': 'attach',
- 'name': att['renamed_name'],
- 'path': None,
- })
-
- # Check for collisions in output_dir
- for file_info in files:
- dest = os.path.join(output_dir, file_info['name'])
- if os.path.exists(dest):
- raise FileExistsError(
- f"Collision: '{file_info['name']}' already exists in {output_dir}"
- )
-
- # Move all files to output_dir
- for file_info in files:
- src = os.path.join(temp_dir, file_info['name'])
- dest = os.path.join(output_dir, file_info['name'])
- shutil.move(src, dest)
- file_info['path'] = dest
-
- return {
- 'metadata': metadata,
- 'body': body,
- 'files': files,
- }
-
- finally:
- # Clean up temp dir
- if os.path.exists(temp_dir):
- shutil.rmtree(temp_dir)
-
-
-# ---------------------------------------------------------------------------
-# Stdout display (backwards-compatible mode)
-# ---------------------------------------------------------------------------
-
-def print_email(eml_path):
- """Parse and print email to stdout. Extract attachments alongside EML.
-
- This preserves the original script behavior when --output-dir is not given.
- """
- with open(eml_path, 'rb') as f:
- msg = email.message_from_binary_file(f)
-
- metadata = extract_metadata(msg)
- body = extract_body(msg)
- timing = metadata['timing']
-
- print(f"From: {metadata['from']}")
- print(f"To: {metadata['to']}")
- print(f"Subject: {metadata['subject']}")
- print(f"Date: {metadata['date']}")
- print(f"Sent: {timing['sent_time']} (via {timing['sent_server']})")
- print(f"Received: {timing['received_time']} (at {timing['received_server']})")
- print()
- print(body)
- print()
-
- # Extract attachments alongside the EML file
- for part in msg.walk():
- if part.get_content_maintype() == 'multipart':
- continue
- if part.get('Content-Disposition') is None:
- continue
-
- filename = part.get_filename()
- if filename:
- filepath = os.path.join(os.path.dirname(eml_path), filename)
- with open(filepath, 'wb') as f:
- f.write(part.get_payload(decode=True))
- print(f"Extracted attachment: {filename}")
-
-
-def print_pipeline_summary(result):
- """Print summary after pipeline extraction."""
- metadata = result['metadata']
- timing = metadata['timing']
-
- print(f"From: {metadata['from']}")
- print(f"To: {metadata['to']}")
- print(f"Subject: {metadata['subject']}")
- print(f"Date: {metadata['date']}")
- print(f"Sent: {timing['sent_time']} (via {timing['sent_server']})")
- print(f"Received: {timing['received_time']} (at {timing['received_server']})")
- print()
- print("Files created:")
- for f in result['files']:
- print(f" [{f['type']:>6}] {f['name']}")
- print(f"\nOutput directory: {os.path.dirname(result['files'][0]['path'])}")
-
-
-# ---------------------------------------------------------------------------
-# CLI
-# ---------------------------------------------------------------------------
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(
- description="Extract email content and attachments from EML files."
- )
- parser.add_argument('eml_path', help="Path to source EML file")
- parser.add_argument(
- '--output-dir',
- help="Destination directory for extracted files. "
- "Without this flag, prints to stdout only (backwards compatible)."
- )
-
- args = parser.parse_args()
-
- if not os.path.isfile(args.eml_path):
- print(f"Error: '{args.eml_path}' not found or is not a file.", file=sys.stderr)
- sys.exit(1)
-
- if args.output_dir:
- result = process_eml(args.eml_path, args.output_dir)
- print_pipeline_summary(result)
- else:
- print_email(args.eml_path)
diff --git a/docs/scripts/maildir-flag-manager.py b/docs/scripts/maildir-flag-manager.py
deleted file mode 100755
index 9c4a59c..0000000
--- a/docs/scripts/maildir-flag-manager.py
+++ /dev/null
@@ -1,345 +0,0 @@
-#!/usr/bin/env python3
-"""Manage maildir flags (read, starred) across email accounts.
-
-Uses atomic os.rename() for flag operations directly on maildir files.
-Safer and more reliable than shell-based approaches (zsh loses PATH in
-while-read loops, piped mu move silently fails).
-
-Supports the same flag semantics as mu4e: maildir files in new/ are moved
-to cur/ when the Seen flag is added, and flag changes are persisted to the
-filesystem so mbsync picks them up on the next sync.
-
-Usage:
- # Mark all unread INBOX emails as read
- maildir-flag-manager.py mark-read
-
- # Mark specific emails as read (by path)
- maildir-flag-manager.py mark-read /path/to/message1 /path/to/message2
-
- # Mark all unread INBOX emails as read, then reindex mu
- maildir-flag-manager.py mark-read --reindex
-
- # Star specific emails (by path)
- maildir-flag-manager.py star /path/to/message1 /path/to/message2
-
- # Star and mark read
- maildir-flag-manager.py star --mark-read /path/to/message1
-
- # Dry run — show what would change without modifying anything
- maildir-flag-manager.py mark-read --dry-run
-"""
-
-import argparse
-import os
-import shutil
-import subprocess
-import sys
-
-
-# ---------------------------------------------------------------------------
-# Configuration
-# ---------------------------------------------------------------------------
-
-MAILDIR_ACCOUNTS = {
- 'gmail': os.path.expanduser('~/.mail/gmail/INBOX'),
- 'cmail': os.path.expanduser('~/.mail/cmail/Inbox'),
-}
-
-
-# ---------------------------------------------------------------------------
-# Core flag operations
-# ---------------------------------------------------------------------------
-
-def parse_maildir_flags(filename):
- """Extract flags from a maildir filename.
-
- Maildir filenames follow the pattern: unique:2,FLAGS
- where FLAGS is a sorted string of flag characters (e.g., "FS" for
- Flagged+Seen).
-
- Returns (base, flags_string). If no flags section, returns (filename, '').
- """
- if ':2,' in filename:
- base, flags = filename.rsplit(':2,', 1)
- return base, flags
- return filename, ''
-
-
-def build_flagged_filename(filename, new_flags):
- """Build a maildir filename with the given flags.
-
- Flags are always sorted alphabetically per maildir spec.
- """
- base, _ = parse_maildir_flags(filename)
- sorted_flags = ''.join(sorted(set(new_flags)))
- return f"{base}:2,{sorted_flags}"
-
-
-def rename_with_flag(file_path, flag, dry_run=False):
- """Add a flag to a single maildir message file via atomic rename.
-
- Handles moving from new/ to cur/ when adding the Seen flag.
- Returns True if the flag was added, False if already present.
- """
- dirname = os.path.dirname(file_path)
- filename = os.path.basename(file_path)
- maildir_root = os.path.dirname(dirname)
- subdir = os.path.basename(dirname)
-
- _, current_flags = parse_maildir_flags(filename)
-
- if flag in current_flags:
- return False
-
- new_flags = current_flags + flag
- new_filename = build_flagged_filename(filename, new_flags)
-
- # Messages with the Seen flag belong in cur/, not new/
- if 'S' in new_flags and subdir == 'new':
- target_dir = os.path.join(maildir_root, 'cur')
- else:
- target_dir = dirname
-
- new_path = os.path.join(target_dir, new_filename)
-
- if dry_run:
- return True
-
- os.rename(file_path, new_path)
- return True
-
-
-def process_maildir(maildir_path, flag, dry_run=False):
- """Add a flag to all messages in a maildir that don't have it.
-
- Scans both new/ and cur/ subdirectories.
- Returns (changed_count, skipped_count, error_count).
- """
- if not os.path.isdir(maildir_path):
- print(f" Skipping {maildir_path} (not found)", file=sys.stderr)
- return 0, 0, 0
-
- changed = 0
- skipped = 0
- errors = 0
-
- for subdir in ('new', 'cur'):
- subdir_path = os.path.join(maildir_path, subdir)
- if not os.path.isdir(subdir_path):
- continue
-
- for filename in os.listdir(subdir_path):
- file_path = os.path.join(subdir_path, filename)
- if not os.path.isfile(file_path):
- continue
-
- try:
- if rename_with_flag(file_path, flag, dry_run):
- changed += 1
- else:
- skipped += 1
- except Exception as e:
- print(f" Error on {filename}: {e}", file=sys.stderr)
- errors += 1
-
- return changed, skipped, errors
-
-
-def process_specific_files(paths, flag, dry_run=False):
- """Add a flag to specific message files by path.
-
- Returns (changed_count, skipped_count, error_count).
- """
- changed = 0
- skipped = 0
- errors = 0
-
- for path in paths:
- path = os.path.abspath(path)
- if not os.path.isfile(path):
- print(f" File not found: {path}", file=sys.stderr)
- errors += 1
- continue
-
- # Verify file is inside a maildir (parent should be cur/ or new/)
- parent_dir = os.path.basename(os.path.dirname(path))
- if parent_dir not in ('cur', 'new'):
- print(f" Not in a maildir cur/ or new/ dir: {path}",
- file=sys.stderr)
- errors += 1
- continue
-
- try:
- if rename_with_flag(path, flag, dry_run):
- changed += 1
- else:
- skipped += 1
- except Exception as e:
- print(f" Error on {path}: {e}", file=sys.stderr)
- errors += 1
-
- return changed, skipped, errors
-
-
-def reindex_mu():
- """Run mu index to update the database after flag changes."""
- mu_path = shutil.which('mu')
- if not mu_path:
- print("Warning: mu not found in PATH, skipping reindex",
- file=sys.stderr)
- return False
-
- try:
- result = subprocess.run(
- [mu_path, 'index'],
- capture_output=True, text=True, timeout=120
- )
- if result.returncode == 0:
- print("mu index: database updated")
- return True
- else:
- print(f"mu index failed: {result.stderr}", file=sys.stderr)
- return False
- except subprocess.TimeoutExpired:
- print("mu index timed out after 120s", file=sys.stderr)
- return False
-
-
-# ---------------------------------------------------------------------------
-# Commands
-# ---------------------------------------------------------------------------
-
-def cmd_mark_read(args):
- """Mark emails as read (add Seen flag)."""
- flag = 'S'
- action = "Marking as read"
- if args.dry_run:
- action = "Would mark as read"
-
- total_changed = 0
- total_skipped = 0
- total_errors = 0
-
- if args.paths:
- print(f"{action}: {len(args.paths)} specific message(s)")
- c, s, e = process_specific_files(args.paths, flag, args.dry_run)
- total_changed += c
- total_skipped += s
- total_errors += e
- else:
- for name, maildir_path in MAILDIR_ACCOUNTS.items():
- print(f"{action} in {name} ({maildir_path})")
- c, s, e = process_maildir(maildir_path, flag, args.dry_run)
- total_changed += c
- total_skipped += s
- total_errors += e
- if c > 0:
- print(f" {c} message(s) marked as read")
- if s > 0:
- print(f" {s} already read")
-
- print(f"\nTotal: {total_changed} changed, {total_skipped} already set, "
- f"{total_errors} errors")
-
- if args.reindex and not args.dry_run and total_changed > 0:
- reindex_mu()
-
- return 0 if total_errors == 0 else 1
-
-
-def cmd_star(args):
- """Star/flag emails (add Flagged flag)."""
- flag = 'F'
- action = "Starring"
- if args.dry_run:
- action = "Would star"
-
- if not args.paths:
- print("Error: star requires specific message paths", file=sys.stderr)
- return 1
-
- print(f"{action}: {len(args.paths)} message(s)")
- total_changed = 0
- total_skipped = 0
- total_errors = 0
-
- c, s, e = process_specific_files(args.paths, flag, args.dry_run)
- total_changed += c
- total_skipped += s
- total_errors += e
-
- # Also mark as read if requested
- if args.mark_read:
- print("Also marking as read...")
- c2, _, e2 = process_specific_files(args.paths, 'S', args.dry_run)
- total_changed += c2
- total_errors += e2
-
- print(f"\nTotal: {total_changed} flag(s) changed, {total_skipped} already set, "
- f"{total_errors} errors")
-
- if args.reindex and not args.dry_run and total_changed > 0:
- reindex_mu()
-
- return 0 if total_errors == 0 else 1
-
-
-# ---------------------------------------------------------------------------
-# CLI
-# ---------------------------------------------------------------------------
-
-def main():
- parser = argparse.ArgumentParser(
- description="Manage maildir flags (read, starred) across email accounts."
- )
- subparsers = parser.add_subparsers(dest='command', required=True)
-
- # mark-read
- p_read = subparsers.add_parser(
- 'mark-read',
- help="Mark emails as read (add Seen flag)"
- )
- p_read.add_argument(
- 'paths', nargs='*',
- help="Specific message file paths. If omitted, marks all unread "
- "messages in configured INBOX maildirs."
- )
- p_read.add_argument(
- '--reindex', action='store_true',
- help="Run mu index after changing flags"
- )
- p_read.add_argument(
- '--dry-run', action='store_true',
- help="Show what would change without modifying anything"
- )
- p_read.set_defaults(func=cmd_mark_read)
-
- # star
- p_star = subparsers.add_parser(
- 'star',
- help="Star/flag emails (add Flagged flag)"
- )
- p_star.add_argument(
- 'paths', nargs='+',
- help="Message file paths to star"
- )
- p_star.add_argument(
- '--mark-read', action='store_true',
- help="Also mark starred messages as read"
- )
- p_star.add_argument(
- '--reindex', action='store_true',
- help="Run mu index after changing flags"
- )
- p_star.add_argument(
- '--dry-run', action='store_true',
- help="Show what would change without modifying anything"
- )
- p_star.set_defaults(func=cmd_star)
-
- args = parser.parse_args()
- sys.exit(args.func(args))
-
-
-if __name__ == '__main__':
- main()
diff --git a/docs/scripts/tests/conftest.py b/docs/scripts/tests/conftest.py
deleted file mode 100644
index 8d965ab..0000000
--- a/docs/scripts/tests/conftest.py
+++ /dev/null
@@ -1,77 +0,0 @@
-"""Shared fixtures for EML extraction tests."""
-
-import os
-from email.message import EmailMessage
-from email.mime.application import MIMEApplication
-from email.mime.multipart import MIMEMultipart
-from email.mime.text import MIMEText
-
-import pytest
-
-
-@pytest.fixture
-def fixtures_dir():
- """Return path to the fixtures/ directory."""
- return os.path.join(os.path.dirname(__file__), 'fixtures')
-
-
-def make_plain_message(body="Test body", from_="Jonathan Smith <jsmith@example.com>",
- to="Craig <craig@example.com>",
- subject="Test Subject",
- date="Wed, 05 Feb 2026 11:36:00 -0600"):
- """Create an EmailMessage with text/plain body."""
- msg = EmailMessage()
- msg['From'] = from_
- msg['To'] = to
- msg['Subject'] = subject
- msg['Date'] = date
- msg.set_content(body)
- return msg
-
-
-def make_html_message(html_body="<p>Test body</p>",
- from_="Jonathan Smith <jsmith@example.com>",
- to="Craig <craig@example.com>",
- subject="Test Subject",
- date="Wed, 05 Feb 2026 11:36:00 -0600"):
- """Create an EmailMessage with text/html body only."""
- msg = EmailMessage()
- msg['From'] = from_
- msg['To'] = to
- msg['Subject'] = subject
- msg['Date'] = date
- msg.set_content(html_body, subtype='html')
- return msg
-
-
-def make_message_with_attachment(body="Test body",
- from_="Jonathan Smith <jsmith@example.com>",
- to="Craig <craig@example.com>",
- subject="Test Subject",
- date="Wed, 05 Feb 2026 11:36:00 -0600",
- attachment_filename="document.pdf",
- attachment_content=b"fake pdf content"):
- """Create a multipart message with a text body and one attachment."""
- msg = MIMEMultipart()
- msg['From'] = from_
- msg['To'] = to
- msg['Subject'] = subject
- msg['Date'] = date
-
- msg.attach(MIMEText(body, 'plain'))
-
- att = MIMEApplication(attachment_content, Name=attachment_filename)
- att['Content-Disposition'] = f'attachment; filename="{attachment_filename}"'
- msg.attach(att)
-
- return msg
-
-
-def add_received_headers(msg, headers):
- """Add Received headers to an existing message.
-
- headers: list of header strings, added in order (first = most recent).
- """
- for header in headers:
- msg['Received'] = header
- return msg
diff --git a/docs/scripts/tests/fixtures/empty-body.eml b/docs/scripts/tests/fixtures/empty-body.eml
deleted file mode 100644
index cf008df..0000000
--- a/docs/scripts/tests/fixtures/empty-body.eml
+++ /dev/null
@@ -1,16 +0,0 @@
-From: Jonathan Smith <jsmith@example.com>
-To: Craig Jennings <craig@example.com>
-Subject: Empty Body Test
-Date: Thu, 05 Feb 2026 11:36:00 -0600
-MIME-Version: 1.0
-Content-Type: multipart/mixed; boundary="boundary456"
-Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
-
---boundary456
-Content-Type: application/octet-stream; name="data.bin"
-Content-Disposition: attachment; filename="data.bin"
-Content-Transfer-Encoding: base64
-
-AQIDBA==
-
---boundary456--
diff --git a/docs/scripts/tests/fixtures/html-only.eml b/docs/scripts/tests/fixtures/html-only.eml
deleted file mode 100644
index 4db7645..0000000
--- a/docs/scripts/tests/fixtures/html-only.eml
+++ /dev/null
@@ -1,20 +0,0 @@
-From: Jonathan Smith <jsmith@example.com>
-To: Craig Jennings <craig@example.com>
-Subject: HTML Update
-Date: Thu, 05 Feb 2026 11:36:00 -0600
-MIME-Version: 1.0
-Content-Type: text/html; charset="utf-8"
-Content-Transfer-Encoding: 7bit
-Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
-
-<html>
-<body>
-<p>Hi Craig,</p>
-<p>Here is the <strong>HTML</strong> update.</p>
-<ul>
-<li>Item one</li>
-<li>Item two</li>
-</ul>
-<p>Best,<br>Jonathan</p>
-</body>
-</html>
diff --git a/docs/scripts/tests/fixtures/multiple-received-headers.eml b/docs/scripts/tests/fixtures/multiple-received-headers.eml
deleted file mode 100644
index 1b8d6a7..0000000
--- a/docs/scripts/tests/fixtures/multiple-received-headers.eml
+++ /dev/null
@@ -1,12 +0,0 @@
-From: Jonathan Smith <jsmith@example.com>
-To: Craig Jennings <craig@example.com>
-Subject: Multiple Received Headers Test
-Date: Thu, 05 Feb 2026 11:36:00 -0600
-MIME-Version: 1.0
-Content-Type: text/plain; charset="utf-8"
-Content-Transfer-Encoding: 7bit
-Received: by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600
-Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
-Received: from originator.example.com by relay.example.com with SMTP; Thu, 05 Feb 2026 11:35:58 -0600
-
-Test body with multiple received headers.
diff --git a/docs/scripts/tests/fixtures/no-received-headers.eml b/docs/scripts/tests/fixtures/no-received-headers.eml
deleted file mode 100644
index 8a05dc7..0000000
--- a/docs/scripts/tests/fixtures/no-received-headers.eml
+++ /dev/null
@@ -1,9 +0,0 @@
-From: Jonathan Smith <jsmith@example.com>
-To: Craig Jennings <craig@example.com>
-Subject: No Received Headers
-Date: Thu, 05 Feb 2026 11:36:00 -0600
-MIME-Version: 1.0
-Content-Type: text/plain; charset="utf-8"
-Content-Transfer-Encoding: 7bit
-
-Test body with no received headers at all.
diff --git a/docs/scripts/tests/fixtures/plain-text.eml b/docs/scripts/tests/fixtures/plain-text.eml
deleted file mode 100644
index 8cc9d9c..0000000
--- a/docs/scripts/tests/fixtures/plain-text.eml
+++ /dev/null
@@ -1,15 +0,0 @@
-From: Jonathan Smith <jsmith@example.com>
-To: Craig Jennings <craig@example.com>
-Subject: Re: Fw: 4319 Danneel Street
-Date: Thu, 05 Feb 2026 11:36:00 -0600
-MIME-Version: 1.0
-Content-Type: text/plain; charset="utf-8"
-Content-Transfer-Encoding: 7bit
-Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
-
-Hi Craig,
-
-Here is the update on 4319 Danneel Street.
-
-Best,
-Jonathan
diff --git a/docs/scripts/tests/fixtures/with-attachment.eml b/docs/scripts/tests/fixtures/with-attachment.eml
deleted file mode 100644
index ac49c5d..0000000
--- a/docs/scripts/tests/fixtures/with-attachment.eml
+++ /dev/null
@@ -1,27 +0,0 @@
-From: Jonathan Smith <jsmith@example.com>
-To: Craig Jennings <craig@example.com>
-Subject: Ltr from Carrollton
-Date: Thu, 05 Feb 2026 11:36:00 -0600
-MIME-Version: 1.0
-Content-Type: multipart/mixed; boundary="boundary123"
-Received: from mail-sender.example.com by mx.receiver.example.com with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600
-
---boundary123
-Content-Type: text/plain; charset="utf-8"
-Content-Transfer-Encoding: 7bit
-
-Hi Craig,
-
-Please find the letter attached.
-
-Best,
-Jonathan
-
---boundary123
-Content-Type: application/octet-stream; name="Ltr Carrollton.pdf"
-Content-Disposition: attachment; filename="Ltr Carrollton.pdf"
-Content-Transfer-Encoding: base64
-
-ZmFrZSBwZGYgY29udGVudA==
-
---boundary123--
diff --git a/docs/scripts/tests/test_extract_body.py b/docs/scripts/tests/test_extract_body.py
deleted file mode 100644
index 7b53cda..0000000
--- a/docs/scripts/tests/test_extract_body.py
+++ /dev/null
@@ -1,96 +0,0 @@
-"""Tests for extract_body()."""
-
-import sys
-import os
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-from conftest import make_plain_message, make_html_message, make_message_with_attachment
-from email.message import EmailMessage
-from email.mime.multipart import MIMEMultipart
-from email.mime.text import MIMEText
-from email.mime.application import MIMEApplication
-
-import importlib.util
-spec = importlib.util.spec_from_file_location(
- "eml_script",
- os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
-)
-eml_script = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(eml_script)
-
-extract_body = eml_script.extract_body
-
-
-class TestPlainText:
- def test_returns_plain_text(self):
- msg = make_plain_message(body="Hello, this is plain text.")
- result = extract_body(msg)
- assert "Hello, this is plain text." in result
-
-
-class TestHtmlOnly:
- def test_returns_converted_html(self):
- msg = make_html_message(html_body="<p>Hello <strong>world</strong></p>")
- result = extract_body(msg)
- assert "Hello" in result
- assert "world" in result
- # Should not contain raw HTML tags
- assert "<p>" not in result
- assert "<strong>" not in result
-
-
-class TestBothPlainAndHtml:
- def test_prefers_plain_text(self):
- msg = MIMEMultipart('alternative')
- msg['From'] = 'test@example.com'
- msg['To'] = 'dest@example.com'
- msg['Subject'] = 'Test'
- msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600'
- msg.attach(MIMEText("Plain text version", 'plain'))
- msg.attach(MIMEText("<p>HTML version</p>", 'html'))
- result = extract_body(msg)
- assert "Plain text version" in result
- assert "HTML version" not in result
-
-
-class TestEmptyBody:
- def test_returns_empty_string(self):
- # Multipart with only attachments, no text parts
- msg = MIMEMultipart()
- msg['From'] = 'test@example.com'
- att = MIMEApplication(b"binary data", Name="file.bin")
- att['Content-Disposition'] = 'attachment; filename="file.bin"'
- msg.attach(att)
- result = extract_body(msg)
- assert result == ""
-
-
-class TestNonUtf8Encoding:
- def test_decodes_with_errors_ignore(self):
- msg = EmailMessage()
- msg['From'] = 'test@example.com'
- # Set raw bytes that include invalid UTF-8
- msg.set_content("Valid text with special: café")
- result = extract_body(msg)
- assert "Valid text" in result
-
-
-class TestHtmlWithStructure:
- def test_preserves_list_structure(self):
- html = "<ul><li>Item one</li><li>Item two</li></ul>"
- msg = make_html_message(html_body=html)
- result = extract_body(msg)
- assert "Item one" in result
- assert "Item two" in result
-
-
-class TestNoTextParts:
- def test_returns_empty_string(self):
- msg = MIMEMultipart()
- msg['From'] = 'test@example.com'
- att = MIMEApplication(b"data", Name="image.png")
- att['Content-Disposition'] = 'attachment; filename="image.png"'
- msg.attach(att)
- result = extract_body(msg)
- assert result == ""
diff --git a/docs/scripts/tests/test_extract_metadata.py b/docs/scripts/tests/test_extract_metadata.py
deleted file mode 100644
index d5ee52e..0000000
--- a/docs/scripts/tests/test_extract_metadata.py
+++ /dev/null
@@ -1,65 +0,0 @@
-"""Tests for extract_metadata()."""
-
-import sys
-import os
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-from conftest import make_plain_message, add_received_headers
-from email.message import EmailMessage
-
-import importlib.util
-spec = importlib.util.spec_from_file_location(
- "eml_script",
- os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
-)
-eml_script = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(eml_script)
-
-extract_metadata = eml_script.extract_metadata
-
-
-class TestAllHeadersPresent:
- def test_complete_dict(self):
- msg = make_plain_message(
- from_="Jonathan Smith <jsmith@example.com>",
- to="Craig <craig@example.com>",
- subject="Test Subject",
- date="Thu, 05 Feb 2026 11:36:00 -0600"
- )
- result = extract_metadata(msg)
- assert result['from'] == "Jonathan Smith <jsmith@example.com>"
- assert result['to'] == "Craig <craig@example.com>"
- assert result['subject'] == "Test Subject"
- assert result['date'] == "Thu, 05 Feb 2026 11:36:00 -0600"
- assert 'timing' in result
-
-
-class TestMissingFrom:
- def test_from_is_none(self):
- msg = EmailMessage()
- msg['To'] = 'craig@example.com'
- msg['Subject'] = 'Test'
- msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600'
- msg.set_content("body")
- result = extract_metadata(msg)
- assert result['from'] is None
-
-
-class TestMissingDate:
- def test_date_is_none(self):
- msg = EmailMessage()
- msg['From'] = 'test@example.com'
- msg['To'] = 'craig@example.com'
- msg['Subject'] = 'Test'
- msg.set_content("body")
- result = extract_metadata(msg)
- assert result['date'] is None
-
-
-class TestLongSubject:
- def test_full_subject_returned(self):
- long_subject = "Re: Fw: This is a very long subject line that spans many words and might be folded"
- msg = make_plain_message(subject=long_subject)
- result = extract_metadata(msg)
- assert result['subject'] == long_subject
diff --git a/docs/scripts/tests/test_generate_filenames.py b/docs/scripts/tests/test_generate_filenames.py
deleted file mode 100644
index 07c8f84..0000000
--- a/docs/scripts/tests/test_generate_filenames.py
+++ /dev/null
@@ -1,157 +0,0 @@
-"""Tests for generate_basename(), generate_email_filename(), generate_attachment_filename()."""
-
-import sys
-import os
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-import importlib.util
-spec = importlib.util.spec_from_file_location(
- "eml_script",
- os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
-)
-eml_script = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(eml_script)
-
-generate_basename = eml_script.generate_basename
-generate_email_filename = eml_script.generate_email_filename
-generate_attachment_filename = eml_script.generate_attachment_filename
-
-
-# --- generate_basename ---
-
-class TestGenerateBasename:
- def test_standard_from_and_date(self):
- metadata = {
- 'from': 'Jonathan Smith <jsmith@example.com>',
- 'date': 'Wed, 05 Feb 2026 11:36:00 -0600',
- }
- assert generate_basename(metadata) == "2026-02-05-1136-Jonathan"
-
- def test_from_with_display_name_first_token(self):
- metadata = {
- 'from': 'C Ciarm <cciarm@example.com>',
- 'date': 'Wed, 05 Feb 2026 11:36:00 -0600',
- }
- result = generate_basename(metadata)
- assert result == "2026-02-05-1136-C"
-
- def test_from_without_display_name(self):
- metadata = {
- 'from': 'jsmith@example.com',
- 'date': 'Wed, 05 Feb 2026 11:36:00 -0600',
- }
- result = generate_basename(metadata)
- assert result == "2026-02-05-1136-jsmith"
-
- def test_missing_date(self):
- metadata = {
- 'from': 'Jonathan Smith <jsmith@example.com>',
- 'date': None,
- }
- result = generate_basename(metadata)
- assert result == "unknown-Jonathan"
-
- def test_missing_from(self):
- metadata = {
- 'from': None,
- 'date': 'Wed, 05 Feb 2026 11:36:00 -0600',
- }
- result = generate_basename(metadata)
- assert result == "2026-02-05-1136-unknown"
-
- def test_both_missing(self):
- metadata = {'from': None, 'date': None}
- result = generate_basename(metadata)
- assert result == "unknown-unknown"
-
- def test_unparseable_date(self):
- metadata = {
- 'from': 'Jonathan <j@example.com>',
- 'date': 'not a real date',
- }
- result = generate_basename(metadata)
- assert result == "unknown-Jonathan"
-
- def test_none_date_no_crash(self):
- metadata = {'from': 'Test <t@e.com>', 'date': None}
- # Should not raise
- result = generate_basename(metadata)
- assert "unknown" in result
-
-
-# --- generate_email_filename ---
-
-class TestGenerateEmailFilename:
- def test_standard_subject(self):
- result = generate_email_filename(
- "2026-02-05-1136-Jonathan",
- "Re: Fw: 4319 Danneel Street"
- )
- assert result == "2026-02-05-1136-Jonathan-EMAIL-Re-Fw-4319-Danneel-Street"
-
- def test_subject_with_special_chars(self):
- result = generate_email_filename(
- "2026-02-05-1136-Jonathan",
- "Update: Meeting (draft) & notes!"
- )
- # Colons, parens, ampersands, exclamation stripped
- assert "EMAIL" in result
- assert ":" not in result
- assert "(" not in result
- assert ")" not in result
- assert "&" not in result
- assert "!" not in result
-
- def test_none_subject(self):
- result = generate_email_filename("2026-02-05-1136-Jonathan", None)
- assert result == "2026-02-05-1136-Jonathan-EMAIL-no-subject"
-
- def test_empty_subject(self):
- result = generate_email_filename("2026-02-05-1136-Jonathan", "")
- assert result == "2026-02-05-1136-Jonathan-EMAIL-no-subject"
-
- def test_very_long_subject(self):
- long_subject = "A" * 100 + " " + "B" * 100
- result = generate_email_filename("2026-02-05-1136-Jonathan", long_subject)
- # The cleaned subject part should be truncated
- # basename (27) + "-EMAIL-" (7) + subject
- # Subject itself is limited to 80 chars by _clean_for_filename
- subject_part = result.split("-EMAIL-")[1]
- assert len(subject_part) <= 80
-
-
-# --- generate_attachment_filename ---
-
-class TestGenerateAttachmentFilename:
- def test_standard_attachment(self):
- result = generate_attachment_filename(
- "2026-02-05-1136-Jonathan",
- "Ltr Carrollton.pdf"
- )
- assert result == "2026-02-05-1136-Jonathan-ATTACH-Ltr-Carrollton.pdf"
-
- def test_filename_with_spaces_and_parens(self):
- result = generate_attachment_filename(
- "2026-02-05-1136-Jonathan",
- "Document (final copy).pdf"
- )
- assert " " not in result
- assert "(" not in result
- assert ")" not in result
- assert result.endswith(".pdf")
-
- def test_preserves_extension(self):
- result = generate_attachment_filename(
- "2026-02-05-1136-Jonathan",
- "photo.jpg"
- )
- assert result.endswith(".jpg")
-
- def test_none_filename(self):
- result = generate_attachment_filename("2026-02-05-1136-Jonathan", None)
- assert result == "2026-02-05-1136-Jonathan-ATTACH-unnamed"
-
- def test_empty_filename(self):
- result = generate_attachment_filename("2026-02-05-1136-Jonathan", "")
- assert result == "2026-02-05-1136-Jonathan-ATTACH-unnamed"
diff --git a/docs/scripts/tests/test_integration_stdout.py b/docs/scripts/tests/test_integration_stdout.py
deleted file mode 100644
index d87478e..0000000
--- a/docs/scripts/tests/test_integration_stdout.py
+++ /dev/null
@@ -1,68 +0,0 @@
-"""Integration tests for backwards-compatible stdout mode (no --output-dir)."""
-
-import os
-import shutil
-import sys
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-import importlib.util
-spec = importlib.util.spec_from_file_location(
- "eml_script",
- os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
-)
-eml_script = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(eml_script)
-
-print_email = eml_script.print_email
-
-FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures')
-
-
-class TestPlainTextStdout:
- def test_metadata_and_body_printed(self, tmp_path, capsys):
- eml_src = os.path.join(FIXTURES, 'plain-text.eml')
- working_eml = tmp_path / "message.eml"
- shutil.copy2(eml_src, working_eml)
-
- print_email(str(working_eml))
- captured = capsys.readouterr()
-
- assert "From: Jonathan Smith <jsmith@example.com>" in captured.out
- assert "To: Craig Jennings <craig@example.com>" in captured.out
- assert "Subject: Re: Fw: 4319 Danneel Street" in captured.out
- assert "Date:" in captured.out
- assert "Sent:" in captured.out
- assert "Received:" in captured.out
- assert "4319 Danneel Street" in captured.out
-
-
-class TestHtmlFallbackStdout:
- def test_html_converted_on_stdout(self, tmp_path, capsys):
- eml_src = os.path.join(FIXTURES, 'html-only.eml')
- working_eml = tmp_path / "message.eml"
- shutil.copy2(eml_src, working_eml)
-
- print_email(str(working_eml))
- captured = capsys.readouterr()
-
- # Should see converted text, not raw HTML
- assert "HTML" in captured.out
- assert "<p>" not in captured.out
-
-
-class TestAttachmentsStdout:
- def test_attachment_extracted_alongside_eml(self, tmp_path, capsys):
- eml_src = os.path.join(FIXTURES, 'with-attachment.eml')
- working_eml = tmp_path / "message.eml"
- shutil.copy2(eml_src, working_eml)
-
- print_email(str(working_eml))
- captured = capsys.readouterr()
-
- assert "Extracted attachment:" in captured.out
- assert "Ltr Carrollton.pdf" in captured.out
-
- # File should exist alongside the EML
- extracted = tmp_path / "Ltr Carrollton.pdf"
- assert extracted.exists()
diff --git a/docs/scripts/tests/test_parse_received_headers.py b/docs/scripts/tests/test_parse_received_headers.py
deleted file mode 100644
index e12e1fb..0000000
--- a/docs/scripts/tests/test_parse_received_headers.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""Tests for parse_received_headers()."""
-
-import email
-import sys
-import os
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-from conftest import make_plain_message, add_received_headers
-from email.message import EmailMessage
-
-# Import the function under test
-import importlib.util
-spec = importlib.util.spec_from_file_location(
- "eml_script",
- os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
-)
-eml_script = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(eml_script)
-
-parse_received_headers = eml_script.parse_received_headers
-
-
-class TestSingleHeader:
- def test_header_with_from_and_by(self):
- msg = EmailMessage()
- msg['Received'] = (
- 'from mail-sender.example.com by mx.receiver.example.com '
- 'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600'
- )
- result = parse_received_headers(msg)
- assert result['sent_server'] == 'mail-sender.example.com'
- assert result['received_server'] == 'mx.receiver.example.com'
- assert result['sent_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600'
- assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:05 -0600'
-
-
-class TestMultipleHeaders:
- def test_uses_first_with_both_from_and_by(self):
- msg = EmailMessage()
- # Most recent first (by only)
- msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600'
- # Next: has both from and by — this should be selected
- msg['Received'] = (
- 'from mail-sender.example.com by mx.receiver.example.com '
- 'with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600'
- )
- # Oldest
- msg['Received'] = (
- 'from originator.example.com by relay.example.com '
- 'with SMTP; Thu, 05 Feb 2026 11:35:58 -0600'
- )
- result = parse_received_headers(msg)
- assert result['sent_server'] == 'mail-sender.example.com'
- assert result['received_server'] == 'mx.receiver.example.com'
-
-
-class TestNoReceivedHeaders:
- def test_all_values_none(self):
- msg = EmailMessage()
- result = parse_received_headers(msg)
- assert result['sent_time'] is None
- assert result['sent_server'] is None
- assert result['received_time'] is None
- assert result['received_server'] is None
-
-
-class TestByButNoFrom:
- def test_falls_back_to_first_header(self):
- msg = EmailMessage()
- msg['Received'] = 'by internal.example.com with SMTP; Thu, 05 Feb 2026 11:36:10 -0600'
- result = parse_received_headers(msg)
- assert result['received_server'] == 'internal.example.com'
- assert result['received_time'] == 'Thu, 05 Feb 2026 11:36:10 -0600'
- # No from in any header, so sent_server stays None
- assert result['sent_server'] is None
-
-
-class TestMultilineFoldedHeader:
- def test_normalizes_whitespace(self):
- # Use email.message_from_string to parse raw folded headers
- # (EmailMessage policy rejects embedded CRLF in set values)
- raw = (
- "From: test@example.com\r\n"
- "Received: from mail-sender.example.com\r\n"
- " by mx.receiver.example.com\r\n"
- " with ESMTP; Thu, 05 Feb 2026 11:36:05 -0600\r\n"
- "\r\n"
- "body\r\n"
- )
- msg = email.message_from_string(raw)
- result = parse_received_headers(msg)
- assert result['sent_server'] == 'mail-sender.example.com'
- assert result['received_server'] == 'mx.receiver.example.com'
-
-
-class TestMalformedTimestamp:
- def test_no_semicolon(self):
- msg = EmailMessage()
- msg['Received'] = 'from sender.example.com by receiver.example.com with SMTP'
- result = parse_received_headers(msg)
- assert result['sent_server'] == 'sender.example.com'
- assert result['received_server'] == 'receiver.example.com'
- assert result['sent_time'] is None
- assert result['received_time'] is None
diff --git a/docs/scripts/tests/test_process_eml.py b/docs/scripts/tests/test_process_eml.py
deleted file mode 100644
index 26c5ad5..0000000
--- a/docs/scripts/tests/test_process_eml.py
+++ /dev/null
@@ -1,129 +0,0 @@
-"""Integration tests for process_eml() — full pipeline with --output-dir."""
-
-import os
-import shutil
-import sys
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-import importlib.util
-spec = importlib.util.spec_from_file_location(
- "eml_script",
- os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
-)
-eml_script = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(eml_script)
-
-process_eml = eml_script.process_eml
-
-import pytest
-
-
-FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures')
-
-
-class TestPlainTextPipeline:
- def test_creates_eml_and_txt(self, tmp_path):
- eml_src = os.path.join(FIXTURES, 'plain-text.eml')
- # Copy fixture to tmp_path so temp dir can be created as sibling
- working_eml = tmp_path / "inbox" / "message.eml"
- working_eml.parent.mkdir()
- shutil.copy2(eml_src, working_eml)
-
- output_dir = tmp_path / "output"
- result = process_eml(str(working_eml), str(output_dir))
-
- # Should have exactly 2 files: .eml and .txt
- assert len(result['files']) == 2
- eml_file = result['files'][0]
- txt_file = result['files'][1]
-
- assert eml_file['type'] == 'eml'
- assert txt_file['type'] == 'txt'
- assert eml_file['name'].endswith('.eml')
- assert txt_file['name'].endswith('.txt')
-
- # Files exist in output dir
- assert os.path.isfile(eml_file['path'])
- assert os.path.isfile(txt_file['path'])
-
- # Filenames contain expected components
- assert 'Jonathan' in eml_file['name']
- assert 'EMAIL' in eml_file['name']
- assert '2026-02-05' in eml_file['name']
-
- # Temp dir cleaned up (no extract-* dirs in inbox)
- inbox_contents = os.listdir(str(tmp_path / "inbox"))
- assert not any(d.startswith('extract-') for d in inbox_contents)
-
-
-class TestHtmlFallbackPipeline:
- def test_txt_contains_converted_html(self, tmp_path):
- eml_src = os.path.join(FIXTURES, 'html-only.eml')
- working_eml = tmp_path / "inbox" / "message.eml"
- working_eml.parent.mkdir()
- shutil.copy2(eml_src, working_eml)
-
- output_dir = tmp_path / "output"
- result = process_eml(str(working_eml), str(output_dir))
-
- txt_file = result['files'][1]
- with open(txt_file['path'], 'r') as f:
- content = f.read()
-
- # Should be converted, not raw HTML
- assert '<p>' not in content
- assert '<strong>' not in content
- assert 'HTML' in content
-
-
-class TestAttachmentPipeline:
- def test_eml_txt_and_attachment_created(self, tmp_path):
- eml_src = os.path.join(FIXTURES, 'with-attachment.eml')
- working_eml = tmp_path / "inbox" / "message.eml"
- working_eml.parent.mkdir()
- shutil.copy2(eml_src, working_eml)
-
- output_dir = tmp_path / "output"
- result = process_eml(str(working_eml), str(output_dir))
-
- assert len(result['files']) == 3
- types = [f['type'] for f in result['files']]
- assert types == ['eml', 'txt', 'attach']
-
- # Attachment is auto-renamed
- attach_file = result['files'][2]
- assert 'ATTACH' in attach_file['name']
- assert attach_file['name'].endswith('.pdf')
- assert os.path.isfile(attach_file['path'])
-
-
-class TestCollisionDetection:
- def test_raises_on_existing_file(self, tmp_path):
- eml_src = os.path.join(FIXTURES, 'plain-text.eml')
- working_eml = tmp_path / "inbox" / "message.eml"
- working_eml.parent.mkdir()
- shutil.copy2(eml_src, working_eml)
-
- output_dir = tmp_path / "output"
- # Run once to create files
- result = process_eml(str(working_eml), str(output_dir))
-
- # Run again — should raise FileExistsError
- with pytest.raises(FileExistsError, match="Collision"):
- process_eml(str(working_eml), str(output_dir))
-
-
-class TestMissingOutputDir:
- def test_creates_directory(self, tmp_path):
- eml_src = os.path.join(FIXTURES, 'plain-text.eml')
- working_eml = tmp_path / "inbox" / "message.eml"
- working_eml.parent.mkdir()
- shutil.copy2(eml_src, working_eml)
-
- output_dir = tmp_path / "new" / "nested" / "output"
- assert not output_dir.exists()
-
- result = process_eml(str(working_eml), str(output_dir))
- assert output_dir.exists()
- assert len(result['files']) == 2
diff --git a/docs/scripts/tests/test_save_attachments.py b/docs/scripts/tests/test_save_attachments.py
deleted file mode 100644
index 32f02a6..0000000
--- a/docs/scripts/tests/test_save_attachments.py
+++ /dev/null
@@ -1,97 +0,0 @@
-"""Tests for save_attachments()."""
-
-import sys
-import os
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-
-from conftest import make_plain_message, make_message_with_attachment
-from email.mime.multipart import MIMEMultipart
-from email.mime.text import MIMEText
-from email.mime.application import MIMEApplication
-
-import importlib.util
-spec = importlib.util.spec_from_file_location(
- "eml_script",
- os.path.join(os.path.dirname(__file__), '..', 'eml-view-and-extract-attachments.py')
-)
-eml_script = importlib.util.module_from_spec(spec)
-spec.loader.exec_module(eml_script)
-
-save_attachments = eml_script.save_attachments
-
-
-class TestSingleAttachment:
- def test_file_written_and_returned(self, tmp_path):
- msg = make_message_with_attachment(
- attachment_filename="report.pdf",
- attachment_content=b"pdf bytes here"
- )
- result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
-
- assert len(result) == 1
- assert result[0]['original_name'] == "report.pdf"
- assert "ATTACH" in result[0]['renamed_name']
- assert result[0]['renamed_name'].endswith(".pdf")
-
- # File actually exists and has correct content
- written_path = result[0]['path']
- assert os.path.isfile(written_path)
- with open(written_path, 'rb') as f:
- assert f.read() == b"pdf bytes here"
-
-
-class TestMultipleAttachments:
- def test_all_written_and_returned(self, tmp_path):
- msg = MIMEMultipart()
- msg['From'] = 'test@example.com'
- msg['Date'] = 'Thu, 05 Feb 2026 11:36:00 -0600'
- msg.attach(MIMEText("body", 'plain'))
-
- for name, content in [("doc1.pdf", b"pdf1"), ("image.png", b"png1")]:
- att = MIMEApplication(content, Name=name)
- att['Content-Disposition'] = f'attachment; filename="{name}"'
- msg.attach(att)
-
- result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
-
- assert len(result) == 2
- for r in result:
- assert os.path.isfile(r['path'])
-
-
-class TestNoAttachments:
- def test_empty_list(self, tmp_path):
- msg = make_plain_message()
- result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
- assert result == []
-
-
-class TestFilenameWithSpaces:
- def test_cleaned_filename(self, tmp_path):
- msg = make_message_with_attachment(
- attachment_filename="My Document (1).pdf",
- attachment_content=b"data"
- )
- result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
-
- assert len(result) == 1
- assert " " not in result[0]['renamed_name']
- assert os.path.isfile(result[0]['path'])
-
-
-class TestNoContentDisposition:
- def test_skipped(self, tmp_path):
- msg = MIMEMultipart()
- msg['From'] = 'test@example.com'
- msg.attach(MIMEText("body", 'plain'))
-
- # Add a part without Content-Disposition
- part = MIMEApplication(b"data", Name="file.bin")
- # Explicitly remove Content-Disposition if present
- if 'Content-Disposition' in part:
- del part['Content-Disposition']
- msg.attach(part)
-
- result = save_attachments(msg, str(tmp_path), "2026-02-05-1136-Jonathan")
- assert result == []