aboutsummaryrefslogtreecommitdiff
path: root/playwright-py/examples/broken_links.py
diff options
context:
space:
mode:
Diffstat (limited to 'playwright-py/examples/broken_links.py')
-rw-r--r--playwright-py/examples/broken_links.py58
1 files changed, 58 insertions, 0 deletions
diff --git a/playwright-py/examples/broken_links.py b/playwright-py/examples/broken_links.py
new file mode 100644
index 0000000..c78520f
--- /dev/null
+++ b/playwright-py/examples/broken_links.py
@@ -0,0 +1,58 @@
+"""Worked example: scan visible external links on a page for broken URLs.
+
+Env vars used:
+ TARGET_URL (default: http://localhost:5173)
+
+Run:
+ python examples/broken_links.py
+"""
+
+import os
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from playwright.sync_api import sync_playwright
+from scripts.safe_actions import build_context_with_headers
+
+TARGET_URL = os.environ.get("TARGET_URL", "http://localhost:5173")
+
+
+def main() -> int:
+ with sync_playwright() as p:
+ browser = p.chromium.launch(headless=True)
+ context = build_context_with_headers(browser)
+ page = context.new_page()
+
+ page.goto(TARGET_URL)
+ page.wait_for_load_state("networkidle")
+
+ # Collect unique external hrefs
+ links = page.locator('a[href^="http"]').all()
+ urls = sorted(
+ {link.get_attribute("href") for link in links if link.get_attribute("href")}
+ )
+
+ ok, bad, err = 0, 0, 0
+ for url in urls:
+ try:
+ resp = page.request.head(url, timeout=5000)
+ status = resp.status
+ if status < 400:
+ ok += 1
+ print(f"✓ {status} {url}")
+ else:
+ bad += 1
+ print(f"✗ {status} {url}")
+ except Exception as ex:
+ err += 1
+ print(f"✗ ERR {url} ({type(ex).__name__}: {ex})")
+
+ print(f"\n{ok} ok, {bad} broken, {err} errored out of {len(urls)} total")
+ browser.close()
+ return 0 if (bad == 0 and err == 0) else 1
+
+
+if __name__ == "__main__":
+ sys.exit(main())