#!/usr/bin/env node // playwright.mjs — Fetch a JS-rendered page with Playwright, then extract with Readability. // // Args: --url (required) // --timeout (optional, default 30000) // Output: JSON on stdout (same shape as readability.mjs) // // Requires: npx playwright install chromium (one-time setup) import { Readability } from "@mozilla/readability"; import { JSDOM } from "jsdom"; import { chromium } from "playwright"; const args = process.argv.slice(2); function getArg(name, fallback) { const idx = args.indexOf(name); return idx !== -1 ? args[idx + 1] : fallback; } const url = getArg("--url", null); const timeout = parseInt(getArg("--timeout", "30000"), 10); if (!url) { process.stderr.write("--url is required\n"); process.exit(1); } let browser; try { browser = await chromium.launch({ headless: true }); const context = await browser.newContext({ userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", }); const page = await context.newPage(); await page.goto(url, { waitUntil: "networkidle", timeout }); const html = await page.content(); const dom = new JSDOM(html, { url }); const reader = new Readability(dom.window.document); const article = reader.parse(); if (!article) { process.stdout.write(JSON.stringify({ error: "readability_failed" })); process.exit(1); } process.stdout.write( JSON.stringify({ title: article.title || null, author: article.byline || null, content: article.textContent || "", excerpt: article.excerpt || null, siteName: article.siteName || null, length: article.length || 0, }) ); } catch (e) { process.stdout.write(JSON.stringify({ error: e.message })); process.exit(1); } finally { if (browser) await browser.close(); }