#!/usr/bin/env node // readability.mjs — Parse HTML with Mozilla Readability. // // Input: HTML on stdin // Args: --url (required, used by Readability for relative URL resolution) // Output: JSON on stdout with title, author, content, excerpt, siteName, length // // Exit code 0 = success, 1 = parse failure or error. import { Readability } from "@mozilla/readability"; import { JSDOM } from "jsdom"; import { readFileSync } from "node:fs"; const args = process.argv.slice(2); const urlIdx = args.indexOf("--url"); const url = urlIdx !== -1 ? args[urlIdx + 1] : "https://example.com"; const html = readFileSync("/dev/stdin", "utf-8"); try { const dom = new JSDOM(html, { url }); const reader = new Readability(dom.window.document); const article = reader.parse(); if (!article) { process.stdout.write(JSON.stringify({ error: "readability_failed" })); process.exit(1); } process.stdout.write( JSON.stringify({ title: article.title || null, author: article.byline || null, content: article.textContent || "", excerpt: article.excerpt || null, siteName: article.siteName || null, length: article.length || 0, }) ); } catch (e) { process.stdout.write(JSON.stringify({ error: e.message })); process.exit(1); }