From 8af4265b6e160287aea80f5ea9ea048091adef8f Mon Sep 17 00:00:00 2001 From: vegard Date: Wed, 18 Mar 2026 18:55:11 +0000 Subject: [PATCH] =?UTF-8?q?synops-clip=20orkestrering-st=C3=B8tte:=20cli?= =?UTF-8?q?=5Ftool-registrering=20+=20clip=5Furl=20jobb/API=20(oppgave=202?= =?UTF-8?q?5.4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gjør synops-clip tilgjengelig i orkestreringer ved å: 1. Registrere synops-clip som cli_tool-node (migration 026) med norske aliases (clip, klipp, hent artikkel) og args_hints for script-kompilatoren. Orkestreringer kan nå skrive "1. clip fra event (lagre node, bruker)" som kompileres til "synops-clip --url {event.url} --write --created-by ...". 2. Legge til clip_url som jobbtype i jobbkøen (clip.rs) — spawner synops-clip med riktige env-variabler (DATABASE_URL, AI_GATEWAY_URL, etc). 3. Legge til POST /intentions/clip_url API-endepunkt slik at frontend og andre klienter kan trigge URL-klipping direkte. 4. Utvide trigger-konteksten med event.url og event.created_by slik at orkestreringer som reagerer på URL-deling kan videresende URL til synops-clip via variabel-substitusjon. --- maskinrommet/src/clip.rs | 105 ++++++++++++++++++++++++ maskinrommet/src/intentions.rs | 70 ++++++++++++++++ maskinrommet/src/jobs.rs | 4 + maskinrommet/src/main.rs | 2 + maskinrommet/src/script_compiler.rs | 37 +++++++++ maskinrommet/src/script_executor.rs | 16 ++++ migrations/026_cli_tool_synops_clip.sql | 33 ++++++++ tasks.md | 3 +- 8 files changed, 268 insertions(+), 2 deletions(-) create mode 100644 maskinrommet/src/clip.rs create mode 100644 migrations/026_cli_tool_synops_clip.sql diff --git a/maskinrommet/src/clip.rs b/maskinrommet/src/clip.rs new file mode 100644 index 0000000..e406c3e --- /dev/null +++ b/maskinrommet/src/clip.rs @@ -0,0 +1,105 @@ +// URL-klipping dispatcher — delegerer til synops-clip CLI. +// +// Maskinrommet orkestrerer (payload-parsing, sikkerhetskontroller), +// CLI-verktøyet gjør jobben (HTTP-henting, Readability-parsing, +// node-opprettelse, AI-oppsummering). +// +// Jobbtype: "clip_url" +// Payload: { "url": "", "created_by": "", "write": true } +// +// Ref: docs/retninger/unix_filosofi.md + +use uuid::Uuid; + +use crate::cli_dispatch; +use crate::jobs::JobRow; + +/// Synops-clip binary path. +fn clip_bin() -> String { + std::env::var("SYNOPS_CLIP_BIN") + .unwrap_or_else(|_| "synops-clip".to_string()) +} + +/// Håndterer clip_url-jobb. +/// +/// Spawner synops-clip med URL og valgfri --write for å gjøre alt arbeidet: +/// HTTP-henting, Readability-parsing, node-opprettelse, AI-oppsummering. +pub async fn handle_clip_url( + job: &JobRow, + _db: &sqlx::PgPool, +) -> Result { + let url = job + .payload + .get("url") + .and_then(|v| v.as_str()) + .ok_or("Mangler url i payload")?; + + let write = job + .payload + .get("write") + .and_then(|v| v.as_bool()) + .unwrap_or(true); + + let created_by: Option = job + .payload + .get("created_by") + .and_then(|v| v.as_str()) + .and_then(|s| s.parse().ok()); + + let playwright = job + .payload + .get("playwright") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + let timeout = job + .payload + .get("timeout") + .and_then(|v| v.as_u64()); + + // Bygg kommando + let bin = clip_bin(); + let mut cmd = tokio::process::Command::new(&bin); + + cmd.arg("--url").arg(url); + + if write { + cmd.arg("--write"); + if let Some(uid) = created_by { + cmd.arg("--created-by").arg(uid.to_string()); + } + } + + if playwright { + cmd.arg("--playwright"); + } + + if let Some(t) = timeout { + cmd.arg("--timeout").arg(t.to_string()); + } + + // Sett miljøvariabler CLI-verktøyet trenger + cli_dispatch::set_database_url(&mut cmd)?; + cli_dispatch::forward_env(&mut cmd, "AI_GATEWAY_URL"); + cli_dispatch::forward_env(&mut cmd, "LITELLM_MASTER_KEY"); + cli_dispatch::forward_env(&mut cmd, "AI_CLIP_MODEL"); + cli_dispatch::forward_env(&mut cmd, "SYNOPS_CLIP_SCRIPTS"); + + tracing::info!( + url = %url, + write = write, + bin = %bin, + "Starter synops-clip" + ); + + let result = cli_dispatch::run_cli_tool(&bin, &mut cmd).await?; + + tracing::info!( + url = %url, + title = result["title"].as_str().unwrap_or("n/a"), + paywall = result["paywall"].as_bool().unwrap_or(false), + "synops-clip fullført" + ); + + Ok(result) +} diff --git a/maskinrommet/src/intentions.rs b/maskinrommet/src/intentions.rs index 025be8a..f85c54f 100644 --- a/maskinrommet/src/intentions.rs +++ b/maskinrommet/src/intentions.rs @@ -4648,6 +4648,76 @@ pub async fn ai_suggest_script( })) } +// ============================================================================= +// POST /intentions/clip_url — klipp URL og opprett content-node +// ============================================================================= + +#[derive(Deserialize)] +pub struct ClipUrlRequest { + /// URL som skal klippes. + pub url: String, + /// Skriv resultat til database (default: true). + #[serde(default = "default_true")] + pub write: bool, + /// Tving bruk av Playwright (headless browser). + #[serde(default)] + pub playwright: bool, +} + +fn default_true() -> bool { + true +} + +#[derive(Serialize)] +pub struct ClipUrlResponse { + pub job_id: Uuid, +} + +/// POST /intentions/clip_url +/// +/// Legger en `clip_url`-jobb i køen. +/// synops-clip henter artikkelen, parser med Readability, +/// og oppretter content-node med AI-oppsummering. +pub async fn clip_url( + State(state): State, + user: AuthUser, + Json(req): Json, +) -> Result, (StatusCode, Json)> { + // Enkel URL-validering + if !req.url.starts_with("http://") && !req.url.starts_with("https://") { + return Err(bad_request("URL må starte med http:// eller https://")); + } + + let payload = serde_json::json!({ + "url": req.url, + "created_by": user.node_id.to_string(), + "write": req.write, + "playwright": req.playwright, + }); + + let job_id = crate::jobs::enqueue( + &state.db, + "clip_url", + payload, + None, + 3, // Lav prioritet — ikke tidskritisk + ) + .await + .map_err(|e| { + tracing::error!(error = %e, "Kunne ikke legge clip_url-jobb i kø"); + internal_error("Kunne ikke starte URL-klipping") + })?; + + tracing::info!( + job_id = %job_id, + url = %req.url, + user = %user.node_id, + "clip_url-jobb lagt i kø" + ); + + Ok(Json(ClipUrlResponse { job_id })) +} + // ============================================================================= // Tester // ============================================================================= diff --git a/maskinrommet/src/jobs.rs b/maskinrommet/src/jobs.rs index 5f82d36..f12a45c 100644 --- a/maskinrommet/src/jobs.rs +++ b/maskinrommet/src/jobs.rs @@ -20,6 +20,7 @@ use crate::ai_process; use crate::audio; use crate::cas::CasStore; use crate::cli_dispatch; +use crate::clip; use crate::maintenance::MaintenanceState; use crate::pg_writes; use crate::publishing::IndexCache; @@ -219,6 +220,9 @@ async fn dispatch( "pg_delete_edge" => { pg_writes::handle_delete_edge(job, db, index_cache).await } + "clip_url" => { + clip::handle_clip_url(job, db).await + } // Orchestration: trigger-evaluering har lagt jobben i kø. // Kompilatoren parser scriptet og validerer det. // Utførelse av kompilert script kommer i oppgave 24.5. diff --git a/maskinrommet/src/main.rs b/maskinrommet/src/main.rs index be91217..ed37937 100644 --- a/maskinrommet/src/main.rs +++ b/maskinrommet/src/main.rs @@ -7,6 +7,7 @@ pub mod bandwidth; mod auth; pub mod cas; pub mod cli_dispatch; +pub mod clip; mod custom_domain; mod intentions; pub mod jobs; @@ -259,6 +260,7 @@ async fn main() { .route("/custom-domain/om", get(custom_domain::serve_custom_domain_about)) .route("/custom-domain/{article_id}", get(custom_domain::serve_custom_domain_article)) // Orkestrering UI (oppgave 24.6) + AI-assistert (oppgave 24.7) + .route("/intentions/clip_url", post(intentions::clip_url)) .route("/intentions/compile_script", post(intentions::compile_script)) .route("/intentions/test_orchestration", post(intentions::test_orchestration)) .route("/intentions/ai_suggest_script", post(intentions::ai_suggest_script)) diff --git a/maskinrommet/src/script_compiler.rs b/maskinrommet/src/script_compiler.rs index 55b3e21..ba1aa13 100644 --- a/maskinrommet/src/script_compiler.rs +++ b/maskinrommet/src/script_compiler.rs @@ -250,6 +250,8 @@ const VALID_EVENT_VARS: &[&str] = &[ "event.cas_hash", "event.communication_id", "event.collection_id", + "event.url", + "event.created_by", ]; /// Sjekk om en variabelreferanse er gyldig. @@ -992,4 +994,39 @@ ved feil: opprett oppgave "Pipeline feilet" (bug) vec!["--collection-id", "{event.collection_id}"] ); } + + #[test] + fn test_compile_clip_url() { + let registry = ToolRegistry { + tools: vec![ToolDef { + binary: "synops-clip".into(), + aliases: vec!["clip".into(), "klipp".into(), "hent artikkel".into(), "clip url".into()], + description: "Hent og parse webartikler".into(), + args_hints: HashMap::from([ + ("url".into(), "--url {arg}".into()), + ("fra event".into(), "--url {event.url}".into()), + ("lagre node".into(), "--write".into()), + ("bruker".into(), "--created-by {event.created_by}".into()), + ("med timeout".into(), "--timeout {arg}".into()), + ("force playwright".into(), "--playwright".into()), + ]), + }], + }; + + // Test: "clip fra event (lagre node, bruker)" + let script = "1. clip fra event (lagre node, bruker)\n"; + let parsed = parse(script).unwrap(); + let result = compile(&parsed, ®istry); + assert!( + !result.has_errors(), + "clip fra event bør kompilere: {:?}", + result.diagnostics + ); + let compiled = result.compiled.unwrap(); + assert_eq!(compiled.steps[0].binary, "synops-clip"); + assert_eq!( + compiled.steps[0].args, + vec!["--url", "{event.url}", "--write", "--created-by", "{event.created_by}"] + ); + } } diff --git a/maskinrommet/src/script_executor.rs b/maskinrommet/src/script_executor.rs index 4323a31..d50cf24 100644 --- a/maskinrommet/src/script_executor.rs +++ b/maskinrommet/src/script_executor.rs @@ -28,6 +28,10 @@ pub struct ExecutionContext { pub cas_hash: Option, pub communication_id: Option, pub collection_id: Option, + /// URL fra trigger-kontekst (f.eks. URL delt i chat) + pub url: Option, + /// Bruker-ID som utløste eventet + pub created_by: Option, /// ID til oppstrøms orkestrering (ved kaskade via triggers-edge) pub upstream_orchestration_id: Option, } @@ -47,6 +51,8 @@ impl ExecutionContext { cas_hash: s("cas_hash"), communication_id: s("communication_id"), collection_id: s("collection_id"), + url: s("url"), + created_by: s("created_by"), upstream_orchestration_id: s("upstream_orchestration_id"), } } @@ -67,6 +73,8 @@ impl ExecutionContext { "event.cas_hash" => self.cas_hash.clone(), "event.communication_id" => self.communication_id.clone(), "event.collection_id" => self.collection_id.clone(), + "event.url" => self.url.clone(), + "event.created_by" => self.created_by.clone(), "event.upstream_orchestration_id" => self.upstream_orchestration_id.clone(), _ => None, } @@ -450,6 +458,8 @@ mod tests { cas_hash: Some("sha256:abc".into()), communication_id: Some("comm-456".into()), collection_id: Some("coll-789".into()), + url: Some("https://example.com/article".into()), + created_by: Some("user-999".into()), upstream_orchestration_id: None, }; @@ -457,6 +467,8 @@ mod tests { assert_eq!(ctx.substitute("{event.cas_hash}"), "sha256:abc"); assert_eq!(ctx.substitute("{event.communication_id}"), "comm-456"); assert_eq!(ctx.substitute("{event.collection_id}"), "coll-789"); + assert_eq!(ctx.substitute("{event.url}"), "https://example.com/article"); + assert_eq!(ctx.substitute("{event.created_by}"), "user-999"); // Ukjent variabel returneres uendret assert_eq!(ctx.substitute("{event.unknown}"), "{event.unknown}"); // Ikke-variabel returneres uendret @@ -476,6 +488,8 @@ mod tests { cas_hash: Some("sha256:abc".into()), communication_id: None, collection_id: None, + url: None, + created_by: None, upstream_orchestration_id: None, }; @@ -501,6 +515,8 @@ mod tests { cas_hash: None, communication_id: None, collection_id: None, + url: None, + created_by: None, upstream_orchestration_id: None, }; diff --git a/migrations/026_cli_tool_synops_clip.sql b/migrations/026_cli_tool_synops_clip.sql new file mode 100644 index 0000000..7138047 --- /dev/null +++ b/migrations/026_cli_tool_synops_clip.sql @@ -0,0 +1,33 @@ +-- 026_cli_tool_synops_clip.sql +-- Oppgave 25.4: Registrer synops-clip som cli_tool-node for orkestreringer. +-- Gjør synops-clip tilgjengelig i script-kompilatoren slik at orkestreringer +-- kan skrive f.eks. "1. clip URL (lagre node)" og få det kompilert til +-- "synops-clip --url {event.url} --write". +-- +-- Ref: docs/retninger/unix_filosofi.md, migrations/022_cli_tool_seeds.sql + +BEGIN; + +INSERT INTO nodes (id, node_kind, title, visibility, metadata, created_by) +VALUES ( + 'f0000000-c100-4000-b000-000000000016', + 'cli_tool', + 'synops-clip', + 'discoverable', + '{ + "binary": "synops-clip", + "aliases": ["clip", "klipp", "hent artikkel", "clip url"], + "description": "Hent og parse webartikler med Readability, opprett content-node med AI-oppsummering", + "args_hints": { + "url": "--url {arg}", + "fra event": "--url {event.url}", + "lagre node": "--write", + "bruker": "--created-by {event.created_by}", + "med timeout": "--timeout {arg}", + "force playwright": "--playwright" + } + }'::jsonb, + 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11' +); + +COMMIT; diff --git a/tasks.md b/tasks.md index ae54812..a65c9ad 100644 --- a/tasks.md +++ b/tasks.md @@ -337,8 +337,7 @@ Readability, og oppretter innholdsnode med AI-beriking. Brukes av @bot i chat - [x] 25.1 `synops-clip` CLI: hent URL, parse med Readability (mozilla/readability via JS eller Rust-port), returner ren tekst + metadata (tittel, forfatter, dato, ingress). Fallback til headless browser (Playwright) for JS-rendrede sider. Detekter betalingsmur (kort/avkuttet innhold, "logg inn for å lese", kjente paywall-mønstre) — returner `"paywall": true` og tilgjengelig innhold (ingress/utdrag). Output: JSON med `title`, `author`, `date`, `content`, `url`, `paywall`. - [x] 25.2 Node-opprettelse: `synops-clip --write` oppretter `content`-node med artikkelinnhold, `metadata.source_url`, og `tagged`-edge "clipped". AI-oppsummering via LiteLLM. `mentions`-edges til gjenkjente entiteter i kunnskapsgrafen. - [x] 25.3 @bot-integrasjon: bruker limer inn URL i chat → boten gjenkjenner URL, kaller `synops-clip`, presenterer oppsummering i chatten, oppretter node i bakgrunnen. Ved paywall: "Denne artikkelen er bak betalingsmur. Jeg fikk med tittel og ingress — lim inn innholdet om du vil dele resten." -- [~] 25.4 Orkestrering-støtte: `synops-clip` tilgjengelig som verktøy i orkestreringer. F.eks. "Clip alle URL-er som deles i #Redaksjonen og oppsummer dem". - > Påbegynt: 2026-03-18T18:45 +- [x] 25.4 Orkestrering-støtte: `synops-clip` tilgjengelig som verktøy i orkestreringer. F.eks. "Clip alle URL-er som deles i #Redaksjonen og oppsummer dem". ## Fase 26: Epost — send og motta via synops.no