From e050612dec1c3b7733fd11d1600cfa0e7cb94650 Mon Sep 17 00:00:00 2001 From: vegard Date: Wed, 18 Mar 2026 00:52:58 +0000 Subject: [PATCH] HTML-rendering av enkeltartikler til CAS med SEO-metadata (oppgave 14.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implementerer rendering-pipeline: metadata.document (TipTap JSON) → HTML via Tera-templates → CAS-lagring → metadata.rendered oppdateres. Nye moduler: - tiptap.rs: Konverterer TipTap/ProseMirror JSON til HTML. Støtter paragraph, heading, blockquote, lister, code_block, image, hr, og marks (bold, italic, strike, code, link, underline). XSS-sikker med HTML-escaping. - render_article jobb i jobbkøen: Henter node + samling, konverterer document → HTML, rendrer med Tera + tema, lagrer i CAS, oppdaterer nodens metadata.rendered med html_hash og renderer_version. Endringer: - publishing.rs: SeoData-struct med OG-tags, canonical URL, JSON-LD. render_article_to_cas() for full pipeline. serve_article() serverer fra CAS (immutable cache) hvis pre-rendret, fallback til on-the-fly. RENDERER_VERSION=1 for fremtidig bulk re-rendering. - intentions.rs: Trigger render_article-jobb automatisk når belongs_to edge opprettes til samling med publishing-trait. - Alle 4 artikkel-templates: SEO-block med meta description, OG-tags (type, title, description, url, site_name, image, published_time), canonical URL, RSS-link, og JSON-LD structured data. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/concepts/publisering.md | 4 +- maskinrommet/src/intentions.rs | 47 ++ maskinrommet/src/jobs.rs | 31 ++ maskinrommet/src/main.rs | 1 + maskinrommet/src/publishing.rs | 445 +++++++++++++++++- maskinrommet/src/templates/avis/article.html | 14 + maskinrommet/src/templates/base.html | 1 + maskinrommet/src/templates/blogg/article.html | 14 + .../src/templates/magasin/article.html | 14 + .../src/templates/tidsskrift/article.html | 14 + maskinrommet/src/tiptap.rs | 380 +++++++++++++++ tasks.md | 16 +- 12 files changed, 965 insertions(+), 16 deletions(-) create mode 100644 maskinrommet/src/tiptap.rs diff --git a/docs/concepts/publisering.md b/docs/concepts/publisering.md index 38c5d45..6b7f757 100644 --- a/docs/concepts/publisering.md +++ b/docs/concepts/publisering.md @@ -654,9 +654,9 @@ Noden peker på rendret resultat via metadata: "metadata": { "document": { /* TipTap/ProseMirror JSON */ }, "rendered": { - "html_hash": "cas://sha256-abc123", + "html_hash": "a1b2c3d4e5f6...", // SHA-256 hex-digest, peker til CAS "rendered_at": "2026-03-17T14:30:00Z", - "renderer_version": 2 + "renderer_version": 1 } } } diff --git a/maskinrommet/src/intentions.rs b/maskinrommet/src/intentions.rs index 438f785..78f4add 100644 --- a/maskinrommet/src/intentions.rs +++ b/maskinrommet/src/intentions.rs @@ -1704,6 +1704,11 @@ fn spawn_pg_insert_edge( match result { Ok(_) => { tracing::info!(edge_id = %edge_id, "Edge persistert til PostgreSQL"); + + // Trigger artikkelrendering ved belongs_to til publiseringssamling + if edge_type == "belongs_to" { + trigger_render_if_publishing(&db, source_id, target_id).await; + } } Err(e) => { tracing::error!(edge_id = %edge_id, error = %e, "Kunne ikke persistere edge til PostgreSQL"); @@ -1713,6 +1718,48 @@ fn spawn_pg_insert_edge( }); } +/// Sjekker om target er en samling med publishing-trait, og legger i så fall +/// en `render_article`-jobb i køen for å rendere artikkelens HTML til CAS. +async fn trigger_render_if_publishing(db: &PgPool, source_id: Uuid, target_id: Uuid) { + match crate::publishing::find_publishing_collection_by_id(db, target_id).await { + Ok(Some(_config)) => { + let payload = serde_json::json!({ + "node_id": source_id.to_string(), + "collection_id": target_id.to_string(), + }); + + match crate::jobs::enqueue(db, "render_article", payload, Some(target_id), 5).await { + Ok(job_id) => { + tracing::info!( + job_id = %job_id, + node_id = %source_id, + collection_id = %target_id, + "render_article-jobb lagt i kø" + ); + } + Err(e) => { + tracing::error!( + node_id = %source_id, + collection_id = %target_id, + error = %e, + "Kunne ikke legge render_article-jobb i kø" + ); + } + } + } + Ok(None) => { + // Target er ikke en publiseringssamling — ingen rendering nødvendig + } + Err(e) => { + tracing::error!( + target_id = %target_id, + error = %e, + "Feil ved sjekk av publiseringssamling for rendering-trigger" + ); + } + } +} + /// Synkroniserer node_access-rader for et subject fra PG til STDB. /// Kalles etter recompute_access for å holde STDB i synk. async fn sync_node_access_to_stdb(db: &PgPool, stdb: &crate::stdb::StdbClient, subject_id: Uuid) { diff --git a/maskinrommet/src/jobs.rs b/maskinrommet/src/jobs.rs index 937f97a..19aa3a6 100644 --- a/maskinrommet/src/jobs.rs +++ b/maskinrommet/src/jobs.rs @@ -12,6 +12,7 @@ use crate::agent; use crate::ai_edges; use crate::audio; use crate::cas::CasStore; +use crate::publishing; use crate::stdb::StdbClient; use crate::summarize; use crate::transcribe; @@ -171,10 +172,40 @@ async fn dispatch( "audio_process" => { audio::handle_audio_process_job(job, db, stdb, cas).await } + "render_article" => { + handle_render_article(job, db, cas).await + } other => Err(format!("Ukjent jobbtype: {other}")), } } +/// Handler for `render_article`-jobb. +/// +/// Payload: `{ "node_id": "...", "collection_id": "..." }` +/// Rendrer artikkelens metadata.document til HTML via Tera, lagrer i CAS, +/// oppdaterer nodens metadata.rendered. +async fn handle_render_article( + job: &JobRow, + db: &PgPool, + cas: &CasStore, +) -> Result { + let node_id: Uuid = job + .payload + .get("node_id") + .and_then(|v| v.as_str()) + .and_then(|s| s.parse().ok()) + .ok_or("Mangler node_id i payload")?; + + let collection_id: Uuid = job + .payload + .get("collection_id") + .and_then(|v| v.as_str()) + .and_then(|s| s.parse().ok()) + .ok_or("Mangler collection_id i payload")?; + + publishing::render_article_to_cas(db, cas, node_id, collection_id).await +} + /// Starter worker-loopen som poller job_queue. /// Kjører som en bakgrunnsoppgave i tokio. pub fn start_worker(db: PgPool, stdb: StdbClient, cas: CasStore) { diff --git a/maskinrommet/src/main.rs b/maskinrommet/src/main.rs index f89f5c9..b05cb90 100644 --- a/maskinrommet/src/main.rs +++ b/maskinrommet/src/main.rs @@ -13,6 +13,7 @@ mod rss; mod serving; mod stdb; pub mod summarize; +pub mod tiptap; pub mod transcribe; pub mod tts; mod warmup; diff --git a/maskinrommet/src/publishing.rs b/maskinrommet/src/publishing.rs index 697073d..818ec0e 100644 --- a/maskinrommet/src/publishing.rs +++ b/maskinrommet/src/publishing.rs @@ -4,7 +4,11 @@ //! Hvert tema har artikkelmal + forside-mal. //! CSS-variabler for theme_config-overstyring. //! -//! Ref: docs/concepts/publisering.md § "Temaer" +//! Artikler rendres til HTML via Tera, lagres i CAS med SEO-metadata +//! (OG-tags, canonical, JSON-LD). Noden oppdateres med +//! `metadata.rendered.html_hash` + `renderer_version`. +//! +//! Ref: docs/concepts/publisering.md § "Temaer", "HTML-rendering og CAS" use axum::{ extract::{Path, State}, @@ -17,8 +21,14 @@ use sqlx::PgPool; use tera::{Context, Tera}; use uuid::Uuid; +use crate::cas::CasStore; +use crate::tiptap; use crate::AppState; +/// Renderer-versjon. Økes ved mal-/template-endringer. +/// Brukes for å identifisere artikler som trenger re-rendering (oppgave 14.14). +pub const RENDERER_VERSION: i64 = 1; + // ============================================================================= // Tema-konfigurasjon fra publishing-trait // ============================================================================= @@ -66,6 +76,63 @@ pub struct LayoutConfig { pub max_width: Option, } +// ============================================================================= +// SEO-data +// ============================================================================= + +/// SEO-metadata for artikkelrendering. +#[derive(Serialize, Clone)] +pub struct SeoData { + pub og_title: String, + pub description: String, + pub canonical_url: String, + pub og_image: Option, + pub json_ld: String, +} + +fn build_seo_data( + article: &ArticleData, + collection_title: &str, + canonical_url: &str, +) -> SeoData { + let description = article + .summary + .as_deref() + .unwrap_or("") + .to_string(); + + let json_ld = build_json_ld(article, collection_title, canonical_url); + + SeoData { + og_title: article.title.clone(), + description, + canonical_url: canonical_url.to_string(), + og_image: None, + json_ld, + } +} + +fn build_json_ld( + article: &ArticleData, + publisher_name: &str, + canonical_url: &str, +) -> String { + // Escape for safe JSON embedding i +{% endblock %} + {% block extra_css %} .article { max-width: var(--layout-max-width); diff --git a/maskinrommet/src/templates/base.html b/maskinrommet/src/templates/base.html index ba31429..f777407 100644 --- a/maskinrommet/src/templates/base.html +++ b/maskinrommet/src/templates/base.html @@ -4,6 +4,7 @@ {% block title %}{{ collection_title | default(value="Synops") }}{% endblock %} + {% block seo %}{% endblock %}