From e5c7791dfc1d597d091d713b8a58ff62c10f0355 Mon Sep 17 00:00:00 2001 From: vegard Date: Wed, 18 Mar 2026 00:09:10 +0000 Subject: [PATCH] =?UTF-8?q?Podcast-RSS:=20samlings-node=20med=20publiserin?= =?UTF-8?q?gs-edges=20=E2=86=92=20generert=20RSS-feed=20(oppgave=2011.4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nytt endepunkt GET /pub/{slug}/feed.xml som genererer RSS 2.0 eller Atom 1.0 feed for samlinger med rss-trait. Feeden er offentlig (ingen auth). - Slår opp samling via publishing.slug i metadata.traits - Henter belongs_to-edges (publiserte noder), sortert på publish_at - Podcast-samlinger (med podcast-trait) inkluderer -tags med CAS-URL, MIME-type og filstørrelse fra has_media-edges - Støtter RSS 2.0 (default) og Atom 1.0 via rss.format config - iTunes-namespace for podcast-feeds - Stabile GUID-er basert på node UUID - 5 min cache (Cache-Control: public, max-age=300) Manuell XML-generering uten ekstra avhengigheter — enklere enn å introdusere en RSS-crate for dette omfanget. Co-Authored-By: Claude Opus 4.6 (1M context) --- maskinrommet/src/main.rs | 2 + maskinrommet/src/rss.rs | 488 +++++++++++++++++++++++++++++++++++++++ tasks.md | 3 +- 3 files changed, 491 insertions(+), 2 deletions(-) create mode 100644 maskinrommet/src/rss.rs diff --git a/maskinrommet/src/main.rs b/maskinrommet/src/main.rs index 57a8149..17e4bea 100644 --- a/maskinrommet/src/main.rs +++ b/maskinrommet/src/main.rs @@ -7,6 +7,7 @@ pub mod jobs; pub mod livekit; pub mod pruning; mod queries; +mod rss; mod serving; mod stdb; pub mod summarize; @@ -165,6 +166,7 @@ async fn main() { .route("/query/graph", get(queries::query_graph)) .route("/query/transcription_versions", get(queries::query_transcription_versions)) .route("/query/segments_version", get(queries::query_segments_version)) + .route("/pub/{slug}/feed.xml", get(rss::generate_feed)) .layer(TraceLayer::new_for_http()) .with_state(state); diff --git a/maskinrommet/src/rss.rs b/maskinrommet/src/rss.rs new file mode 100644 index 0000000..96da5f8 --- /dev/null +++ b/maskinrommet/src/rss.rs @@ -0,0 +1,488 @@ +//! RSS/Atom-feed: GET /pub/{slug}/feed.xml +//! +//! Genererer RSS 2.0 eller Atom 1.0 feed for samlinger med `rss`-trait. +//! Feeden er offentlig — ingen autentisering kreves. +//! Podcast-samlinger (med `podcast`-trait) inkluderer -tags. +//! +//! Ref: docs/concepts/publisering.md (RSS/Atom-seksjonen) +//! docs/primitiver/traits.md (rss-trait) + +use axum::{ + extract::{Path, State}, + http::{header, StatusCode}, + response::Response, +}; +use chrono::{DateTime, Utc}; +use serde::Deserialize; +use sqlx::PgPool; +use uuid::Uuid; + +use crate::AppState; + +// ============================================================================= +// Konfigurasjon fra trait-metadata +// ============================================================================= + +#[derive(Deserialize, Default)] +struct RssTraitConfig { + format: Option, // "rss" (default) eller "atom" + title: Option, + description: Option, + max_items: Option, + language: Option, +} + +#[derive(Deserialize, Default)] +struct PublishingTraitConfig { + #[allow(dead_code)] + slug: Option, + custom_domain: Option, +} + +// ============================================================================= +// Database-modeller +// ============================================================================= + +struct CollectionInfo { + id: Uuid, + title: Option, + rss_config: RssTraitConfig, + publishing_config: PublishingTraitConfig, + is_podcast: bool, +} + +struct FeedItem { + id: Uuid, + title: Option, + content: Option, + created_at: DateTime, + publish_at: Option>, + // Podcast-felt + enclosure_url: Option, + enclosure_mime: Option, + enclosure_size: Option, +} + +// ============================================================================= +// Handler +// ============================================================================= + +/// GET /pub/{slug}/feed.xml — offentlig RSS/Atom-feed. +pub async fn generate_feed( + State(state): State, + Path(slug): Path, +) -> Result { + let collection = find_collection_by_slug(&state.db, &slug) + .await + .map_err(|e| { + tracing::error!(slug = %slug, error = %e, "Feil ved oppslag av samling"); + StatusCode::INTERNAL_SERVER_ERROR + })? + .ok_or(StatusCode::NOT_FOUND)?; + + let max_items = collection.rss_config.max_items.unwrap_or(50); + let items = fetch_feed_items(&state.db, collection.id, max_items, collection.is_podcast) + .await + .map_err(|e| { + tracing::error!(slug = %slug, error = %e, "Feil ved henting av feed-elementer"); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + let base_url = collection + .publishing_config + .custom_domain + .as_deref() + .map(|d| format!("https://{d}")) + .unwrap_or_else(|| format!("https://synops.no/pub/{slug}")); + + let format = collection + .rss_config + .format + .as_deref() + .unwrap_or("rss"); + + let xml = match format { + "atom" => build_atom_feed(&collection, &items, &base_url), + _ => build_rss_feed(&collection, &items, &base_url), + }; + + let content_type = match format { + "atom" => "application/atom+xml; charset=utf-8", + _ => "application/rss+xml; charset=utf-8", + }; + + Ok(Response::builder() + .header(header::CONTENT_TYPE, content_type) + .header(header::CACHE_CONTROL, "public, max-age=300") + .body(xml.into()) + .unwrap()) +} + +// ============================================================================= +// Database-spørringer +// ============================================================================= + +/// Finn samling med rss-trait basert på publishing-slug. +async fn find_collection_by_slug( + db: &PgPool, + slug: &str, +) -> Result, sqlx::Error> { + let row: Option<(Uuid, Option, serde_json::Value)> = sqlx::query_as( + r#" + SELECT id, title, metadata + FROM nodes + WHERE node_kind = 'collection' + AND metadata->'traits'->'publishing'->>'slug' = $1 + AND metadata->'traits' ? 'rss' + LIMIT 1 + "#, + ) + .bind(slug) + .fetch_optional(db) + .await?; + + let Some((id, title, metadata)) = row else { + return Ok(None); + }; + + let traits = metadata + .get("traits") + .cloned() + .unwrap_or(serde_json::Value::Null); + + let rss_config: RssTraitConfig = traits + .get("rss") + .cloned() + .map(|v| serde_json::from_value(v).unwrap_or_default()) + .unwrap_or_default(); + + let publishing_config: PublishingTraitConfig = traits + .get("publishing") + .cloned() + .map(|v| serde_json::from_value(v).unwrap_or_default()) + .unwrap_or_default(); + + let is_podcast = traits.get("podcast").is_some(); + + Ok(Some(CollectionInfo { + id, + title, + rss_config, + publishing_config, + is_podcast, + })) +} + +/// Hent publiserte elementer (belongs_to-edges til samlingen). +/// For podcast-samlinger: inkluder enclosure-data via has_media-edges. +async fn fetch_feed_items( + db: &PgPool, + collection_id: Uuid, + max_items: i64, + is_podcast: bool, +) -> Result, sqlx::Error> { + if is_podcast { + // Podcast: join med has_media for enclosure-data + let rows: Vec<(Uuid, Option, Option, DateTime, Option, Option, Option, Option)> = sqlx::query_as( + r#" + SELECT + n.id, + n.title, + n.content, + n.created_at, + e.metadata, + m.metadata->>'cas_hash' AS cas_hash, + m.metadata->>'mime' AS mime, + (m.metadata->>'size')::bigint AS size + FROM edges e + JOIN nodes n ON n.id = e.source_id + LEFT JOIN edges me ON me.source_id = n.id AND me.edge_type = 'has_media' + LEFT JOIN nodes m ON m.id = me.target_id AND m.node_kind = 'media' + WHERE e.target_id = $1 + AND e.edge_type = 'belongs_to' + ORDER BY COALESCE( + (e.metadata->>'publish_at')::timestamptz, + n.created_at + ) DESC + LIMIT $2 + "#, + ) + .bind(collection_id) + .bind(max_items) + .fetch_all(db) + .await?; + + Ok(rows + .into_iter() + .map(|(id, title, content, created_at, edge_meta, cas_hash, mime, size)| { + let publish_at = edge_meta + .as_ref() + .and_then(|m| m.get("publish_at")) + .and_then(|v| v.as_str()) + .and_then(|s| s.parse::>().ok()); + + FeedItem { + id, + title, + content, + created_at, + publish_at, + enclosure_url: cas_hash.map(|h| format!("/cas/{h}")), + enclosure_mime: mime, + enclosure_size: size, + } + }) + .collect()) + } else { + // Vanlig feed: kun noder, ingen enclosures + let rows: Vec<(Uuid, Option, Option, DateTime, Option)> = sqlx::query_as( + r#" + SELECT + n.id, + n.title, + n.content, + n.created_at, + e.metadata + FROM edges e + JOIN nodes n ON n.id = e.source_id + WHERE e.target_id = $1 + AND e.edge_type = 'belongs_to' + ORDER BY COALESCE( + (e.metadata->>'publish_at')::timestamptz, + n.created_at + ) DESC + LIMIT $2 + "#, + ) + .bind(collection_id) + .bind(max_items) + .fetch_all(db) + .await?; + + Ok(rows + .into_iter() + .map(|(id, title, content, created_at, edge_meta)| { + let publish_at = edge_meta + .as_ref() + .and_then(|m| m.get("publish_at")) + .and_then(|v| v.as_str()) + .and_then(|s| s.parse::>().ok()); + + FeedItem { + id, + title, + content, + created_at, + publish_at, + enclosure_url: None, + enclosure_mime: None, + enclosure_size: None, + } + }) + .collect()) + } +} + +// ============================================================================= +// XML-generering +// ============================================================================= + +/// Bygg RSS 2.0 XML-streng. +fn build_rss_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &str) -> String { + let channel_title = xml_escape( + collection + .rss_config + .title + .as_deref() + .or(collection.title.as_deref()) + .unwrap_or("Untitled Feed"), + ); + let channel_desc = xml_escape( + collection + .rss_config + .description + .as_deref() + .unwrap_or(""), + ); + let language = collection + .rss_config + .language + .as_deref() + .unwrap_or("no"); + let feed_url = format!("{base_url}/feed.xml"); + + let mut xml = String::with_capacity(4096); + xml.push_str("\n"); + + // iTunes namespace for podcast-feeds + if collection.is_podcast { + xml.push_str("\n"); + } else { + xml.push_str("\n"); + } + + xml.push_str("\n"); + xml.push_str(&format!(" {channel_title}\n")); + xml.push_str(&format!(" {base_url}\n")); + xml.push_str(&format!(" {channel_desc}\n")); + xml.push_str(&format!(" {language}\n")); + xml.push_str(&format!( + " \n" + )); + + if let Some(item) = items.first() { + let date = item.publish_at.unwrap_or(item.created_at); + xml.push_str(&format!(" {}\n", date.to_rfc2822())); + } + + for item in items { + xml.push_str(" \n"); + let title = xml_escape(item.title.as_deref().unwrap_or("Uten tittel")); + xml.push_str(&format!(" {title}\n")); + + let item_url = format!("{base_url}/{}", short_id(item.id)); + xml.push_str(&format!(" {item_url}\n")); + + // GUID er stabil — basert på node-ID + xml.push_str(&format!( + " {}\n", + item.id + )); + + let pub_date = item.publish_at.unwrap_or(item.created_at); + xml.push_str(&format!(" {}\n", pub_date.to_rfc2822())); + + if let Some(ref content) = item.content { + let desc = xml_escape(&truncate_description(content, 500)); + xml.push_str(&format!(" {desc}\n")); + } + + // Podcast enclosure + if let Some(ref enc_path) = item.enclosure_url { + let enc_url = format!("{base_url}{enc_path}"); + let mime = item.enclosure_mime.as_deref().unwrap_or("audio/mpeg"); + let size = item.enclosure_size.unwrap_or(0); + xml.push_str(&format!( + " \n" + )); + } + + xml.push_str(" \n"); + } + + xml.push_str("\n"); + xml.push_str("\n"); + xml +} + +/// Bygg Atom 1.0 XML-streng. +fn build_atom_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &str) -> String { + let feed_title = xml_escape( + collection + .rss_config + .title + .as_deref() + .or(collection.title.as_deref()) + .unwrap_or("Untitled Feed"), + ); + let feed_desc = xml_escape( + collection + .rss_config + .description + .as_deref() + .unwrap_or(""), + ); + let feed_url = format!("{base_url}/feed.xml"); + + let updated = items + .first() + .map(|i| i.publish_at.unwrap_or(i.created_at)) + .unwrap_or_else(Utc::now); + + let mut xml = String::with_capacity(4096); + xml.push_str("\n"); + xml.push_str("\n"); + xml.push_str(&format!(" {feed_title}\n")); + xml.push_str(&format!(" {feed_desc}\n")); + xml.push_str(&format!( + " \n" + )); + xml.push_str(&format!(" \n")); + xml.push_str(&format!(" {base_url}\n")); + xml.push_str(&format!( + " {}\n", + updated.to_rfc3339() + )); + + for item in items { + xml.push_str(" \n"); + let title = xml_escape(item.title.as_deref().unwrap_or("Uten tittel")); + xml.push_str(&format!(" {title}\n")); + + let item_url = format!("{base_url}/{}", short_id(item.id)); + xml.push_str(&format!( + " \n" + )); + + xml.push_str(&format!(" urn:uuid:{}\n", item.id)); + + let pub_date = item.publish_at.unwrap_or(item.created_at); + xml.push_str(&format!( + " {}\n", + pub_date.to_rfc3339() + )); + xml.push_str(&format!( + " {}\n", + pub_date.to_rfc3339() + )); + + if let Some(ref content) = item.content { + let summary = xml_escape(&truncate_description(content, 500)); + xml.push_str(&format!(" {summary}\n")); + } + + // Podcast enclosure som link med rel="enclosure" + if let Some(ref enc_path) = item.enclosure_url { + let enc_url = format!("{base_url}{enc_path}"); + let mime = item.enclosure_mime.as_deref().unwrap_or("audio/mpeg"); + let size = item.enclosure_size.unwrap_or(0); + xml.push_str(&format!( + " \n" + )); + } + + xml.push_str(" \n"); + } + + xml.push_str("\n"); + xml +} + +// ============================================================================= +// Hjelpefunksjoner +// ============================================================================= + +/// XML-escape for tekst i elementer. +fn xml_escape(s: &str) -> String { + s.replace('&', "&") + .replace('<', "<") + .replace('>', ">") + .replace('"', """) + .replace('\'', "'") +} + +/// Kort ID fra UUID (første 8 tegn) — for URL-er. +fn short_id(id: Uuid) -> String { + id.to_string()[..8].to_string() +} + +/// Trunkér beskrivelse til maks antall tegn, på ordgrense. +fn truncate_description(s: &str, max_len: usize) -> String { + if s.len() <= max_len { + return s.to_string(); + } + match s[..max_len].rfind(' ') { + Some(pos) => format!("{}…", &s[..pos]), + None => format!("{}…", &s[..max_len]), + } +} diff --git a/tasks.md b/tasks.md index 886e17e..2e03c30 100644 --- a/tasks.md +++ b/tasks.md @@ -126,8 +126,7 @@ Uavhengige faser kan fortsatt plukkes. - [x] 11.1 LiveKit oppsett: Docker-container for WebRTC. Ref: `docs/setup/produksjon.md`. - [x] 11.2 Sanntidslyd: kommunikasjonsnode med live-status → LiveKit-rom for deltakere. - [x] 11.3 Pruning-logikk: TTL per modalitet, signaler som forlenger levetid, disk-nødventil. -- [~] 11.4 Podcast-RSS: samlings-node med publiserings-edges → generert RSS-feed. - > Påbegynt: 2026-03-18T00:03 +- [x] 11.4 Podcast-RSS: samlings-node med publiserings-edges → generert RSS-feed. ## Fase 13: Trait-system