// synops-rss — RSS/Atom-feed generering. // // Genererer RSS 2.0 eller Atom 1.0 feed for samlinger med `rss`-trait. // Erstatter RSS-logikken i maskinrommet/src/rss.rs som et frittstående // CLI-verktøy i tråd med unix-filosofien. // // Miljøvariabler: // DATABASE_URL — PostgreSQL-tilkobling (påkrevd) // // Ref: docs/retninger/unix_filosofi.md, docs/concepts/publisering.md use chrono::{DateTime, Utc}; use clap::Parser; use serde::Deserialize; use std::process; use uuid::Uuid; // ============================================================================= // CLI // ============================================================================= /// Generer RSS/Atom-feed for en samling. #[derive(Parser)] #[command(name = "synops-rss", about = "RSS/Atom-feed generering for samlinger")] struct Cli { /// Samlings-ID (UUID) #[arg(long)] collection_id: Option, /// Samlingens slug (alternativ til --collection-id) #[arg(long)] slug: Option, /// Overstyr feed-format: rss eller atom #[arg(long)] format: Option, /// Maks antall elementer i feeden #[arg(long)] max_items: Option, } // ============================================================================= // Konfigurasjon fra trait-metadata // ============================================================================= #[derive(Deserialize, Default)] struct RssTraitConfig { format: Option, title: Option, description: Option, max_items: Option, language: Option, } #[derive(Deserialize, Default)] #[allow(dead_code)] struct PodcastTraitConfig { itunes_author: Option, itunes_category: Option, explicit: Option, language: Option, #[serde(default)] redirect_feed: Option, } #[derive(Deserialize, Default)] struct PublishingTraitConfig { slug: Option, custom_domain: Option, } // ============================================================================= // Datamodeller // ============================================================================= struct CollectionInfo { id: Uuid, title: Option, slug: String, rss_config: RssTraitConfig, publishing_config: PublishingTraitConfig, is_podcast: bool, podcast_config: PodcastTraitConfig, /// CAS-hash for samlingens artwork (fra og_image-edge) artwork_cas_hash: Option, } struct FeedItem { id: Uuid, title: Option, content: Option, created_at: DateTime, publish_at: Option>, enclosure_url: Option, enclosure_mime: Option, enclosure_size: Option, /// Episoden har transkripsjons-segmenter has_transcript: bool, /// Kapittelmarkører (tidspunkt + tittel) chapters: Vec, /// Varighet i sekunder (fra media-metadata) duration_secs: Option, /// Episodens artwork CAS-hash (fra og_image-edge) episode_image_cas: Option, } #[allow(dead_code)] struct Chapter { at: String, title: Option, } // ============================================================================= // Main // ============================================================================= #[tokio::main] async fn main() { synops_common::logging::init("synops_rss"); let cli = Cli::parse(); if cli.collection_id.is_none() && cli.slug.is_none() { eprintln!("Feil: Enten --collection-id eller --slug må oppgis"); process::exit(1); } let db = match synops_common::db::connect().await { Ok(pool) => pool, Err(e) => { eprintln!("Feil: Kunne ikke koble til database: {e}"); process::exit(1); } }; let collection = match find_collection(&db, cli.collection_id, cli.slug.as_deref()).await { Ok(Some(c)) => c, Ok(None) => { eprintln!("Feil: Fant ingen samling med rss-trait"); process::exit(1); } Err(e) => { eprintln!("Feil: Database-feil ved oppslag: {e}"); process::exit(1); } }; let max_items = cli .max_items .or(collection.rss_config.max_items) .unwrap_or(50); let items = match fetch_feed_items(&db, collection.id, max_items, collection.is_podcast).await { Ok(items) => items, Err(e) => { eprintln!("Feil: Kunne ikke hente feed-elementer: {e}"); process::exit(1); } }; let base_url = collection .publishing_config .custom_domain .as_deref() .map(|d| format!("https://{d}")) .unwrap_or_else(|| format!("https://synops.no/pub/{}", collection.slug)); let format = cli .format .as_deref() .or(collection.rss_config.format.as_deref()) .unwrap_or("rss"); let xml = match format { "atom" => build_atom_feed(&collection, &items, &base_url), _ => build_rss_feed(&collection, &items, &base_url), }; tracing::info!( collection_id = %collection.id, slug = %collection.slug, format = format, items = items.len(), "Feed generert" ); print!("{xml}"); } // ============================================================================= // Database-spørringer // ============================================================================= /// Finn samling med rss-trait, enten via UUID eller slug. async fn find_collection( db: &sqlx::PgPool, collection_id: Option, slug: Option<&str>, ) -> Result, sqlx::Error> { let row: Option<(Uuid, Option, serde_json::Value)> = if let Some(id) = collection_id { sqlx::query_as( r#" SELECT id, title, metadata FROM nodes WHERE id = $1 AND node_kind = 'collection' AND metadata->'traits' ? 'rss' LIMIT 1 "#, ) .bind(id) .fetch_optional(db) .await? } else if let Some(slug) = slug { sqlx::query_as( r#" SELECT id, title, metadata FROM nodes WHERE node_kind = 'collection' AND metadata->'traits'->'publishing'->>'slug' = $1 AND metadata->'traits' ? 'rss' LIMIT 1 "#, ) .bind(slug) .fetch_optional(db) .await? } else { return Ok(None); }; let Some((id, title, metadata)) = row else { return Ok(None); }; let traits = metadata .get("traits") .cloned() .unwrap_or(serde_json::Value::Null); let rss_config: RssTraitConfig = traits .get("rss") .cloned() .map(|v| serde_json::from_value(v).unwrap_or_default()) .unwrap_or_default(); let publishing_config: PublishingTraitConfig = traits .get("publishing") .cloned() .map(|v| serde_json::from_value(v).unwrap_or_default()) .unwrap_or_default(); let slug = publishing_config .slug .clone() .unwrap_or_else(|| id.to_string()); let is_podcast = traits.get("podcast").is_some(); let podcast_config: PodcastTraitConfig = traits .get("podcast") .cloned() .map(|v| serde_json::from_value(v).unwrap_or_default()) .unwrap_or_default(); // Hent samlingens artwork via og_image-edge let artwork_cas_hash: Option = sqlx::query_scalar( r#" SELECT m.metadata->>'cas_hash' FROM edges e JOIN nodes m ON m.id = e.target_id WHERE e.source_id = $1 AND e.edge_type = 'og_image' LIMIT 1 "#, ) .bind(id) .fetch_optional(db) .await?; Ok(Some(CollectionInfo { id, title, slug, rss_config, publishing_config, is_podcast, podcast_config, artwork_cas_hash, })) } /// Hent publiserte elementer (belongs_to-edges til samlingen). /// For podcast-samlinger: inkluder enclosure-data, transkripsjoner, kapitler og varighet. async fn fetch_feed_items( db: &sqlx::PgPool, collection_id: Uuid, max_items: i64, is_podcast: bool, ) -> Result, sqlx::Error> { if is_podcast { // Podcast: join med has_media for enclosure, sjekk transkripsjoner, varighet og episode-bilde let rows: Vec<( Uuid, Option, Option, DateTime, Option, Option, Option, Option, Option, Option, Option, )> = sqlx::query_as( r#" SELECT n.id, n.title, n.content, n.created_at, e.metadata, m.metadata->>'cas_hash' AS cas_hash, m.metadata->>'mime' AS mime, COALESCE((m.metadata->>'size_bytes')::bigint, (m.metadata->>'size')::bigint) AS size, EXISTS( SELECT 1 FROM transcription_segments ts WHERE ts.node_id = n.id LIMIT 1 ) AS has_transcript, (m.metadata->>'duration_secs')::bigint AS duration_secs, (SELECT img.metadata->>'cas_hash' FROM edges ie JOIN nodes img ON img.id = ie.target_id WHERE ie.source_id = n.id AND ie.edge_type = 'og_image' LIMIT 1 ) AS episode_image_cas FROM edges e JOIN nodes n ON n.id = e.source_id LEFT JOIN edges me ON me.source_id = n.id AND me.edge_type = 'has_media' LEFT JOIN nodes m ON m.id = me.target_id AND m.node_kind = 'media' WHERE e.target_id = $1 AND e.edge_type = 'belongs_to' ORDER BY COALESCE( (e.metadata->>'publish_at')::timestamptz, n.created_at ) DESC LIMIT $2 "#, ) .bind(collection_id) .bind(max_items) .fetch_all(db) .await?; // Samle node-IDer for å hente kapitler i én spørring let node_ids: Vec = rows.iter().map(|r| r.0).collect(); let chapter_rows: Vec<(Uuid, String, Option)> = if !node_ids.is_empty() { sqlx::query_as( r#" SELECT e.source_id, COALESCE(e.metadata->>'at', '00:00:00') AS at, e.metadata->>'title' AS title FROM edges e WHERE e.source_id = ANY($1) AND e.edge_type = 'chapter' ORDER BY e.source_id, e.metadata->>'at' "#, ) .bind(&node_ids) .fetch_all(db) .await? } else { vec![] }; // Grupper kapitler per node let mut chapters_map: std::collections::HashMap> = std::collections::HashMap::new(); for (node_id, at, title) in chapter_rows { chapters_map .entry(node_id) .or_default() .push(Chapter { at, title }); } Ok(rows .into_iter() .map( |(id, title, content, created_at, edge_meta, cas_hash, mime, size, has_transcript, duration_secs, episode_image_cas)| { let publish_at = edge_meta .as_ref() .and_then(|m| m.get("publish_at")) .and_then(|v| v.as_str()) .and_then(|s| s.parse::>().ok()); FeedItem { id, title, content, created_at, publish_at, enclosure_url: cas_hash.map(|h| format!("/cas/{h}")), enclosure_mime: mime, enclosure_size: size, has_transcript: has_transcript.unwrap_or(false), chapters: chapters_map.remove(&id).unwrap_or_default(), duration_secs, episode_image_cas, } }, ) .collect()) } else { let rows: Vec<( Uuid, Option, Option, DateTime, Option, )> = sqlx::query_as( r#" SELECT n.id, n.title, n.content, n.created_at, e.metadata FROM edges e JOIN nodes n ON n.id = e.source_id WHERE e.target_id = $1 AND e.edge_type = 'belongs_to' ORDER BY COALESCE( (e.metadata->>'publish_at')::timestamptz, n.created_at ) DESC LIMIT $2 "#, ) .bind(collection_id) .bind(max_items) .fetch_all(db) .await?; Ok(rows .into_iter() .map(|(id, title, content, created_at, edge_meta)| { let publish_at = edge_meta .as_ref() .and_then(|m| m.get("publish_at")) .and_then(|v| v.as_str()) .and_then(|s| s.parse::>().ok()); FeedItem { id, title, content, created_at, publish_at, enclosure_url: None, enclosure_mime: None, enclosure_size: None, has_transcript: false, chapters: vec![], duration_secs: None, episode_image_cas: None, } }) .collect()) } } // ============================================================================= // XML-generering // ============================================================================= /// Bygg RSS 2.0 XML-streng. fn build_rss_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &str) -> String { let channel_title = xml_escape( collection .rss_config .title .as_deref() .or(collection.title.as_deref()) .unwrap_or("Untitled Feed"), ); let channel_desc = xml_escape( collection .rss_config .description .as_deref() .unwrap_or(""), ); // Podcast-trait language overstyre rss-trait language let language = collection .podcast_config .language .as_deref() .or(collection.rss_config.language.as_deref()) .unwrap_or("no"); let feed_url = format!("{base_url}/feed.xml"); let mut xml = String::with_capacity(4096); xml.push_str("\n"); if collection.is_podcast { xml.push_str("\n"); } else { xml.push_str("\n"); } xml.push_str("\n"); xml.push_str(&format!(" {channel_title}\n")); xml.push_str(&format!(" {base_url}\n")); xml.push_str(&format!(" {channel_desc}\n")); xml.push_str(&format!(" {language}\n")); xml.push_str(&format!( " \n" )); if let Some(item) = items.first() { let date = item.publish_at.unwrap_or(item.created_at); xml.push_str(&format!( " {}\n", date.to_rfc2822() )); } // iTunes og Podcasting 2.0 channel-level tags if collection.is_podcast { let pc = &collection.podcast_config; if let Some(ref author) = pc.itunes_author { xml.push_str(&format!( " {}\n", xml_escape(author) )); } if let Some(ref category) = pc.itunes_category { xml.push_str(&format!( " \n", xml_escape(category) )); } let explicit = pc.explicit.unwrap_or(false); xml.push_str(&format!( " {}\n", if explicit { "true" } else { "false" } )); // Artwork: fra og_image-edge eller samlingens podcast-bilde if let Some(ref cas_hash) = collection.artwork_cas_hash { let artwork_url = format!("{base_url}/cas/{cas_hash}"); xml.push_str(&format!( " \n" )); } xml.push_str(&format!( " episodic\n" )); // Podcasting 2.0: locked xml.push_str(" no\n"); } for item in items { xml.push_str(" \n"); let title = xml_escape(item.title.as_deref().unwrap_or("Uten tittel")); xml.push_str(&format!(" {title}\n")); let item_url = format!("{base_url}/{}", short_id(item.id)); xml.push_str(&format!(" {item_url}\n")); xml.push_str(&format!( " {}\n", item.id )); let pub_date = item.publish_at.unwrap_or(item.created_at); xml.push_str(&format!( " {}\n", pub_date.to_rfc2822() )); if let Some(ref content) = item.content { let desc = xml_escape(&truncate_description(content, 500)); xml.push_str(&format!(" {desc}\n")); } if let Some(ref enc_path) = item.enclosure_url { let enc_url = format!("{base_url}{enc_path}"); let mime = item.enclosure_mime.as_deref().unwrap_or("audio/mpeg"); let size = item.enclosure_size.unwrap_or(0); xml.push_str(&format!( " \n" )); } // iTunes og Podcasting 2.0 item-level tags (kun for podcast) if collection.is_podcast { // itunes:title (samme som title, men eksplisitt for iTunes) xml.push_str(&format!(" {title}\n")); // itunes:duration if let Some(secs) = item.duration_secs { let h = secs / 3600; let m = (secs % 3600) / 60; let s = secs % 60; if h > 0 { xml.push_str(&format!( " {h:02}:{m:02}:{s:02}\n" )); } else { xml.push_str(&format!( " {m:02}:{s:02}\n" )); } } // itunes:explicit (per episode, arver fra kanal) let explicit = collection.podcast_config.explicit.unwrap_or(false); xml.push_str(&format!( " {}\n", if explicit { "true" } else { "false" } )); // Episode-bilde if let Some(ref cas) = item.episode_image_cas { let img_url = format!("{base_url}/cas/{cas}"); xml.push_str(&format!( " \n" )); } // podcast:transcript — SRT fra transkripsjons-segmenter if item.has_transcript { let transcript_url = format!("{base_url}/{}/transcript.srt", short_id(item.id)); xml.push_str(&format!( " \n" )); } // podcast:chapters — JSON fra chapter-edges if !item.chapters.is_empty() { let chapters_url = format!("{base_url}/{}/chapters.json", short_id(item.id)); xml.push_str(&format!( " \n" )); } } xml.push_str(" \n"); } xml.push_str("\n"); xml.push_str("\n"); xml } /// Bygg Atom 1.0 XML-streng. fn build_atom_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &str) -> String { let feed_title = xml_escape( collection .rss_config .title .as_deref() .or(collection.title.as_deref()) .unwrap_or("Untitled Feed"), ); let feed_desc = xml_escape( collection .rss_config .description .as_deref() .unwrap_or(""), ); let feed_url = format!("{base_url}/feed.xml"); let updated = items .first() .map(|i| i.publish_at.unwrap_or(i.created_at)) .unwrap_or_else(Utc::now); let mut xml = String::with_capacity(4096); xml.push_str("\n"); xml.push_str("\n"); xml.push_str(&format!(" {feed_title}\n")); xml.push_str(&format!(" {feed_desc}\n")); xml.push_str(&format!( " \n" )); xml.push_str(&format!( " \n" )); xml.push_str(&format!(" {base_url}\n")); xml.push_str(&format!( " {}\n", updated.to_rfc3339() )); for item in items { xml.push_str(" \n"); let title = xml_escape(item.title.as_deref().unwrap_or("Uten tittel")); xml.push_str(&format!(" {title}\n")); let item_url = format!("{base_url}/{}", short_id(item.id)); xml.push_str(&format!( " \n" )); xml.push_str(&format!(" urn:uuid:{}\n", item.id)); let pub_date = item.publish_at.unwrap_or(item.created_at); xml.push_str(&format!( " {}\n", pub_date.to_rfc3339() )); xml.push_str(&format!( " {}\n", pub_date.to_rfc3339() )); if let Some(ref content) = item.content { let summary = xml_escape(&truncate_description(content, 500)); xml.push_str(&format!(" {summary}\n")); } if let Some(ref enc_path) = item.enclosure_url { let enc_url = format!("{base_url}{enc_path}"); let mime = item.enclosure_mime.as_deref().unwrap_or("audio/mpeg"); let size = item.enclosure_size.unwrap_or(0); xml.push_str(&format!( " \n" )); } xml.push_str(" \n"); } xml.push_str("\n"); xml } // ============================================================================= // Hjelpefunksjoner // ============================================================================= /// XML-escape for tekst i elementer. fn xml_escape(s: &str) -> String { s.replace('&', "&") .replace('<', "<") .replace('>', ">") .replace('"', """) .replace('\'', "'") } /// Kort ID fra UUID (første 8 tegn) — for URL-er. fn short_id(id: Uuid) -> String { id.to_string()[..8].to_string() } /// Trunkér beskrivelse til maks antall tegn (chars, ikke bytes), på ordgrense. fn truncate_description(s: &str, max_chars: usize) -> String { let char_count = s.chars().count(); if char_count <= max_chars { return s.to_string(); } let byte_end = s.char_indices().nth(max_chars).map(|(i, _)| i).unwrap_or(s.len()); match s[..byte_end].rfind(' ') { Some(pos) => format!("{}…", &s[..pos]), None => format!("{}…", &s[..byte_end]), } }