synops/tools/synops-rss/src/main.rs
vegard d7f08d439d iTunes/Podcasting 2.0 RSS-tags: komplett implementering (oppgave 30.1)
Utvider synops-rss og maskinrommet/src/rss.rs med iTunes og Podcasting 2.0
namespace for podcast-samlinger.

Channel-level tags:
- itunes:author, itunes:category, itunes:explicit fra podcast-trait metadata
- itunes:image fra samlingens og_image-edge (CAS-hash)
- itunes:type (episodic)
- podcast:locked

Item-level tags:
- itunes:title, itunes:duration (fra media-metadata duration_secs)
- itunes:explicit (arver fra kanal), itunes:image (episode og_image)
- podcast:transcript (SRT-URL hvis transcription_segments finnes)
- podcast:chapters (JSON-URL hvis chapter-edges finnes)

DB-spørringene er utvidet til å hente transkripsjons-eksistens,
varighet, episode-bilde og kapitler i effektive batch-spørringer.

Merk: Transcript/chapters-URL-ene genereres i feeden men krever
offentlige endepunkt for å serveres (fremtidig oppgave).
2026-03-18 23:12:34 +00:00

779 lines
25 KiB
Rust

// synops-rss — RSS/Atom-feed generering.
//
// Genererer RSS 2.0 eller Atom 1.0 feed for samlinger med `rss`-trait.
// Erstatter RSS-logikken i maskinrommet/src/rss.rs som et frittstående
// CLI-verktøy i tråd med unix-filosofien.
//
// Miljøvariabler:
// DATABASE_URL — PostgreSQL-tilkobling (påkrevd)
//
// Ref: docs/retninger/unix_filosofi.md, docs/concepts/publisering.md
use chrono::{DateTime, Utc};
use clap::Parser;
use serde::Deserialize;
use std::process;
use uuid::Uuid;
// =============================================================================
// CLI
// =============================================================================
/// Generer RSS/Atom-feed for en samling.
#[derive(Parser)]
#[command(name = "synops-rss", about = "RSS/Atom-feed generering for samlinger")]
struct Cli {
/// Samlings-ID (UUID)
#[arg(long)]
collection_id: Option<Uuid>,
/// Samlingens slug (alternativ til --collection-id)
#[arg(long)]
slug: Option<String>,
/// Overstyr feed-format: rss eller atom
#[arg(long)]
format: Option<String>,
/// Maks antall elementer i feeden
#[arg(long)]
max_items: Option<i64>,
}
// =============================================================================
// Konfigurasjon fra trait-metadata
// =============================================================================
#[derive(Deserialize, Default)]
struct RssTraitConfig {
format: Option<String>,
title: Option<String>,
description: Option<String>,
max_items: Option<i64>,
language: Option<String>,
}
#[derive(Deserialize, Default)]
#[allow(dead_code)]
struct PodcastTraitConfig {
itunes_author: Option<String>,
itunes_category: Option<String>,
explicit: Option<bool>,
language: Option<String>,
#[serde(default)]
redirect_feed: Option<String>,
}
#[derive(Deserialize, Default)]
struct PublishingTraitConfig {
slug: Option<String>,
custom_domain: Option<String>,
}
// =============================================================================
// Datamodeller
// =============================================================================
struct CollectionInfo {
id: Uuid,
title: Option<String>,
slug: String,
rss_config: RssTraitConfig,
publishing_config: PublishingTraitConfig,
is_podcast: bool,
podcast_config: PodcastTraitConfig,
/// CAS-hash for samlingens artwork (fra og_image-edge)
artwork_cas_hash: Option<String>,
}
struct FeedItem {
id: Uuid,
title: Option<String>,
content: Option<String>,
created_at: DateTime<Utc>,
publish_at: Option<DateTime<Utc>>,
enclosure_url: Option<String>,
enclosure_mime: Option<String>,
enclosure_size: Option<i64>,
/// Episoden har transkripsjons-segmenter
has_transcript: bool,
/// Kapittelmarkører (tidspunkt + tittel)
chapters: Vec<Chapter>,
/// Varighet i sekunder (fra media-metadata)
duration_secs: Option<i64>,
/// Episodens artwork CAS-hash (fra og_image-edge)
episode_image_cas: Option<String>,
}
#[allow(dead_code)]
struct Chapter {
at: String,
title: Option<String>,
}
// =============================================================================
// Main
// =============================================================================
#[tokio::main]
async fn main() {
synops_common::logging::init("synops_rss");
let cli = Cli::parse();
if cli.collection_id.is_none() && cli.slug.is_none() {
eprintln!("Feil: Enten --collection-id eller --slug må oppgis");
process::exit(1);
}
let db = match synops_common::db::connect().await {
Ok(pool) => pool,
Err(e) => {
eprintln!("Feil: Kunne ikke koble til database: {e}");
process::exit(1);
}
};
let collection = match find_collection(&db, cli.collection_id, cli.slug.as_deref()).await {
Ok(Some(c)) => c,
Ok(None) => {
eprintln!("Feil: Fant ingen samling med rss-trait");
process::exit(1);
}
Err(e) => {
eprintln!("Feil: Database-feil ved oppslag: {e}");
process::exit(1);
}
};
let max_items = cli
.max_items
.or(collection.rss_config.max_items)
.unwrap_or(50);
let items = match fetch_feed_items(&db, collection.id, max_items, collection.is_podcast).await {
Ok(items) => items,
Err(e) => {
eprintln!("Feil: Kunne ikke hente feed-elementer: {e}");
process::exit(1);
}
};
let base_url = collection
.publishing_config
.custom_domain
.as_deref()
.map(|d| format!("https://{d}"))
.unwrap_or_else(|| format!("https://synops.no/pub/{}", collection.slug));
let format = cli
.format
.as_deref()
.or(collection.rss_config.format.as_deref())
.unwrap_or("rss");
let xml = match format {
"atom" => build_atom_feed(&collection, &items, &base_url),
_ => build_rss_feed(&collection, &items, &base_url),
};
tracing::info!(
collection_id = %collection.id,
slug = %collection.slug,
format = format,
items = items.len(),
"Feed generert"
);
print!("{xml}");
}
// =============================================================================
// Database-spørringer
// =============================================================================
/// Finn samling med rss-trait, enten via UUID eller slug.
async fn find_collection(
db: &sqlx::PgPool,
collection_id: Option<Uuid>,
slug: Option<&str>,
) -> Result<Option<CollectionInfo>, sqlx::Error> {
let row: Option<(Uuid, Option<String>, serde_json::Value)> = if let Some(id) = collection_id {
sqlx::query_as(
r#"
SELECT id, title, metadata
FROM nodes
WHERE id = $1
AND node_kind = 'collection'
AND metadata->'traits' ? 'rss'
LIMIT 1
"#,
)
.bind(id)
.fetch_optional(db)
.await?
} else if let Some(slug) = slug {
sqlx::query_as(
r#"
SELECT id, title, metadata
FROM nodes
WHERE node_kind = 'collection'
AND metadata->'traits'->'publishing'->>'slug' = $1
AND metadata->'traits' ? 'rss'
LIMIT 1
"#,
)
.bind(slug)
.fetch_optional(db)
.await?
} else {
return Ok(None);
};
let Some((id, title, metadata)) = row else {
return Ok(None);
};
let traits = metadata
.get("traits")
.cloned()
.unwrap_or(serde_json::Value::Null);
let rss_config: RssTraitConfig = traits
.get("rss")
.cloned()
.map(|v| serde_json::from_value(v).unwrap_or_default())
.unwrap_or_default();
let publishing_config: PublishingTraitConfig = traits
.get("publishing")
.cloned()
.map(|v| serde_json::from_value(v).unwrap_or_default())
.unwrap_or_default();
let slug = publishing_config
.slug
.clone()
.unwrap_or_else(|| id.to_string());
let is_podcast = traits.get("podcast").is_some();
let podcast_config: PodcastTraitConfig = traits
.get("podcast")
.cloned()
.map(|v| serde_json::from_value(v).unwrap_or_default())
.unwrap_or_default();
// Hent samlingens artwork via og_image-edge
let artwork_cas_hash: Option<String> = sqlx::query_scalar(
r#"
SELECT m.metadata->>'cas_hash'
FROM edges e
JOIN nodes m ON m.id = e.target_id
WHERE e.source_id = $1
AND e.edge_type = 'og_image'
LIMIT 1
"#,
)
.bind(id)
.fetch_optional(db)
.await?;
Ok(Some(CollectionInfo {
id,
title,
slug,
rss_config,
publishing_config,
is_podcast,
podcast_config,
artwork_cas_hash,
}))
}
/// Hent publiserte elementer (belongs_to-edges til samlingen).
/// For podcast-samlinger: inkluder enclosure-data, transkripsjoner, kapitler og varighet.
async fn fetch_feed_items(
db: &sqlx::PgPool,
collection_id: Uuid,
max_items: i64,
is_podcast: bool,
) -> Result<Vec<FeedItem>, sqlx::Error> {
if is_podcast {
// Podcast: join med has_media for enclosure, sjekk transkripsjoner, varighet og episode-bilde
let rows: Vec<(
Uuid,
Option<String>,
Option<String>,
DateTime<Utc>,
Option<serde_json::Value>,
Option<String>,
Option<String>,
Option<i64>,
Option<bool>,
Option<i64>,
Option<String>,
)> = sqlx::query_as(
r#"
SELECT
n.id,
n.title,
n.content,
n.created_at,
e.metadata,
m.metadata->>'cas_hash' AS cas_hash,
m.metadata->>'mime' AS mime,
COALESCE((m.metadata->>'size_bytes')::bigint, (m.metadata->>'size')::bigint) AS size,
EXISTS(
SELECT 1 FROM transcription_segments ts WHERE ts.node_id = n.id LIMIT 1
) AS has_transcript,
(m.metadata->>'duration_secs')::bigint AS duration_secs,
(SELECT img.metadata->>'cas_hash'
FROM edges ie
JOIN nodes img ON img.id = ie.target_id
WHERE ie.source_id = n.id AND ie.edge_type = 'og_image'
LIMIT 1
) AS episode_image_cas
FROM edges e
JOIN nodes n ON n.id = e.source_id
LEFT JOIN edges me ON me.source_id = n.id AND me.edge_type = 'has_media'
LEFT JOIN nodes m ON m.id = me.target_id AND m.node_kind = 'media'
WHERE e.target_id = $1
AND e.edge_type = 'belongs_to'
ORDER BY COALESCE(
(e.metadata->>'publish_at')::timestamptz,
n.created_at
) DESC
LIMIT $2
"#,
)
.bind(collection_id)
.bind(max_items)
.fetch_all(db)
.await?;
// Samle node-IDer for å hente kapitler i én spørring
let node_ids: Vec<Uuid> = rows.iter().map(|r| r.0).collect();
let chapter_rows: Vec<(Uuid, String, Option<String>)> = if !node_ids.is_empty() {
sqlx::query_as(
r#"
SELECT
e.source_id,
COALESCE(e.metadata->>'at', '00:00:00') AS at,
e.metadata->>'title' AS title
FROM edges e
WHERE e.source_id = ANY($1)
AND e.edge_type = 'chapter'
ORDER BY e.source_id, e.metadata->>'at'
"#,
)
.bind(&node_ids)
.fetch_all(db)
.await?
} else {
vec![]
};
// Grupper kapitler per node
let mut chapters_map: std::collections::HashMap<Uuid, Vec<Chapter>> =
std::collections::HashMap::new();
for (node_id, at, title) in chapter_rows {
chapters_map
.entry(node_id)
.or_default()
.push(Chapter { at, title });
}
Ok(rows
.into_iter()
.map(
|(id, title, content, created_at, edge_meta, cas_hash, mime, size, has_transcript, duration_secs, episode_image_cas)| {
let publish_at = edge_meta
.as_ref()
.and_then(|m| m.get("publish_at"))
.and_then(|v| v.as_str())
.and_then(|s| s.parse::<DateTime<Utc>>().ok());
FeedItem {
id,
title,
content,
created_at,
publish_at,
enclosure_url: cas_hash.map(|h| format!("/cas/{h}")),
enclosure_mime: mime,
enclosure_size: size,
has_transcript: has_transcript.unwrap_or(false),
chapters: chapters_map.remove(&id).unwrap_or_default(),
duration_secs,
episode_image_cas,
}
},
)
.collect())
} else {
let rows: Vec<(
Uuid,
Option<String>,
Option<String>,
DateTime<Utc>,
Option<serde_json::Value>,
)> = sqlx::query_as(
r#"
SELECT
n.id,
n.title,
n.content,
n.created_at,
e.metadata
FROM edges e
JOIN nodes n ON n.id = e.source_id
WHERE e.target_id = $1
AND e.edge_type = 'belongs_to'
ORDER BY COALESCE(
(e.metadata->>'publish_at')::timestamptz,
n.created_at
) DESC
LIMIT $2
"#,
)
.bind(collection_id)
.bind(max_items)
.fetch_all(db)
.await?;
Ok(rows
.into_iter()
.map(|(id, title, content, created_at, edge_meta)| {
let publish_at = edge_meta
.as_ref()
.and_then(|m| m.get("publish_at"))
.and_then(|v| v.as_str())
.and_then(|s| s.parse::<DateTime<Utc>>().ok());
FeedItem {
id,
title,
content,
created_at,
publish_at,
enclosure_url: None,
enclosure_mime: None,
enclosure_size: None,
has_transcript: false,
chapters: vec![],
duration_secs: None,
episode_image_cas: None,
}
})
.collect())
}
}
// =============================================================================
// XML-generering
// =============================================================================
/// Bygg RSS 2.0 XML-streng.
fn build_rss_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &str) -> String {
let channel_title = xml_escape(
collection
.rss_config
.title
.as_deref()
.or(collection.title.as_deref())
.unwrap_or("Untitled Feed"),
);
let channel_desc = xml_escape(
collection
.rss_config
.description
.as_deref()
.unwrap_or(""),
);
// Podcast-trait language overstyre rss-trait language
let language = collection
.podcast_config
.language
.as_deref()
.or(collection.rss_config.language.as_deref())
.unwrap_or("no");
let feed_url = format!("{base_url}/feed.xml");
let mut xml = String::with_capacity(4096);
xml.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
if collection.is_podcast {
xml.push_str("<rss version=\"2.0\" xmlns:itunes=\"http://www.itunes.com/dtds/podcast-1.0.dtd\" xmlns:podcast=\"https://podcastindex.org/namespace/1.0\" xmlns:atom=\"http://www.w3.org/2005/Atom\">\n");
} else {
xml.push_str("<rss version=\"2.0\" xmlns:atom=\"http://www.w3.org/2005/Atom\">\n");
}
xml.push_str("<channel>\n");
xml.push_str(&format!(" <title>{channel_title}</title>\n"));
xml.push_str(&format!(" <link>{base_url}</link>\n"));
xml.push_str(&format!(" <description>{channel_desc}</description>\n"));
xml.push_str(&format!(" <language>{language}</language>\n"));
xml.push_str(&format!(
" <atom:link href=\"{feed_url}\" rel=\"self\" type=\"application/rss+xml\"/>\n"
));
if let Some(item) = items.first() {
let date = item.publish_at.unwrap_or(item.created_at);
xml.push_str(&format!(
" <lastBuildDate>{}</lastBuildDate>\n",
date.to_rfc2822()
));
}
// iTunes og Podcasting 2.0 channel-level tags
if collection.is_podcast {
let pc = &collection.podcast_config;
if let Some(ref author) = pc.itunes_author {
xml.push_str(&format!(
" <itunes:author>{}</itunes:author>\n",
xml_escape(author)
));
}
if let Some(ref category) = pc.itunes_category {
xml.push_str(&format!(
" <itunes:category text=\"{}\"/>\n",
xml_escape(category)
));
}
let explicit = pc.explicit.unwrap_or(false);
xml.push_str(&format!(
" <itunes:explicit>{}</itunes:explicit>\n",
if explicit { "true" } else { "false" }
));
// Artwork: fra og_image-edge eller samlingens podcast-bilde
if let Some(ref cas_hash) = collection.artwork_cas_hash {
let artwork_url = format!("{base_url}/cas/{cas_hash}");
xml.push_str(&format!(
" <itunes:image href=\"{artwork_url}\"/>\n"
));
}
xml.push_str(&format!(
" <itunes:type>episodic</itunes:type>\n"
));
// Podcasting 2.0: locked
xml.push_str(" <podcast:locked>no</podcast:locked>\n");
}
for item in items {
xml.push_str(" <item>\n");
let title = xml_escape(item.title.as_deref().unwrap_or("Uten tittel"));
xml.push_str(&format!(" <title>{title}</title>\n"));
let item_url = format!("{base_url}/{}", short_id(item.id));
xml.push_str(&format!(" <link>{item_url}</link>\n"));
xml.push_str(&format!(
" <guid isPermaLink=\"false\">{}</guid>\n",
item.id
));
let pub_date = item.publish_at.unwrap_or(item.created_at);
xml.push_str(&format!(
" <pubDate>{}</pubDate>\n",
pub_date.to_rfc2822()
));
if let Some(ref content) = item.content {
let desc = xml_escape(&truncate_description(content, 500));
xml.push_str(&format!(" <description>{desc}</description>\n"));
}
if let Some(ref enc_path) = item.enclosure_url {
let enc_url = format!("{base_url}{enc_path}");
let mime = item.enclosure_mime.as_deref().unwrap_or("audio/mpeg");
let size = item.enclosure_size.unwrap_or(0);
xml.push_str(&format!(
" <enclosure url=\"{enc_url}\" length=\"{size}\" type=\"{mime}\"/>\n"
));
}
// iTunes og Podcasting 2.0 item-level tags (kun for podcast)
if collection.is_podcast {
// itunes:title (samme som title, men eksplisitt for iTunes)
xml.push_str(&format!(" <itunes:title>{title}</itunes:title>\n"));
// itunes:duration
if let Some(secs) = item.duration_secs {
let h = secs / 3600;
let m = (secs % 3600) / 60;
let s = secs % 60;
if h > 0 {
xml.push_str(&format!(
" <itunes:duration>{h:02}:{m:02}:{s:02}</itunes:duration>\n"
));
} else {
xml.push_str(&format!(
" <itunes:duration>{m:02}:{s:02}</itunes:duration>\n"
));
}
}
// itunes:explicit (per episode, arver fra kanal)
let explicit = collection.podcast_config.explicit.unwrap_or(false);
xml.push_str(&format!(
" <itunes:explicit>{}</itunes:explicit>\n",
if explicit { "true" } else { "false" }
));
// Episode-bilde
if let Some(ref cas) = item.episode_image_cas {
let img_url = format!("{base_url}/cas/{cas}");
xml.push_str(&format!(
" <itunes:image href=\"{img_url}\"/>\n"
));
}
// podcast:transcript — SRT fra transkripsjons-segmenter
if item.has_transcript {
let transcript_url =
format!("{base_url}/{}/transcript.srt", short_id(item.id));
xml.push_str(&format!(
" <podcast:transcript url=\"{transcript_url}\" type=\"application/srt\"/>\n"
));
}
// podcast:chapters — JSON fra chapter-edges
if !item.chapters.is_empty() {
let chapters_url =
format!("{base_url}/{}/chapters.json", short_id(item.id));
xml.push_str(&format!(
" <podcast:chapters url=\"{chapters_url}\" type=\"application/json+chapters\"/>\n"
));
}
}
xml.push_str(" </item>\n");
}
xml.push_str("</channel>\n");
xml.push_str("</rss>\n");
xml
}
/// Bygg Atom 1.0 XML-streng.
fn build_atom_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &str) -> String {
let feed_title = xml_escape(
collection
.rss_config
.title
.as_deref()
.or(collection.title.as_deref())
.unwrap_or("Untitled Feed"),
);
let feed_desc = xml_escape(
collection
.rss_config
.description
.as_deref()
.unwrap_or(""),
);
let feed_url = format!("{base_url}/feed.xml");
let updated = items
.first()
.map(|i| i.publish_at.unwrap_or(i.created_at))
.unwrap_or_else(Utc::now);
let mut xml = String::with_capacity(4096);
xml.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
xml.push_str("<feed xmlns=\"http://www.w3.org/2005/Atom\">\n");
xml.push_str(&format!(" <title>{feed_title}</title>\n"));
xml.push_str(&format!(" <subtitle>{feed_desc}</subtitle>\n"));
xml.push_str(&format!(
" <link href=\"{feed_url}\" rel=\"self\" type=\"application/atom+xml\"/>\n"
));
xml.push_str(&format!(
" <link href=\"{base_url}\" rel=\"alternate\"/>\n"
));
xml.push_str(&format!(" <id>{base_url}</id>\n"));
xml.push_str(&format!(
" <updated>{}</updated>\n",
updated.to_rfc3339()
));
for item in items {
xml.push_str(" <entry>\n");
let title = xml_escape(item.title.as_deref().unwrap_or("Uten tittel"));
xml.push_str(&format!(" <title>{title}</title>\n"));
let item_url = format!("{base_url}/{}", short_id(item.id));
xml.push_str(&format!(
" <link href=\"{item_url}\" rel=\"alternate\"/>\n"
));
xml.push_str(&format!(" <id>urn:uuid:{}</id>\n", item.id));
let pub_date = item.publish_at.unwrap_or(item.created_at);
xml.push_str(&format!(
" <updated>{}</updated>\n",
pub_date.to_rfc3339()
));
xml.push_str(&format!(
" <published>{}</published>\n",
pub_date.to_rfc3339()
));
if let Some(ref content) = item.content {
let summary = xml_escape(&truncate_description(content, 500));
xml.push_str(&format!(" <summary>{summary}</summary>\n"));
}
if let Some(ref enc_path) = item.enclosure_url {
let enc_url = format!("{base_url}{enc_path}");
let mime = item.enclosure_mime.as_deref().unwrap_or("audio/mpeg");
let size = item.enclosure_size.unwrap_or(0);
xml.push_str(&format!(
" <link rel=\"enclosure\" href=\"{enc_url}\" type=\"{mime}\" length=\"{size}\"/>\n"
));
}
xml.push_str(" </entry>\n");
}
xml.push_str("</feed>\n");
xml
}
// =============================================================================
// Hjelpefunksjoner
// =============================================================================
/// XML-escape for tekst i elementer.
fn xml_escape(s: &str) -> String {
s.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
.replace('"', "&quot;")
.replace('\'', "&apos;")
}
/// Kort ID fra UUID (første 8 tegn) — for URL-er.
fn short_id(id: Uuid) -> String {
id.to_string()[..8].to_string()
}
/// Trunkér beskrivelse til maks antall tegn (chars, ikke bytes), på ordgrense.
fn truncate_description(s: &str, max_chars: usize) -> String {
let char_count = s.chars().count();
if char_count <= max_chars {
return s.to_string();
}
let byte_end = s.char_indices().nth(max_chars).map(|(i, _)| i).unwrap_or(s.len());
match s[..byte_end].rfind(' ') {
Some(pos) => format!("{}", &s[..pos]),
None => format!("{}", &s[..byte_end]),
}
}