Fase 11 (produksjon) validert — LiveKit, pruning og podcast-RSS: - rss.rs + synops-rss: Les filstørrelse fra både 'size_bytes' (intentions) og 'size' (publishing) med COALESCE — forhindrer manglende enclosure- størrelse i podcast-feeds avhengig av opplastingsmetode. - pruning.rs + synops-prune: Samme COALESCE-fix for konsistent size-tracking. - rss.rs + synops-rss: Fiks truncate_description til å bruke char-indeksering istedenfor byte-indeksering — forhindrer panic på norsk tekst (å, ø, æ). LiveKit kjører i Docker (healthy), token-generering via join_communication, pruning-loop aktiv, RSS-endepunkt returnerer korrekt 404 for ukjent slug. Alle 61 maskinrommet-tester bestått.
576 lines
17 KiB
Rust
576 lines
17 KiB
Rust
// synops-rss — RSS/Atom-feed generering.
|
|
//
|
|
// Genererer RSS 2.0 eller Atom 1.0 feed for samlinger med `rss`-trait.
|
|
// Erstatter RSS-logikken i maskinrommet/src/rss.rs som et frittstående
|
|
// CLI-verktøy i tråd med unix-filosofien.
|
|
//
|
|
// Miljøvariabler:
|
|
// DATABASE_URL — PostgreSQL-tilkobling (påkrevd)
|
|
//
|
|
// Ref: docs/retninger/unix_filosofi.md, docs/concepts/publisering.md
|
|
|
|
use chrono::{DateTime, Utc};
|
|
use clap::Parser;
|
|
use serde::Deserialize;
|
|
use std::process;
|
|
use uuid::Uuid;
|
|
|
|
// =============================================================================
|
|
// CLI
|
|
// =============================================================================
|
|
|
|
/// Generer RSS/Atom-feed for en samling.
|
|
#[derive(Parser)]
|
|
#[command(name = "synops-rss", about = "RSS/Atom-feed generering for samlinger")]
|
|
struct Cli {
|
|
/// Samlings-ID (UUID)
|
|
#[arg(long)]
|
|
collection_id: Option<Uuid>,
|
|
|
|
/// Samlingens slug (alternativ til --collection-id)
|
|
#[arg(long)]
|
|
slug: Option<String>,
|
|
|
|
/// Overstyr feed-format: rss eller atom
|
|
#[arg(long)]
|
|
format: Option<String>,
|
|
|
|
/// Maks antall elementer i feeden
|
|
#[arg(long)]
|
|
max_items: Option<i64>,
|
|
}
|
|
|
|
// =============================================================================
|
|
// Konfigurasjon fra trait-metadata
|
|
// =============================================================================
|
|
|
|
#[derive(Deserialize, Default)]
|
|
struct RssTraitConfig {
|
|
format: Option<String>,
|
|
title: Option<String>,
|
|
description: Option<String>,
|
|
max_items: Option<i64>,
|
|
language: Option<String>,
|
|
}
|
|
|
|
#[derive(Deserialize, Default)]
|
|
struct PublishingTraitConfig {
|
|
slug: Option<String>,
|
|
custom_domain: Option<String>,
|
|
}
|
|
|
|
// =============================================================================
|
|
// Datamodeller
|
|
// =============================================================================
|
|
|
|
struct CollectionInfo {
|
|
id: Uuid,
|
|
title: Option<String>,
|
|
slug: String,
|
|
rss_config: RssTraitConfig,
|
|
publishing_config: PublishingTraitConfig,
|
|
is_podcast: bool,
|
|
}
|
|
|
|
struct FeedItem {
|
|
id: Uuid,
|
|
title: Option<String>,
|
|
content: Option<String>,
|
|
created_at: DateTime<Utc>,
|
|
publish_at: Option<DateTime<Utc>>,
|
|
enclosure_url: Option<String>,
|
|
enclosure_mime: Option<String>,
|
|
enclosure_size: Option<i64>,
|
|
}
|
|
|
|
// =============================================================================
|
|
// Main
|
|
// =============================================================================
|
|
|
|
#[tokio::main]
|
|
async fn main() {
|
|
synops_common::logging::init("synops_rss");
|
|
|
|
let cli = Cli::parse();
|
|
|
|
if cli.collection_id.is_none() && cli.slug.is_none() {
|
|
eprintln!("Feil: Enten --collection-id eller --slug må oppgis");
|
|
process::exit(1);
|
|
}
|
|
|
|
let db = match synops_common::db::connect().await {
|
|
Ok(pool) => pool,
|
|
Err(e) => {
|
|
eprintln!("Feil: Kunne ikke koble til database: {e}");
|
|
process::exit(1);
|
|
}
|
|
};
|
|
|
|
let collection = match find_collection(&db, cli.collection_id, cli.slug.as_deref()).await {
|
|
Ok(Some(c)) => c,
|
|
Ok(None) => {
|
|
eprintln!("Feil: Fant ingen samling med rss-trait");
|
|
process::exit(1);
|
|
}
|
|
Err(e) => {
|
|
eprintln!("Feil: Database-feil ved oppslag: {e}");
|
|
process::exit(1);
|
|
}
|
|
};
|
|
|
|
let max_items = cli
|
|
.max_items
|
|
.or(collection.rss_config.max_items)
|
|
.unwrap_or(50);
|
|
|
|
let items = match fetch_feed_items(&db, collection.id, max_items, collection.is_podcast).await {
|
|
Ok(items) => items,
|
|
Err(e) => {
|
|
eprintln!("Feil: Kunne ikke hente feed-elementer: {e}");
|
|
process::exit(1);
|
|
}
|
|
};
|
|
|
|
let base_url = collection
|
|
.publishing_config
|
|
.custom_domain
|
|
.as_deref()
|
|
.map(|d| format!("https://{d}"))
|
|
.unwrap_or_else(|| format!("https://synops.no/pub/{}", collection.slug));
|
|
|
|
let format = cli
|
|
.format
|
|
.as_deref()
|
|
.or(collection.rss_config.format.as_deref())
|
|
.unwrap_or("rss");
|
|
|
|
let xml = match format {
|
|
"atom" => build_atom_feed(&collection, &items, &base_url),
|
|
_ => build_rss_feed(&collection, &items, &base_url),
|
|
};
|
|
|
|
tracing::info!(
|
|
collection_id = %collection.id,
|
|
slug = %collection.slug,
|
|
format = format,
|
|
items = items.len(),
|
|
"Feed generert"
|
|
);
|
|
|
|
print!("{xml}");
|
|
}
|
|
|
|
// =============================================================================
|
|
// Database-spørringer
|
|
// =============================================================================
|
|
|
|
/// Finn samling med rss-trait, enten via UUID eller slug.
|
|
async fn find_collection(
|
|
db: &sqlx::PgPool,
|
|
collection_id: Option<Uuid>,
|
|
slug: Option<&str>,
|
|
) -> Result<Option<CollectionInfo>, sqlx::Error> {
|
|
let row: Option<(Uuid, Option<String>, serde_json::Value)> = if let Some(id) = collection_id {
|
|
sqlx::query_as(
|
|
r#"
|
|
SELECT id, title, metadata
|
|
FROM nodes
|
|
WHERE id = $1
|
|
AND node_kind = 'collection'
|
|
AND metadata->'traits' ? 'rss'
|
|
LIMIT 1
|
|
"#,
|
|
)
|
|
.bind(id)
|
|
.fetch_optional(db)
|
|
.await?
|
|
} else if let Some(slug) = slug {
|
|
sqlx::query_as(
|
|
r#"
|
|
SELECT id, title, metadata
|
|
FROM nodes
|
|
WHERE node_kind = 'collection'
|
|
AND metadata->'traits'->'publishing'->>'slug' = $1
|
|
AND metadata->'traits' ? 'rss'
|
|
LIMIT 1
|
|
"#,
|
|
)
|
|
.bind(slug)
|
|
.fetch_optional(db)
|
|
.await?
|
|
} else {
|
|
return Ok(None);
|
|
};
|
|
|
|
let Some((id, title, metadata)) = row else {
|
|
return Ok(None);
|
|
};
|
|
|
|
let traits = metadata
|
|
.get("traits")
|
|
.cloned()
|
|
.unwrap_or(serde_json::Value::Null);
|
|
|
|
let rss_config: RssTraitConfig = traits
|
|
.get("rss")
|
|
.cloned()
|
|
.map(|v| serde_json::from_value(v).unwrap_or_default())
|
|
.unwrap_or_default();
|
|
|
|
let publishing_config: PublishingTraitConfig = traits
|
|
.get("publishing")
|
|
.cloned()
|
|
.map(|v| serde_json::from_value(v).unwrap_or_default())
|
|
.unwrap_or_default();
|
|
|
|
let slug = publishing_config
|
|
.slug
|
|
.clone()
|
|
.unwrap_or_else(|| id.to_string());
|
|
|
|
let is_podcast = traits.get("podcast").is_some();
|
|
|
|
Ok(Some(CollectionInfo {
|
|
id,
|
|
title,
|
|
slug,
|
|
rss_config,
|
|
publishing_config,
|
|
is_podcast,
|
|
}))
|
|
}
|
|
|
|
/// Hent publiserte elementer (belongs_to-edges til samlingen).
|
|
/// For podcast-samlinger: inkluder enclosure-data via has_media-edges.
|
|
async fn fetch_feed_items(
|
|
db: &sqlx::PgPool,
|
|
collection_id: Uuid,
|
|
max_items: i64,
|
|
is_podcast: bool,
|
|
) -> Result<Vec<FeedItem>, sqlx::Error> {
|
|
if is_podcast {
|
|
let rows: Vec<(
|
|
Uuid,
|
|
Option<String>,
|
|
Option<String>,
|
|
DateTime<Utc>,
|
|
Option<serde_json::Value>,
|
|
Option<String>,
|
|
Option<String>,
|
|
Option<i64>,
|
|
)> = sqlx::query_as(
|
|
r#"
|
|
SELECT
|
|
n.id,
|
|
n.title,
|
|
n.content,
|
|
n.created_at,
|
|
e.metadata,
|
|
m.metadata->>'cas_hash' AS cas_hash,
|
|
m.metadata->>'mime' AS mime,
|
|
COALESCE((m.metadata->>'size_bytes')::bigint, (m.metadata->>'size')::bigint) AS size
|
|
FROM edges e
|
|
JOIN nodes n ON n.id = e.source_id
|
|
LEFT JOIN edges me ON me.source_id = n.id AND me.edge_type = 'has_media'
|
|
LEFT JOIN nodes m ON m.id = me.target_id AND m.node_kind = 'media'
|
|
WHERE e.target_id = $1
|
|
AND e.edge_type = 'belongs_to'
|
|
ORDER BY COALESCE(
|
|
(e.metadata->>'publish_at')::timestamptz,
|
|
n.created_at
|
|
) DESC
|
|
LIMIT $2
|
|
"#,
|
|
)
|
|
.bind(collection_id)
|
|
.bind(max_items)
|
|
.fetch_all(db)
|
|
.await?;
|
|
|
|
Ok(rows
|
|
.into_iter()
|
|
.map(
|
|
|(id, title, content, created_at, edge_meta, cas_hash, mime, size)| {
|
|
let publish_at = edge_meta
|
|
.as_ref()
|
|
.and_then(|m| m.get("publish_at"))
|
|
.and_then(|v| v.as_str())
|
|
.and_then(|s| s.parse::<DateTime<Utc>>().ok());
|
|
|
|
FeedItem {
|
|
id,
|
|
title,
|
|
content,
|
|
created_at,
|
|
publish_at,
|
|
enclosure_url: cas_hash.map(|h| format!("/cas/{h}")),
|
|
enclosure_mime: mime,
|
|
enclosure_size: size,
|
|
}
|
|
},
|
|
)
|
|
.collect())
|
|
} else {
|
|
let rows: Vec<(
|
|
Uuid,
|
|
Option<String>,
|
|
Option<String>,
|
|
DateTime<Utc>,
|
|
Option<serde_json::Value>,
|
|
)> = sqlx::query_as(
|
|
r#"
|
|
SELECT
|
|
n.id,
|
|
n.title,
|
|
n.content,
|
|
n.created_at,
|
|
e.metadata
|
|
FROM edges e
|
|
JOIN nodes n ON n.id = e.source_id
|
|
WHERE e.target_id = $1
|
|
AND e.edge_type = 'belongs_to'
|
|
ORDER BY COALESCE(
|
|
(e.metadata->>'publish_at')::timestamptz,
|
|
n.created_at
|
|
) DESC
|
|
LIMIT $2
|
|
"#,
|
|
)
|
|
.bind(collection_id)
|
|
.bind(max_items)
|
|
.fetch_all(db)
|
|
.await?;
|
|
|
|
Ok(rows
|
|
.into_iter()
|
|
.map(|(id, title, content, created_at, edge_meta)| {
|
|
let publish_at = edge_meta
|
|
.as_ref()
|
|
.and_then(|m| m.get("publish_at"))
|
|
.and_then(|v| v.as_str())
|
|
.and_then(|s| s.parse::<DateTime<Utc>>().ok());
|
|
|
|
FeedItem {
|
|
id,
|
|
title,
|
|
content,
|
|
created_at,
|
|
publish_at,
|
|
enclosure_url: None,
|
|
enclosure_mime: None,
|
|
enclosure_size: None,
|
|
}
|
|
})
|
|
.collect())
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// XML-generering
|
|
// =============================================================================
|
|
|
|
/// Bygg RSS 2.0 XML-streng.
|
|
fn build_rss_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &str) -> String {
|
|
let channel_title = xml_escape(
|
|
collection
|
|
.rss_config
|
|
.title
|
|
.as_deref()
|
|
.or(collection.title.as_deref())
|
|
.unwrap_or("Untitled Feed"),
|
|
);
|
|
let channel_desc = xml_escape(
|
|
collection
|
|
.rss_config
|
|
.description
|
|
.as_deref()
|
|
.unwrap_or(""),
|
|
);
|
|
let language = collection
|
|
.rss_config
|
|
.language
|
|
.as_deref()
|
|
.unwrap_or("no");
|
|
let feed_url = format!("{base_url}/feed.xml");
|
|
|
|
let mut xml = String::with_capacity(4096);
|
|
xml.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
|
|
|
|
if collection.is_podcast {
|
|
xml.push_str("<rss version=\"2.0\" xmlns:itunes=\"http://www.itunes.com/dtds/podcast-1.0.dtd\" xmlns:atom=\"http://www.w3.org/2005/Atom\">\n");
|
|
} else {
|
|
xml.push_str("<rss version=\"2.0\" xmlns:atom=\"http://www.w3.org/2005/Atom\">\n");
|
|
}
|
|
|
|
xml.push_str("<channel>\n");
|
|
xml.push_str(&format!(" <title>{channel_title}</title>\n"));
|
|
xml.push_str(&format!(" <link>{base_url}</link>\n"));
|
|
xml.push_str(&format!(" <description>{channel_desc}</description>\n"));
|
|
xml.push_str(&format!(" <language>{language}</language>\n"));
|
|
xml.push_str(&format!(
|
|
" <atom:link href=\"{feed_url}\" rel=\"self\" type=\"application/rss+xml\"/>\n"
|
|
));
|
|
|
|
if let Some(item) = items.first() {
|
|
let date = item.publish_at.unwrap_or(item.created_at);
|
|
xml.push_str(&format!(
|
|
" <lastBuildDate>{}</lastBuildDate>\n",
|
|
date.to_rfc2822()
|
|
));
|
|
}
|
|
|
|
for item in items {
|
|
xml.push_str(" <item>\n");
|
|
let title = xml_escape(item.title.as_deref().unwrap_or("Uten tittel"));
|
|
xml.push_str(&format!(" <title>{title}</title>\n"));
|
|
|
|
let item_url = format!("{base_url}/{}", short_id(item.id));
|
|
xml.push_str(&format!(" <link>{item_url}</link>\n"));
|
|
|
|
xml.push_str(&format!(
|
|
" <guid isPermaLink=\"false\">{}</guid>\n",
|
|
item.id
|
|
));
|
|
|
|
let pub_date = item.publish_at.unwrap_or(item.created_at);
|
|
xml.push_str(&format!(
|
|
" <pubDate>{}</pubDate>\n",
|
|
pub_date.to_rfc2822()
|
|
));
|
|
|
|
if let Some(ref content) = item.content {
|
|
let desc = xml_escape(&truncate_description(content, 500));
|
|
xml.push_str(&format!(" <description>{desc}</description>\n"));
|
|
}
|
|
|
|
if let Some(ref enc_path) = item.enclosure_url {
|
|
let enc_url = format!("{base_url}{enc_path}");
|
|
let mime = item.enclosure_mime.as_deref().unwrap_or("audio/mpeg");
|
|
let size = item.enclosure_size.unwrap_or(0);
|
|
xml.push_str(&format!(
|
|
" <enclosure url=\"{enc_url}\" length=\"{size}\" type=\"{mime}\"/>\n"
|
|
));
|
|
}
|
|
|
|
xml.push_str(" </item>\n");
|
|
}
|
|
|
|
xml.push_str("</channel>\n");
|
|
xml.push_str("</rss>\n");
|
|
xml
|
|
}
|
|
|
|
/// Bygg Atom 1.0 XML-streng.
|
|
fn build_atom_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &str) -> String {
|
|
let feed_title = xml_escape(
|
|
collection
|
|
.rss_config
|
|
.title
|
|
.as_deref()
|
|
.or(collection.title.as_deref())
|
|
.unwrap_or("Untitled Feed"),
|
|
);
|
|
let feed_desc = xml_escape(
|
|
collection
|
|
.rss_config
|
|
.description
|
|
.as_deref()
|
|
.unwrap_or(""),
|
|
);
|
|
let feed_url = format!("{base_url}/feed.xml");
|
|
|
|
let updated = items
|
|
.first()
|
|
.map(|i| i.publish_at.unwrap_or(i.created_at))
|
|
.unwrap_or_else(Utc::now);
|
|
|
|
let mut xml = String::with_capacity(4096);
|
|
xml.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
|
|
xml.push_str("<feed xmlns=\"http://www.w3.org/2005/Atom\">\n");
|
|
xml.push_str(&format!(" <title>{feed_title}</title>\n"));
|
|
xml.push_str(&format!(" <subtitle>{feed_desc}</subtitle>\n"));
|
|
xml.push_str(&format!(
|
|
" <link href=\"{feed_url}\" rel=\"self\" type=\"application/atom+xml\"/>\n"
|
|
));
|
|
xml.push_str(&format!(
|
|
" <link href=\"{base_url}\" rel=\"alternate\"/>\n"
|
|
));
|
|
xml.push_str(&format!(" <id>{base_url}</id>\n"));
|
|
xml.push_str(&format!(
|
|
" <updated>{}</updated>\n",
|
|
updated.to_rfc3339()
|
|
));
|
|
|
|
for item in items {
|
|
xml.push_str(" <entry>\n");
|
|
let title = xml_escape(item.title.as_deref().unwrap_or("Uten tittel"));
|
|
xml.push_str(&format!(" <title>{title}</title>\n"));
|
|
|
|
let item_url = format!("{base_url}/{}", short_id(item.id));
|
|
xml.push_str(&format!(
|
|
" <link href=\"{item_url}\" rel=\"alternate\"/>\n"
|
|
));
|
|
|
|
xml.push_str(&format!(" <id>urn:uuid:{}</id>\n", item.id));
|
|
|
|
let pub_date = item.publish_at.unwrap_or(item.created_at);
|
|
xml.push_str(&format!(
|
|
" <updated>{}</updated>\n",
|
|
pub_date.to_rfc3339()
|
|
));
|
|
xml.push_str(&format!(
|
|
" <published>{}</published>\n",
|
|
pub_date.to_rfc3339()
|
|
));
|
|
|
|
if let Some(ref content) = item.content {
|
|
let summary = xml_escape(&truncate_description(content, 500));
|
|
xml.push_str(&format!(" <summary>{summary}</summary>\n"));
|
|
}
|
|
|
|
if let Some(ref enc_path) = item.enclosure_url {
|
|
let enc_url = format!("{base_url}{enc_path}");
|
|
let mime = item.enclosure_mime.as_deref().unwrap_or("audio/mpeg");
|
|
let size = item.enclosure_size.unwrap_or(0);
|
|
xml.push_str(&format!(
|
|
" <link rel=\"enclosure\" href=\"{enc_url}\" type=\"{mime}\" length=\"{size}\"/>\n"
|
|
));
|
|
}
|
|
|
|
xml.push_str(" </entry>\n");
|
|
}
|
|
|
|
xml.push_str("</feed>\n");
|
|
xml
|
|
}
|
|
|
|
// =============================================================================
|
|
// Hjelpefunksjoner
|
|
// =============================================================================
|
|
|
|
/// XML-escape for tekst i elementer.
|
|
fn xml_escape(s: &str) -> String {
|
|
s.replace('&', "&")
|
|
.replace('<', "<")
|
|
.replace('>', ">")
|
|
.replace('"', """)
|
|
.replace('\'', "'")
|
|
}
|
|
|
|
/// Kort ID fra UUID (første 8 tegn) — for URL-er.
|
|
fn short_id(id: Uuid) -> String {
|
|
id.to_string()[..8].to_string()
|
|
}
|
|
|
|
/// Trunkér beskrivelse til maks antall tegn (chars, ikke bytes), på ordgrense.
|
|
fn truncate_description(s: &str, max_chars: usize) -> String {
|
|
let char_count = s.chars().count();
|
|
if char_count <= max_chars {
|
|
return s.to_string();
|
|
}
|
|
let byte_end = s.char_indices().nth(max_chars).map(|(i, _)| i).unwrap_or(s.len());
|
|
match s[..byte_end].rfind(' ') {
|
|
Some(pos) => format!("{}…", &s[..pos]),
|
|
None => format!("{}…", &s[..byte_end]),
|
|
}
|
|
}
|