diff --git a/docs/features/podcast_hosting.md b/docs/features/podcast_hosting.md index e0b7469..f350b9d 100644 --- a/docs/features/podcast_hosting.md +++ b/docs/features/podcast_hosting.md @@ -21,23 +21,18 @@ med riktige tags. Vi har 80% allerede. ## Hva vi mangler -### 1. Podcast-spesifikke RSS-tags +### ~~1. Podcast-spesifikke RSS-tags~~ ✓ -Utvid synops-rss med iTunes og Podcasting 2.0 namespace: +Implementert i synops-rss og maskinrommet/src/rss.rs. Begge genererer nå: -```xml -Sidelinja - -false - -no - - -``` +**Channel-level:** `itunes:author`, `itunes:category`, `itunes:explicit`, +`itunes:image` (fra og_image-edge), `itunes:type`, `podcast:locked`. -Metadata fra samlingens podcast-trait: +**Item-level:** `itunes:title`, `itunes:duration`, `itunes:explicit`, +`itunes:image` (episode-bilde), `podcast:transcript` (SRT fra +transcription_segments), `podcast:chapters` (JSON fra chapter-edges). + +Metadata leses fra samlingens podcast-trait: ```jsonc { @@ -53,6 +48,10 @@ Metadata fra samlingens podcast-trait: } ``` +**Merk:** Transcript- og chapters-URL-ene (`/{short_id}/transcript.srt`, +`/{short_id}/chapters.json`) krever at offentlige endepunkt legges til i +maskinrommet for å servere disse. De genereres i feeden, men serveres ikke ennå. + ### 2. Nedlastingsstatistikk Caddy logger allerede alle requests. `synops-stats` parser diff --git a/maskinrommet/src/rss.rs b/maskinrommet/src/rss.rs index 516b0a2..049abd1 100644 --- a/maskinrommet/src/rss.rs +++ b/maskinrommet/src/rss.rs @@ -2,10 +2,12 @@ //! //! Genererer RSS 2.0 eller Atom 1.0 feed for samlinger med `rss`-trait. //! Feeden er offentlig — ingen autentisering kreves. -//! Podcast-samlinger (med `podcast`-trait) inkluderer -tags. +//! Podcast-samlinger (med `podcast`-trait) inkluderer -tags, +//! iTunes-tags og Podcasting 2.0-tags (transcript, chapters). //! //! Ref: docs/concepts/publisering.md (RSS/Atom-seksjonen) //! docs/primitiver/traits.md (rss-trait) +//! docs/features/podcast_hosting.md (iTunes/Podcasting 2.0) use axum::{ extract::{Path, State}, @@ -32,6 +34,17 @@ struct RssTraitConfig { language: Option, } +#[derive(Deserialize, Default)] +#[allow(dead_code)] +struct PodcastTraitConfig { + itunes_author: Option, + itunes_category: Option, + explicit: Option, + language: Option, + #[serde(default)] + redirect_feed: Option, +} + #[derive(Deserialize, Default)] struct PublishingTraitConfig { #[allow(dead_code)] @@ -49,6 +62,8 @@ struct CollectionInfo { rss_config: RssTraitConfig, publishing_config: PublishingTraitConfig, is_podcast: bool, + podcast_config: PodcastTraitConfig, + artwork_cas_hash: Option, } struct FeedItem { @@ -61,6 +76,16 @@ struct FeedItem { enclosure_url: Option, enclosure_mime: Option, enclosure_size: Option, + has_transcript: bool, + chapters: Vec, + duration_secs: Option, + episode_image_cas: Option, +} + +#[allow(dead_code)] +struct Chapter { + at: String, + title: Option, } // ============================================================================= @@ -164,17 +189,40 @@ async fn find_collection_by_slug( let is_podcast = traits.get("podcast").is_some(); + let podcast_config: PodcastTraitConfig = traits + .get("podcast") + .cloned() + .map(|v| serde_json::from_value(v).unwrap_or_default()) + .unwrap_or_default(); + + // Hent samlingens artwork via og_image-edge + let artwork_cas_hash: Option = sqlx::query_scalar( + r#" + SELECT m.metadata->>'cas_hash' + FROM edges e + JOIN nodes m ON m.id = e.target_id + WHERE e.source_id = $1 + AND e.edge_type = 'og_image' + LIMIT 1 + "#, + ) + .bind(id) + .fetch_optional(db) + .await?; + Ok(Some(CollectionInfo { id, title, rss_config, publishing_config, is_podcast, + podcast_config, + artwork_cas_hash, })) } /// Hent publiserte elementer (belongs_to-edges til samlingen). -/// For podcast-samlinger: inkluder enclosure-data via has_media-edges. +/// For podcast-samlinger: inkluder enclosure-data, transkripsjoner, kapitler og varighet. async fn fetch_feed_items( db: &PgPool, collection_id: Uuid, @@ -182,8 +230,19 @@ async fn fetch_feed_items( is_podcast: bool, ) -> Result, sqlx::Error> { if is_podcast { - // Podcast: join med has_media for enclosure-data - let rows: Vec<(Uuid, Option, Option, DateTime, Option, Option, Option, Option)> = sqlx::query_as( + let rows: Vec<( + Uuid, + Option, + Option, + DateTime, + Option, + Option, + Option, + Option, + Option, + Option, + Option, + )> = sqlx::query_as( r#" SELECT n.id, @@ -193,7 +252,17 @@ async fn fetch_feed_items( e.metadata, m.metadata->>'cas_hash' AS cas_hash, m.metadata->>'mime' AS mime, - COALESCE((m.metadata->>'size_bytes')::bigint, (m.metadata->>'size')::bigint) AS size + COALESCE((m.metadata->>'size_bytes')::bigint, (m.metadata->>'size')::bigint) AS size, + EXISTS( + SELECT 1 FROM transcription_segments ts WHERE ts.node_id = n.id LIMIT 1 + ) AS has_transcript, + (m.metadata->>'duration_secs')::bigint AS duration_secs, + (SELECT img.metadata->>'cas_hash' + FROM edges ie + JOIN nodes img ON img.id = ie.target_id + WHERE ie.source_id = n.id AND ie.edge_type = 'og_image' + LIMIT 1 + ) AS episode_image_cas FROM edges e JOIN nodes n ON n.id = e.source_id LEFT JOIN edges me ON me.source_id = n.id AND me.edge_type = 'has_media' @@ -212,26 +281,65 @@ async fn fetch_feed_items( .fetch_all(db) .await?; + // Samle node-IDer for å hente kapitler i én spørring + let node_ids: Vec = rows.iter().map(|r| r.0).collect(); + + let chapter_rows: Vec<(Uuid, String, Option)> = if !node_ids.is_empty() { + sqlx::query_as( + r#" + SELECT + e.source_id, + COALESCE(e.metadata->>'at', '00:00:00') AS at, + e.metadata->>'title' AS title + FROM edges e + WHERE e.source_id = ANY($1) + AND e.edge_type = 'chapter' + ORDER BY e.source_id, e.metadata->>'at' + "#, + ) + .bind(&node_ids) + .fetch_all(db) + .await? + } else { + vec![] + }; + + // Grupper kapitler per node + let mut chapters_map: std::collections::HashMap> = + std::collections::HashMap::new(); + for (node_id, at, title) in chapter_rows { + chapters_map + .entry(node_id) + .or_default() + .push(Chapter { at, title }); + } + Ok(rows .into_iter() - .map(|(id, title, content, created_at, edge_meta, cas_hash, mime, size)| { - let publish_at = edge_meta - .as_ref() - .and_then(|m| m.get("publish_at")) - .and_then(|v| v.as_str()) - .and_then(|s| s.parse::>().ok()); + .map( + |(id, title, content, created_at, edge_meta, cas_hash, mime, size, has_transcript, duration_secs, episode_image_cas)| { + let publish_at = edge_meta + .as_ref() + .and_then(|m| m.get("publish_at")) + .and_then(|v| v.as_str()) + .and_then(|s| s.parse::>().ok()); - FeedItem { - id, - title, - content, - created_at, - publish_at, - enclosure_url: cas_hash.map(|h| format!("/cas/{h}")), - enclosure_mime: mime, - enclosure_size: size, - } - }) + FeedItem { + id, + title, + content, + created_at, + publish_at, + enclosure_url: cas_hash.map(|h| format!("/cas/{h}")), + enclosure_mime: mime, + enclosure_size: size, + has_transcript: has_transcript.unwrap_or(false), + chapters: chapters_map.remove(&id).unwrap_or_default(), + duration_secs, + episode_image_cas, + } + }, + ) .collect()) } else { // Vanlig feed: kun noder, ingen enclosures @@ -277,6 +385,10 @@ async fn fetch_feed_items( enclosure_url: None, enclosure_mime: None, enclosure_size: None, + has_transcript: false, + chapters: vec![], + duration_secs: None, + episode_image_cas: None, } }) .collect()) @@ -304,19 +416,20 @@ fn build_rss_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &st .as_deref() .unwrap_or(""), ); + // Podcast-trait language overstyrer rss-trait language let language = collection - .rss_config + .podcast_config .language .as_deref() + .or(collection.rss_config.language.as_deref()) .unwrap_or("no"); let feed_url = format!("{base_url}/feed.xml"); let mut xml = String::with_capacity(4096); xml.push_str("\n"); - // iTunes namespace for podcast-feeds if collection.is_podcast { - xml.push_str("\n"); + xml.push_str("\n"); } else { xml.push_str("\n"); } @@ -335,6 +448,43 @@ fn build_rss_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &st xml.push_str(&format!(" {}\n", date.to_rfc2822())); } + // iTunes og Podcasting 2.0 channel-level tags + if collection.is_podcast { + let pc = &collection.podcast_config; + + if let Some(ref author) = pc.itunes_author { + xml.push_str(&format!( + " {}\n", + xml_escape(author) + )); + } + + if let Some(ref category) = pc.itunes_category { + xml.push_str(&format!( + " \n", + xml_escape(category) + )); + } + + let explicit = pc.explicit.unwrap_or(false); + xml.push_str(&format!( + " {}\n", + if explicit { "true" } else { "false" } + )); + + if let Some(ref cas_hash) = collection.artwork_cas_hash { + let artwork_url = format!("{base_url}/cas/{cas_hash}"); + xml.push_str(&format!( + " \n" + )); + } + + xml.push_str(" episodic\n"); + + // Podcasting 2.0: locked + xml.push_str(" no\n"); + } + for item in items { xml.push_str(" \n"); let title = xml_escape(item.title.as_deref().unwrap_or("Uten tittel")); @@ -367,6 +517,57 @@ fn build_rss_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &st )); } + // iTunes og Podcasting 2.0 item-level tags (kun for podcast) + if collection.is_podcast { + xml.push_str(&format!(" {title}\n")); + + if let Some(secs) = item.duration_secs { + let h = secs / 3600; + let m = (secs % 3600) / 60; + let s = secs % 60; + if h > 0 { + xml.push_str(&format!( + " {h:02}:{m:02}:{s:02}\n" + )); + } else { + xml.push_str(&format!( + " {m:02}:{s:02}\n" + )); + } + } + + let explicit = collection.podcast_config.explicit.unwrap_or(false); + xml.push_str(&format!( + " {}\n", + if explicit { "true" } else { "false" } + )); + + if let Some(ref cas) = item.episode_image_cas { + let img_url = format!("{base_url}/cas/{cas}"); + xml.push_str(&format!( + " \n" + )); + } + + // podcast:transcript — SRT fra transkripsjons-segmenter + if item.has_transcript { + let transcript_url = + format!("{base_url}/{}/transcript.srt", short_id(item.id)); + xml.push_str(&format!( + " \n" + )); + } + + // podcast:chapters — JSON fra chapter-edges + if !item.chapters.is_empty() { + let chapters_url = + format!("{base_url}/{}/chapters.json", short_id(item.id)); + xml.push_str(&format!( + " \n" + )); + } + } + xml.push_str(" \n"); } diff --git a/tasks.md b/tasks.md index e7ab368..654409b 100644 --- a/tasks.md +++ b/tasks.md @@ -422,8 +422,7 @@ Ingen castopod, ingen ekstern tjeneste. Import fra eksisterende podcast med prøveimport-flyt. ### RSS og metadata -- [~] 30.1 iTunes/Podcasting 2.0 RSS-tags: utvid synops-rss med `` og `` namespace. Tags fra samlingens podcast-trait metadata (author, category, explicit, language). Podcast:transcript og podcast:chapters fra eksisterende edges. - > Påbegynt: 2026-03-18T23:05 +- [x] 30.1 iTunes/Podcasting 2.0 RSS-tags: utvid synops-rss med `` og `` namespace. Tags fra samlingens podcast-trait metadata (author, category, explicit, language). Podcast:transcript og podcast:chapters fra eksisterende edges. - [ ] 30.2 Podcast-trait metadata: utvid podcast-trait med iTunes-felt (itunes_category, itunes_author, explicit, language, redirect_feed). Admin-UI for å redigere. ### Statistikk diff --git a/tools/synops-rss/src/main.rs b/tools/synops-rss/src/main.rs index 92afb88..35bd81c 100644 --- a/tools/synops-rss/src/main.rs +++ b/tools/synops-rss/src/main.rs @@ -53,6 +53,17 @@ struct RssTraitConfig { language: Option, } +#[derive(Deserialize, Default)] +#[allow(dead_code)] +struct PodcastTraitConfig { + itunes_author: Option, + itunes_category: Option, + explicit: Option, + language: Option, + #[serde(default)] + redirect_feed: Option, +} + #[derive(Deserialize, Default)] struct PublishingTraitConfig { slug: Option, @@ -70,6 +81,9 @@ struct CollectionInfo { rss_config: RssTraitConfig, publishing_config: PublishingTraitConfig, is_podcast: bool, + podcast_config: PodcastTraitConfig, + /// CAS-hash for samlingens artwork (fra og_image-edge) + artwork_cas_hash: Option, } struct FeedItem { @@ -81,6 +95,20 @@ struct FeedItem { enclosure_url: Option, enclosure_mime: Option, enclosure_size: Option, + /// Episoden har transkripsjons-segmenter + has_transcript: bool, + /// Kapittelmarkører (tidspunkt + tittel) + chapters: Vec, + /// Varighet i sekunder (fra media-metadata) + duration_secs: Option, + /// Episodens artwork CAS-hash (fra og_image-edge) + episode_image_cas: Option, +} + +#[allow(dead_code)] +struct Chapter { + at: String, + title: Option, } // ============================================================================= @@ -230,6 +258,27 @@ async fn find_collection( let is_podcast = traits.get("podcast").is_some(); + let podcast_config: PodcastTraitConfig = traits + .get("podcast") + .cloned() + .map(|v| serde_json::from_value(v).unwrap_or_default()) + .unwrap_or_default(); + + // Hent samlingens artwork via og_image-edge + let artwork_cas_hash: Option = sqlx::query_scalar( + r#" + SELECT m.metadata->>'cas_hash' + FROM edges e + JOIN nodes m ON m.id = e.target_id + WHERE e.source_id = $1 + AND e.edge_type = 'og_image' + LIMIT 1 + "#, + ) + .bind(id) + .fetch_optional(db) + .await?; + Ok(Some(CollectionInfo { id, title, @@ -237,11 +286,13 @@ async fn find_collection( rss_config, publishing_config, is_podcast, + podcast_config, + artwork_cas_hash, })) } /// Hent publiserte elementer (belongs_to-edges til samlingen). -/// For podcast-samlinger: inkluder enclosure-data via has_media-edges. +/// For podcast-samlinger: inkluder enclosure-data, transkripsjoner, kapitler og varighet. async fn fetch_feed_items( db: &sqlx::PgPool, collection_id: Uuid, @@ -249,6 +300,7 @@ async fn fetch_feed_items( is_podcast: bool, ) -> Result, sqlx::Error> { if is_podcast { + // Podcast: join med has_media for enclosure, sjekk transkripsjoner, varighet og episode-bilde let rows: Vec<( Uuid, Option, @@ -258,6 +310,9 @@ async fn fetch_feed_items( Option, Option, Option, + Option, + Option, + Option, )> = sqlx::query_as( r#" SELECT @@ -268,7 +323,17 @@ async fn fetch_feed_items( e.metadata, m.metadata->>'cas_hash' AS cas_hash, m.metadata->>'mime' AS mime, - COALESCE((m.metadata->>'size_bytes')::bigint, (m.metadata->>'size')::bigint) AS size + COALESCE((m.metadata->>'size_bytes')::bigint, (m.metadata->>'size')::bigint) AS size, + EXISTS( + SELECT 1 FROM transcription_segments ts WHERE ts.node_id = n.id LIMIT 1 + ) AS has_transcript, + (m.metadata->>'duration_secs')::bigint AS duration_secs, + (SELECT img.metadata->>'cas_hash' + FROM edges ie + JOIN nodes img ON img.id = ie.target_id + WHERE ie.source_id = n.id AND ie.edge_type = 'og_image' + LIMIT 1 + ) AS episode_image_cas FROM edges e JOIN nodes n ON n.id = e.source_id LEFT JOIN edges me ON me.source_id = n.id AND me.edge_type = 'has_media' @@ -287,10 +352,43 @@ async fn fetch_feed_items( .fetch_all(db) .await?; + // Samle node-IDer for å hente kapitler i én spørring + let node_ids: Vec = rows.iter().map(|r| r.0).collect(); + + let chapter_rows: Vec<(Uuid, String, Option)> = if !node_ids.is_empty() { + sqlx::query_as( + r#" + SELECT + e.source_id, + COALESCE(e.metadata->>'at', '00:00:00') AS at, + e.metadata->>'title' AS title + FROM edges e + WHERE e.source_id = ANY($1) + AND e.edge_type = 'chapter' + ORDER BY e.source_id, e.metadata->>'at' + "#, + ) + .bind(&node_ids) + .fetch_all(db) + .await? + } else { + vec![] + }; + + // Grupper kapitler per node + let mut chapters_map: std::collections::HashMap> = + std::collections::HashMap::new(); + for (node_id, at, title) in chapter_rows { + chapters_map + .entry(node_id) + .or_default() + .push(Chapter { at, title }); + } + Ok(rows .into_iter() .map( - |(id, title, content, created_at, edge_meta, cas_hash, mime, size)| { + |(id, title, content, created_at, edge_meta, cas_hash, mime, size, has_transcript, duration_secs, episode_image_cas)| { let publish_at = edge_meta .as_ref() .and_then(|m| m.get("publish_at")) @@ -306,6 +404,10 @@ async fn fetch_feed_items( enclosure_url: cas_hash.map(|h| format!("/cas/{h}")), enclosure_mime: mime, enclosure_size: size, + has_transcript: has_transcript.unwrap_or(false), + chapters: chapters_map.remove(&id).unwrap_or_default(), + duration_secs, + episode_image_cas, } }, ) @@ -359,6 +461,10 @@ async fn fetch_feed_items( enclosure_url: None, enclosure_mime: None, enclosure_size: None, + has_transcript: false, + chapters: vec![], + duration_secs: None, + episode_image_cas: None, } }) .collect()) @@ -386,10 +492,12 @@ fn build_rss_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &st .as_deref() .unwrap_or(""), ); + // Podcast-trait language overstyre rss-trait language let language = collection - .rss_config + .podcast_config .language .as_deref() + .or(collection.rss_config.language.as_deref()) .unwrap_or("no"); let feed_url = format!("{base_url}/feed.xml"); @@ -397,7 +505,7 @@ fn build_rss_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &st xml.push_str("\n"); if collection.is_podcast { - xml.push_str("\n"); + xml.push_str("\n"); } else { xml.push_str("\n"); } @@ -419,6 +527,46 @@ fn build_rss_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &st )); } + // iTunes og Podcasting 2.0 channel-level tags + if collection.is_podcast { + let pc = &collection.podcast_config; + + if let Some(ref author) = pc.itunes_author { + xml.push_str(&format!( + " {}\n", + xml_escape(author) + )); + } + + if let Some(ref category) = pc.itunes_category { + xml.push_str(&format!( + " \n", + xml_escape(category) + )); + } + + let explicit = pc.explicit.unwrap_or(false); + xml.push_str(&format!( + " {}\n", + if explicit { "true" } else { "false" } + )); + + // Artwork: fra og_image-edge eller samlingens podcast-bilde + if let Some(ref cas_hash) = collection.artwork_cas_hash { + let artwork_url = format!("{base_url}/cas/{cas_hash}"); + xml.push_str(&format!( + " \n" + )); + } + + xml.push_str(&format!( + " episodic\n" + )); + + // Podcasting 2.0: locked + xml.push_str(" no\n"); + } + for item in items { xml.push_str(" \n"); let title = xml_escape(item.title.as_deref().unwrap_or("Uten tittel")); @@ -452,6 +600,61 @@ fn build_rss_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &st )); } + // iTunes og Podcasting 2.0 item-level tags (kun for podcast) + if collection.is_podcast { + // itunes:title (samme som title, men eksplisitt for iTunes) + xml.push_str(&format!(" {title}\n")); + + // itunes:duration + if let Some(secs) = item.duration_secs { + let h = secs / 3600; + let m = (secs % 3600) / 60; + let s = secs % 60; + if h > 0 { + xml.push_str(&format!( + " {h:02}:{m:02}:{s:02}\n" + )); + } else { + xml.push_str(&format!( + " {m:02}:{s:02}\n" + )); + } + } + + // itunes:explicit (per episode, arver fra kanal) + let explicit = collection.podcast_config.explicit.unwrap_or(false); + xml.push_str(&format!( + " {}\n", + if explicit { "true" } else { "false" } + )); + + // Episode-bilde + if let Some(ref cas) = item.episode_image_cas { + let img_url = format!("{base_url}/cas/{cas}"); + xml.push_str(&format!( + " \n" + )); + } + + // podcast:transcript — SRT fra transkripsjons-segmenter + if item.has_transcript { + let transcript_url = + format!("{base_url}/{}/transcript.srt", short_id(item.id)); + xml.push_str(&format!( + " \n" + )); + } + + // podcast:chapters — JSON fra chapter-edges + if !item.chapters.is_empty() { + let chapters_url = + format!("{base_url}/{}/chapters.json", short_id(item.id)); + xml.push_str(&format!( + " \n" + )); + } + } + xml.push_str(" \n"); }