synops/tools/synops-rss/src/main.rs

// synops-rss — RSS/Atom-feed generering.
//
// Genererer RSS 2.0 eller Atom 1.0 feed for samlinger med `rss`-trait.
// Erstatter RSS-logikken i maskinrommet/src/rss.rs som et frittstående
// CLI-verktøy i tråd med unix-filosofien.
//
// Miljøvariabler:
//   DATABASE_URL  — PostgreSQL-tilkobling (påkrevd)
//
// Ref: docs/retninger/unix_filosofi.md, docs/concepts/publisering.md

use chrono::{DateTime, Utc};
use clap::Parser;
use serde::Deserialize;
use std::process;
use uuid::Uuid;

// =============================================================================
// CLI
// =============================================================================

/// Generer RSS/Atom-feed for en samling.
#[derive(Parser)]
#[command(name = "synops-rss", about = "RSS/Atom-feed generering for samlinger")]
struct Cli {
    /// Samlings-ID (UUID)
    #[arg(long)]
    collection_id: Option<Uuid>,

    /// Samlingens slug (alternativ til --collection-id)
    #[arg(long)]
    slug: Option<String>,

    /// Overstyr feed-format: rss eller atom
    #[arg(long)]
    format: Option<String>,

    /// Maks antall elementer i feeden
    #[arg(long)]
    max_items: Option<i64>,
}

// =============================================================================
// Konfigurasjon fra trait-metadata
// =============================================================================

#[derive(Deserialize, Default)]
struct RssTraitConfig {
    format: Option<String>,
    title: Option<String>,
    description: Option<String>,
    max_items: Option<i64>,
    language: Option<String>,
}

#[derive(Deserialize, Default)]
#[allow(dead_code)]
struct PodcastTraitConfig {
    itunes_author: Option<String>,
    itunes_category: Option<String>,
    explicit: Option<bool>,
    language: Option<String>,
    #[serde(default)]
    redirect_feed: Option<String>,
}

#[derive(Deserialize, Default)]
struct PublishingTraitConfig {
    slug: Option<String>,
    custom_domain: Option<String>,
}

// =============================================================================
// Datamodeller
// =============================================================================

struct CollectionInfo {
    id: Uuid,
    title: Option<String>,
    slug: String,
    rss_config: RssTraitConfig,
    publishing_config: PublishingTraitConfig,
    is_podcast: bool,
    podcast_config: PodcastTraitConfig,
    /// CAS-hash for samlingens artwork (fra og_image-edge)
    artwork_cas_hash: Option<String>,
}

struct FeedItem {
    id: Uuid,
    title: Option<String>,
    content: Option<String>,
    created_at: DateTime<Utc>,
    publish_at: Option<DateTime<Utc>>,
    enclosure_url: Option<String>,
    enclosure_mime: Option<String>,
    enclosure_size: Option<i64>,
    /// Episoden har transkripsjons-segmenter
    has_transcript: bool,
    /// Kapittelmarkører (tidspunkt + tittel)
    chapters: Vec<Chapter>,
    /// Varighet i sekunder (fra media-metadata)
    duration_secs: Option<i64>,
    /// Episodens artwork CAS-hash (fra og_image-edge)
    episode_image_cas: Option<String>,
}

#[allow(dead_code)]
struct Chapter {
    at: String,
    title: Option<String>,
}

// =============================================================================
// Main
// =============================================================================

#[tokio::main]
async fn main() {
    synops_common::logging::init("synops_rss");

    let cli = Cli::parse();

    if cli.collection_id.is_none() && cli.slug.is_none() {
        eprintln!("Feil: Enten --collection-id eller --slug må oppgis");
        process::exit(1);
    }

    let db = match synops_common::db::connect().await {
        Ok(pool) => pool,
        Err(e) => {
            eprintln!("Feil: Kunne ikke koble til database: {e}");
            process::exit(1);
        }
    };

    let collection = match find_collection(&db, cli.collection_id, cli.slug.as_deref()).await {
        Ok(Some(c)) => c,
        Ok(None) => {
            eprintln!("Feil: Fant ingen samling med rss-trait");
            process::exit(1);
        }
        Err(e) => {
            eprintln!("Feil: Database-feil ved oppslag: {e}");
            process::exit(1);
        }
    };

    let max_items = cli
        .max_items
        .or(collection.rss_config.max_items)
        .unwrap_or(50);

    let items = match fetch_feed_items(&db, collection.id, max_items, collection.is_podcast).await {
        Ok(items) => items,
        Err(e) => {
            eprintln!("Feil: Kunne ikke hente feed-elementer: {e}");
            process::exit(1);
        }
    };

    let base_url = collection
        .publishing_config
        .custom_domain
        .as_deref()
        .map(|d| format!("https://{d}"))
        .unwrap_or_else(|| format!("https://synops.no/pub/{}", collection.slug));

    let format = cli
        .format
        .as_deref()
        .or(collection.rss_config.format.as_deref())
        .unwrap_or("rss");

    let xml = match format {
        "atom" => build_atom_feed(&collection, &items, &base_url),
        _ => build_rss_feed(&collection, &items, &base_url),
    };

    tracing::info!(
        collection_id = %collection.id,
        slug = %collection.slug,
        format = format,
        items = items.len(),
        "Feed generert"
    );

    print!("{xml}");
}

// =============================================================================
// Database-spørringer
// =============================================================================

/// Finn samling med rss-trait, enten via UUID eller slug.
async fn find_collection(
    db: &sqlx::PgPool,
    collection_id: Option<Uuid>,
    slug: Option<&str>,
) -> Result<Option<CollectionInfo>, sqlx::Error> {
    let row: Option<(Uuid, Option<String>, serde_json::Value)> = if let Some(id) = collection_id {
        sqlx::query_as(
            r#"
            SELECT id, title, metadata
            FROM nodes
            WHERE id = $1
              AND node_kind = 'collection'
              AND metadata->'traits' ? 'rss'
            LIMIT 1
            "#,
        )
        .bind(id)
        .fetch_optional(db)
        .await?
    } else if let Some(slug) = slug {
        sqlx::query_as(
            r#"
            SELECT id, title, metadata
            FROM nodes
            WHERE node_kind = 'collection'
              AND metadata->'traits'->'publishing'->>'slug' = $1
              AND metadata->'traits' ? 'rss'
            LIMIT 1
            "#,
        )
        .bind(slug)
        .fetch_optional(db)
        .await?
    } else {
        return Ok(None);
    };

    let Some((id, title, metadata)) = row else {
        return Ok(None);
    };

    let traits = metadata
        .get("traits")
        .cloned()
        .unwrap_or(serde_json::Value::Null);

    let rss_config: RssTraitConfig = traits
        .get("rss")
        .cloned()
        .map(|v| serde_json::from_value(v).unwrap_or_default())
        .unwrap_or_default();

    let publishing_config: PublishingTraitConfig = traits
        .get("publishing")
        .cloned()
        .map(|v| serde_json::from_value(v).unwrap_or_default())
        .unwrap_or_default();

    let slug = publishing_config
        .slug
        .clone()
        .unwrap_or_else(|| id.to_string());

    let is_podcast = traits.get("podcast").is_some();

    let podcast_config: PodcastTraitConfig = traits
        .get("podcast")
        .cloned()
        .map(|v| serde_json::from_value(v).unwrap_or_default())
        .unwrap_or_default();

    // Hent samlingens artwork via og_image-edge
    let artwork_cas_hash: Option<String> = sqlx::query_scalar(
        r#"
        SELECT m.metadata->>'cas_hash'
        FROM edges e
        JOIN nodes m ON m.id = e.target_id
        WHERE e.source_id = $1
          AND e.edge_type = 'og_image'
        LIMIT 1
        "#,
    )
    .bind(id)
    .fetch_optional(db)
    .await?;

    Ok(Some(CollectionInfo {
        id,
        title,
        slug,
        rss_config,
        publishing_config,
        is_podcast,
        podcast_config,
        artwork_cas_hash,
    }))
}

/// Hent publiserte elementer (belongs_to-edges til samlingen).
/// For podcast-samlinger: inkluder enclosure-data, transkripsjoner, kapitler og varighet.
async fn fetch_feed_items(
    db: &sqlx::PgPool,
    collection_id: Uuid,
    max_items: i64,
    is_podcast: bool,
) -> Result<Vec<FeedItem>, sqlx::Error> {
    if is_podcast {
        // Podcast: join med has_media for enclosure, sjekk transkripsjoner, varighet og episode-bilde
        let rows: Vec<(
            Uuid,
            Option<String>,
            Option<String>,
            DateTime<Utc>,
            Option<serde_json::Value>,
            Option<String>,
            Option<String>,
            Option<i64>,
            Option<bool>,
            Option<i64>,
            Option<String>,
        )> = sqlx::query_as(
            r#"
            SELECT
                n.id,
                n.title,
                n.content,
                n.created_at,
                e.metadata,
                m.metadata->>'cas_hash' AS cas_hash,
                m.metadata->>'mime' AS mime,
                COALESCE((m.metadata->>'size_bytes')::bigint, (m.metadata->>'size')::bigint) AS size,
                EXISTS(
                    SELECT 1 FROM transcription_segments ts WHERE ts.node_id = n.id LIMIT 1
                ) AS has_transcript,
                (m.metadata->>'duration_secs')::bigint AS duration_secs,
                (SELECT img.metadata->>'cas_hash'
                 FROM edges ie
                 JOIN nodes img ON img.id = ie.target_id
                 WHERE ie.source_id = n.id AND ie.edge_type = 'og_image'
                 LIMIT 1
                ) AS episode_image_cas
            FROM edges e
            JOIN nodes n ON n.id = e.source_id
            LEFT JOIN edges me ON me.source_id = n.id AND me.edge_type = 'has_media'
            LEFT JOIN nodes m ON m.id = me.target_id AND m.node_kind = 'media'
            WHERE e.target_id = $1
              AND e.edge_type = 'belongs_to'
            ORDER BY COALESCE(
                (e.metadata->>'publish_at')::timestamptz,
                n.created_at
            ) DESC
            LIMIT $2
            "#,
        )
        .bind(collection_id)
        .bind(max_items)
        .fetch_all(db)
        .await?;

        // Samle node-IDer for å hente kapitler i én spørring
        let node_ids: Vec<Uuid> = rows.iter().map(|r| r.0).collect();

        let chapter_rows: Vec<(Uuid, String, Option<String>)> = if !node_ids.is_empty() {
            sqlx::query_as(
                r#"
                SELECT
                    e.source_id,
                    COALESCE(e.metadata->>'at', '00:00:00') AS at,
                    e.metadata->>'title' AS title
                FROM edges e
                WHERE e.source_id = ANY($1)
                  AND e.edge_type = 'chapter'
                ORDER BY e.source_id, e.metadata->>'at'
                "#,
            )
            .bind(&node_ids)
            .fetch_all(db)
            .await?
        } else {
            vec![]
        };

        // Grupper kapitler per node
        let mut chapters_map: std::collections::HashMap<Uuid, Vec<Chapter>> =
            std::collections::HashMap::new();
        for (node_id, at, title) in chapter_rows {
            chapters_map
                .entry(node_id)
                .or_default()
                .push(Chapter { at, title });
        }

        Ok(rows
            .into_iter()
            .map(
                |(id, title, content, created_at, edge_meta, cas_hash, mime, size, has_transcript, duration_secs, episode_image_cas)| {
                    let publish_at = edge_meta
                        .as_ref()
                        .and_then(|m| m.get("publish_at"))
                        .and_then(|v| v.as_str())
                        .and_then(|s| s.parse::<DateTime<Utc>>().ok());

                    FeedItem {
                        id,
                        title,
                        content,
                        created_at,
                        publish_at,
                        enclosure_url: cas_hash.map(|h| format!("/cas/{h}")),
                        enclosure_mime: mime,
                        enclosure_size: size,
                        has_transcript: has_transcript.unwrap_or(false),
                        chapters: chapters_map.remove(&id).unwrap_or_default(),
                        duration_secs,
                        episode_image_cas,
                    }
                },
            )
            .collect())
    } else {
        let rows: Vec<(
            Uuid,
            Option<String>,
            Option<String>,
            DateTime<Utc>,
            Option<serde_json::Value>,
        )> = sqlx::query_as(
            r#"
            SELECT
                n.id,
                n.title,
                n.content,
                n.created_at,
                e.metadata
            FROM edges e
            JOIN nodes n ON n.id = e.source_id
            WHERE e.target_id = $1
              AND e.edge_type = 'belongs_to'
            ORDER BY COALESCE(
                (e.metadata->>'publish_at')::timestamptz,
                n.created_at
            ) DESC
            LIMIT $2
            "#,
        )
        .bind(collection_id)
        .bind(max_items)
        .fetch_all(db)
        .await?;

        Ok(rows
            .into_iter()
            .map(|(id, title, content, created_at, edge_meta)| {
                let publish_at = edge_meta
                    .as_ref()
                    .and_then(|m| m.get("publish_at"))
                    .and_then(|v| v.as_str())
                    .and_then(|s| s.parse::<DateTime<Utc>>().ok());

                FeedItem {
                    id,
                    title,
                    content,
                    created_at,
                    publish_at,
                    enclosure_url: None,
                    enclosure_mime: None,
                    enclosure_size: None,
                    has_transcript: false,
                    chapters: vec![],
                    duration_secs: None,
                    episode_image_cas: None,
                }
            })
            .collect())
    }
}

// =============================================================================
// XML-generering
// =============================================================================

/// Bygg RSS 2.0 XML-streng.
fn build_rss_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &str) -> String {
    let channel_title = xml_escape(
        collection
            .rss_config
            .title
            .as_deref()
            .or(collection.title.as_deref())
            .unwrap_or("Untitled Feed"),
    );
    let channel_desc = xml_escape(
        collection
            .rss_config
            .description
            .as_deref()
            .unwrap_or(""),
    );
    // Podcast-trait language overstyre rss-trait language
    let language = collection
        .podcast_config
        .language
        .as_deref()
        .or(collection.rss_config.language.as_deref())
        .unwrap_or("no");
    let feed_url = format!("{base_url}/feed.xml");

    let mut xml = String::with_capacity(4096);
    xml.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");

    if collection.is_podcast {
        xml.push_str("<rss version=\"2.0\" xmlns:itunes=\"http://www.itunes.com/dtds/podcast-1.0.dtd\" xmlns:podcast=\"https://podcastindex.org/namespace/1.0\" xmlns:atom=\"http://www.w3.org/2005/Atom\">\n");
    } else {
        xml.push_str("<rss version=\"2.0\" xmlns:atom=\"http://www.w3.org/2005/Atom\">\n");
    }

    xml.push_str("<channel>\n");
    xml.push_str(&format!("  <title>{channel_title}</title>\n"));
    xml.push_str(&format!("  <link>{base_url}</link>\n"));
    xml.push_str(&format!("  <description>{channel_desc}</description>\n"));
    xml.push_str(&format!("  <language>{language}</language>\n"));
    xml.push_str(&format!(
        "  <atom:link href=\"{feed_url}\" rel=\"self\" type=\"application/rss+xml\"/>\n"
    ));

    if let Some(item) = items.first() {
        let date = item.publish_at.unwrap_or(item.created_at);
        xml.push_str(&format!(
            "  <lastBuildDate>{}</lastBuildDate>\n",
            date.to_rfc2822()
        ));
    }

    // iTunes og Podcasting 2.0 channel-level tags
    if collection.is_podcast {
        let pc = &collection.podcast_config;

        if let Some(ref author) = pc.itunes_author {
            xml.push_str(&format!(
                "  <itunes:author>{}</itunes:author>\n",
                xml_escape(author)
            ));
        }

        if let Some(ref category) = pc.itunes_category {
            xml.push_str(&format!(
                "  <itunes:category text=\"{}\"/>\n",
                xml_escape(category)
            ));
        }

        let explicit = pc.explicit.unwrap_or(false);
        xml.push_str(&format!(
            "  <itunes:explicit>{}</itunes:explicit>\n",
            if explicit { "true" } else { "false" }
        ));

        // Artwork: fra og_image-edge eller samlingens podcast-bilde
        if let Some(ref cas_hash) = collection.artwork_cas_hash {
            let artwork_url = format!("{base_url}/cas/{cas_hash}");
            xml.push_str(&format!(
                "  <itunes:image href=\"{artwork_url}\"/>\n"
            ));
        }

        xml.push_str(&format!(
            "  <itunes:type>episodic</itunes:type>\n"
        ));

        // Podcasting 2.0: locked
        xml.push_str("  <podcast:locked>no</podcast:locked>\n");
    }

    for item in items {
        xml.push_str("  <item>\n");
        let title = xml_escape(item.title.as_deref().unwrap_or("Uten tittel"));
        xml.push_str(&format!("    <title>{title}</title>\n"));

        let item_url = format!("{base_url}/{}", short_id(item.id));
        xml.push_str(&format!("    <link>{item_url}</link>\n"));

        xml.push_str(&format!(
            "    <guid isPermaLink=\"false\">{}</guid>\n",
            item.id
        ));

        let pub_date = item.publish_at.unwrap_or(item.created_at);
        xml.push_str(&format!(
            "    <pubDate>{}</pubDate>\n",
            pub_date.to_rfc2822()
        ));

        if let Some(ref content) = item.content {
            let desc = xml_escape(&truncate_description(content, 500));
            xml.push_str(&format!("    <description>{desc}</description>\n"));
        }

        if let Some(ref enc_path) = item.enclosure_url {
            let enc_url = format!("{base_url}{enc_path}");
            let mime = item.enclosure_mime.as_deref().unwrap_or("audio/mpeg");
            let size = item.enclosure_size.unwrap_or(0);
            xml.push_str(&format!(
                "    <enclosure url=\"{enc_url}\" length=\"{size}\" type=\"{mime}\"/>\n"
            ));
        }

        // iTunes og Podcasting 2.0 item-level tags (kun for podcast)
        if collection.is_podcast {
            // itunes:title (samme som title, men eksplisitt for iTunes)
            xml.push_str(&format!("    <itunes:title>{title}</itunes:title>\n"));

            // itunes:duration
            if let Some(secs) = item.duration_secs {
                let h = secs / 3600;
                let m = (secs % 3600) / 60;
                let s = secs % 60;
                if h > 0 {
                    xml.push_str(&format!(
                        "    <itunes:duration>{h:02}:{m:02}:{s:02}</itunes:duration>\n"
                    ));
                } else {
                    xml.push_str(&format!(
                        "    <itunes:duration>{m:02}:{s:02}</itunes:duration>\n"
                    ));
                }
            }

            // itunes:explicit (per episode, arver fra kanal)
            let explicit = collection.podcast_config.explicit.unwrap_or(false);
            xml.push_str(&format!(
                "    <itunes:explicit>{}</itunes:explicit>\n",
                if explicit { "true" } else { "false" }
            ));

            // Episode-bilde
            if let Some(ref cas) = item.episode_image_cas {
                let img_url = format!("{base_url}/cas/{cas}");
                xml.push_str(&format!(
                    "    <itunes:image href=\"{img_url}\"/>\n"
                ));
            }

            // podcast:transcript — SRT fra transkripsjons-segmenter
            if item.has_transcript {
                let transcript_url =
                    format!("{base_url}/{}/transcript.srt", short_id(item.id));
                xml.push_str(&format!(
                    "    <podcast:transcript url=\"{transcript_url}\" type=\"application/srt\"/>\n"
                ));
            }

            // podcast:chapters — JSON fra chapter-edges
            if !item.chapters.is_empty() {
                let chapters_url =
                    format!("{base_url}/{}/chapters.json", short_id(item.id));
                xml.push_str(&format!(
                    "    <podcast:chapters url=\"{chapters_url}\" type=\"application/json+chapters\"/>\n"
                ));
            }
        }

        xml.push_str("  </item>\n");
    }

    xml.push_str("</channel>\n");
    xml.push_str("</rss>\n");
    xml
}

/// Bygg Atom 1.0 XML-streng.
fn build_atom_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &str) -> String {
    let feed_title = xml_escape(
        collection
            .rss_config
            .title
            .as_deref()
            .or(collection.title.as_deref())
            .unwrap_or("Untitled Feed"),
    );
    let feed_desc = xml_escape(
        collection
            .rss_config
            .description
            .as_deref()
            .unwrap_or(""),
    );
    let feed_url = format!("{base_url}/feed.xml");

    let updated = items
        .first()
        .map(|i| i.publish_at.unwrap_or(i.created_at))
        .unwrap_or_else(Utc::now);

    let mut xml = String::with_capacity(4096);
    xml.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
    xml.push_str("<feed xmlns=\"http://www.w3.org/2005/Atom\">\n");
    xml.push_str(&format!("  <title>{feed_title}</title>\n"));
    xml.push_str(&format!("  <subtitle>{feed_desc}</subtitle>\n"));
    xml.push_str(&format!(
        "  <link href=\"{feed_url}\" rel=\"self\" type=\"application/atom+xml\"/>\n"
    ));
    xml.push_str(&format!(
        "  <link href=\"{base_url}\" rel=\"alternate\"/>\n"
    ));
    xml.push_str(&format!("  <id>{base_url}</id>\n"));
    xml.push_str(&format!(
        "  <updated>{}</updated>\n",
        updated.to_rfc3339()
    ));

    for item in items {
        xml.push_str("  <entry>\n");
        let title = xml_escape(item.title.as_deref().unwrap_or("Uten tittel"));
        xml.push_str(&format!("    <title>{title}</title>\n"));

        let item_url = format!("{base_url}/{}", short_id(item.id));
        xml.push_str(&format!(
            "    <link href=\"{item_url}\" rel=\"alternate\"/>\n"
        ));

        xml.push_str(&format!("    <id>urn:uuid:{}</id>\n", item.id));

        let pub_date = item.publish_at.unwrap_or(item.created_at);
        xml.push_str(&format!(
            "    <updated>{}</updated>\n",
            pub_date.to_rfc3339()
        ));
        xml.push_str(&format!(
            "    <published>{}</published>\n",
            pub_date.to_rfc3339()
        ));

        if let Some(ref content) = item.content {
            let summary = xml_escape(&truncate_description(content, 500));
            xml.push_str(&format!("    <summary>{summary}</summary>\n"));
        }

        if let Some(ref enc_path) = item.enclosure_url {
            let enc_url = format!("{base_url}{enc_path}");
            let mime = item.enclosure_mime.as_deref().unwrap_or("audio/mpeg");
            let size = item.enclosure_size.unwrap_or(0);
            xml.push_str(&format!(
                "    <link rel=\"enclosure\" href=\"{enc_url}\" type=\"{mime}\" length=\"{size}\"/>\n"
            ));
        }

        xml.push_str("  </entry>\n");
    }

    xml.push_str("</feed>\n");
    xml
}

// =============================================================================
// Hjelpefunksjoner
// =============================================================================

/// XML-escape for tekst i elementer.
fn xml_escape(s: &str) -> String {
    s.replace('&', "&amp;")
        .replace('<', "&lt;")
        .replace('>', "&gt;")
        .replace('"', "&quot;")
        .replace('\'', "&apos;")
}

/// Kort ID fra UUID (første 8 tegn) — for URL-er.
fn short_id(id: Uuid) -> String {
    id.to_string()[..8].to_string()
}

/// Trunkér beskrivelse til maks antall tegn (chars, ikke bytes), på ordgrense.
fn truncate_description(s: &str, max_chars: usize) -> String {
    let char_count = s.chars().count();
    if char_count <= max_chars {
        return s.to_string();
    }
    let byte_end = s.char_indices().nth(max_chars).map(|(i, _)| i).unwrap_or(s.len());
    match s[..byte_end].rfind(' ') {
        Some(pos) => format!("{}…", &s[..pos]),
        None => format!("{}…", &s[..byte_end]),
    }
}