synops/maskinrommet/src/rss.rs

//! RSS/Atom-feed: GET /pub/{slug}/feed.xml
//!
//! Genererer RSS 2.0 eller Atom 1.0 feed for samlinger med `rss`-trait.
//! Feeden er offentlig — ingen autentisering kreves.
//! Podcast-samlinger (med `podcast`-trait) inkluderer <enclosure>-tags,
//! iTunes-tags og Podcasting 2.0-tags (transcript, chapters).
//!
//! Ref: docs/concepts/publisering.md (RSS/Atom-seksjonen)
//!      docs/primitiver/traits.md (rss-trait)
//!      docs/features/podcast_hosting.md (iTunes/Podcasting 2.0)

use axum::{
    extract::{Path, State},
    http::{header, StatusCode},
    response::Response,
};
use chrono::{DateTime, Utc};
use serde::Deserialize;
use sqlx::PgPool;
use uuid::Uuid;

use crate::AppState;

// =============================================================================
// Konfigurasjon fra trait-metadata
// =============================================================================

#[derive(Deserialize, Default)]
struct RssTraitConfig {
    format: Option<String>,     // "rss" (default) eller "atom"
    title: Option<String>,
    description: Option<String>,
    max_items: Option<i64>,
    language: Option<String>,
}

#[derive(Deserialize, Default)]
struct PodcastTraitConfig {
    itunes_author: Option<String>,
    itunes_category: Option<String>,
    itunes_subcategory: Option<String>,
    explicit: Option<bool>,
    language: Option<String>,
    #[serde(default)]
    redirect_feed: Option<String>,
}

#[derive(Deserialize, Default)]
struct PublishingTraitConfig {
    #[allow(dead_code)]
    slug: Option<String>,
    custom_domain: Option<String>,
}

// =============================================================================
// Database-modeller
// =============================================================================

struct CollectionInfo {
    id: Uuid,
    title: Option<String>,
    rss_config: RssTraitConfig,
    publishing_config: PublishingTraitConfig,
    is_podcast: bool,
    podcast_config: PodcastTraitConfig,
    artwork_cas_hash: Option<String>,
}

struct FeedItem {
    id: Uuid,
    title: Option<String>,
    content: Option<String>,
    created_at: DateTime<Utc>,
    publish_at: Option<DateTime<Utc>>,
    // Podcast-felt
    enclosure_url: Option<String>,
    enclosure_mime: Option<String>,
    enclosure_size: Option<i64>,
    has_transcript: bool,
    chapters: Vec<Chapter>,
    duration_secs: Option<i64>,
    episode_image_cas: Option<String>,
}

#[allow(dead_code)]
struct Chapter {
    at: String,
    title: Option<String>,
}

// =============================================================================
// Handler
// =============================================================================

/// GET /pub/{slug}/feed.xml — offentlig RSS/Atom-feed.
pub async fn generate_feed(
    State(state): State<AppState>,
    Path(slug): Path<String>,
) -> Result<Response, StatusCode> {
    let collection = find_collection_by_slug(&state.db, &slug)
        .await
        .map_err(|e| {
            tracing::error!(slug = %slug, error = %e, "Feil ved oppslag av samling");
            StatusCode::INTERNAL_SERVER_ERROR
        })?
        .ok_or(StatusCode::NOT_FOUND)?;

    // 301 Moved Permanently når redirect_feed er satt (podcast flyttet til ny host)
    if let Some(ref redirect_url) = collection.podcast_config.redirect_feed {
        if !redirect_url.is_empty() {
            tracing::info!(slug = %slug, redirect = %redirect_url, "Feed-redirect aktiv");
            return Ok(Response::builder()
                .status(StatusCode::MOVED_PERMANENTLY)
                .header(header::LOCATION, redirect_url.as_str())
                .header(header::CACHE_CONTROL, "public, max-age=3600")
                .body("Moved Permanently".into())
                .unwrap());
        }
    }

    let max_items = collection.rss_config.max_items.unwrap_or(50);
    let items = fetch_feed_items(&state.db, collection.id, max_items, collection.is_podcast)
        .await
        .map_err(|e| {
            tracing::error!(slug = %slug, error = %e, "Feil ved henting av feed-elementer");
            StatusCode::INTERNAL_SERVER_ERROR
        })?;

    let base_url = collection
        .publishing_config
        .custom_domain
        .as_deref()
        .map(|d| format!("https://{d}"))
        .unwrap_or_else(|| format!("https://synops.no/pub/{slug}"));

    let format = collection
        .rss_config
        .format
        .as_deref()
        .unwrap_or("rss");

    let xml = match format {
        "atom" => build_atom_feed(&collection, &items, &base_url),
        _ => build_rss_feed(&collection, &items, &base_url),
    };

    let content_type = match format {
        "atom" => "application/atom+xml; charset=utf-8",
        _ => "application/rss+xml; charset=utf-8",
    };

    Ok(Response::builder()
        .header(header::CONTENT_TYPE, content_type)
        .header(header::CACHE_CONTROL, "public, max-age=300")
        .body(xml.into())
        .unwrap())
}

// =============================================================================
// Database-spørringer
// =============================================================================

/// Finn samling med rss-trait basert på publishing-slug.
async fn find_collection_by_slug(
    db: &PgPool,
    slug: &str,
) -> Result<Option<CollectionInfo>, sqlx::Error> {
    let row: Option<(Uuid, Option<String>, serde_json::Value)> = sqlx::query_as(
        r#"
        SELECT id, title, metadata
        FROM nodes
        WHERE node_kind = 'collection'
          AND metadata->'traits'->'publishing'->>'slug' = $1
          AND metadata->'traits' ? 'rss'
        LIMIT 1
        "#,
    )
    .bind(slug)
    .fetch_optional(db)
    .await?;

    let Some((id, title, metadata)) = row else {
        return Ok(None);
    };

    let traits = metadata
        .get("traits")
        .cloned()
        .unwrap_or(serde_json::Value::Null);

    let rss_config: RssTraitConfig = traits
        .get("rss")
        .cloned()
        .map(|v| serde_json::from_value(v).unwrap_or_default())
        .unwrap_or_default();

    let publishing_config: PublishingTraitConfig = traits
        .get("publishing")
        .cloned()
        .map(|v| serde_json::from_value(v).unwrap_or_default())
        .unwrap_or_default();

    let is_podcast = traits.get("podcast").is_some();

    let podcast_config: PodcastTraitConfig = traits
        .get("podcast")
        .cloned()
        .map(|v| serde_json::from_value(v).unwrap_or_default())
        .unwrap_or_default();

    // Hent samlingens artwork via og_image-edge
    let artwork_cas_hash: Option<String> = sqlx::query_scalar(
        r#"
        SELECT m.metadata->>'cas_hash'
        FROM edges e
        JOIN nodes m ON m.id = e.target_id
        WHERE e.source_id = $1
          AND e.edge_type = 'og_image'
        LIMIT 1
        "#,
    )
    .bind(id)
    .fetch_optional(db)
    .await?;

    Ok(Some(CollectionInfo {
        id,
        title,
        rss_config,
        publishing_config,
        is_podcast,
        podcast_config,
        artwork_cas_hash,
    }))
}

/// Hent publiserte elementer (belongs_to-edges til samlingen).
/// For podcast-samlinger: inkluder enclosure-data, transkripsjoner, kapitler og varighet.
async fn fetch_feed_items(
    db: &PgPool,
    collection_id: Uuid,
    max_items: i64,
    is_podcast: bool,
) -> Result<Vec<FeedItem>, sqlx::Error> {
    if is_podcast {
        let rows: Vec<(
            Uuid,
            Option<String>,
            Option<String>,
            DateTime<Utc>,
            Option<serde_json::Value>,
            Option<String>,
            Option<String>,
            Option<i64>,
            Option<bool>,
            Option<i64>,
            Option<String>,
        )> = sqlx::query_as(
            r#"
            SELECT
                n.id,
                n.title,
                n.content,
                n.created_at,
                e.metadata,
                m.metadata->>'cas_hash' AS cas_hash,
                m.metadata->>'mime' AS mime,
                COALESCE((m.metadata->>'size_bytes')::bigint, (m.metadata->>'size')::bigint) AS size,
                EXISTS(
                    SELECT 1 FROM transcription_segments ts WHERE ts.node_id = n.id LIMIT 1
                ) AS has_transcript,
                (m.metadata->>'duration_secs')::bigint AS duration_secs,
                (SELECT img.metadata->>'cas_hash'
                 FROM edges ie
                 JOIN nodes img ON img.id = ie.target_id
                 WHERE ie.source_id = n.id AND ie.edge_type = 'og_image'
                 LIMIT 1
                ) AS episode_image_cas
            FROM edges e
            JOIN nodes n ON n.id = e.source_id
            LEFT JOIN edges me ON me.source_id = n.id AND me.edge_type = 'has_media'
            LEFT JOIN nodes m ON m.id = me.target_id AND m.node_kind = 'media'
            WHERE e.target_id = $1
              AND e.edge_type = 'belongs_to'
            ORDER BY COALESCE(
                (e.metadata->>'publish_at')::timestamptz,
                n.created_at
            ) DESC
            LIMIT $2
            "#,
        )
        .bind(collection_id)
        .bind(max_items)
        .fetch_all(db)
        .await?;

        // Samle node-IDer for å hente kapitler i én spørring
        let node_ids: Vec<Uuid> = rows.iter().map(|r| r.0).collect();

        let chapter_rows: Vec<(Uuid, String, Option<String>)> = if !node_ids.is_empty() {
            sqlx::query_as(
                r#"
                SELECT
                    e.source_id,
                    COALESCE(e.metadata->>'at', '00:00:00') AS at,
                    e.metadata->>'title' AS title
                FROM edges e
                WHERE e.source_id = ANY($1)
                  AND e.edge_type = 'chapter'
                ORDER BY e.source_id, e.metadata->>'at'
                "#,
            )
            .bind(&node_ids)
            .fetch_all(db)
            .await?
        } else {
            vec![]
        };

        // Grupper kapitler per node
        let mut chapters_map: std::collections::HashMap<Uuid, Vec<Chapter>> =
            std::collections::HashMap::new();
        for (node_id, at, title) in chapter_rows {
            chapters_map
                .entry(node_id)
                .or_default()
                .push(Chapter { at, title });
        }

        Ok(rows
            .into_iter()
            .map(
                |(id, title, content, created_at, edge_meta, cas_hash, mime, size, has_transcript, duration_secs, episode_image_cas)| {
                    let publish_at = edge_meta
                        .as_ref()
                        .and_then(|m| m.get("publish_at"))
                        .and_then(|v| v.as_str())
                        .and_then(|s| s.parse::<DateTime<Utc>>().ok());

                    FeedItem {
                        id,
                        title,
                        content,
                        created_at,
                        publish_at,
                        enclosure_url: cas_hash.map(|h| format!("/cas/{h}")),
                        enclosure_mime: mime,
                        enclosure_size: size,
                        has_transcript: has_transcript.unwrap_or(false),
                        chapters: chapters_map.remove(&id).unwrap_or_default(),
                        duration_secs,
                        episode_image_cas,
                    }
                },
            )
            .collect())
    } else {
        // Vanlig feed: kun noder, ingen enclosures
        let rows: Vec<(Uuid, Option<String>, Option<String>, DateTime<Utc>, Option<serde_json::Value>)> = sqlx::query_as(
            r#"
            SELECT
                n.id,
                n.title,
                n.content,
                n.created_at,
                e.metadata
            FROM edges e
            JOIN nodes n ON n.id = e.source_id
            WHERE e.target_id = $1
              AND e.edge_type = 'belongs_to'
            ORDER BY COALESCE(
                (e.metadata->>'publish_at')::timestamptz,
                n.created_at
            ) DESC
            LIMIT $2
            "#,
        )
        .bind(collection_id)
        .bind(max_items)
        .fetch_all(db)
        .await?;

        Ok(rows
            .into_iter()
            .map(|(id, title, content, created_at, edge_meta)| {
                let publish_at = edge_meta
                    .as_ref()
                    .and_then(|m| m.get("publish_at"))
                    .and_then(|v| v.as_str())
                    .and_then(|s| s.parse::<DateTime<Utc>>().ok());

                FeedItem {
                    id,
                    title,
                    content,
                    created_at,
                    publish_at,
                    enclosure_url: None,
                    enclosure_mime: None,
                    enclosure_size: None,
                    has_transcript: false,
                    chapters: vec![],
                    duration_secs: None,
                    episode_image_cas: None,
                }
            })
            .collect())
    }
}

// =============================================================================
// XML-generering
// =============================================================================

/// Bygg RSS 2.0 XML-streng.
fn build_rss_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &str) -> String {
    let channel_title = xml_escape(
        collection
            .rss_config
            .title
            .as_deref()
            .or(collection.title.as_deref())
            .unwrap_or("Untitled Feed"),
    );
    let channel_desc = xml_escape(
        collection
            .rss_config
            .description
            .as_deref()
            .unwrap_or(""),
    );
    // Podcast-trait language overstyrer rss-trait language
    let language = collection
        .podcast_config
        .language
        .as_deref()
        .or(collection.rss_config.language.as_deref())
        .unwrap_or("no");
    let feed_url = format!("{base_url}/feed.xml");

    let mut xml = String::with_capacity(4096);
    xml.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");

    if collection.is_podcast {
        xml.push_str("<rss version=\"2.0\" xmlns:itunes=\"http://www.itunes.com/dtds/podcast-1.0.dtd\" xmlns:podcast=\"https://podcastindex.org/namespace/1.0\" xmlns:atom=\"http://www.w3.org/2005/Atom\">\n");
    } else {
        xml.push_str("<rss version=\"2.0\" xmlns:atom=\"http://www.w3.org/2005/Atom\">\n");
    }

    xml.push_str("<channel>\n");
    xml.push_str(&format!("  <title>{channel_title}</title>\n"));
    xml.push_str(&format!("  <link>{base_url}</link>\n"));
    xml.push_str(&format!("  <description>{channel_desc}</description>\n"));
    xml.push_str(&format!("  <language>{language}</language>\n"));
    xml.push_str(&format!(
        "  <atom:link href=\"{feed_url}\" rel=\"self\" type=\"application/rss+xml\"/>\n"
    ));

    if let Some(item) = items.first() {
        let date = item.publish_at.unwrap_or(item.created_at);
        xml.push_str(&format!("  <lastBuildDate>{}</lastBuildDate>\n", date.to_rfc2822()));
    }

    // iTunes og Podcasting 2.0 channel-level tags
    if collection.is_podcast {
        let pc = &collection.podcast_config;

        if let Some(ref author) = pc.itunes_author {
            xml.push_str(&format!(
                "  <itunes:author>{}</itunes:author>\n",
                xml_escape(author)
            ));
        }

        if let Some(ref category) = pc.itunes_category {
            if let Some(ref subcategory) = pc.itunes_subcategory {
                xml.push_str(&format!(
                    "  <itunes:category text=\"{}\">\n    <itunes:category text=\"{}\"/>\n  </itunes:category>\n",
                    xml_escape(category),
                    xml_escape(subcategory)
                ));
            } else {
                xml.push_str(&format!(
                    "  <itunes:category text=\"{}\"/>\n",
                    xml_escape(category)
                ));
            }
        }

        let explicit = pc.explicit.unwrap_or(false);
        xml.push_str(&format!(
            "  <itunes:explicit>{}</itunes:explicit>\n",
            if explicit { "true" } else { "false" }
        ));

        if let Some(ref cas_hash) = collection.artwork_cas_hash {
            let artwork_url = format!("{base_url}/cas/{cas_hash}");
            xml.push_str(&format!(
                "  <itunes:image href=\"{artwork_url}\"/>\n"
            ));
        }

        xml.push_str("  <itunes:type>episodic</itunes:type>\n");

        // itunes:new-feed-url for feed migration
        if let Some(ref redirect_url) = pc.redirect_feed {
            if !redirect_url.is_empty() {
                xml.push_str(&format!(
                    "  <itunes:new-feed-url>{}</itunes:new-feed-url>\n",
                    xml_escape(redirect_url)
                ));
            }
        }

        // Podcasting 2.0: locked
        xml.push_str("  <podcast:locked>no</podcast:locked>\n");
    }

    for item in items {
        xml.push_str("  <item>\n");
        let title = xml_escape(item.title.as_deref().unwrap_or("Uten tittel"));
        xml.push_str(&format!("    <title>{title}</title>\n"));

        let item_url = format!("{base_url}/{}", short_id(item.id));
        xml.push_str(&format!("    <link>{item_url}</link>\n"));

        // GUID er stabil — basert på node-ID
        xml.push_str(&format!(
            "    <guid isPermaLink=\"false\">{}</guid>\n",
            item.id
        ));

        let pub_date = item.publish_at.unwrap_or(item.created_at);
        xml.push_str(&format!("    <pubDate>{}</pubDate>\n", pub_date.to_rfc2822()));

        if let Some(ref content) = item.content {
            let desc = xml_escape(&truncate_description(content, 500));
            xml.push_str(&format!("    <description>{desc}</description>\n"));
        }

        // Podcast enclosure
        if let Some(ref enc_path) = item.enclosure_url {
            let enc_url = format!("{base_url}{enc_path}");
            let mime = item.enclosure_mime.as_deref().unwrap_or("audio/mpeg");
            let size = item.enclosure_size.unwrap_or(0);
            xml.push_str(&format!(
                "    <enclosure url=\"{enc_url}\" length=\"{size}\" type=\"{mime}\"/>\n"
            ));
        }

        // iTunes og Podcasting 2.0 item-level tags (kun for podcast)
        if collection.is_podcast {
            xml.push_str(&format!("    <itunes:title>{title}</itunes:title>\n"));

            if let Some(secs) = item.duration_secs {
                let h = secs / 3600;
                let m = (secs % 3600) / 60;
                let s = secs % 60;
                if h > 0 {
                    xml.push_str(&format!(
                        "    <itunes:duration>{h:02}:{m:02}:{s:02}</itunes:duration>\n"
                    ));
                } else {
                    xml.push_str(&format!(
                        "    <itunes:duration>{m:02}:{s:02}</itunes:duration>\n"
                    ));
                }
            }

            let explicit = collection.podcast_config.explicit.unwrap_or(false);
            xml.push_str(&format!(
                "    <itunes:explicit>{}</itunes:explicit>\n",
                if explicit { "true" } else { "false" }
            ));

            if let Some(ref cas) = item.episode_image_cas {
                let img_url = format!("{base_url}/cas/{cas}");
                xml.push_str(&format!(
                    "    <itunes:image href=\"{img_url}\"/>\n"
                ));
            }

            // podcast:transcript — SRT fra transkripsjons-segmenter
            if item.has_transcript {
                let transcript_url =
                    format!("{base_url}/{}/transcript.srt", short_id(item.id));
                xml.push_str(&format!(
                    "    <podcast:transcript url=\"{transcript_url}\" type=\"application/srt\"/>\n"
                ));
            }

            // podcast:chapters — JSON fra chapter-edges
            if !item.chapters.is_empty() {
                let chapters_url =
                    format!("{base_url}/{}/chapters.json", short_id(item.id));
                xml.push_str(&format!(
                    "    <podcast:chapters url=\"{chapters_url}\" type=\"application/json+chapters\"/>\n"
                ));
            }
        }

        xml.push_str("  </item>\n");
    }

    xml.push_str("</channel>\n");
    xml.push_str("</rss>\n");
    xml
}

/// Bygg Atom 1.0 XML-streng.
fn build_atom_feed(collection: &CollectionInfo, items: &[FeedItem], base_url: &str) -> String {
    let feed_title = xml_escape(
        collection
            .rss_config
            .title
            .as_deref()
            .or(collection.title.as_deref())
            .unwrap_or("Untitled Feed"),
    );
    let feed_desc = xml_escape(
        collection
            .rss_config
            .description
            .as_deref()
            .unwrap_or(""),
    );
    let feed_url = format!("{base_url}/feed.xml");

    let updated = items
        .first()
        .map(|i| i.publish_at.unwrap_or(i.created_at))
        .unwrap_or_else(Utc::now);

    let mut xml = String::with_capacity(4096);
    xml.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
    xml.push_str("<feed xmlns=\"http://www.w3.org/2005/Atom\">\n");
    xml.push_str(&format!("  <title>{feed_title}</title>\n"));
    xml.push_str(&format!("  <subtitle>{feed_desc}</subtitle>\n"));
    xml.push_str(&format!(
        "  <link href=\"{feed_url}\" rel=\"self\" type=\"application/atom+xml\"/>\n"
    ));
    xml.push_str(&format!("  <link href=\"{base_url}\" rel=\"alternate\"/>\n"));
    xml.push_str(&format!("  <id>{base_url}</id>\n"));
    xml.push_str(&format!(
        "  <updated>{}</updated>\n",
        updated.to_rfc3339()
    ));

    for item in items {
        xml.push_str("  <entry>\n");
        let title = xml_escape(item.title.as_deref().unwrap_or("Uten tittel"));
        xml.push_str(&format!("    <title>{title}</title>\n"));

        let item_url = format!("{base_url}/{}", short_id(item.id));
        xml.push_str(&format!(
            "    <link href=\"{item_url}\" rel=\"alternate\"/>\n"
        ));

        xml.push_str(&format!("    <id>urn:uuid:{}</id>\n", item.id));

        let pub_date = item.publish_at.unwrap_or(item.created_at);
        xml.push_str(&format!(
            "    <updated>{}</updated>\n",
            pub_date.to_rfc3339()
        ));
        xml.push_str(&format!(
            "    <published>{}</published>\n",
            pub_date.to_rfc3339()
        ));

        if let Some(ref content) = item.content {
            let summary = xml_escape(&truncate_description(content, 500));
            xml.push_str(&format!("    <summary>{summary}</summary>\n"));
        }

        // Podcast enclosure som link med rel="enclosure"
        if let Some(ref enc_path) = item.enclosure_url {
            let enc_url = format!("{base_url}{enc_path}");
            let mime = item.enclosure_mime.as_deref().unwrap_or("audio/mpeg");
            let size = item.enclosure_size.unwrap_or(0);
            xml.push_str(&format!(
                "    <link rel=\"enclosure\" href=\"{enc_url}\" type=\"{mime}\" length=\"{size}\"/>\n"
            ));
        }

        xml.push_str("  </entry>\n");
    }

    xml.push_str("</feed>\n");
    xml
}

// =============================================================================
// Hjelpefunksjoner
// =============================================================================

/// XML-escape for tekst i elementer.
fn xml_escape(s: &str) -> String {
    s.replace('&', "&amp;")
        .replace('<', "&lt;")
        .replace('>', "&gt;")
        .replace('"', "&quot;")
        .replace('\'', "&apos;")
}

/// Kort ID fra UUID (første 8 tegn) — for URL-er.
fn short_id(id: Uuid) -> String {
    id.to_string()[..8].to_string()
}

/// Trunkér beskrivelse til maks antall tegn (chars, ikke bytes), på ordgrense.
fn truncate_description(s: &str, max_chars: usize) -> String {
    let char_count = s.chars().count();
    if char_count <= max_chars {
        return s.to_string();
    }
    // Finn byte-posisjon for max_chars tegn
    let byte_end = s.char_indices().nth(max_chars).map(|(i, _)| i).unwrap_or(s.len());
    match s[..byte_end].rfind(' ') {
        Some(pos) => format!("{}…", &s[..pos]),
        None => format!("{}…", &s[..byte_end]),
    }
}