// synops-highlight — AI-kuratert highlight reel fra podcast-transkripsjon. // // Analyserer fullstendig transkripsjon etter innspilling og finner // 5-10 klippverdige øyeblikk: humor, emosjonelle topper, sterke meninger, // punchlines og narrative høydepunkter. // // Input: --media-node-id (episodenode med transkripsjon) // Output: JSON med foreslåtte klipp (tidsstempler, teksting, hashtags, score) // Med --write: oppretter klipp-noder og edges i PG. // // Miljøvariabler: // DATABASE_URL — PostgreSQL-tilkobling (påkrevd) // AI_GATEWAY_URL — LiteLLM gateway (default: http://localhost:4000) // LITELLM_MASTER_KEY — API-nøkkel for LiteLLM // AI_HIGHLIGHT_MODEL — Modellalias (default: synops/high) // // Ref: docs/proposals/auto_highlight_reel.md // docs/retninger/unix_filosofi.md use clap::Parser; use serde::{Deserialize, Serialize}; use std::process; use uuid::Uuid; /// AI-kuratert highlight reel fra podcast-transkripsjon. #[derive(Parser)] #[command(name = "synops-highlight", about = "Generer highlight reel fra transkripsjon")] struct Cli { /// Media-node-ID (episode med transkripsjon) #[arg(long)] media_node_id: Uuid, /// Bruker-ID som utløste analysen #[arg(long)] requested_by: Option, /// Podcast-samling (for belongs_to-edge) #[arg(long)] collection_id: Option, /// Skriv klipp-noder og edges til database #[arg(long)] write: bool, } // --- Database-rader --- #[derive(sqlx::FromRow)] struct MediaNode { title: Option, #[allow(dead_code)] content: Option, created_by: Option, #[allow(dead_code)] metadata: serde_json::Value, } #[derive(sqlx::FromRow)] struct TranscriptSegment { #[allow(dead_code)] seq: i32, start_ms: i32, end_ms: i32, content: String, } // --- LLM request/response (OpenAI-kompatibel) --- #[derive(Serialize)] struct ChatRequest { model: String, messages: Vec, temperature: f32, response_format: ResponseFormat, } #[derive(Serialize)] struct ResponseFormat { r#type: String, } #[derive(Serialize)] struct ChatMessage { role: String, content: String, } #[derive(Deserialize)] struct ChatResponse { choices: Vec, #[serde(default)] usage: Option, #[serde(default)] model: Option, } #[derive(Deserialize, Clone)] struct UsageInfo { #[serde(default)] prompt_tokens: i64, #[serde(default)] completion_tokens: i64, } #[derive(Deserialize)] struct Choice { message: MessageContent, } #[derive(Deserialize)] struct MessageContent { content: Option, } // --- AI-analysens output --- #[derive(Deserialize, Debug)] struct HighlightResponse { #[serde(default)] highlights: Vec, } #[derive(Deserialize, Serialize, Debug, Clone)] struct Highlight { /// Starttid i millisekunder start_ms: i64, /// Sluttid i millisekunder end_ms: i64, /// Klippverdighets-score 0.0-1.0 score: f64, /// Kategori: humor, emotion, opinion, punchline, narrative reason: String, /// Foreslått teksting for sosiale medier suggested_caption: String, /// Det sterkeste sitatet (thumbnail-tekst) quote: String, /// Foreslåtte hashtags #[serde(default)] hashtags: Vec, } const SYSTEM_PROMPT: &str = r#"Du er en podcast-produsent som identifiserer de beste øyeblikkene i en podcast-episode. Analyser transkripsjonen og finn 5-10 klippverdige øyeblikk (15-45 sekunder hver). For hvert høydepunkt, vurder: - **humor**: Morsomme øyeblikk, vitser, latter-øyeblikk - **emotion**: Emosjonelle topper, sårbare øyeblikk, sterke reaksjoner - **opinion**: Kontroversielle eller sterke meninger som engasjerer - **punchline**: Slagkraftige formuleringer, one-liners, quotable moments - **narrative**: Narrative vendepunkter, overraskende avsløringer Returner KUN et JSON-objekt med denne strukturen: { "highlights": [ { "start_ms": 12000, "end_ms": 45000, "score": 0.92, "reason": "punchline", "suggested_caption": "Kort, engasjerende tekst for sosiale medier (maks 280 tegn)", "quote": "Det sterkeste sitatet fra klippet", "hashtags": ["podcast", "tema1", "tema2"] } ] } Regler: - Hvert klipp bør være 15-45 sekunder (15000-45000ms) - Sorter etter score (høyeste først) - Bruk start_ms/end_ms fra transkripsjonen — ikke dikt opp tidsstempler - suggested_caption skal være catchy og fungere på TikTok/Instagram/Twitter - quote skal være et direkte sitat som fungerer som thumbnail-tekst - hashtags: 3-5 relevante, norske hashtags per klipp - Returner tom highlights-liste hvis innholdet ikke har klippverdige øyeblikk - Foretrekk øyeblikk som fungerer alene uten kontekst"#; #[tokio::main] async fn main() { synops_common::logging::init("synops_highlight"); let cli = Cli::parse(); if cli.write && cli.requested_by.is_none() { eprintln!("Feil: --requested-by er påkrevd sammen med --write"); process::exit(1); } if let Err(e) = run(cli).await { eprintln!("Feil: {e}"); process::exit(1); } } async fn run(cli: Cli) -> Result<(), String> { let db = synops_common::db::connect().await?; let media_node_id = cli.media_node_id; // 1. Hent medianode let media = sqlx::query_as::<_, MediaNode>( "SELECT title, content, created_by, metadata FROM nodes WHERE id = $1", ) .bind(media_node_id) .fetch_optional(&db) .await .map_err(|e| format!("PG-feil ved henting av node: {e}"))? .ok_or_else(|| format!("Node {media_node_id} finnes ikke"))?; // 2. Hent transkripsjonssegmenter let segments = sqlx::query_as::<_, TranscriptSegment>( r#"SELECT seq, start_ms, end_ms, content FROM transcription_segments WHERE node_id = $1 ORDER BY seq ASC"#, ) .bind(media_node_id) .fetch_all(&db) .await .map_err(|e| format!("PG-feil ved henting av segmenter: {e}"))?; if segments.is_empty() { let result = serde_json::json!({ "status": "skipped", "reason": "no_transcription", "media_node_id": media_node_id.to_string() }); println!("{}", serde_json::to_string_pretty(&result).unwrap()); return Ok(()); } tracing::info!( media_node_id = %media_node_id, segments = segments.len(), "Hentet transkripsjon, sender til AI for highlight-analyse" ); // 3. Bygg transkripsjon med tidsstempler for AI let transcript = build_timestamped_transcript(&segments); let episode_title = media.title.unwrap_or_else(|| "Ukjent episode".to_string()); let user_content = format!( "Episode: {episode_title}\n\nTranskripsjon med tidsstempler (ms):\n\n{transcript}" ); // 4. Kall LLM let (highlights_resp, llm_usage, llm_model) = call_llm(&user_content).await?; let highlights = highlights_resp.highlights; tracing::info!( media_node_id = %media_node_id, highlights_found = highlights.len(), "AI-analyse fullført" ); if highlights.is_empty() { let result = serde_json::json!({ "status": "completed", "media_node_id": media_node_id.to_string(), "highlights_found": 0, "clips_created": 0, "reason": "no_highlights_found" }); println!("{}", serde_json::to_string_pretty(&result).unwrap()); return Ok(()); } // 5. Validér og filtrer highlights let last_segment_end = segments.last().map(|s| s.end_ms as i64).unwrap_or(0); let valid_highlights: Vec<&Highlight> = highlights .iter() .filter(|h| { h.start_ms >= 0 && h.end_ms > h.start_ms && h.end_ms <= last_segment_end && (h.end_ms - h.start_ms) >= 10_000 // minst 10 sek && (h.end_ms - h.start_ms) <= 60_000 // maks 60 sek && h.score >= 0.0 && h.score <= 1.0 }) .collect(); tracing::info!( valid = valid_highlights.len(), total = highlights.len(), "Highlights validert" ); let tokens_in = llm_usage.as_ref().map(|u| u.prompt_tokens).unwrap_or(0); let tokens_out = llm_usage.as_ref().map(|u| u.completion_tokens).unwrap_or(0); let model_id = llm_model.unwrap_or_else(|| "unknown".to_string()); // 6. Skriv til database eller bare output let result = if cli.write { let requested_by = cli.requested_by.unwrap(); let created_by = media.created_by.unwrap_or(media_node_id); let collection_id = cli.collection_id; let clips_created = write_highlights( &db, media_node_id, &valid_highlights, created_by, collection_id, ) .await?; // Logg AI-ressursforbruk log_resource_usage( &db, media_node_id, media.created_by, &model_id, tokens_in, tokens_out, requested_by, ) .await; serde_json::json!({ "status": "completed", "media_node_id": media_node_id.to_string(), "highlights_found": highlights.len(), "highlights_valid": valid_highlights.len(), "clips_created": clips_created, "model": model_id, "tokens_in": tokens_in, "tokens_out": tokens_out, }) } else { let highlight_data: Vec<&Highlight> = valid_highlights.iter().copied().collect(); serde_json::json!({ "status": "completed", "media_node_id": media_node_id.to_string(), "highlights_found": highlights.len(), "highlights_valid": valid_highlights.len(), "highlights": highlight_data, "model": model_id, "tokens_in": tokens_in, "tokens_out": tokens_out, }) }; println!( "{}", serde_json::to_string_pretty(&result) .map_err(|e| format!("JSON-serialisering feilet: {e}"))? ); Ok(()) } /// Bygg en tidsstemplet transkripsjon for AI-analyse. /// Format: [12000-15000ms] Tekst her... fn build_timestamped_transcript(segments: &[TranscriptSegment]) -> String { let mut lines = Vec::with_capacity(segments.len()); for seg in segments { lines.push(format!("[{}-{}ms] {}", seg.start_ms, seg.end_ms, seg.content)); } lines.join("\n") } /// Kall LiteLLM for highlight-analyse. async fn call_llm( user_content: &str, ) -> Result<(HighlightResponse, Option, Option), String> { let gateway_url = std::env::var("AI_GATEWAY_URL").unwrap_or_else(|_| "http://localhost:4000".to_string()); let api_key = std::env::var("LITELLM_MASTER_KEY").unwrap_or_default(); let model = std::env::var("AI_HIGHLIGHT_MODEL").unwrap_or_else(|_| "synops/high".to_string()); let request = ChatRequest { model, messages: vec![ ChatMessage { role: "system".to_string(), content: SYSTEM_PROMPT.to_string(), }, ChatMessage { role: "user".to_string(), content: user_content.to_string(), }, ], temperature: 0.4, response_format: ResponseFormat { r#type: "json_object".to_string(), }, }; let client = reqwest::Client::new(); let url = format!("{gateway_url}/v1/chat/completions"); let resp = client .post(&url) .header("Authorization", format!("Bearer {api_key}")) .header("Content-Type", "application/json") .json(&request) .timeout(std::time::Duration::from_secs(120)) .send() .await .map_err(|e| format!("LiteLLM-kall feilet: {e}"))?; if !resp.status().is_success() { let status = resp.status(); let body = resp.text().await.unwrap_or_default(); return Err(format!("LiteLLM returnerte {status}: {body}")); } let chat_resp: ChatResponse = resp .json() .await .map_err(|e| format!("Kunne ikke parse LiteLLM-respons: {e}"))?; let content = chat_resp .choices .first() .and_then(|c| c.message.content.as_deref()) .ok_or("LiteLLM returnerte ingen content")?; let highlights: HighlightResponse = serde_json::from_str(content) .map_err(|e| format!("Kunne ikke parse LLM JSON: {e}. Rå output: {content}"))?; Ok((highlights, chat_resp.usage, chat_resp.model)) } /// Opprett klipp-noder og edges i PG for godkjente highlights. /// Returnerer antall klipp opprettet. async fn write_highlights( db: &sqlx::PgPool, media_node_id: Uuid, highlights: &[&Highlight], created_by: Uuid, collection_id: Option, ) -> Result { let mut clips_created = 0u32; for highlight in highlights { let clip_id = Uuid::now_v7(); let clip_metadata = serde_json::json!({ "ai_generated": true, "clip": { "start_ms": highlight.start_ms, "end_ms": highlight.end_ms, "score": highlight.score, "reason": highlight.reason, "suggested_caption": highlight.suggested_caption, "quote": highlight.quote, "hashtags": highlight.hashtags, "source_type": "highlight_reel", } }); // Opprett klipp-node sqlx::query( r#"INSERT INTO nodes (id, node_kind, title, content, visibility, metadata, created_by) VALUES ($1, 'content', $2, $3, 'hidden', $4, $5) ON CONFLICT (id) DO NOTHING"#, ) .bind(clip_id) .bind(&highlight.suggested_caption) .bind(&highlight.quote) .bind(&clip_metadata) .bind(created_by) .execute(db) .await .map_err(|e| format!("PG insert clip-node feilet: {e}"))?; // Edge: klipp → episode (derived_from) let edge_id = Uuid::now_v7(); sqlx::query( r#"INSERT INTO edges (id, source_id, target_id, edge_type, metadata, system, created_by) VALUES ($1, $2, $3, 'derived_from', '{"origin": "highlight_reel"}'::jsonb, true, $4) ON CONFLICT (source_id, target_id, edge_type) DO NOTHING"#, ) .bind(edge_id) .bind(clip_id) .bind(media_node_id) .bind(created_by) .execute(db) .await .map_err(|e| format!("PG insert derived_from-edge feilet: {e}"))?; // Edge: klipp → samling (belongs_to) hvis collection_id er gitt if let Some(coll_id) = collection_id { let edge_id = Uuid::now_v7(); sqlx::query( r#"INSERT INTO edges (id, source_id, target_id, edge_type, metadata, system, created_by) VALUES ($1, $2, $3, 'belongs_to', '{"origin": "highlight_reel"}'::jsonb, true, $4) ON CONFLICT (source_id, target_id, edge_type) DO NOTHING"#, ) .bind(edge_id) .bind(clip_id) .bind(coll_id) .bind(created_by) .execute(db) .await .map_err(|e| format!("PG insert belongs_to-edge feilet: {e}"))?; } clips_created += 1; tracing::info!( clip_id = %clip_id, start_ms = highlight.start_ms, end_ms = highlight.end_ms, score = highlight.score, reason = %highlight.reason, "Highlight-klipp opprettet" ); } tracing::info!( media_node_id = %media_node_id, clips_created = clips_created, "Highlights skrevet til database" ); Ok(clips_created) } /// Logg AI-ressursforbruk til resource_usage_log. async fn log_resource_usage( db: &sqlx::PgPool, node_id: Uuid, _created_by: Option, model_id: &str, tokens_in: i64, tokens_out: i64, requested_by: Uuid, ) { let collection_id: Option = sqlx::query_scalar( "SELECT e.target_id FROM edges e JOIN nodes n ON n.id = e.target_id WHERE e.source_id = $1 AND e.edge_type = 'belongs_to' AND n.node_kind = 'collection' LIMIT 1", ) .bind(node_id) .fetch_optional(db) .await .ok() .flatten(); if let Err(e) = sqlx::query( "INSERT INTO resource_usage_log (target_node_id, triggered_by, collection_id, resource_type, detail) VALUES ($1, $2, $3, $4, $5)", ) .bind(node_id) .bind(Some(requested_by)) .bind(collection_id) .bind("ai") .bind(serde_json::json!({ "model_level": "standard", "model_id": model_id, "tokens_in": tokens_in, "tokens_out": tokens_out, "job_type": "highlight_extract" })) .execute(db) .await { tracing::warn!(error = %e, "Kunne ikke logge AI-ressursforbruk"); } }