// synops-transcribe — Whisper-transkribering via faster-whisper API. // // Input: CAS-hash til lydfil, modellnavn, valgfri initial prompt. // Output: JSON med segmenter til stdout. // Med --write: skriver segmenter til PG og oppdaterer node metadata. // // Miljøvariabler: // DATABASE_URL — PostgreSQL-tilkobling (påkrevd med --write) // CAS_ROOT — Rot for content-addressable store (default: /srv/synops/media/cas) // WHISPER_URL — faster-whisper API URL (default: http://localhost:8000) // // Ref: docs/retninger/unix_filosofi.md, docs/concepts/podcastfabrikken.md use chrono::Utc; use clap::Parser; use serde::Serialize; use std::process; use uuid::Uuid; /// Transkriber lydfil fra CAS via faster-whisper. #[derive(Parser)] #[command(name = "synops-transcribe", about = "Whisper-transkribering av lydfil fra CAS")] struct Cli { /// SHA-256 CAS-hash til lydfilen #[arg(long)] cas_hash: String, /// Whisper-modell (tiny, base, small, medium, large-v3) #[arg(long, default_value = "medium")] model: String, /// Initial prompt for bedre egennavn-gjenkjenning #[arg(long)] initial_prompt: Option, /// Språkkode (ISO 639-1) #[arg(long, default_value = "no")] language: String, /// MIME-type for lydfilen (brukes som filnavn-hint til Whisper) #[arg(long, default_value = "audio/mpeg")] mime: String, /// Node-ID å oppdatere i databasen (påkrevd med --write) #[arg(long)] node_id: Option, /// Bruker-ID som utløste transkripsjonen (for ressurslogging) #[arg(long)] requested_by: Option, /// Skriv resultater til database (uten dette flagget: kun stdout) #[arg(long)] write: bool, } #[derive(Debug, Serialize)] struct Segment { seq: i32, start_ms: i32, end_ms: i32, content: String, } #[derive(Serialize)] struct TranscribeResult { segments: Vec, transcript: String, duration_ms: i32, segment_count: usize, model: String, language: String, transcribed_at: String, } #[tokio::main] async fn main() { synops_common::logging::init("synops_transcribe"); let cli = Cli::parse(); if cli.write && cli.node_id.is_none() { eprintln!("Feil: --node-id er påkrevd sammen med --write"); process::exit(1); } if let Err(e) = run(cli).await { eprintln!("Feil: {e}"); process::exit(1); } } async fn run(cli: Cli) -> Result<(), String> { let cas_root = synops_common::cas::root(); let whisper_url = std::env::var("WHISPER_URL").unwrap_or_else(|_| "http://localhost:8000".into()); // 1. Les lydfil fra CAS let file_path = synops_common::cas::path(&cas_root, &cli.cas_hash); let file_data = tokio::fs::read(&file_path) .await .map_err(|e| format!("Kunne ikke lese CAS-fil {}: {e}", cli.cas_hash))?; tracing::info!( cas_hash = %cli.cas_hash, size = file_data.len(), model = %cli.model, "Sender lydfil til Whisper" ); // 2. Send til faster-whisper API (SRT-format) let file_ext = synops_common::cas::mime_to_extension(&cli.mime); let file_name = format!("audio.{file_ext}"); let file_part = reqwest::multipart::Part::bytes(file_data) .file_name(file_name) .mime_str(&cli.mime) .map_err(|e| format!("Kunne ikke bygge multipart: {e}"))?; let mut form = reqwest::multipart::Form::new() .part("file", file_part) .text("model", cli.model.clone()) .text("language", cli.language.clone()) .text("response_format", "srt"); if let Some(ref prompt) = cli.initial_prompt { form = form.text("initial_prompt", prompt.clone()); } let url = format!("{whisper_url}/v1/audio/transcriptions"); let response = reqwest::Client::new() .post(&url) .multipart(form) .timeout(std::time::Duration::from_secs(600)) .send() .await .map_err(|e| format!("HTTP-feil mot Whisper: {e}"))?; if !response.status().is_success() { let status = response.status(); let body = response.text().await.unwrap_or_default(); return Err(format!("Whisper returnerte {status}: {body}")); } let srt_text = response .text() .await .map_err(|e| format!("Kunne ikke lese Whisper-respons: {e}"))?; // 3. Parse SRT til segmenter let segments = parse_srt(&srt_text)?; tracing::info!(segments = segments.len(), "SRT parset"); if segments.is_empty() { return Err("Whisper returnerte tom SRT — ingen segmenter".into()); } // 4. Bygg resultat let transcript: String = segments.iter().map(|s| s.content.trim()).collect::>().join(" "); let duration_ms = segments.last().map(|s| s.end_ms).unwrap_or(0); let transcribed_at = Utc::now(); let result = TranscribeResult { segment_count: segments.len(), transcript, duration_ms, model: cli.model.clone(), language: cli.language.clone(), transcribed_at: transcribed_at.to_rfc3339(), segments, }; // 5. Skriv JSON til stdout println!( "{}", serde_json::to_string_pretty(&result).map_err(|e| format!("JSON-serialisering feilet: {e}"))? ); // 6. Skriv til database hvis --write if cli.write { let node_id = cli.node_id.unwrap(); // Allerede validert let db = synops_common::db::connect().await?; // Slett gamle segmenter for denne noden (idempotent) sqlx::query("DELETE FROM transcription_segments WHERE node_id = $1") .bind(node_id) .execute(&db) .await .map_err(|e| format!("Kunne ikke slette gamle segmenter: {e}"))?; // Sett inn nye segmenter insert_segments(&db, node_id, transcribed_at, &result.segments).await?; // Oppdater node metadata update_node(&db, node_id, &result.transcript, transcribed_at, result.segment_count, duration_ms).await?; // Logg ressursforbruk log_resource_usage(&db, node_id, cli.requested_by, &cli.model, duration_ms, &cli.language).await; tracing::info!(node_id = %node_id, "Database oppdatert"); } Ok(()) } // --- SRT-parsing --- fn parse_srt(srt: &str) -> Result, String> { let mut segments = Vec::new(); let mut lines = srt.lines().peekable(); while lines.peek().is_some() { // Hopp over tomme linjer while lines.peek().map_or(false, |l| l.trim().is_empty()) { lines.next(); } // Sekvensnummer let seq_line = match lines.next() { Some(l) if !l.trim().is_empty() => l.trim().to_string(), _ => break, }; let seq: i32 = seq_line .parse() .map_err(|_| format!("Ugyldig SRT-sekvensnummer: '{seq_line}'"))?; // Tidslinje: 00:00:00,000 --> 00:00:05,230 let time_line = lines .next() .ok_or_else(|| format!("Mangler tidslinje etter sekvens {seq}"))?; let (start_ms, end_ms) = parse_srt_time_line(time_line) .map_err(|e| format!("Ugyldig tidslinje for sekvens {seq}: {e}"))?; // Tekstlinjer (frem til tom linje eller slutt) let mut text_parts = Vec::new(); while lines.peek().map_or(false, |l| !l.trim().is_empty()) { text_parts.push(lines.next().unwrap().to_string()); } let content = text_parts.join("\n"); if !content.is_empty() { segments.push(Segment { seq, start_ms, end_ms, content, }); } } Ok(segments) } fn parse_srt_time_line(line: &str) -> Result<(i32, i32), String> { let parts: Vec<&str> = line.split("-->").collect(); if parts.len() != 2 { return Err(format!("Forventet 'start --> end', fikk: '{line}'")); } let start = parse_srt_timestamp(parts[0].trim())?; let end = parse_srt_timestamp(parts[1].trim())?; Ok((start, end)) } fn parse_srt_timestamp(ts: &str) -> Result { let ts = ts.replace(',', "."); let parts: Vec<&str> = ts.split(':').collect(); if parts.len() != 3 { return Err(format!("Ugyldig tidsstempel: '{ts}'")); } let hours: f64 = parts[0].parse().map_err(|_| format!("Ugyldig timer: '{}'", parts[0]))?; let minutes: f64 = parts[1].parse().map_err(|_| format!("Ugyldig minutter: '{}'", parts[1]))?; let seconds: f64 = parts[2].parse().map_err(|_| format!("Ugyldig sekunder: '{}'", parts[2]))?; Ok(((hours * 3_600.0 + minutes * 60.0 + seconds) * 1000.0) as i32) } // --- Database-operasjoner (kun med --write) --- async fn insert_segments( db: &sqlx::PgPool, node_id: Uuid, transcribed_at: chrono::DateTime, segments: &[Segment], ) -> Result<(), String> { let mut tx = db.begin().await.map_err(|e| format!("Transaksjon feilet: {e}"))?; for seg in segments { sqlx::query( "INSERT INTO transcription_segments (node_id, transcribed_at, seq, start_ms, end_ms, content) VALUES ($1, $2, $3, $4, $5, $6)", ) .bind(node_id) .bind(transcribed_at) .bind(seg.seq) .bind(seg.start_ms) .bind(seg.end_ms) .bind(&seg.content) .execute(&mut *tx) .await .map_err(|e| format!("Kunne ikke sette inn segment {}: {e}", seg.seq))?; } tx.commit().await.map_err(|e| format!("Commit feilet: {e}"))?; tracing::info!(node_id = %node_id, segments = segments.len(), "Segmenter skrevet til DB"); Ok(()) } async fn update_node( db: &sqlx::PgPool, node_id: Uuid, transcript: &str, transcribed_at: chrono::DateTime, segment_count: usize, duration_ms: i32, ) -> Result<(), String> { // Hent eksisterende metadata for å merge let existing_metadata: Option = sqlx::query_scalar("SELECT metadata FROM nodes WHERE id = $1") .bind(node_id) .fetch_optional(db) .await .map_err(|e| format!("Kunne ikke hente node: {e}"))?; let mut metadata = existing_metadata.ok_or_else(|| format!("Node {node_id} finnes ikke"))?; metadata["transcription"] = serde_json::json!({ "duration_ms": duration_ms, "segment_count": segment_count, "transcribed_at": transcribed_at.to_rfc3339(), }); sqlx::query("UPDATE nodes SET content = $2, metadata = $3 WHERE id = $1") .bind(node_id) .bind(transcript) .bind(&metadata) .execute(db) .await .map_err(|e| format!("Kunne ikke oppdatere node: {e}"))?; tracing::info!(node_id = %node_id, transcript_len = transcript.len(), "Node oppdatert med transkripsjon"); Ok(()) } async fn log_resource_usage( db: &sqlx::PgPool, node_id: Uuid, requested_by: Option, model: &str, duration_ms: i32, language: &str, ) { // Finn collection via belongs_to-edge let collection_id: Option = sqlx::query_scalar( "SELECT e.target_id FROM edges e JOIN nodes n ON n.id = e.target_id WHERE e.source_id = $1 AND e.edge_type = 'belongs_to' AND n.node_kind = 'collection' LIMIT 1", ) .bind(node_id) .fetch_optional(db) .await .ok() .flatten(); let detail = serde_json::json!({ "model": model, "duration_seconds": (duration_ms as f64) / 1000.0, "language": language, "mode": "batch", }); if let Err(e) = sqlx::query( "INSERT INTO resource_usage_log (target_node_id, triggered_by, collection_id, resource_type, detail) VALUES ($1, $2, $3, $4, $5)", ) .bind(node_id) .bind(requested_by) .bind(collection_id) .bind("whisper") .bind(&detail) .execute(db) .await { tracing::warn!(error = %e, "Kunne ikke logge ressursforbruk"); } } #[cfg(test)] mod tests { use super::*; #[test] fn test_parse_srt_timestamp() { assert_eq!(parse_srt_timestamp("00:00:00,000").unwrap(), 0); assert_eq!(parse_srt_timestamp("00:01:23,456").unwrap(), 83456); assert_eq!(parse_srt_timestamp("01:00:00,000").unwrap(), 3_600_000); assert_eq!(parse_srt_timestamp("00:00:05,230").unwrap(), 5230); } #[test] fn test_parse_srt() { let srt = "\ 1 00:00:00,000 --> 00:00:05,230 Hei og velkommen til Sidelinja. 2 00:00:05,230 --> 00:00:10,500 I dag snakker vi om fotball. "; let segments = parse_srt(srt).unwrap(); assert_eq!(segments.len(), 2); assert_eq!(segments[0].seq, 1); assert_eq!(segments[0].start_ms, 0); assert_eq!(segments[0].end_ms, 5230); assert_eq!(segments[0].content, "Hei og velkommen til Sidelinja."); assert_eq!(segments[1].seq, 2); assert_eq!(segments[1].start_ms, 5230); assert_eq!(segments[1].end_ms, 10500); } #[test] fn test_parse_srt_time_line() { let (start, end) = parse_srt_time_line("00:01:23,456 --> 00:01:30,789").unwrap(); assert_eq!(start, 83456); assert_eq!(end, 90789); } #[test] fn test_parse_srt_empty() { let segments = parse_srt("").unwrap(); assert!(segments.is_empty()); } #[test] fn test_cas_path() { let p = synops_common::cas::path("/srv/synops/media/cas", "b94d27b9934d3e08"); assert_eq!( p, std::path::PathBuf::from("/srv/synops/media/cas/b9/4d/b94d27b9934d3e08") ); } }