// synops-tts — Tekst-til-tale via ElevenLabs API. // // Input: tekst og stemme-ID. Output: CAS-hash for generert lydfil. // Med --write: oppretter media-node og has_media-edge i PG. // // Miljøvariabler: // ELEVENLABS_API_KEY — API-nøkkel (påkrevd) // ELEVENLABS_MODEL — Modell (default: eleven_multilingual_v2) // ELEVENLABS_DEFAULT_VOICE — Default stemme (default: Rachel) // CAS_ROOT — Rot for content-addressable store (default: /srv/synops/media/cas) // DATABASE_URL — PostgreSQL-tilkobling (påkrevd med --write) // // Erstatter: maskinrommet/src/tts.rs // Ref: docs/retninger/unix_filosofi.md, docs/proposals/ghost_host_tts.md use clap::Parser; use std::process; use uuid::Uuid; /// Tekst-til-tale via ElevenLabs — lagrer lyd i CAS. #[derive(Parser)] #[command(name = "synops-tts", about = "Generer tale fra tekst via ElevenLabs, lagre i CAS")] struct Cli { /// Teksten som skal leses opp #[arg(long)] text: String, /// ElevenLabs voice_id (overstyrer default) #[arg(long)] voice: Option, /// Språkkode (brukes ikke av API direkte, men logges) #[arg(long, default_value = "no")] language: String, /// Source-node som teksten tilhører (for has_media-edge) #[arg(long)] source_node_id: Option, /// Bruker-ID som utløste genereringen #[arg(long)] requested_by: Option, /// Skriv media-node og edges til database (uten: kun CAS + stdout) #[arg(long)] write: bool, } /// Maks tekst-lengde (ElevenLabs grense er 5000 tegn per kall). const MAX_TEXT_LENGTH: usize = 5000; #[tokio::main] async fn main() { synops_common::logging::init("synops_tts"); let cli = Cli::parse(); if cli.write && cli.requested_by.is_none() { eprintln!("Feil: --requested-by er påkrevd sammen med --write"); process::exit(1); } if let Err(e) = run(cli).await { eprintln!("Feil: {e}"); process::exit(1); } } async fn run(cli: Cli) -> Result<(), String> { // Valider tekst if cli.text.is_empty() { return Err("Tom tekst — ingenting å generere".into()); } if cli.text.len() > MAX_TEXT_LENGTH { return Err(format!( "Tekst for lang: {} tegn (maks {})", cli.text.len(), MAX_TEXT_LENGTH )); } // Bestem stemme let voice_id = cli.voice.unwrap_or_else(|| { std::env::var("ELEVENLABS_DEFAULT_VOICE") .unwrap_or_else(|_| "21m00Tcm4TlvDq8ikWAM".into()) }); tracing::info!( text_len = cli.text.len(), voice_id = %voice_id, language = %cli.language, "Starter TTS-generering" ); // 1. Kall ElevenLabs API let audio_bytes = call_elevenlabs(&cli.text, &voice_id).await?; tracing::info!(audio_size = audio_bytes.len(), "Mottok lyd fra ElevenLabs"); // 2. Lagre i CAS let cas_root = synops_common::cas::root(); let cas_hash = synops_common::cas::store(&cas_root, &audio_bytes).await?; tracing::info!(cas_hash = %cas_hash, "Lyd lagret i CAS"); // 3. Skriv til database hvis --write, og hent media_node_id let media_node_id = if cli.write { let requested_by = cli.requested_by.unwrap(); // Allerede validert Some(write_to_db(&cas_hash, &cli.text, &voice_id, &cli.language, audio_bytes.len(), cli.source_node_id, requested_by).await?) } else { None }; // 4. Bygg resultat-JSON let mut result = serde_json::json!({ "cas_hash": cas_hash, "size_bytes": audio_bytes.len(), "voice_id": voice_id, "language": cli.language, "characters": cli.text.len(), "mime": "audio/mpeg", }); if let Some(id) = media_node_id { result["media_node_id"] = serde_json::Value::String(id.to_string()); } // 5. Output JSON til stdout println!( "{}", serde_json::to_string_pretty(&result) .map_err(|e| format!("JSON-serialisering feilet: {e}"))? ); Ok(()) } /// Kall ElevenLabs text-to-speech API. Returnerer rå MP3-bytes. async fn call_elevenlabs(text: &str, voice_id: &str) -> Result, String> { let api_key = std::env::var("ELEVENLABS_API_KEY") .map_err(|_| "ELEVENLABS_API_KEY er ikke satt".to_string())?; let model_id = std::env::var("ELEVENLABS_MODEL") .unwrap_or_else(|_| "eleven_multilingual_v2".into()); let url = format!("https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"); let payload = serde_json::json!({ "text": text, "model_id": model_id, "voice_settings": { "stability": 0.5, "similarity_boost": 0.75, "style": 0.0, "use_speaker_boost": true } }); let client = reqwest::Client::new(); let resp = client .post(&url) .header("xi-api-key", &api_key) .header("Accept", "audio/mpeg") .json(&payload) .timeout(std::time::Duration::from_secs(30)) .send() .await .map_err(|e| format!("ElevenLabs HTTP-feil: {e}"))?; if !resp.status().is_success() { let status = resp.status(); let body = resp.text().await.unwrap_or_default(); return Err(format!("ElevenLabs returnerte {status}: {body}")); } resp.bytes() .await .map(|b| b.to_vec()) .map_err(|e| format!("Kunne ikke lese ElevenLabs-respons: {e}")) } /// Opprett media-node og has_media-edge i PostgreSQL. /// Returnerer media_node_id. PG NOTIFY-triggere sender sanntidsoppdateringer. async fn write_to_db( cas_hash: &str, text: &str, voice_id: &str, language: &str, size_bytes: usize, source_node_id: Option, requested_by: Uuid, ) -> Result { let db = synops_common::db::connect().await?; let media_node_id = Uuid::now_v7(); let title = format!("TTS: {}", truncate(text, 60)); let metadata = serde_json::json!({ "cas_hash": cas_hash, "mime": "audio/mpeg", "size_bytes": size_bytes, "tts": { "provider": "elevenlabs", "voice_id": voice_id, "language": language, "characters": text.len(), } }); sqlx::query( "INSERT INTO nodes (id, node_kind, title, content, visibility, metadata, created_by) VALUES ($1, 'content', $2, '', 'hidden'::visibility, $3, $4)", ) .bind(media_node_id) .bind(&title) .bind(&metadata) .bind(requested_by) .execute(&db) .await .map_err(|e| format!("PG insert media-node feilet: {e}"))?; tracing::info!(media_node_id = %media_node_id, "Media-node opprettet"); // has_media-edge fra source til media-node if let Some(source_id) = source_node_id { let edge_id = Uuid::now_v7(); let edge_meta = serde_json::json!({ "media_type": "tts_audio", "generated_at": chrono::Utc::now().to_rfc3339() }); sqlx::query( "INSERT INTO edges (id, source_id, target_id, edge_type, metadata, system, created_by) VALUES ($1, $2, $3, 'has_media', $4, false, $5)", ) .bind(edge_id) .bind(source_id) .bind(media_node_id) .bind(&edge_meta) .bind(requested_by) .execute(&db) .await .map_err(|e| format!("PG insert has_media-edge feilet: {e}"))?; tracing::info!(source_id = %source_id, "has_media-edge opprettet"); } // Logg ressursforbruk let collection_id: Option = sqlx::query_scalar( "SELECT e.target_id FROM edges e JOIN nodes n ON n.id = e.target_id WHERE e.source_id = $1 AND e.edge_type = 'belongs_to' AND n.node_kind = 'collection' LIMIT 1", ) .bind(media_node_id) .fetch_optional(&db) .await .ok() .flatten(); if let Err(e) = sqlx::query( "INSERT INTO resource_usage_log (target_node_id, triggered_by, collection_id, resource_type, detail) VALUES ($1, $2, $3, $4, $5)", ) .bind(media_node_id) .bind(Some(requested_by)) .bind(collection_id) .bind("tts") .bind(serde_json::json!({ "provider": "elevenlabs", "characters": text.len(), "voice_id": voice_id, })) .execute(&db) .await { tracing::warn!(error = %e, "Kunne ikke logge TTS-ressursforbruk"); } Ok(media_node_id) } /// Forkorter en streng til maks `max_len` tegn med "..." suffix. fn truncate(s: &str, max_len: usize) -> String { if s.len() <= max_len { s.to_string() } else { let end = s .char_indices() .nth(max_len - 3) .map(|(i, _)| i) .unwrap_or(s.len()); format!("{}...", &s[..end]) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_truncate_short() { assert_eq!(truncate("hei", 10), "hei"); } #[test] fn test_truncate_long() { let long = "a".repeat(100); let result = truncate(&long, 20); assert!(result.len() <= 20); assert!(result.ends_with("...")); } #[test] fn test_truncate_unicode() { let text = "Ødeleggende ødemark ødelagt"; let result = truncate(text, 15); assert!(result.ends_with("...")); } }