Implementer synops-tts CLI-verktøy (oppgave 21.5)

Nytt CLI-verktøy som erstatter maskinrommet/src/tts.rs.
Kaller ElevenLabs API, lagrer generert lyd i CAS, og returnerer
CAS-hash som JSON. Med --write opprettes media-node og has_media-edge
i PostgreSQL, inkludert ressurslogging.

Følger samme mønster som synops-transcribe: clap-args, tracing til
stderr, JSON til stdout, atomisk CAS-lagring via temp+rename.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
vegard 2026-03-18 09:29:12 +00:00
parent 8034181e63
commit 1dcecc2a04
5 changed files with 3306 additions and 2 deletions

View file

@ -245,8 +245,7 @@ kaller dem direkte. Samme verktøy, to brukere.
- [x] 21.2 `synops-audio`: FFmpeg-prosessering. Input: `--cas-hash <hash> --edl <json>`. Output: ny CAS-hash. Erstatter `audio.rs`. Inkluder parametervalidering (fase 17.217.3).
- [x] 21.3 `synops-render`: Tera HTML-rendering. Input: `--node-id <uuid> --theme <tema>`. Output: CAS-hash for rendret HTML. Erstatter `publishing.rs`.
- [x] 21.4 `synops-rss`: RSS/Atom-generering. Input: `--collection-id <uuid>`. Output: XML til stdout. Erstatter `rss.rs`.
- [~] 21.5 `synops-tts`: Tekst-til-tale. Input: `--text <tekst> --voice <stemme>`. Output: CAS-hash for lydfil. Erstatter `tts.rs`.
> Påbegynt: 2026-03-18T09:25
- [x] 21.5 `synops-tts`: Tekst-til-tale. Input: `--text <tekst> --voice <stemme>`. Output: CAS-hash for lydfil. Erstatter `tts.rs`.
- [ ] 21.6 `synops-summarize`: AI-oppsummering. Input: `--communication-id <uuid>`. Output: sammendrag som tekst. Erstatter `summarize.rs`.
- [ ] 21.7 `synops-suggest-edges`: AI-foreslåtte edges. Input: `--node-id <uuid>`. Output: JSON med forslag (target, edge_type, confidence). Erstatter `ai_edges.rs`.
- [ ] 21.8 `synops-respond`: Claude chat-svar. Input: `--communication-id <uuid> --message-id <uuid>`. Output: svartekst. Erstatter `agent.rs` sin prosessering (auth/ratelimit forblir i maskinrommet).

View file

@ -11,6 +11,7 @@ eller maskinrommet-API. Ligger i PATH via symlink eller direkte kall.
| `synops-audio` | FFmpeg lydprosessering med EDL (cut, normalize, EQ, m.m.) | Ferdig |
| `synops-render` | Tera HTML-rendering til CAS (artikler, forsider) | Ferdig |
| `synops-rss` | RSS/Atom-feed generering for samlinger | Ferdig |
| `synops-tts` | Tekst-til-tale via ElevenLabs, lagrer lyd i CAS | Ferdig |
## Konvensjoner
- Navnekonvensjon: `synops-<verb>` (f.eks. `synops-context`)

2914
tools/synops-tts/Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,22 @@
[package]
name = "synops-tts"
version = "0.1.0"
edition = "2024"
[[bin]]
name = "synops-tts"
path = "src/main.rs"
[dependencies]
clap = { version = "4", features = ["derive"] }
tokio = { version = "1", features = ["full"] }
sqlx = { version = "0.8", features = ["runtime-tokio", "tls-rustls", "postgres", "uuid", "chrono", "json"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
uuid = { version = "1", features = ["v7", "serde"] }
chrono = { version = "0.4", features = ["serde"] }
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "json"] }
sha2 = "0.10"
hex = "0.4"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }

View file

@ -0,0 +1,368 @@
// synops-tts — Tekst-til-tale via ElevenLabs API.
//
// Input: tekst og stemme-ID. Output: CAS-hash for generert lydfil.
// Med --write: oppretter media-node og has_media-edge i PG.
//
// Miljøvariabler:
// ELEVENLABS_API_KEY — API-nøkkel (påkrevd)
// ELEVENLABS_MODEL — Modell (default: eleven_multilingual_v2)
// ELEVENLABS_DEFAULT_VOICE — Default stemme (default: Rachel)
// CAS_ROOT — Rot for content-addressable store (default: /srv/synops/media/cas)
// DATABASE_URL — PostgreSQL-tilkobling (påkrevd med --write)
//
// Erstatter: maskinrommet/src/tts.rs
// Ref: docs/retninger/unix_filosofi.md, docs/proposals/ghost_host_tts.md
use clap::Parser;
use sha2::{Digest, Sha256};
use std::path::PathBuf;
use std::process;
use uuid::Uuid;
/// Tekst-til-tale via ElevenLabs — lagrer lyd i CAS.
#[derive(Parser)]
#[command(name = "synops-tts", about = "Generer tale fra tekst via ElevenLabs, lagre i CAS")]
struct Cli {
/// Teksten som skal leses opp
#[arg(long)]
text: String,
/// ElevenLabs voice_id (overstyrer default)
#[arg(long)]
voice: Option<String>,
/// Språkkode (brukes ikke av API direkte, men logges)
#[arg(long, default_value = "no")]
language: String,
/// Source-node som teksten tilhører (for has_media-edge)
#[arg(long)]
source_node_id: Option<Uuid>,
/// Bruker-ID som utløste genereringen
#[arg(long)]
requested_by: Option<Uuid>,
/// Skriv media-node og edges til database (uten: kun CAS + stdout)
#[arg(long)]
write: bool,
}
/// Maks tekst-lengde (ElevenLabs grense er 5000 tegn per kall).
const MAX_TEXT_LENGTH: usize = 5000;
#[tokio::main]
async fn main() {
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "synops_tts=info".parse().unwrap()),
)
.with_target(false)
.with_writer(std::io::stderr)
.init();
let cli = Cli::parse();
if cli.write && cli.requested_by.is_none() {
eprintln!("Feil: --requested-by er påkrevd sammen med --write");
process::exit(1);
}
if let Err(e) = run(cli).await {
eprintln!("Feil: {e}");
process::exit(1);
}
}
async fn run(cli: Cli) -> Result<(), String> {
// Valider tekst
if cli.text.is_empty() {
return Err("Tom tekst — ingenting å generere".into());
}
if cli.text.len() > MAX_TEXT_LENGTH {
return Err(format!(
"Tekst for lang: {} tegn (maks {})",
cli.text.len(),
MAX_TEXT_LENGTH
));
}
// Bestem stemme
let voice_id = cli.voice.unwrap_or_else(|| {
std::env::var("ELEVENLABS_DEFAULT_VOICE")
.unwrap_or_else(|_| "21m00Tcm4TlvDq8ikWAM".into())
});
tracing::info!(
text_len = cli.text.len(),
voice_id = %voice_id,
language = %cli.language,
"Starter TTS-generering"
);
// 1. Kall ElevenLabs API
let audio_bytes = call_elevenlabs(&cli.text, &voice_id).await?;
tracing::info!(audio_size = audio_bytes.len(), "Mottok lyd fra ElevenLabs");
// 2. Lagre i CAS
let cas_root = std::env::var("CAS_ROOT").unwrap_or_else(|_| "/srv/synops/media/cas".into());
let cas_hash = store_in_cas(&cas_root, &audio_bytes).await?;
tracing::info!(cas_hash = %cas_hash, "Lyd lagret i CAS");
// 3. Bygg resultat-JSON
let result = serde_json::json!({
"cas_hash": cas_hash,
"size_bytes": audio_bytes.len(),
"voice_id": voice_id,
"language": cli.language,
"characters": cli.text.len(),
"mime": "audio/mpeg",
});
// 4. Skriv til database hvis --write
if cli.write {
let requested_by = cli.requested_by.unwrap(); // Allerede validert
write_to_db(&cas_hash, &cli.text, &voice_id, &cli.language, audio_bytes.len(),
cli.source_node_id, requested_by).await?;
}
// 5. Output JSON til stdout
println!(
"{}",
serde_json::to_string_pretty(&result)
.map_err(|e| format!("JSON-serialisering feilet: {e}"))?
);
Ok(())
}
/// Kall ElevenLabs text-to-speech API. Returnerer rå MP3-bytes.
async fn call_elevenlabs(text: &str, voice_id: &str) -> Result<Vec<u8>, String> {
let api_key = std::env::var("ELEVENLABS_API_KEY")
.map_err(|_| "ELEVENLABS_API_KEY er ikke satt".to_string())?;
let model_id = std::env::var("ELEVENLABS_MODEL")
.unwrap_or_else(|_| "eleven_multilingual_v2".into());
let url = format!("https://api.elevenlabs.io/v1/text-to-speech/{voice_id}");
let payload = serde_json::json!({
"text": text,
"model_id": model_id,
"voice_settings": {
"stability": 0.5,
"similarity_boost": 0.75,
"style": 0.0,
"use_speaker_boost": true
}
});
let client = reqwest::Client::new();
let resp = client
.post(&url)
.header("xi-api-key", &api_key)
.header("Accept", "audio/mpeg")
.json(&payload)
.timeout(std::time::Duration::from_secs(30))
.send()
.await
.map_err(|e| format!("ElevenLabs HTTP-feil: {e}"))?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
return Err(format!("ElevenLabs returnerte {status}: {body}"));
}
resp.bytes()
.await
.map(|b| b.to_vec())
.map_err(|e| format!("Kunne ikke lese ElevenLabs-respons: {e}"))
}
/// Lagre bytes i CAS. Returnerer SHA-256 hash.
/// Atomisk skriving via temp-fil + rename.
async fn store_in_cas(cas_root: &str, data: &[u8]) -> Result<String, String> {
let mut hasher = Sha256::new();
hasher.update(data);
let hash = hex::encode(hasher.finalize());
let dir = PathBuf::from(cas_root)
.join(&hash[..2])
.join(&hash[2..4]);
let final_path = dir.join(&hash);
// Allerede lagret? Returner direkte.
if final_path.exists() {
tracing::info!("CAS-deduplisering: filen finnes allerede");
return Ok(hash);
}
tokio::fs::create_dir_all(&dir)
.await
.map_err(|e| format!("Kunne ikke opprette CAS-dir {}: {e}", dir.display()))?;
let tmp_dir = PathBuf::from(cas_root).join("tmp");
tokio::fs::create_dir_all(&tmp_dir)
.await
.map_err(|e| format!("Kunne ikke opprette CAS tmp-dir: {e}"))?;
let tmp_path = tmp_dir.join(format!("{hash}.tmp"));
tokio::fs::write(&tmp_path, data)
.await
.map_err(|e| format!("Kunne ikke skrive CAS temp-fil: {e}"))?;
tokio::fs::rename(&tmp_path, &final_path)
.await
.map_err(|e| format!("Kunne ikke flytte CAS-fil: {e}"))?;
Ok(hash)
}
/// Opprett media-node og has_media-edge i PostgreSQL.
async fn write_to_db(
cas_hash: &str,
text: &str,
voice_id: &str,
language: &str,
size_bytes: usize,
source_node_id: Option<Uuid>,
requested_by: Uuid,
) -> Result<(), String> {
let db_url = std::env::var("DATABASE_URL")
.map_err(|_| "DATABASE_URL må settes med --write".to_string())?;
let db = sqlx::postgres::PgPoolOptions::new()
.max_connections(2)
.connect(&db_url)
.await
.map_err(|e| format!("Kunne ikke koble til database: {e}"))?;
let media_node_id = Uuid::now_v7();
let title = format!("TTS: {}", truncate(text, 60));
let metadata = serde_json::json!({
"cas_hash": cas_hash,
"mime": "audio/mpeg",
"size_bytes": size_bytes,
"tts": {
"provider": "elevenlabs",
"voice_id": voice_id,
"language": language,
"characters": text.len(),
}
});
sqlx::query(
"INSERT INTO nodes (id, node_kind, title, content, visibility, metadata, created_by)
VALUES ($1, 'content', $2, '', 'hidden'::visibility, $3, $4)",
)
.bind(media_node_id)
.bind(&title)
.bind(&metadata)
.bind(requested_by)
.execute(&db)
.await
.map_err(|e| format!("PG insert media-node feilet: {e}"))?;
tracing::info!(media_node_id = %media_node_id, "Media-node opprettet");
// has_media-edge fra source til media-node
if let Some(source_id) = source_node_id {
let edge_id = Uuid::now_v7();
let edge_meta = serde_json::json!({
"media_type": "tts_audio",
"generated_at": chrono::Utc::now().to_rfc3339()
});
sqlx::query(
"INSERT INTO edges (id, source_id, target_id, edge_type, metadata, system, created_by)
VALUES ($1, $2, $3, 'has_media', $4, false, $5)",
)
.bind(edge_id)
.bind(source_id)
.bind(media_node_id)
.bind(&edge_meta)
.bind(requested_by)
.execute(&db)
.await
.map_err(|e| format!("PG insert has_media-edge feilet: {e}"))?;
tracing::info!(source_id = %source_id, "has_media-edge opprettet");
}
// Logg ressursforbruk
let collection_id: Option<Uuid> = sqlx::query_scalar(
"SELECT e.target_id FROM edges e
JOIN nodes n ON n.id = e.target_id
WHERE e.source_id = $1 AND e.edge_type = 'belongs_to' AND n.node_kind = 'collection'
LIMIT 1",
)
.bind(media_node_id)
.fetch_optional(&db)
.await
.ok()
.flatten();
if let Err(e) = sqlx::query(
"INSERT INTO resource_usage_log (target_node_id, triggered_by, collection_id, resource_type, detail)
VALUES ($1, $2, $3, $4, $5)",
)
.bind(media_node_id)
.bind(Some(requested_by))
.bind(collection_id)
.bind("tts")
.bind(serde_json::json!({
"provider": "elevenlabs",
"characters": text.len(),
"voice_id": voice_id,
}))
.execute(&db)
.await
{
tracing::warn!(error = %e, "Kunne ikke logge TTS-ressursforbruk");
}
Ok(())
}
/// Forkorter en streng til maks `max_len` tegn med "..." suffix.
fn truncate(s: &str, max_len: usize) -> String {
if s.len() <= max_len {
s.to_string()
} else {
let end = s
.char_indices()
.nth(max_len - 3)
.map(|(i, _)| i)
.unwrap_or(s.len());
format!("{}...", &s[..end])
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_truncate_short() {
assert_eq!(truncate("hei", 10), "hei");
}
#[test]
fn test_truncate_long() {
let long = "a".repeat(100);
let result = truncate(&long, 20);
assert!(result.len() <= 20);
assert!(result.ends_with("..."));
}
#[test]
fn test_truncate_unicode() {
let text = "Ødeleggende ødemark ødelagt";
let result = truncate(text, 15);
assert!(result.ends_with("..."));
}
}