Sentralisert logging av alle ressurskrevende operasjoner til resource_usage_log-tabellen (opprettet i migrasjon 009). Ny kode: - resource_usage.rs: hjelpemodul med log() og find_collection_for_node() - bandwidth.rs: Caddy JSON-logg-parser med nattlig batch-jobb (kl 03:00) Logging lagt til i handlere: - AI: summarize, ai_edges (token-telling via LiteLLM usage-felt), agent (placeholder — claude CLI gir ikke token-info) - Whisper: duration_seconds, model, language, mode - TTS: refaktorert til sentralisert modul, lagt til collection_id - CAS: logger nye filer ved upload (ikke dedup) - LiveKit: logger join-hendelser (faktisk deltaker-minutter krever webhook-integrasjon i fremtiden) Caddy-config: JSON access logging aktivert for sidelinja.org og synops.no i /srv/synops/config/caddy/Caddyfile (utenfor repo). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
317 lines
9.2 KiB
Rust
317 lines
9.2 KiB
Rust
// TTS-pipeline — tekst til lyd via ElevenLabs.
|
|
//
|
|
// Jobbtype: "tts_generate"
|
|
// Payload: {
|
|
// "text": "<tekst som skal leses opp>",
|
|
// "voice_id": "<elevenlabs voice_id>", (valgfritt, bruker default)
|
|
// "language": "no", (valgfritt)
|
|
// "source_node_id": "<uuid>", (noden teksten tilhører)
|
|
// "requested_by": "<uuid>"
|
|
// }
|
|
//
|
|
// Flyten:
|
|
// 1. Hent tekst og voice-preferanse fra payload
|
|
// 2. Sjekk mottaker-preferanse i source-nodens metadata (voice_preference)
|
|
// 3. Kall ElevenLabs API
|
|
// 4. Lagre lyd i CAS
|
|
// 5. Opprett media-node med has_media-edge til kilde
|
|
// 6. Logg ressursforbruk
|
|
//
|
|
// Ref: docs/features/ressursforbruk.md, docs/proposals/ghost_host_tts.md
|
|
|
|
use sqlx::PgPool;
|
|
use uuid::Uuid;
|
|
|
|
use crate::cas::CasStore;
|
|
use crate::jobs::JobRow;
|
|
use crate::resource_usage;
|
|
use crate::stdb::StdbClient;
|
|
|
|
/// Maks tekst-lengde for TTS (ElevenLabs grense er 5000 tegn per kall).
|
|
const MAX_TEXT_LENGTH: usize = 5000;
|
|
|
|
/// Håndterer tts_generate-jobb.
|
|
pub async fn handle_tts_job(
|
|
job: &JobRow,
|
|
db: &PgPool,
|
|
stdb: &StdbClient,
|
|
cas: &CasStore,
|
|
) -> Result<serde_json::Value, String> {
|
|
let text = job.payload["text"]
|
|
.as_str()
|
|
.ok_or("Mangler 'text' i payload")?
|
|
.to_string();
|
|
|
|
if text.is_empty() {
|
|
return Err("Tom tekst — ingenting å generere".to_string());
|
|
}
|
|
if text.len() > MAX_TEXT_LENGTH {
|
|
return Err(format!(
|
|
"Tekst for lang: {} tegn (maks {})",
|
|
text.len(),
|
|
MAX_TEXT_LENGTH
|
|
));
|
|
}
|
|
|
|
let source_node_id: Option<Uuid> = job.payload["source_node_id"]
|
|
.as_str()
|
|
.and_then(|s| s.parse().ok());
|
|
|
|
let requested_by: Uuid = job.payload["requested_by"]
|
|
.as_str()
|
|
.and_then(|s| s.parse().ok())
|
|
.ok_or("Mangler gyldig 'requested_by' i payload")?;
|
|
|
|
// Bestem voice_id: payload > source-node metadata > env default
|
|
let voice_id = resolve_voice_id(job, db, source_node_id).await?;
|
|
|
|
let language = job.payload["language"]
|
|
.as_str()
|
|
.unwrap_or("no")
|
|
.to_string();
|
|
|
|
tracing::info!(
|
|
text_len = text.len(),
|
|
voice_id = %voice_id,
|
|
language = %language,
|
|
"Starter TTS-generering"
|
|
);
|
|
|
|
// 1. Kall ElevenLabs API
|
|
let audio_bytes = call_elevenlabs(&text, &voice_id).await?;
|
|
|
|
tracing::info!(
|
|
audio_size = audio_bytes.len(),
|
|
"Mottok lyd fra ElevenLabs"
|
|
);
|
|
|
|
// 2. Lagre i CAS
|
|
let cas_result = cas
|
|
.store(&audio_bytes)
|
|
.await
|
|
.map_err(|e| format!("CAS-lagring feilet: {e}"))?;
|
|
|
|
tracing::info!(
|
|
cas_hash = %cas_result.hash,
|
|
size = cas_result.size,
|
|
dedup = cas_result.already_existed,
|
|
"Lyd lagret i CAS"
|
|
);
|
|
|
|
// 3. Opprett media-node for lydfilen
|
|
let media_node_id = Uuid::now_v7();
|
|
let title = format!("TTS: {}", truncate(&text, 60));
|
|
let metadata = serde_json::json!({
|
|
"cas_hash": cas_result.hash,
|
|
"mime": "audio/mpeg",
|
|
"size_bytes": cas_result.size,
|
|
"tts": {
|
|
"provider": "elevenlabs",
|
|
"voice_id": voice_id,
|
|
"language": language,
|
|
"characters": text.len(),
|
|
}
|
|
});
|
|
let metadata_str = metadata.to_string();
|
|
|
|
// STDB først (sanntid)
|
|
stdb.create_node(
|
|
&media_node_id.to_string(),
|
|
"content",
|
|
&title,
|
|
"",
|
|
"hidden",
|
|
&metadata_str,
|
|
&requested_by.to_string(),
|
|
)
|
|
.await
|
|
.map_err(|e| format!("STDB create_node feilet: {e}"))?;
|
|
|
|
// PG (persistering)
|
|
sqlx::query(
|
|
r#"
|
|
INSERT INTO nodes (id, node_kind, title, content, visibility, metadata, created_by)
|
|
VALUES ($1, 'content', $2, '', 'hidden'::visibility, $3, $4)
|
|
"#,
|
|
)
|
|
.bind(media_node_id)
|
|
.bind(&title)
|
|
.bind(&metadata)
|
|
.bind(requested_by)
|
|
.execute(db)
|
|
.await
|
|
.map_err(|e| format!("PG insert media-node feilet: {e}"))?;
|
|
|
|
// 4. Opprett has_media-edge fra kilde-node til TTS-noden (hvis kilde finnes)
|
|
if let Some(source_id) = source_node_id {
|
|
let edge_id = Uuid::now_v7();
|
|
let edge_meta = serde_json::json!({
|
|
"media_type": "tts_audio",
|
|
"generated_at": chrono::Utc::now().to_rfc3339()
|
|
});
|
|
let empty_meta = edge_meta.to_string();
|
|
|
|
stdb.create_edge(
|
|
&edge_id.to_string(),
|
|
&source_id.to_string(),
|
|
&media_node_id.to_string(),
|
|
"has_media",
|
|
&empty_meta,
|
|
false,
|
|
&requested_by.to_string(),
|
|
)
|
|
.await
|
|
.map_err(|e| format!("STDB create_edge (has_media) feilet: {e}"))?;
|
|
|
|
sqlx::query(
|
|
r#"
|
|
INSERT INTO edges (id, source_id, target_id, edge_type, metadata, system, created_by)
|
|
VALUES ($1, $2, $3, 'has_media', $4, false, $5)
|
|
"#,
|
|
)
|
|
.bind(edge_id)
|
|
.bind(source_id)
|
|
.bind(media_node_id)
|
|
.bind(&edge_meta)
|
|
.bind(requested_by)
|
|
.execute(db)
|
|
.await
|
|
.map_err(|e| format!("PG insert has_media-edge feilet: {e}"))?;
|
|
}
|
|
|
|
// 5. Logg ressursforbruk
|
|
let collection_id = resource_usage::find_collection_for_node(db, media_node_id).await;
|
|
if let Err(e) = resource_usage::log(
|
|
db,
|
|
media_node_id,
|
|
Some(requested_by),
|
|
collection_id,
|
|
"tts",
|
|
serde_json::json!({
|
|
"provider": "elevenlabs",
|
|
"characters": text.len(),
|
|
"voice_id": voice_id
|
|
}),
|
|
)
|
|
.await
|
|
{
|
|
tracing::warn!(error = %e, "Kunne ikke logge TTS-ressursforbruk");
|
|
}
|
|
|
|
Ok(serde_json::json!({
|
|
"status": "completed",
|
|
"media_node_id": media_node_id.to_string(),
|
|
"cas_hash": cas_result.hash,
|
|
"characters": text.len(),
|
|
"audio_size_bytes": cas_result.size,
|
|
"voice_id": voice_id,
|
|
}))
|
|
}
|
|
|
|
/// Bestem voice_id: payload-verdi > source-node metadata.voice_preference > env default.
|
|
/// "Mottaker-preferanse i metadata" betyr at en node kan ha
|
|
/// metadata.voice_preference.voice_id som overstyrer default.
|
|
async fn resolve_voice_id(
|
|
job: &JobRow,
|
|
db: &PgPool,
|
|
source_node_id: Option<Uuid>,
|
|
) -> Result<String, String> {
|
|
// 1. Eksplisitt i payload — alltid høyest prioritet
|
|
if let Some(vid) = job.payload["voice_id"].as_str() {
|
|
if !vid.is_empty() {
|
|
return Ok(vid.to_string());
|
|
}
|
|
}
|
|
|
|
// 2. Sjekk source-nodens metadata.voice_preference.voice_id
|
|
if let Some(node_id) = source_node_id {
|
|
let meta: Option<serde_json::Value> = sqlx::query_scalar(
|
|
"SELECT metadata FROM nodes WHERE id = $1",
|
|
)
|
|
.bind(node_id)
|
|
.fetch_optional(db)
|
|
.await
|
|
.map_err(|e| format!("PG-feil ved henting av voice_preference: {e}"))?
|
|
.flatten();
|
|
|
|
if let Some(meta) = meta {
|
|
if let Some(vid) = meta
|
|
.get("voice_preference")
|
|
.and_then(|vp| vp.get("voice_id"))
|
|
.and_then(|v| v.as_str())
|
|
{
|
|
if !vid.is_empty() {
|
|
tracing::info!(
|
|
node_id = %node_id,
|
|
voice_id = %vid,
|
|
"Bruker mottaker-preferanse fra node-metadata"
|
|
);
|
|
return Ok(vid.to_string());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// 3. Miljøvariabel-default
|
|
let default = std::env::var("ELEVENLABS_DEFAULT_VOICE")
|
|
.unwrap_or_else(|_| "21m00Tcm4TlvDq8ikWAM".to_string()); // Rachel (ElevenLabs premade)
|
|
|
|
Ok(default)
|
|
}
|
|
|
|
/// Kall ElevenLabs text-to-speech API.
|
|
/// Returnerer rå MP3-bytes.
|
|
async fn call_elevenlabs(text: &str, voice_id: &str) -> Result<Vec<u8>, String> {
|
|
let api_key = std::env::var("ELEVENLABS_API_KEY")
|
|
.map_err(|_| "ELEVENLABS_API_KEY er ikke satt".to_string())?;
|
|
|
|
let model_id = std::env::var("ELEVENLABS_MODEL")
|
|
.unwrap_or_else(|_| "eleven_multilingual_v2".to_string());
|
|
|
|
let url = format!(
|
|
"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
|
|
);
|
|
|
|
let payload = serde_json::json!({
|
|
"text": text,
|
|
"model_id": model_id,
|
|
"voice_settings": {
|
|
"stability": 0.5,
|
|
"similarity_boost": 0.75,
|
|
"style": 0.0,
|
|
"use_speaker_boost": true
|
|
}
|
|
});
|
|
|
|
let client = reqwest::Client::new();
|
|
let resp = client
|
|
.post(&url)
|
|
.header("xi-api-key", &api_key)
|
|
.header("Accept", "audio/mpeg")
|
|
.json(&payload)
|
|
.timeout(std::time::Duration::from_secs(30))
|
|
.send()
|
|
.await
|
|
.map_err(|e| format!("ElevenLabs HTTP-feil: {e}"))?;
|
|
|
|
if !resp.status().is_success() {
|
|
let status = resp.status();
|
|
let body = resp.text().await.unwrap_or_default();
|
|
return Err(format!("ElevenLabs returnerte {status}: {body}"));
|
|
}
|
|
|
|
resp.bytes()
|
|
.await
|
|
.map(|b| b.to_vec())
|
|
.map_err(|e| format!("Kunne ikke lese ElevenLabs-respons: {e}"))
|
|
}
|
|
|
|
/// Forkorter en streng til maks `max_len` tegn med "..." suffix.
|
|
fn truncate(s: &str, max_len: usize) -> String {
|
|
if s.len() <= max_len {
|
|
s.to_string()
|
|
} else {
|
|
let end = s.char_indices().nth(max_len - 3).map(|(i, _)| i).unwrap_or(s.len());
|
|
format!("{}...", &s[..end])
|
|
}
|
|
}
|