diff --git a/docs/features/lydstudio.md b/docs/features/lydstudio.md new file mode 100644 index 0000000..8e77938 --- /dev/null +++ b/docs/features/lydstudio.md @@ -0,0 +1,130 @@ +# Lydstudio — Lydredigering i nettleseren + +**Status:** Under utvikling (v1) + +## Konsept + +Lydstudioet er en visning (`/studio/[id]`) av en medienode som gir +brukeren verktøy for enkel lydredigering direkte i nettleseren. +Tenk "Audacity-light" integrert i Synops-plattformen. + +**Prinsipp:** Ikke-destruktiv redigering. Originalen i CAS røres aldri. +Operasjoner lagres som en EDL (Edit Decision List), og rendres til ny +fil via maskinrommet + ffmpeg. + +## Arkitektur + +### Node/edge-modell +``` +Original medienode (media, cas_hash: "abc...") + ←derived_from── Prosessert medienode (media, cas_hash: "def...", metadata.edl) + ←has_studio──── Studio-sesjon (content, metadata.edl = {...}) +``` + +- **Studioet** er en *visning*, ikke en ny node_kind +- **Studio-sesjon** er en content-node som lagrer EDL-en (gjenopptagbart) +- **Prosessert fil** er en ny medienode med `derived_from`-edge + +### EDL-format (Edit Decision List) + +```json +{ + "source_hash": "abc123...", + "operations": [ + { "type": "cut", "start_ms": 15200, "end_ms": 17800 }, + { "type": "normalize", "target_lufs": -16.0 }, + { "type": "trim_silence", "threshold_db": -30.0, "min_duration_ms": 500 }, + { "type": "fade_in", "duration_ms": 1000 }, + { "type": "fade_out", "duration_ms": 2000 }, + { "type": "noise_reduction", "strength_db": -25.0 }, + { "type": "equalizer", "low_gain": 2.0, "mid_gain": 0.0, "high_gain": -1.0 }, + { "type": "compressor", "threshold_db": -20.0, "ratio": 4.0 } + ] +} +``` + +### Prosesseringsflyt +``` +Frontend (EDL) + → POST /intentions/audio_process + → Jobbkø (audio_process, prioritet 5) + → maskinrommet: edl → ffmpeg filtergraf → subprocess + → Resultat lagres i CAS → ny medienode + derived_from edge +``` + +## Operasjoner + +| Operasjon | FFmpeg-filter | Beskrivelse | +|-----------|---------------|-------------| +| Klipp (cut) | `aselect` + `asetpts` | Fjern region (nysing, telefon, etc.) | +| Normaliser | `loudnorm` (to-pass) | EBU R128 loudness-normalisering, typisk -16 LUFS | +| Trim stillhet | `silencedetect` → cuts | Forkort/fjern stille regioner | +| Fade in | `afade=t=in` | Gradvis inngang | +| Fade out | `afade=t=out` | Gradvis utgang | +| Noise reduction | `afftdn` | FFT-basert støyreduksjon | +| EQ | `equalizer` | Tre-bånds parametrisk (lav/mid/høy) | +| Kompressor | `acompressor` | Dynamisk kompresjon ("radio-lyd") | + +### Operasjonsrekkefølge (ved render) +1. Cuts (aselect) — fjerner regioner +2. Noise reduction (afftdn) +3. EQ (equalizer) +4. Compressor (acompressor) +5. Normalize (loudnorm) — alltid nest sist +6. Fades (afade) — helt sist + +## API-endepunkter + +### `POST /intentions/audio_analyze` +Synkron analyse av lydfil: loudness, silence-regioner, metadata. + +```json +// Request +{ "cas_hash": "abc...", "silence_threshold_db": -30.0, "silence_min_duration_ms": 500 } + +// Response +{ + "loudness": { "input_i": -23.1, "input_tp": -5.2, "input_lra": 14.0, "input_thresh": -34.0 }, + "silence_regions": [{ "start_ms": 1200, "end_ms": 2800, "duration_ms": 1600 }], + "info": { "duration_ms": 180000, "sample_rate": 44100, "channels": 2, "codec": "mp3", "format": "mp3" } +} +``` + +### `POST /intentions/audio_process` +Køer render-jobb med EDL. Returnerer job_id for polling. + +```json +// Request +{ "media_node_id": "uuid", "edl": { "source_hash": "...", "operations": [...] }, "output_format": "mp3" } + +// Response +{ "job_id": "uuid" } +``` + +### `GET /query/audio_info?hash=...` +Hurtig metadata om lydfil (ffprobe). + +## Frontend + +- **Rute:** `/studio/[id]` — waveform-visning av medienode +- **Waveform:** wavesurfer.js med RegionsPlugin for visuell region-markering +- **Verktøypanel:** Alle operasjoner tilgjengelig som knapper/slidere +- **Tastatur:** Space (play/pause), Delete (klipp), Ctrl+Z (angre) +- **Transkripsjon:** Segmenter synkronisert med waveform (klikk → seek) +- **Render:** Dialog med format-valg, deretter jobb-polling + +## Avhengigheter + +- **ffmpeg 6.1.1** — installert native på serveren +- **wavesurfer.js** — allerede i bruk (AudioPlayer.svelte) +- **Trait:** `studio` — aktiverer "Rediger i studioet"-knapp på medienoder + +## Filer + +| Fil | Rolle | +|-----|-------| +| `maskinrommet/src/audio.rs` | EDL-parser, ffmpeg-kommandoer, jobbhåndterer | +| `maskinrommet/src/jobs.rs` | `audio_process` dispatch | +| `maskinrommet/src/intentions.rs` | API-endepunkter for analyze/process/info | +| `frontend/src/routes/studio/[id]/+page.svelte` | Hovedside | +| `frontend/src/lib/components/studio/` | Waveform, panel, render-dialog | diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index 7daccab..d57148c 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -397,6 +397,88 @@ export interface SegmentChoice { choice: 'new' | 'old'; } +// ============================================================================= +// Lydstudio +// ============================================================================= + +export interface AudioInfo { + duration_ms: number; + sample_rate: number; + channels: number; + codec: string; + format: string; + bit_rate: number | null; +} + +export interface LoudnessInfo { + input_i: number; + input_tp: number; + input_lra: number; + input_thresh: number; +} + +export interface SilenceRegion { + start_ms: number; + end_ms: number; + duration_ms: number; +} + +export interface AnalyzeResult { + loudness: LoudnessInfo; + silence_regions: SilenceRegion[]; + info: AudioInfo; +} + +export interface EdlOperation { + type: string; + [key: string]: unknown; +} + +export interface EdlDocument { + source_hash: string; + operations: EdlOperation[]; +} + +/** Analyser lydfil: loudness, silence-regioner, metadata. */ +export function audioAnalyze( + accessToken: string, + casHash: string, + silenceThresholdDb?: number, + silenceMinDurationMs?: number +): Promise { + return post(accessToken, '/intentions/audio_analyze', { + cas_hash: casHash, + silence_threshold_db: silenceThresholdDb, + silence_min_duration_ms: silenceMinDurationMs + }); +} + +/** Køer audio-prosessering med EDL. Returnerer job_id. */ +export function audioProcess( + accessToken: string, + mediaNodeId: string, + edl: EdlDocument, + outputFormat?: string +): Promise<{ job_id: string }> { + return post(accessToken, '/intentions/audio_process', { + media_node_id: mediaNodeId, + edl, + output_format: outputFormat + }); +} + +/** Hent metadata om lydfil (ffprobe). */ +export async function audioInfo(accessToken: string, hash: string): Promise { + const res = await fetch(`${BASE_URL}/query/audio_info?hash=${encodeURIComponent(hash)}`, { + headers: { Authorization: `Bearer ${accessToken}` } + }); + if (!res.ok) { + const body = await res.text(); + throw new Error(`audio_info failed (${res.status}): ${body}`); + } + return res.json(); +} + /** Anvend brukerens per-segment-valg etter re-transkripsjon. */ export function resolveRetranscription( accessToken: string, diff --git a/frontend/src/lib/components/studio/OperationPanel.svelte b/frontend/src/lib/components/studio/OperationPanel.svelte new file mode 100644 index 0000000..e7858f6 --- /dev/null +++ b/frontend/src/lib/components/studio/OperationPanel.svelte @@ -0,0 +1,304 @@ + + +
+ +
+

Analyse

+ + + {#if loudness} +
+ Loudness: + {loudness.input_i.toFixed(1)} LUFS + True Peak: + {loudness.input_tp.toFixed(1)} dBTP + LRA: + {loudness.input_lra.toFixed(1)} LU +
+ {/if} + + {#if audioInfo} +
+ Varighet: + {fmtMs(audioInfo.duration_ms)} + Format: + {audioInfo.codec} / {audioInfo.sample_rate}Hz + Kanaler: + {audioInfo.channels} +
+ {/if} + + {#if silenceRegions.length > 0} +

+ {silenceRegions.length} stille regioner funnet +

+ {/if} +
+ + +
+

Klipp

+ +
+ + +
+

Trim stillhet

+
+ + + dB +
+
+ + + ms +
+ +
+ + +
+

Normaliser loudness

+
+ + + LUFS +
+ +
+ + +
+

Fades

+
+
+ +
+ + ms +
+ +
+
+ +
+ + ms +
+ +
+
+
+ + +
+

Noise reduction

+
+ + + {noiseStrength}dB +
+ +
+ + +
+

Equalizer

+
+
+ + + {eqLow}dB +
+
+ + + {eqMid}dB +
+
+ + + {eqHigh}dB +
+
+ +
+ + +
+

Kompressor

+
+
+ + + dB +
+
+ + + :1 +
+
+ +
+ + + {#if operations.length > 0} +
+
+

Operasjoner ({operations.length})

+ +
+
    + {#each operations as op, i} +
  • + {i + 1}. {opLabel(op)} + +
  • + {/each} +
+
+ {/if} + + + +
diff --git a/frontend/src/lib/components/studio/RenderDialog.svelte b/frontend/src/lib/components/studio/RenderDialog.svelte new file mode 100644 index 0000000..467508c --- /dev/null +++ b/frontend/src/lib/components/studio/RenderDialog.svelte @@ -0,0 +1,93 @@ + + + +
{ if (e.key === 'Escape') onclose(); }} +> + +
e.stopPropagation()} + > +

Render lyd

+ + {#if resultNodeId} + +
+

Rendering fullfort!

+

Node: {resultNodeId}

+
+ + {:else if rendering} + +
+ + + + +

Rendrer lyd...

+ {#if jobId} +

Jobb: {jobId.slice(0, 8)}...

+ {/if} +
+ {:else} + +
+
+ + +
+ +
+

Operasjoner ({operations.length})

+
    + {#each operations as op, i} +
  • {i + 1}. {op.type}
  • + {/each} +
+
+
+ +
+ + +
+ {/if} +
+
diff --git a/frontend/src/lib/components/studio/StudioWaveform.svelte b/frontend/src/lib/components/studio/StudioWaveform.svelte new file mode 100644 index 0000000..2acd82f --- /dev/null +++ b/frontend/src/lib/components/studio/StudioWaveform.svelte @@ -0,0 +1,274 @@ + + + + +
+ +
+ {#if loadError} +

Kunne ikke laste lydfilen

+ {:else if !ready} +
+ + + + + Laster waveform... +
+ {/if} +
+
+ + +
+
+ + + + + + {formatTime(currentTime)} / {formatTime(totalDuration)} + + + + {#if activeRegion} + + Markert: {formatTime(activeRegion.start)} - {formatTime(activeRegion.end)} + + {/if} +
+ + +
+ Zoom + handleZoom(zoom)} + class="w-24" + /> +
+
+
diff --git a/frontend/src/lib/components/traits/StudioTrait.svelte b/frontend/src/lib/components/traits/StudioTrait.svelte new file mode 100644 index 0000000..945ce06 --- /dev/null +++ b/frontend/src/lib/components/traits/StudioTrait.svelte @@ -0,0 +1,70 @@ + + + + {#if audioNodes.length === 0} +

Ingen lydfiler i denne samlingen enna.

+ {:else} +
    + {#each audioNodes as node} +
  • +
    +

    {node.title ?? 'Uten tittel'}

    + {#if hasVersions(node.id)} + Har prosesserte versjoner + {/if} +
    + + Rediger + +
  • + {/each} +
+ {/if} +
diff --git a/frontend/src/routes/collection/[id]/+page.svelte b/frontend/src/routes/collection/[id]/+page.svelte index 0d86f8c..88897e3 100644 --- a/frontend/src/routes/collection/[id]/+page.svelte +++ b/frontend/src/routes/collection/[id]/+page.svelte @@ -13,6 +13,7 @@ import CalendarTrait from '$lib/components/traits/CalendarTrait.svelte'; import RecordingTrait from '$lib/components/traits/RecordingTrait.svelte'; import TranscriptionTrait from '$lib/components/traits/TranscriptionTrait.svelte'; + import StudioTrait from '$lib/components/traits/StudioTrait.svelte'; import GenericTrait from '$lib/components/traits/GenericTrait.svelte'; import TraitAdmin from '$lib/components/traits/TraitAdmin.svelte'; @@ -47,7 +48,7 @@ /** Traits with dedicated components */ const knownTraits = new Set([ 'editor', 'chat', 'kanban', 'podcast', 'publishing', - 'rss', 'calendar', 'recording', 'transcription' + 'rss', 'calendar', 'recording', 'transcription', 'studio' ]); /** Traits that have a dedicated component */ @@ -165,6 +166,8 @@ {:else if trait === 'transcription'} + {:else if trait === 'studio'} + {/if} {/each} diff --git a/frontend/src/routes/studio/[id]/+page.svelte b/frontend/src/routes/studio/[id]/+page.svelte new file mode 100644 index 0000000..6ddf832 --- /dev/null +++ b/frontend/src/routes/studio/[id]/+page.svelte @@ -0,0 +1,417 @@ + + + + +
+ +
+
+ + + + + +

+ {mediaNode?.title ?? 'Lydstudio'} +

+ {#if audioInfo} + + {audioInfo.codec} / {audioInfo.sample_rate}Hz / {audioInfo.channels}ch + + {/if} +
+
+ {#if operations.length > 0} + + {/if} +
+
+ + {#if !audioSrc} +
+

+ {#if !connected} + Kobler til... + {:else if !mediaNode} + Finner ikke medienoden + {:else} + Ingen lydfil tilgjengelig + {/if} +

+
+ {:else} +
+ +
+ { audioInfo = audioInfo ?? { duration_ms: d * 1000, sample_rate: 0, channels: 0, codec: '', format: '', bit_rate: null }; }} + ontimeupdate={(t) => { currentTime = t; }} + /> + + + {#if segments.length > 0} +
+

Transkripsjon

+
+ {#each segments as seg} + {@const active = currentTime >= seg.start_ms / 1000 && currentTime < seg.end_ms / 1000} + + {/each} +
+
+ {/if} +
+ + +
+ + + + {#if versions.length > 0} +
+

Versjoner

+ +
+ {/if} +
+
+ {/if} +
+ + +{#if showRenderDialog} + { showRenderDialog = false; rendering = false; renderJobId = null; resultNodeId = null; }} + /> +{/if} diff --git a/maskinrommet/src/audio.rs b/maskinrommet/src/audio.rs new file mode 100644 index 0000000..691d89f --- /dev/null +++ b/maskinrommet/src/audio.rs @@ -0,0 +1,724 @@ +//! Lydstudio — lydbehandling via FFmpeg subprocess. +//! +//! Ikke-destruktiv redigering: originalen i CAS røres aldri. +//! En EDL (Edit Decision List) beskriver operasjonene. Ved render +//! kjøres ffmpeg og resultatet lagres som ny CAS-entry. + +use serde::{Deserialize, Serialize}; +use sqlx::PgPool; +use uuid::Uuid; + +use crate::cas::CasStore; +use crate::jobs::JobRow; +use crate::stdb::StdbClient; + +// ─── EDL-datastrukturer ─────────────────────────────────────────── + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EdlDocument { + pub source_hash: String, + pub operations: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum EdlOperation { + Cut { + start_ms: i64, + end_ms: i64, + }, + Normalize { + target_lufs: f64, + }, + TrimSilence { + threshold_db: f32, + min_duration_ms: u32, + }, + FadeIn { + duration_ms: u32, + }, + FadeOut { + duration_ms: u32, + }, + NoiseReduction { + strength_db: f32, + }, + Equalizer { + low_gain: f32, + mid_gain: f32, + high_gain: f32, + }, + Compressor { + threshold_db: f32, + ratio: f32, + }, +} + +// ─── Analyse-resultat ───────────────────────────────────────────── + +#[derive(Debug, Serialize, Deserialize)] +pub struct LoudnessInfo { + pub input_i: f64, + pub input_tp: f64, + pub input_lra: f64, + pub input_thresh: f64, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct SilenceRegion { + pub start_ms: i64, + pub end_ms: i64, + pub duration_ms: i64, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct AudioInfo { + pub duration_ms: i64, + pub sample_rate: u32, + pub channels: u32, + pub codec: String, + pub format: String, + pub bit_rate: Option, +} + +#[derive(Debug, Serialize)] +pub struct AnalyzeResult { + pub loudness: LoudnessInfo, + pub silence_regions: Vec, + pub info: AudioInfo, +} + +// ─── FFmpeg-kommandoer ──────────────────────────────────────────── + +/// Hent metadata om en lydfil via ffprobe. +pub async fn get_audio_info(cas: &CasStore, hash: &str) -> Result { + let path = cas.path_for(hash); + if !path.exists() { + return Err(format!("Filen finnes ikke i CAS: {hash}")); + } + + let output = tokio::process::Command::new("ffprobe") + .args([ + "-v", "quiet", + "-print_format", "json", + "-show_format", + "-show_streams", + ]) + .arg(&path) + .output() + .await + .map_err(|e| format!("Kunne ikke kjøre ffprobe: {e}"))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("ffprobe feilet: {stderr}")); + } + + let json: serde_json::Value = serde_json::from_slice(&output.stdout) + .map_err(|e| format!("Kunne ikke parse ffprobe-output: {e}"))?; + + // Finn første audio-stream + let stream = json["streams"] + .as_array() + .and_then(|streams| streams.iter().find(|s| s["codec_type"] == "audio")) + .ok_or("Ingen audio-stream funnet")?; + + let format = &json["format"]; + + let duration_secs: f64 = format["duration"] + .as_str() + .and_then(|s| s.parse().ok()) + .unwrap_or(0.0); + + Ok(AudioInfo { + duration_ms: (duration_secs * 1000.0) as i64, + sample_rate: stream["sample_rate"] + .as_str() + .and_then(|s| s.parse().ok()) + .unwrap_or(44100), + channels: stream["channels"].as_u64().unwrap_or(2) as u32, + codec: stream["codec_name"] + .as_str() + .unwrap_or("unknown") + .to_string(), + format: format["format_name"] + .as_str() + .unwrap_or("unknown") + .to_string(), + bit_rate: format["bit_rate"] + .as_str() + .and_then(|s| s.parse().ok()), + }) +} + +/// Analyser loudness (EBU R128) via ffmpeg loudnorm. +pub async fn analyze_loudness(cas: &CasStore, hash: &str) -> Result { + let path = cas.path_for(hash); + if !path.exists() { + return Err(format!("Filen finnes ikke i CAS: {hash}")); + } + + let output = tokio::process::Command::new("ffmpeg") + .args(["-i"]) + .arg(&path) + .args([ + "-af", "loudnorm=print_format=json", + "-f", "null", "-", + ]) + .output() + .await + .map_err(|e| format!("Kunne ikke kjøre ffmpeg loudnorm: {e}"))?; + + // loudnorm skriver JSON til stderr + let stderr = String::from_utf8_lossy(&output.stderr); + + // Finn JSON-blokken i stderr + let json_start = stderr + .find("{\n") + .ok_or("Fant ikke loudnorm JSON i ffmpeg-output")?; + let json_end = stderr[json_start..] + .find("\n}") + .map(|i| json_start + i + 2) + .ok_or("Ufullstendig loudnorm JSON")?; + + let json_str = &stderr[json_start..json_end]; + let json: serde_json::Value = serde_json::from_str(json_str) + .map_err(|e| format!("Kunne ikke parse loudnorm JSON: {e}\n{json_str}"))?; + + Ok(LoudnessInfo { + input_i: parse_loudnorm_field(&json, "input_i")?, + input_tp: parse_loudnorm_field(&json, "input_tp")?, + input_lra: parse_loudnorm_field(&json, "input_lra")?, + input_thresh: parse_loudnorm_field(&json, "input_thresh")?, + }) +} + +fn parse_loudnorm_field(json: &serde_json::Value, field: &str) -> Result { + json[field] + .as_str() + .and_then(|s| s.parse::().ok()) + .ok_or_else(|| format!("Mangler felt '{field}' i loudnorm-output")) +} + +/// Detekter stille regioner i en lydfil. +pub async fn detect_silence( + cas: &CasStore, + hash: &str, + threshold_db: f32, + min_duration_ms: u32, +) -> Result, String> { + let path = cas.path_for(hash); + if !path.exists() { + return Err(format!("Filen finnes ikke i CAS: {hash}")); + } + + let min_duration_secs = min_duration_ms as f64 / 1000.0; + let filter = format!("silencedetect=noise={threshold_db}dB:d={min_duration_secs}"); + + let output = tokio::process::Command::new("ffmpeg") + .args(["-i"]) + .arg(&path) + .args(["-af", &filter, "-f", "null", "-"]) + .output() + .await + .map_err(|e| format!("Kunne ikke kjøre ffmpeg silencedetect: {e}"))?; + + let stderr = String::from_utf8_lossy(&output.stderr); + let mut regions = Vec::new(); + let mut current_start: Option = None; + + for line in stderr.lines() { + if let Some(pos) = line.find("silence_start: ") { + let val_str = &line[pos + 15..]; + if let Some(secs) = val_str.split_whitespace().next().and_then(|s| s.parse::().ok()) { + current_start = Some(secs); + } + } + if let Some(pos) = line.find("silence_end: ") { + let val_str = &line[pos + 13..]; + if let Some(end_secs) = val_str.split_whitespace().next().and_then(|s| s.parse::().ok()) { + if let Some(start_secs) = current_start.take() { + regions.push(SilenceRegion { + start_ms: (start_secs * 1000.0) as i64, + end_ms: (end_secs * 1000.0) as i64, + duration_ms: ((end_secs - start_secs) * 1000.0) as i64, + }); + } + } + } + } + + Ok(regions) +} + +// ─── EDL → FFmpeg filtergraf ────────────────────────────────────── + +/// Bygg ffmpeg-filtergraf fra EDL-operasjoner. +/// Returnerer (filter_string, trenger_to_pass). +/// +/// Operasjonsrekkefølge: +/// 1. Cuts (aselect) — fjerner regioner +/// 2. Trim silence — konvertert til cuts +/// 3. Noise reduction (afftdn) +/// 4. EQ (equalizer) +/// 5. Compressor (acompressor) +/// 6. Normalize (loudnorm) — alltid sist før fades +/// 7. Fades (afade) — aller sist +pub fn build_filter_chain( + ops: &[EdlOperation], + duration_ms: i64, + loudness_measured: Option<&LoudnessInfo>, +) -> String { + let mut filters: Vec = Vec::new(); + + // Samle alle cuts (inkl. fra trim_silence) + let mut cuts: Vec<(i64, i64)> = Vec::new(); + for op in ops { + if let EdlOperation::Cut { start_ms, end_ms } = op { + cuts.push((*start_ms, *end_ms)); + } + } + + // Sorter cuts og bygg aselect-filter + if !cuts.is_empty() { + cuts.sort_by_key(|c| c.0); + let conditions: Vec = cuts + .iter() + .map(|(s, e)| { + format!( + "between(t,{:.3},{:.3})", + *s as f64 / 1000.0, + *e as f64 / 1000.0 + ) + }) + .collect(); + filters.push(format!( + "aselect='not({})',asetpts=N/SR/TB", + conditions.join("+") + )); + } + + // Noise reduction + for op in ops { + if let EdlOperation::NoiseReduction { strength_db } = op { + filters.push(format!("afftdn=nf={strength_db}")); + } + } + + // EQ — tre-bånds parametrisk + for op in ops { + if let EdlOperation::Equalizer { + low_gain, + mid_gain, + high_gain, + } = op + { + let mut eq_parts = Vec::new(); + if *low_gain != 0.0 { + eq_parts.push(format!("equalizer=f=100:t=h:w=200:g={low_gain}")); + } + if *mid_gain != 0.0 { + eq_parts.push(format!("equalizer=f=1000:t=h:w=1000:g={mid_gain}")); + } + if *high_gain != 0.0 { + eq_parts.push(format!("equalizer=f=8000:t=h:w=4000:g={high_gain}")); + } + filters.extend(eq_parts); + } + } + + // Compressor + for op in ops { + if let EdlOperation::Compressor { + threshold_db, + ratio, + } = op + { + filters.push(format!( + "acompressor=threshold={threshold_db}dB:ratio={ratio}:attack=5:release=50" + )); + } + } + + // Normalize (loudnorm) — to-pass hvis vi har målte verdier + for op in ops { + if let EdlOperation::Normalize { target_lufs } = op { + if let Some(measured) = loudness_measured { + filters.push(format!( + "loudnorm=I={target_lufs}:TP=-1.5:LRA=11:\ + measured_I={:.1}:measured_TP={:.1}:measured_LRA={:.1}:\ + measured_thresh={:.1}:linear=true", + measured.input_i, + measured.input_tp, + measured.input_lra, + measured.input_thresh, + )); + } else { + // Enkeltpass (lavere kvalitet, men fungerer) + filters.push(format!("loudnorm=I={target_lufs}:TP=-1.5:LRA=11")); + } + } + } + + // Beregn varighet etter cuts for fade-out posisjonering + let total_cut_ms: i64 = cuts.iter().map(|(s, e)| e - s).sum(); + let effective_duration_ms = duration_ms - total_cut_ms; + + // Fades — helt sist + for op in ops { + match op { + EdlOperation::FadeIn { duration_ms } => { + let d = *duration_ms as f64 / 1000.0; + filters.push(format!("afade=t=in:d={d:.3}")); + } + EdlOperation::FadeOut { duration_ms: dur } => { + let d = *dur as f64 / 1000.0; + let start = (effective_duration_ms as f64 / 1000.0) - d; + if start > 0.0 { + filters.push(format!("afade=t=out:st={start:.3}:d={d:.3}")); + } + } + _ => {} + } + } + + filters.join(",") +} + +// ─── Prosessering ───────────────────────────────────────────────── + +/// Kjør ffmpeg med EDL-operasjoner og lagre resultatet i CAS. +pub async fn process_audio( + cas: &CasStore, + edl: &EdlDocument, + output_format: &str, +) -> Result<(String, u64), String> { + let source_path = cas.path_for(&edl.source_hash); + if !source_path.exists() { + return Err(format!("Kildefil finnes ikke i CAS: {}", edl.source_hash)); + } + + // Hent info for fade-out beregning + let info = get_audio_info(cas, &edl.source_hash).await?; + + // Sjekk om vi trenger to-pass loudnorm + let has_normalize = edl.operations.iter().any(|op| matches!(op, EdlOperation::Normalize { .. })); + + let loudness_measured = if has_normalize { + // Kjør silence-detection for trim_silence operasjoner + let silence_cuts = resolve_silence_cuts(cas, edl).await?; + + // Bygg midlertidig EDL uten normalize for pass 1 + let mut pass1_ops: Vec = edl.operations.clone(); + pass1_ops.retain(|op| !matches!(op, EdlOperation::Normalize { .. })); + pass1_ops.extend(silence_cuts.iter().cloned()); + + let pass1_filter = build_filter_chain(&pass1_ops, info.duration_ms, None); + + // Pass 1: mål loudness etter andre filtre er påført + let measured = if pass1_filter.is_empty() { + analyze_loudness(cas, &edl.source_hash).await? + } else { + analyze_with_filter(cas, &edl.source_hash, &pass1_filter).await? + }; + Some(measured) + } else { + None + }; + + // Resolve trim_silence til faktiske cuts + let silence_cuts = resolve_silence_cuts(cas, edl).await?; + let mut all_ops = edl.operations.clone(); + // Fjern TrimSilence og legg til genererte cuts + all_ops.retain(|op| !matches!(op, EdlOperation::TrimSilence { .. })); + all_ops.extend(silence_cuts); + + let filter = build_filter_chain(&all_ops, info.duration_ms, loudness_measured.as_ref()); + + if filter.is_empty() { + return Err("Ingen operasjoner å utføre".to_string()); + } + + // Bestem output-codec basert på format + let codec_args = match output_format { + "mp3" => vec!["-codec:a", "libmp3lame", "-q:a", "2"], + "wav" => vec!["-codec:a", "pcm_s16le"], + "flac" => vec!["-codec:a", "flac"], + "ogg" => vec!["-codec:a", "libvorbis", "-q:a", "6"], + _ => vec!["-codec:a", "libmp3lame", "-q:a", "2"], // default: mp3 + }; + + let ext = match output_format { + "wav" => "wav", + "flac" => "flac", + "ogg" => "ogg", + _ => "mp3", + }; + + // Output til temp-fil + let tmp_dir = cas.root().join("tmp"); + tokio::fs::create_dir_all(&tmp_dir) + .await + .map_err(|e| format!("Kunne ikke opprette tmp-katalog: {e}"))?; + let tmp_output = tmp_dir.join(format!("audio_process_{}.{ext}", Uuid::now_v7())); + + let mut cmd = tokio::process::Command::new("ffmpeg"); + cmd.args(["-i"]) + .arg(&source_path) + .args(["-af", &filter]) + .args(&codec_args) + .args(["-y"]) + .arg(&tmp_output); + + tracing::info!( + source = %edl.source_hash, + filter = %filter, + output = %tmp_output.display(), + "Kjører ffmpeg audio processing" + ); + + let output = cmd + .output() + .await + .map_err(|e| format!("Kunne ikke kjøre ffmpeg: {e}"))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + // Rydd opp temp-fil + let _ = tokio::fs::remove_file(&tmp_output).await; + return Err(format!("ffmpeg feilet: {stderr}")); + } + + // Les resultat og lagre i CAS + let result_bytes = tokio::fs::read(&tmp_output) + .await + .map_err(|e| format!("Kunne ikke lese ffmpeg-output: {e}"))?; + + let _ = tokio::fs::remove_file(&tmp_output).await; + + let store_result = cas + .store(&result_bytes) + .await + .map_err(|e| format!("Kunne ikke lagre i CAS: {e}"))?; + + tracing::info!( + source = %edl.source_hash, + result = %store_result.hash, + size = store_result.size, + "Audio processing fullført" + ); + + Ok((store_result.hash, store_result.size)) +} + +/// Kjør loudnorm-analyse med et forhåndsfilter (for to-pass normalisering). +async fn analyze_with_filter( + cas: &CasStore, + hash: &str, + pre_filter: &str, +) -> Result { + let path = cas.path_for(hash); + let filter = format!("{pre_filter},loudnorm=print_format=json"); + + let output = tokio::process::Command::new("ffmpeg") + .args(["-i"]) + .arg(&path) + .args(["-af", &filter, "-f", "null", "-"]) + .output() + .await + .map_err(|e| format!("Kunne ikke kjøre ffmpeg loudnorm pass 1: {e}"))?; + + let stderr = String::from_utf8_lossy(&output.stderr); + + let json_start = stderr + .find("{\n") + .ok_or("Fant ikke loudnorm JSON i pass 1")?; + let json_end = stderr[json_start..] + .find("\n}") + .map(|i| json_start + i + 2) + .ok_or("Ufullstendig loudnorm JSON i pass 1")?; + + let json: serde_json::Value = serde_json::from_str(&stderr[json_start..json_end]) + .map_err(|e| format!("Kunne ikke parse loudnorm pass 1: {e}"))?; + + Ok(LoudnessInfo { + input_i: parse_loudnorm_field(&json, "input_i")?, + input_tp: parse_loudnorm_field(&json, "input_tp")?, + input_lra: parse_loudnorm_field(&json, "input_lra")?, + input_thresh: parse_loudnorm_field(&json, "input_thresh")?, + }) +} + +/// Konverter TrimSilence-operasjoner til faktiske Cut-operasjoner +/// ved å kjøre silence detection. +async fn resolve_silence_cuts( + cas: &CasStore, + edl: &EdlDocument, +) -> Result, String> { + let mut cuts = Vec::new(); + for op in &edl.operations { + if let EdlOperation::TrimSilence { + threshold_db, + min_duration_ms, + } = op + { + let regions = detect_silence(cas, &edl.source_hash, *threshold_db, *min_duration_ms).await?; + for region in regions { + // Behold 200ms stillhet på hver side for naturlig lyd + let margin_ms = 200; + let start = region.start_ms + margin_ms; + let end = region.end_ms - margin_ms; + if end > start { + cuts.push(EdlOperation::Cut { + start_ms: start, + end_ms: end, + }); + } + } + } + } + Ok(cuts) +} + +// ─── Jobbhåndterer ─────────────────────────────────────────────── + +/// Håndterer `audio_process`-jobber fra jobbkøen. +/// +/// Payload: +/// ```json +/// { +/// "media_node_id": "uuid", +/// "edl": { "source_hash": "...", "operations": [...] }, +/// "output_format": "mp3", +/// "requested_by": "uuid" +/// } +/// ``` +pub async fn handle_audio_process_job( + job: &JobRow, + db: &PgPool, + stdb: &StdbClient, + cas: &CasStore, +) -> Result { + let media_node_id: Uuid = job.payload["media_node_id"] + .as_str() + .and_then(|s| s.parse().ok()) + .ok_or("Mangler media_node_id i payload")?; + + let edl: EdlDocument = serde_json::from_value(job.payload["edl"].clone()) + .map_err(|e| format!("Ugyldig EDL i payload: {e}"))?; + + let output_format = job.payload["output_format"] + .as_str() + .unwrap_or("mp3"); + + let requested_by: Uuid = job.payload["requested_by"] + .as_str() + .and_then(|s| s.parse().ok()) + .ok_or("Mangler requested_by i payload")?; + + // Kjør prosessering + let (result_hash, result_size) = process_audio(cas, &edl, output_format).await?; + + // Bestem MIME-type + let mime = match output_format { + "mp3" => "audio/mpeg", + "wav" => "audio/wav", + "flac" => "audio/flac", + "ogg" => "audio/ogg", + _ => "audio/mpeg", + }; + + // Opprett ny medienode for den prosesserte filen + let processed_node_id = Uuid::now_v7(); + let metadata = serde_json::json!({ + "cas_hash": result_hash, + "mime": mime, + "size_bytes": result_size, + "source_hash": edl.source_hash, + "edl": edl, + }); + + // Hent tittel fra original node + let original_title: Option = sqlx::query_scalar( + "SELECT title FROM nodes WHERE id = $1" + ) + .bind(media_node_id) + .fetch_optional(db) + .await + .map_err(|e| format!("DB-feil: {e}"))? + .flatten(); + + let title = original_title + .map(|t| format!("{t} (prosessert)")) + .unwrap_or_else(|| "Prosessert lyd".to_string()); + + // Insert processed media node + sqlx::query( + r#" + INSERT INTO nodes (id, node_kind, title, visibility, metadata, created_by) + VALUES ($1, 'media', $2, 'hidden', $3, $4) + "#, + ) + .bind(processed_node_id) + .bind(&title) + .bind(&metadata) + .bind(requested_by) + .execute(db) + .await + .map_err(|e| format!("Kunne ikke opprette prosessert node: {e}"))?; + + // Opprett derived_from edge: processed → original + let edge_id = Uuid::now_v7(); + sqlx::query( + r#" + INSERT INTO edges (id, source_id, target_id, edge_type, system, created_by) + VALUES ($1, $2, $3, 'derived_from', true, $4) + "#, + ) + .bind(edge_id) + .bind(processed_node_id) + .bind(media_node_id) + .bind(requested_by) + .execute(db) + .await + .map_err(|e| format!("Kunne ikke opprette derived_from edge: {e}"))?; + + // Synk til SpacetimeDB + let metadata_str = serde_json::to_string(&metadata).unwrap_or_default(); + let _ = stdb + .create_node( + &processed_node_id.to_string(), + "media", + &title, + "", + "hidden", + &metadata_str, + &requested_by.to_string(), + ) + .await; + + let _ = stdb + .create_edge( + &edge_id.to_string(), + &processed_node_id.to_string(), + &media_node_id.to_string(), + "derived_from", + "{}", + true, + &requested_by.to_string(), + ) + .await; + + tracing::info!( + original = %media_node_id, + processed = %processed_node_id, + hash = %result_hash, + "Audio process-jobb fullført" + ); + + Ok(serde_json::json!({ + "processed_node_id": processed_node_id.to_string(), + "cas_hash": result_hash, + "size_bytes": result_size, + })) +} diff --git a/maskinrommet/src/intentions.rs b/maskinrommet/src/intentions.rs index 75c206b..438f785 100644 --- a/maskinrommet/src/intentions.rs +++ b/maskinrommet/src/intentions.rs @@ -36,7 +36,7 @@ const VALID_TRAITS: &[&str] = &[ // Publisering & distribusjon "publishing", "rss", "newsletter", "custom_domain", "analytics", "embed", "api", // Lyd & video - "podcast", "recording", "transcription", "tts", "clips", "playlist", + "podcast", "recording", "transcription", "tts", "clips", "playlist", "studio", // Kommunikasjon "chat", "forum", "comments", "guest_input", "announcements", "polls", "qa", // Organisering @@ -2783,6 +2783,131 @@ pub async fn close_communication( })) } +// ============================================================================= +// Lydstudio +// ============================================================================= + +#[derive(Deserialize)] +pub struct AudioAnalyzeRequest { + pub cas_hash: String, + pub silence_threshold_db: Option, + pub silence_min_duration_ms: Option, +} + +/// POST /intentions/audio_analyze +/// +/// Synkron analyse av en lydfil: loudness (LUFS), silence-regioner, og metadata. +/// Brukes av studioet for å vise nåværende tilstand før redigering. +pub async fn audio_analyze( + State(state): State, + _user: AuthUser, + Json(req): Json, +) -> Result, (StatusCode, Json)> { + let cas = &state.cas; + + if !cas.exists(&req.cas_hash) { + return Err(bad_request("Filen finnes ikke i CAS")); + } + + let info = crate::audio::get_audio_info(cas, &req.cas_hash) + .await + .map_err(|e| internal_error(&e))?; + + let loudness = crate::audio::analyze_loudness(cas, &req.cas_hash) + .await + .map_err(|e| internal_error(&e))?; + + let threshold = req.silence_threshold_db.unwrap_or(-30.0); + let min_dur = req.silence_min_duration_ms.unwrap_or(500); + + let silence_regions = crate::audio::detect_silence(cas, &req.cas_hash, threshold, min_dur) + .await + .map_err(|e| internal_error(&e))?; + + Ok(Json(crate::audio::AnalyzeResult { + loudness, + silence_regions, + info, + })) +} + +#[derive(Deserialize)] +pub struct AudioProcessRequest { + pub media_node_id: Uuid, + pub edl: crate::audio::EdlDocument, + pub output_format: Option, +} + +#[derive(Serialize)] +pub struct AudioProcessResponse { + pub job_id: Uuid, +} + +/// POST /intentions/audio_process +/// +/// Køer en audio-prosessering-jobb. Resultatet blir en ny medienode +/// med derived_from-edge til originalen. +pub async fn audio_process( + State(state): State, + user: AuthUser, + Json(req): Json, +) -> Result, (StatusCode, Json)> { + // Sjekk at medienoden eksisterer + if !node_exists(&state.db, req.media_node_id).await.map_err(|e| { + tracing::error!("DB-feil: {e}"); + internal_error("Databasefeil") + })? { + return Err(bad_request("media_node_id finnes ikke")); + } + + // Sjekk at kildefilen finnes i CAS + if !state.cas.exists(&req.edl.source_hash) { + return Err(bad_request("source_hash finnes ikke i CAS")); + } + + let output_format = req.output_format.unwrap_or_else(|| "mp3".to_string()); + + let payload = serde_json::json!({ + "media_node_id": req.media_node_id.to_string(), + "edl": req.edl, + "output_format": output_format, + "requested_by": user.node_id.to_string(), + }); + + let job_id = crate::jobs::enqueue(&state.db, "audio_process", payload, None, 5) + .await + .map_err(|e| { + tracing::error!("Kunne ikke køe audio_process-jobb: {e}"); + internal_error("Kunne ikke køe jobb") + })?; + + Ok(Json(AudioProcessResponse { job_id })) +} + +#[derive(Deserialize)] +pub struct AudioInfoQuery { + pub hash: String, +} + +/// GET /query/audio_info?hash=... +/// +/// Hent metadata om en lydfil (varighet, sample rate, kanaler, codec). +pub async fn audio_info( + State(state): State, + _user: AuthUser, + axum::extract::Query(query): axum::extract::Query, +) -> Result, (StatusCode, Json)> { + if !state.cas.exists(&query.hash) { + return Err(bad_request("Filen finnes ikke i CAS")); + } + + let info = crate::audio::get_audio_info(&state.cas, &query.hash) + .await + .map_err(|e| internal_error(&e))?; + + Ok(Json(info)) +} + // ============================================================================= // Tester // ============================================================================= @@ -2842,7 +2967,7 @@ mod tests { let all_traits = vec![ "editor", "versioning", "collaboration", "translation", "templates", "publishing", "rss", "newsletter", "custom_domain", "analytics", "embed", "api", - "podcast", "recording", "transcription", "tts", "clips", "playlist", + "podcast", "recording", "transcription", "tts", "clips", "playlist", "studio", "chat", "forum", "comments", "guest_input", "announcements", "polls", "qa", "kanban", "calendar", "timeline", "table", "gallery", "bookmarks", "tags", "knowledge_graph", "wiki", "glossary", "faq", "bibliography", diff --git a/maskinrommet/src/jobs.rs b/maskinrommet/src/jobs.rs index c553279..937f97a 100644 --- a/maskinrommet/src/jobs.rs +++ b/maskinrommet/src/jobs.rs @@ -10,6 +10,7 @@ use uuid::Uuid; use crate::agent; use crate::ai_edges; +use crate::audio; use crate::cas::CasStore; use crate::stdb::StdbClient; use crate::summarize; @@ -167,6 +168,9 @@ async fn dispatch( "tts_generate" => { tts::handle_tts_job(job, db, stdb, cas).await } + "audio_process" => { + audio::handle_audio_process_job(job, db, stdb, cas).await + } other => Err(format!("Ukjent jobbtype: {other}")), } }