synops/maskinrommet/src/audio.rs
vegard b4c4bb8a0f Lydstudio: lydredigering via FFmpeg i nettleseren
Ikke-destruktiv redigering via EDL (Edit Decision List):
- Backend: audio.rs med FFmpeg-subprocess for klipp, normalisering,
  silence trim, fades, noise reduction, EQ, kompressor
- Frontend: /studio/[id] med wavesurfer.js RegionsPlugin,
  verktøypanel, sesjonslagring, og render-dialog
- Studio-trait for samlinger, versjonshistorikk via derived_from-edges
- API: audio_analyze (synkron), audio_process (jobbkø), audio_info

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 00:45:53 +00:00

724 lines
22 KiB
Rust

//! Lydstudio — lydbehandling via FFmpeg subprocess.
//!
//! Ikke-destruktiv redigering: originalen i CAS røres aldri.
//! En EDL (Edit Decision List) beskriver operasjonene. Ved render
//! kjøres ffmpeg og resultatet lagres som ny CAS-entry.
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use uuid::Uuid;
use crate::cas::CasStore;
use crate::jobs::JobRow;
use crate::stdb::StdbClient;
// ─── EDL-datastrukturer ───────────────────────────────────────────
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EdlDocument {
pub source_hash: String,
pub operations: Vec<EdlOperation>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum EdlOperation {
Cut {
start_ms: i64,
end_ms: i64,
},
Normalize {
target_lufs: f64,
},
TrimSilence {
threshold_db: f32,
min_duration_ms: u32,
},
FadeIn {
duration_ms: u32,
},
FadeOut {
duration_ms: u32,
},
NoiseReduction {
strength_db: f32,
},
Equalizer {
low_gain: f32,
mid_gain: f32,
high_gain: f32,
},
Compressor {
threshold_db: f32,
ratio: f32,
},
}
// ─── Analyse-resultat ─────────────────────────────────────────────
#[derive(Debug, Serialize, Deserialize)]
pub struct LoudnessInfo {
pub input_i: f64,
pub input_tp: f64,
pub input_lra: f64,
pub input_thresh: f64,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct SilenceRegion {
pub start_ms: i64,
pub end_ms: i64,
pub duration_ms: i64,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct AudioInfo {
pub duration_ms: i64,
pub sample_rate: u32,
pub channels: u32,
pub codec: String,
pub format: String,
pub bit_rate: Option<u64>,
}
#[derive(Debug, Serialize)]
pub struct AnalyzeResult {
pub loudness: LoudnessInfo,
pub silence_regions: Vec<SilenceRegion>,
pub info: AudioInfo,
}
// ─── FFmpeg-kommandoer ────────────────────────────────────────────
/// Hent metadata om en lydfil via ffprobe.
pub async fn get_audio_info(cas: &CasStore, hash: &str) -> Result<AudioInfo, String> {
let path = cas.path_for(hash);
if !path.exists() {
return Err(format!("Filen finnes ikke i CAS: {hash}"));
}
let output = tokio::process::Command::new("ffprobe")
.args([
"-v", "quiet",
"-print_format", "json",
"-show_format",
"-show_streams",
])
.arg(&path)
.output()
.await
.map_err(|e| format!("Kunne ikke kjøre ffprobe: {e}"))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(format!("ffprobe feilet: {stderr}"));
}
let json: serde_json::Value = serde_json::from_slice(&output.stdout)
.map_err(|e| format!("Kunne ikke parse ffprobe-output: {e}"))?;
// Finn første audio-stream
let stream = json["streams"]
.as_array()
.and_then(|streams| streams.iter().find(|s| s["codec_type"] == "audio"))
.ok_or("Ingen audio-stream funnet")?;
let format = &json["format"];
let duration_secs: f64 = format["duration"]
.as_str()
.and_then(|s| s.parse().ok())
.unwrap_or(0.0);
Ok(AudioInfo {
duration_ms: (duration_secs * 1000.0) as i64,
sample_rate: stream["sample_rate"]
.as_str()
.and_then(|s| s.parse().ok())
.unwrap_or(44100),
channels: stream["channels"].as_u64().unwrap_or(2) as u32,
codec: stream["codec_name"]
.as_str()
.unwrap_or("unknown")
.to_string(),
format: format["format_name"]
.as_str()
.unwrap_or("unknown")
.to_string(),
bit_rate: format["bit_rate"]
.as_str()
.and_then(|s| s.parse().ok()),
})
}
/// Analyser loudness (EBU R128) via ffmpeg loudnorm.
pub async fn analyze_loudness(cas: &CasStore, hash: &str) -> Result<LoudnessInfo, String> {
let path = cas.path_for(hash);
if !path.exists() {
return Err(format!("Filen finnes ikke i CAS: {hash}"));
}
let output = tokio::process::Command::new("ffmpeg")
.args(["-i"])
.arg(&path)
.args([
"-af", "loudnorm=print_format=json",
"-f", "null", "-",
])
.output()
.await
.map_err(|e| format!("Kunne ikke kjøre ffmpeg loudnorm: {e}"))?;
// loudnorm skriver JSON til stderr
let stderr = String::from_utf8_lossy(&output.stderr);
// Finn JSON-blokken i stderr
let json_start = stderr
.find("{\n")
.ok_or("Fant ikke loudnorm JSON i ffmpeg-output")?;
let json_end = stderr[json_start..]
.find("\n}")
.map(|i| json_start + i + 2)
.ok_or("Ufullstendig loudnorm JSON")?;
let json_str = &stderr[json_start..json_end];
let json: serde_json::Value = serde_json::from_str(json_str)
.map_err(|e| format!("Kunne ikke parse loudnorm JSON: {e}\n{json_str}"))?;
Ok(LoudnessInfo {
input_i: parse_loudnorm_field(&json, "input_i")?,
input_tp: parse_loudnorm_field(&json, "input_tp")?,
input_lra: parse_loudnorm_field(&json, "input_lra")?,
input_thresh: parse_loudnorm_field(&json, "input_thresh")?,
})
}
fn parse_loudnorm_field(json: &serde_json::Value, field: &str) -> Result<f64, String> {
json[field]
.as_str()
.and_then(|s| s.parse::<f64>().ok())
.ok_or_else(|| format!("Mangler felt '{field}' i loudnorm-output"))
}
/// Detekter stille regioner i en lydfil.
pub async fn detect_silence(
cas: &CasStore,
hash: &str,
threshold_db: f32,
min_duration_ms: u32,
) -> Result<Vec<SilenceRegion>, String> {
let path = cas.path_for(hash);
if !path.exists() {
return Err(format!("Filen finnes ikke i CAS: {hash}"));
}
let min_duration_secs = min_duration_ms as f64 / 1000.0;
let filter = format!("silencedetect=noise={threshold_db}dB:d={min_duration_secs}");
let output = tokio::process::Command::new("ffmpeg")
.args(["-i"])
.arg(&path)
.args(["-af", &filter, "-f", "null", "-"])
.output()
.await
.map_err(|e| format!("Kunne ikke kjøre ffmpeg silencedetect: {e}"))?;
let stderr = String::from_utf8_lossy(&output.stderr);
let mut regions = Vec::new();
let mut current_start: Option<f64> = None;
for line in stderr.lines() {
if let Some(pos) = line.find("silence_start: ") {
let val_str = &line[pos + 15..];
if let Some(secs) = val_str.split_whitespace().next().and_then(|s| s.parse::<f64>().ok()) {
current_start = Some(secs);
}
}
if let Some(pos) = line.find("silence_end: ") {
let val_str = &line[pos + 13..];
if let Some(end_secs) = val_str.split_whitespace().next().and_then(|s| s.parse::<f64>().ok()) {
if let Some(start_secs) = current_start.take() {
regions.push(SilenceRegion {
start_ms: (start_secs * 1000.0) as i64,
end_ms: (end_secs * 1000.0) as i64,
duration_ms: ((end_secs - start_secs) * 1000.0) as i64,
});
}
}
}
}
Ok(regions)
}
// ─── EDL → FFmpeg filtergraf ──────────────────────────────────────
/// Bygg ffmpeg-filtergraf fra EDL-operasjoner.
/// Returnerer (filter_string, trenger_to_pass).
///
/// Operasjonsrekkefølge:
/// 1. Cuts (aselect) — fjerner regioner
/// 2. Trim silence — konvertert til cuts
/// 3. Noise reduction (afftdn)
/// 4. EQ (equalizer)
/// 5. Compressor (acompressor)
/// 6. Normalize (loudnorm) — alltid sist før fades
/// 7. Fades (afade) — aller sist
pub fn build_filter_chain(
ops: &[EdlOperation],
duration_ms: i64,
loudness_measured: Option<&LoudnessInfo>,
) -> String {
let mut filters: Vec<String> = Vec::new();
// Samle alle cuts (inkl. fra trim_silence)
let mut cuts: Vec<(i64, i64)> = Vec::new();
for op in ops {
if let EdlOperation::Cut { start_ms, end_ms } = op {
cuts.push((*start_ms, *end_ms));
}
}
// Sorter cuts og bygg aselect-filter
if !cuts.is_empty() {
cuts.sort_by_key(|c| c.0);
let conditions: Vec<String> = cuts
.iter()
.map(|(s, e)| {
format!(
"between(t,{:.3},{:.3})",
*s as f64 / 1000.0,
*e as f64 / 1000.0
)
})
.collect();
filters.push(format!(
"aselect='not({})',asetpts=N/SR/TB",
conditions.join("+")
));
}
// Noise reduction
for op in ops {
if let EdlOperation::NoiseReduction { strength_db } = op {
filters.push(format!("afftdn=nf={strength_db}"));
}
}
// EQ — tre-bånds parametrisk
for op in ops {
if let EdlOperation::Equalizer {
low_gain,
mid_gain,
high_gain,
} = op
{
let mut eq_parts = Vec::new();
if *low_gain != 0.0 {
eq_parts.push(format!("equalizer=f=100:t=h:w=200:g={low_gain}"));
}
if *mid_gain != 0.0 {
eq_parts.push(format!("equalizer=f=1000:t=h:w=1000:g={mid_gain}"));
}
if *high_gain != 0.0 {
eq_parts.push(format!("equalizer=f=8000:t=h:w=4000:g={high_gain}"));
}
filters.extend(eq_parts);
}
}
// Compressor
for op in ops {
if let EdlOperation::Compressor {
threshold_db,
ratio,
} = op
{
filters.push(format!(
"acompressor=threshold={threshold_db}dB:ratio={ratio}:attack=5:release=50"
));
}
}
// Normalize (loudnorm) — to-pass hvis vi har målte verdier
for op in ops {
if let EdlOperation::Normalize { target_lufs } = op {
if let Some(measured) = loudness_measured {
filters.push(format!(
"loudnorm=I={target_lufs}:TP=-1.5:LRA=11:\
measured_I={:.1}:measured_TP={:.1}:measured_LRA={:.1}:\
measured_thresh={:.1}:linear=true",
measured.input_i,
measured.input_tp,
measured.input_lra,
measured.input_thresh,
));
} else {
// Enkeltpass (lavere kvalitet, men fungerer)
filters.push(format!("loudnorm=I={target_lufs}:TP=-1.5:LRA=11"));
}
}
}
// Beregn varighet etter cuts for fade-out posisjonering
let total_cut_ms: i64 = cuts.iter().map(|(s, e)| e - s).sum();
let effective_duration_ms = duration_ms - total_cut_ms;
// Fades — helt sist
for op in ops {
match op {
EdlOperation::FadeIn { duration_ms } => {
let d = *duration_ms as f64 / 1000.0;
filters.push(format!("afade=t=in:d={d:.3}"));
}
EdlOperation::FadeOut { duration_ms: dur } => {
let d = *dur as f64 / 1000.0;
let start = (effective_duration_ms as f64 / 1000.0) - d;
if start > 0.0 {
filters.push(format!("afade=t=out:st={start:.3}:d={d:.3}"));
}
}
_ => {}
}
}
filters.join(",")
}
// ─── Prosessering ─────────────────────────────────────────────────
/// Kjør ffmpeg med EDL-operasjoner og lagre resultatet i CAS.
pub async fn process_audio(
cas: &CasStore,
edl: &EdlDocument,
output_format: &str,
) -> Result<(String, u64), String> {
let source_path = cas.path_for(&edl.source_hash);
if !source_path.exists() {
return Err(format!("Kildefil finnes ikke i CAS: {}", edl.source_hash));
}
// Hent info for fade-out beregning
let info = get_audio_info(cas, &edl.source_hash).await?;
// Sjekk om vi trenger to-pass loudnorm
let has_normalize = edl.operations.iter().any(|op| matches!(op, EdlOperation::Normalize { .. }));
let loudness_measured = if has_normalize {
// Kjør silence-detection for trim_silence operasjoner
let silence_cuts = resolve_silence_cuts(cas, edl).await?;
// Bygg midlertidig EDL uten normalize for pass 1
let mut pass1_ops: Vec<EdlOperation> = edl.operations.clone();
pass1_ops.retain(|op| !matches!(op, EdlOperation::Normalize { .. }));
pass1_ops.extend(silence_cuts.iter().cloned());
let pass1_filter = build_filter_chain(&pass1_ops, info.duration_ms, None);
// Pass 1: mål loudness etter andre filtre er påført
let measured = if pass1_filter.is_empty() {
analyze_loudness(cas, &edl.source_hash).await?
} else {
analyze_with_filter(cas, &edl.source_hash, &pass1_filter).await?
};
Some(measured)
} else {
None
};
// Resolve trim_silence til faktiske cuts
let silence_cuts = resolve_silence_cuts(cas, edl).await?;
let mut all_ops = edl.operations.clone();
// Fjern TrimSilence og legg til genererte cuts
all_ops.retain(|op| !matches!(op, EdlOperation::TrimSilence { .. }));
all_ops.extend(silence_cuts);
let filter = build_filter_chain(&all_ops, info.duration_ms, loudness_measured.as_ref());
if filter.is_empty() {
return Err("Ingen operasjoner å utføre".to_string());
}
// Bestem output-codec basert på format
let codec_args = match output_format {
"mp3" => vec!["-codec:a", "libmp3lame", "-q:a", "2"],
"wav" => vec!["-codec:a", "pcm_s16le"],
"flac" => vec!["-codec:a", "flac"],
"ogg" => vec!["-codec:a", "libvorbis", "-q:a", "6"],
_ => vec!["-codec:a", "libmp3lame", "-q:a", "2"], // default: mp3
};
let ext = match output_format {
"wav" => "wav",
"flac" => "flac",
"ogg" => "ogg",
_ => "mp3",
};
// Output til temp-fil
let tmp_dir = cas.root().join("tmp");
tokio::fs::create_dir_all(&tmp_dir)
.await
.map_err(|e| format!("Kunne ikke opprette tmp-katalog: {e}"))?;
let tmp_output = tmp_dir.join(format!("audio_process_{}.{ext}", Uuid::now_v7()));
let mut cmd = tokio::process::Command::new("ffmpeg");
cmd.args(["-i"])
.arg(&source_path)
.args(["-af", &filter])
.args(&codec_args)
.args(["-y"])
.arg(&tmp_output);
tracing::info!(
source = %edl.source_hash,
filter = %filter,
output = %tmp_output.display(),
"Kjører ffmpeg audio processing"
);
let output = cmd
.output()
.await
.map_err(|e| format!("Kunne ikke kjøre ffmpeg: {e}"))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
// Rydd opp temp-fil
let _ = tokio::fs::remove_file(&tmp_output).await;
return Err(format!("ffmpeg feilet: {stderr}"));
}
// Les resultat og lagre i CAS
let result_bytes = tokio::fs::read(&tmp_output)
.await
.map_err(|e| format!("Kunne ikke lese ffmpeg-output: {e}"))?;
let _ = tokio::fs::remove_file(&tmp_output).await;
let store_result = cas
.store(&result_bytes)
.await
.map_err(|e| format!("Kunne ikke lagre i CAS: {e}"))?;
tracing::info!(
source = %edl.source_hash,
result = %store_result.hash,
size = store_result.size,
"Audio processing fullført"
);
Ok((store_result.hash, store_result.size))
}
/// Kjør loudnorm-analyse med et forhåndsfilter (for to-pass normalisering).
async fn analyze_with_filter(
cas: &CasStore,
hash: &str,
pre_filter: &str,
) -> Result<LoudnessInfo, String> {
let path = cas.path_for(hash);
let filter = format!("{pre_filter},loudnorm=print_format=json");
let output = tokio::process::Command::new("ffmpeg")
.args(["-i"])
.arg(&path)
.args(["-af", &filter, "-f", "null", "-"])
.output()
.await
.map_err(|e| format!("Kunne ikke kjøre ffmpeg loudnorm pass 1: {e}"))?;
let stderr = String::from_utf8_lossy(&output.stderr);
let json_start = stderr
.find("{\n")
.ok_or("Fant ikke loudnorm JSON i pass 1")?;
let json_end = stderr[json_start..]
.find("\n}")
.map(|i| json_start + i + 2)
.ok_or("Ufullstendig loudnorm JSON i pass 1")?;
let json: serde_json::Value = serde_json::from_str(&stderr[json_start..json_end])
.map_err(|e| format!("Kunne ikke parse loudnorm pass 1: {e}"))?;
Ok(LoudnessInfo {
input_i: parse_loudnorm_field(&json, "input_i")?,
input_tp: parse_loudnorm_field(&json, "input_tp")?,
input_lra: parse_loudnorm_field(&json, "input_lra")?,
input_thresh: parse_loudnorm_field(&json, "input_thresh")?,
})
}
/// Konverter TrimSilence-operasjoner til faktiske Cut-operasjoner
/// ved å kjøre silence detection.
async fn resolve_silence_cuts(
cas: &CasStore,
edl: &EdlDocument,
) -> Result<Vec<EdlOperation>, String> {
let mut cuts = Vec::new();
for op in &edl.operations {
if let EdlOperation::TrimSilence {
threshold_db,
min_duration_ms,
} = op
{
let regions = detect_silence(cas, &edl.source_hash, *threshold_db, *min_duration_ms).await?;
for region in regions {
// Behold 200ms stillhet på hver side for naturlig lyd
let margin_ms = 200;
let start = region.start_ms + margin_ms;
let end = region.end_ms - margin_ms;
if end > start {
cuts.push(EdlOperation::Cut {
start_ms: start,
end_ms: end,
});
}
}
}
}
Ok(cuts)
}
// ─── Jobbhåndterer ───────────────────────────────────────────────
/// Håndterer `audio_process`-jobber fra jobbkøen.
///
/// Payload:
/// ```json
/// {
/// "media_node_id": "uuid",
/// "edl": { "source_hash": "...", "operations": [...] },
/// "output_format": "mp3",
/// "requested_by": "uuid"
/// }
/// ```
pub async fn handle_audio_process_job(
job: &JobRow,
db: &PgPool,
stdb: &StdbClient,
cas: &CasStore,
) -> Result<serde_json::Value, String> {
let media_node_id: Uuid = job.payload["media_node_id"]
.as_str()
.and_then(|s| s.parse().ok())
.ok_or("Mangler media_node_id i payload")?;
let edl: EdlDocument = serde_json::from_value(job.payload["edl"].clone())
.map_err(|e| format!("Ugyldig EDL i payload: {e}"))?;
let output_format = job.payload["output_format"]
.as_str()
.unwrap_or("mp3");
let requested_by: Uuid = job.payload["requested_by"]
.as_str()
.and_then(|s| s.parse().ok())
.ok_or("Mangler requested_by i payload")?;
// Kjør prosessering
let (result_hash, result_size) = process_audio(cas, &edl, output_format).await?;
// Bestem MIME-type
let mime = match output_format {
"mp3" => "audio/mpeg",
"wav" => "audio/wav",
"flac" => "audio/flac",
"ogg" => "audio/ogg",
_ => "audio/mpeg",
};
// Opprett ny medienode for den prosesserte filen
let processed_node_id = Uuid::now_v7();
let metadata = serde_json::json!({
"cas_hash": result_hash,
"mime": mime,
"size_bytes": result_size,
"source_hash": edl.source_hash,
"edl": edl,
});
// Hent tittel fra original node
let original_title: Option<String> = sqlx::query_scalar(
"SELECT title FROM nodes WHERE id = $1"
)
.bind(media_node_id)
.fetch_optional(db)
.await
.map_err(|e| format!("DB-feil: {e}"))?
.flatten();
let title = original_title
.map(|t| format!("{t} (prosessert)"))
.unwrap_or_else(|| "Prosessert lyd".to_string());
// Insert processed media node
sqlx::query(
r#"
INSERT INTO nodes (id, node_kind, title, visibility, metadata, created_by)
VALUES ($1, 'media', $2, 'hidden', $3, $4)
"#,
)
.bind(processed_node_id)
.bind(&title)
.bind(&metadata)
.bind(requested_by)
.execute(db)
.await
.map_err(|e| format!("Kunne ikke opprette prosessert node: {e}"))?;
// Opprett derived_from edge: processed → original
let edge_id = Uuid::now_v7();
sqlx::query(
r#"
INSERT INTO edges (id, source_id, target_id, edge_type, system, created_by)
VALUES ($1, $2, $3, 'derived_from', true, $4)
"#,
)
.bind(edge_id)
.bind(processed_node_id)
.bind(media_node_id)
.bind(requested_by)
.execute(db)
.await
.map_err(|e| format!("Kunne ikke opprette derived_from edge: {e}"))?;
// Synk til SpacetimeDB
let metadata_str = serde_json::to_string(&metadata).unwrap_or_default();
let _ = stdb
.create_node(
&processed_node_id.to_string(),
"media",
&title,
"",
"hidden",
&metadata_str,
&requested_by.to_string(),
)
.await;
let _ = stdb
.create_edge(
&edge_id.to_string(),
&processed_node_id.to_string(),
&media_node_id.to_string(),
"derived_from",
"{}",
true,
&requested_by.to_string(),
)
.await;
tracing::info!(
original = %media_node_id,
processed = %processed_node_id,
hash = %result_hash,
"Audio process-jobb fullført"
);
Ok(serde_json::json!({
"processed_node_id": processed_node_id.to_string(),
"cas_hash": result_hash,
"size_bytes": result_size,
}))
}