From b4ee80a97bd9afb038daeb760c461c93138c50fc Mon Sep 17 00:00:00 2001 From: vegard Date: Tue, 17 Mar 2026 18:41:09 +0100 Subject: [PATCH] =?UTF-8?q?Fullf=C3=B8r=20oppgave=207.7:=20Re-transkripsjo?= =?UTF-8?q?nsflyt=20med=20side-om-side-sammenligning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ny funksjonalitet for å kjøre re-transkripsjon på eksisterende media-noder og sammenligne gammel vs ny versjon per segment. Manuelt redigerte segmenter fra forrige versjon blir uthevet, og brukeren velger per segment hvilken versjon som skal beholdes. Backend (Rust): - POST /intentions/retranscribe — trigger ny Whisper-jobb for media-node - GET /query/transcription_versions — list alle versjoner for en node - GET /query/segments_version — hent segmenter for spesifikk versjon - POST /intentions/resolve_retranscription — anvend per-segment-valg Frontend (Svelte): - RetranscriptionCompare.svelte — side-om-side visning med per-segment-valg - TranscriptionView: re-transkriber-knapp, auto-detect nye versjoner, polling - API-klient: nye funksjoner for alle re-transkripsjonsendepunkter Co-Authored-By: Claude Opus 4.6 --- frontend/src/lib/api.ts | 79 +++++ .../src/lib/components/AudioPlayer.svelte | 4 +- .../components/RetranscriptionCompare.svelte | 273 ++++++++++++++++++ .../lib/components/TranscriptionView.svelte | 151 +++++++++- maskinrommet/src/intentions.rs | 259 +++++++++++++++++ maskinrommet/src/main.rs | 4 + maskinrommet/src/queries.rs | 170 +++++++++++ tasks.md | 3 +- 8 files changed, 932 insertions(+), 11 deletions(-) create mode 100644 frontend/src/lib/components/RetranscriptionCompare.svelte diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index b4f11ac..6a4d652 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -175,3 +175,82 @@ export function updateSegment( content }); } + +// ============================================================================= +// Re-transkripsjon +// ============================================================================= + +export interface TranscriptionVersion { + transcribed_at: string; + segment_count: number; + edited_count: number; +} + +export interface TranscriptionVersionsResponse { + versions: TranscriptionVersion[]; +} + +/** Hent alle transkripsjonsversjoner for en node. */ +export async function fetchTranscriptionVersions( + accessToken: string, + nodeId: string +): Promise { + const res = await fetch( + `${BASE_URL}/query/transcription_versions?node_id=${encodeURIComponent(nodeId)}`, + { headers: { Authorization: `Bearer ${accessToken}` } } + ); + if (!res.ok) { + const body = await res.text(); + throw new Error(`transcription_versions failed (${res.status}): ${body}`); + } + return res.json(); +} + +/** Hent segmenter for en spesifikk transkripsjonsversjon. */ +export async function fetchSegmentsVersion( + accessToken: string, + nodeId: string, + transcribedAt: string +): Promise { + const params = new URLSearchParams({ + node_id: nodeId, + transcribed_at: transcribedAt + }); + const res = await fetch(`${BASE_URL}/query/segments_version?${params}`, { + headers: { Authorization: `Bearer ${accessToken}` } + }); + if (!res.ok) { + const body = await res.text(); + throw new Error(`segments_version failed (${res.status}): ${body}`); + } + return res.json(); +} + +/** Trigger re-transkripsjon for en media-node. */ +export function retranscribe( + accessToken: string, + nodeId: string +): Promise<{ job_id: string }> { + return post(accessToken, '/intentions/retranscribe', { node_id: nodeId }); +} + +export interface SegmentChoice { + seq: number; + choice: 'new' | 'old'; +} + +/** Anvend brukerens per-segment-valg etter re-transkripsjon. */ +export function resolveRetranscription( + accessToken: string, + nodeId: string, + newVersion: string, + oldVersion: string, + choices: SegmentChoice[] +): Promise<{ resolved: boolean; kept_old: number; kept_new: number }> { + return post(accessToken, '/intentions/resolve_retranscription', { + node_id: nodeId, + new_version: newVersion, + old_version: oldVersion, + choices + }); +} diff --git a/frontend/src/lib/components/AudioPlayer.svelte b/frontend/src/lib/components/AudioPlayer.svelte index fa86e60..ccac56b 100644 --- a/frontend/src/lib/components/AudioPlayer.svelte +++ b/frontend/src/lib/components/AudioPlayer.svelte @@ -158,8 +158,8 @@ {#if showTranscript} {#if hasSegments} diff --git a/frontend/src/lib/components/RetranscriptionCompare.svelte b/frontend/src/lib/components/RetranscriptionCompare.svelte new file mode 100644 index 0000000..9c9fbdb --- /dev/null +++ b/frontend/src/lib/components/RetranscriptionCompare.svelte @@ -0,0 +1,273 @@ + + +{#if loading} +

Laster sammenligning...

+{:else if error} +

{error}

+{:else} +
+ +
+

Sammenlign transkripsjoner

+
+ + +
+
+ + +
+ {#each newSegments as newSeg (newSeg.seq)} + {@const oldSeg = oldSegments.find((o) => o.seq === newSeg.seq)} + {@const choice = choices[newSeg.seq] ?? 'new'} + {@const different = isDifferent(newSeg.seq)} + {@const isActive = + currentTime * 1000 >= newSeg.start_ms && currentTime * 1000 < newSeg.end_ms} + +
+ +
+ + + {#if different && oldSeg} + + {:else if !oldSeg} + Nytt segment + {:else} + Uendret + {/if} +
+ + {#if different && oldSeg} + +
+ + + + + +
+ {:else} + +

+ {newSeg.content} +

+ {/if} +
+ {/each} +
+ + +
+ + {stats.kept_new} nye, {stats.kept_old} gamle + +
+ + +
+
+
+{/if} diff --git a/frontend/src/lib/components/TranscriptionView.svelte b/frontend/src/lib/components/TranscriptionView.svelte index 510f5ea..73ee5ef 100644 --- a/frontend/src/lib/components/TranscriptionView.svelte +++ b/frontend/src/lib/components/TranscriptionView.svelte @@ -1,5 +1,13 @@ {#if loading} @@ -110,7 +180,72 @@

{error}

{:else if segments.length === 0}

Ingen segmenter

+{:else if showCompare && versions.length >= 2} + + {:else} + +
+ + {segments.length} segmenter + +
+ {#if polling} + + + + + + Transkriberer... + + {:else if versions.length >= 2} + + {/if} + +
+
+ +
{#each segments as seg (seg.id)} {@const isActive = activeSegmentId === seg.id} @@ -170,7 +305,9 @@ > {seg.content} {#if seg.edited} - redigert + redigert {/if} {/if} diff --git a/maskinrommet/src/intentions.rs b/maskinrommet/src/intentions.rs index 0732563..8f8bf12 100644 --- a/maskinrommet/src/intentions.rs +++ b/maskinrommet/src/intentions.rs @@ -1450,3 +1450,262 @@ pub async fn update_segment( edited: true, })) } + +// ============================================================================= +// POST /intentions/retranscribe — trigger re-transkripsjon for eksisterende media-node +// ============================================================================= + +#[derive(Deserialize)] +pub struct RetranscribeRequest { + /// Media-node-ID å re-transkribere. + pub node_id: Uuid, +} + +#[derive(Serialize)] +pub struct RetranscribeResponse { + pub job_id: Uuid, +} + +/// POST /intentions/retranscribe +/// +/// Trigger en ny transkripsjons-jobb for en eksisterende media-node. +/// Henter CAS-hash og MIME fra nodens metadata. Krever skrivetilgang. +pub async fn retranscribe( + State(state): State, + user: AuthUser, + Json(req): Json, +) -> Result, (StatusCode, Json)> { + // Verifiser skrivetilgang + let can_modify = user_can_modify_node(&state.db, user.node_id, req.node_id) + .await + .map_err(|e| { + tracing::error!(error = %e, "Tilgangssjekk feilet"); + internal_error("Databasefeil ved tilgangssjekk") + })?; + + if !can_modify { + return Err(forbidden("Ikke tilgang til å re-transkribere denne noden")); + } + + // Hent metadata for CAS-hash og MIME + let node: Option<(serde_json::Value,)> = sqlx::query_as( + "SELECT metadata FROM nodes WHERE id = $1 AND node_kind = 'media'", + ) + .bind(req.node_id) + .fetch_optional(&state.db) + .await + .map_err(|e| { + tracing::error!(error = %e, "Feil ved henting av node"); + internal_error("Databasefeil") + })?; + + let Some((metadata,)) = node else { + return Err(bad_request("Noden finnes ikke eller er ikke en media-node")); + }; + + let cas_hash = metadata["cas_hash"] + .as_str() + .ok_or_else(|| bad_request("Noden mangler cas_hash i metadata"))?; + let mime = metadata["mime"] + .as_str() + .unwrap_or("audio/mpeg"); + + // Finn collection fra eier-kjede + let collection_id = find_collection_for_node(&state.db, req.node_id) + .await + .ok() + .flatten(); + + let payload = serde_json::json!({ + "media_node_id": req.node_id, + "cas_hash": cas_hash, + "mime": mime, + "language": "no", + }); + + let job_id = crate::jobs::enqueue(&state.db, "whisper_transcribe", payload, collection_id, 5) + .await + .map_err(|e| { + tracing::error!(error = %e, "Kunne ikke opprette re-transkripsjons-jobb"); + internal_error("Kunne ikke starte re-transkripsjon") + })?; + + tracing::info!( + job_id = %job_id, + node_id = %req.node_id, + user = %user.node_id, + "Re-transkripsjons-jobb opprettet" + ); + + Ok(Json(RetranscribeResponse { job_id })) +} + +// ============================================================================= +// POST /intentions/resolve_retranscription — anvend brukerens segment-valg +// ============================================================================= + +#[derive(Deserialize)] +pub struct SegmentChoice { + /// Sekvensnummer i den nye transkripsjonen. + pub seq: i32, + /// "new" = behold ny versjon, "old" = behold gammel versjon. + pub choice: String, +} + +#[derive(Deserialize)] +pub struct ResolveRetranscriptionRequest { + /// Media-node-ID. + pub node_id: Uuid, + /// `transcribed_at` for den nye versjonen. + pub new_version: String, + /// `transcribed_at` for den gamle versjonen. + pub old_version: String, + /// Per-segment-valg. + pub choices: Vec, +} + +#[derive(Serialize)] +pub struct ResolveRetranscriptionResponse { + pub resolved: bool, + pub kept_old: i32, + pub kept_new: i32, +} + +/// POST /intentions/resolve_retranscription +/// +/// Anvender brukerens per-segment-valg etter re-transkripsjon. +/// For segmenter der brukeren velger "old", kopieres innholdet fra +/// den gamle versjonen til den nye. Gamle versjoner slettes etterpå. +pub async fn resolve_retranscription( + State(state): State, + user: AuthUser, + Json(req): Json, +) -> Result, (StatusCode, Json)> { + // Verifiser skrivetilgang + let can_modify = user_can_modify_node(&state.db, user.node_id, req.node_id) + .await + .map_err(|e| { + tracing::error!(error = %e, "Tilgangssjekk feilet"); + internal_error("Databasefeil ved tilgangssjekk") + })?; + + if !can_modify { + return Err(forbidden("Ikke tilgang til å endre segmenter")); + } + + let new_ts: chrono::DateTime = req.new_version.parse() + .map_err(|_| bad_request("Ugyldig new_version-tidsstempel"))?; + let old_ts: chrono::DateTime = req.old_version.parse() + .map_err(|_| bad_request("Ugyldig old_version-tidsstempel"))?; + + // Hent gamle segmenter (indeksert på seq) + let old_segments: Vec<(i32, String, bool)> = sqlx::query_as( + "SELECT seq, content, edited FROM transcription_segments WHERE node_id = $1 AND transcribed_at = $2 ORDER BY seq", + ) + .bind(req.node_id) + .bind(old_ts) + .fetch_all(&state.db) + .await + .map_err(|e| { + tracing::error!(error = %e, "Feil ved henting av gamle segmenter"); + internal_error("Databasefeil") + })?; + + let old_by_seq: std::collections::HashMap = old_segments + .into_iter() + .map(|(seq, content, edited)| (seq, (content, edited))) + .collect(); + + let mut kept_old = 0i32; + let mut kept_new = 0i32; + + let mut tx = state.db.begin().await.map_err(|e| { + tracing::error!(error = %e, "Transaksjon feilet"); + internal_error("Databasefeil") + })?; + + for choice in &req.choices { + if choice.choice == "old" { + if let Some((old_content, old_edited)) = old_by_seq.get(&choice.seq) { + // Kopier gammel tekst til nytt segment, bevar edited-flagg + sqlx::query( + "UPDATE transcription_segments SET content = $1, edited = $2 WHERE node_id = $3 AND transcribed_at = $4 AND seq = $5", + ) + .bind(old_content) + .bind(*old_edited) + .bind(req.node_id) + .bind(new_ts) + .bind(choice.seq) + .execute(&mut *tx) + .await + .map_err(|e| { + tracing::error!(error = %e, "Feil ved oppdatering av segment"); + internal_error("Databasefeil ved oppdatering") + })?; + kept_old += 1; + } + } else { + kept_new += 1; + } + } + + // Slett alle gamle versjoner (ikke bare den valgte — rydd opp) + sqlx::query( + "DELETE FROM transcription_segments WHERE node_id = $1 AND transcribed_at < $2", + ) + .bind(req.node_id) + .bind(new_ts) + .execute(&mut *tx) + .await + .map_err(|e| { + tracing::error!(error = %e, "Feil ved sletting av gamle segmenter"); + internal_error("Databasefeil ved opprydding") + })?; + + // Oppdater nodens content med den endelige transkripsjonen + let final_segments: Vec<(String,)> = sqlx::query_as( + "SELECT content FROM transcription_segments WHERE node_id = $1 AND transcribed_at = $2 ORDER BY seq", + ) + .bind(req.node_id) + .bind(new_ts) + .fetch_all(&mut *tx) + .await + .map_err(|e| { + tracing::error!(error = %e, "Feil ved henting av endelige segmenter"); + internal_error("Databasefeil") + })?; + + let transcript_text: String = final_segments + .iter() + .map(|(c,)| c.trim()) + .collect::>() + .join(" "); + + sqlx::query("UPDATE nodes SET content = $1 WHERE id = $2") + .bind(&transcript_text) + .bind(req.node_id) + .execute(&mut *tx) + .await + .map_err(|e| { + tracing::error!(error = %e, "Feil ved oppdatering av node-innhold"); + internal_error("Databasefeil") + })?; + + tx.commit().await.map_err(|e| { + tracing::error!(error = %e, "Commit feilet"); + internal_error("Databasefeil ved commit") + })?; + + tracing::info!( + node_id = %req.node_id, + kept_old = kept_old, + kept_new = kept_new, + "Re-transkripsjon løst" + ); + + Ok(Json(ResolveRetranscriptionResponse { + resolved: true, + kept_old, + kept_new, + })) +} diff --git a/maskinrommet/src/main.rs b/maskinrommet/src/main.rs index 4453219..215cf41 100644 --- a/maskinrommet/src/main.rs +++ b/maskinrommet/src/main.rs @@ -141,6 +141,10 @@ async fn main() { .route("/query/nodes", get(queries::query_nodes)) .route("/query/segments", get(queries::query_segments)) .route("/intentions/update_segment", post(intentions::update_segment)) + .route("/intentions/retranscribe", post(intentions::retranscribe)) + .route("/intentions/resolve_retranscription", post(intentions::resolve_retranscription)) + .route("/query/transcription_versions", get(queries::query_transcription_versions)) + .route("/query/segments_version", get(queries::query_segments_version)) .layer(TraceLayer::new_for_http()) .with_state(state); diff --git a/maskinrommet/src/queries.rs b/maskinrommet/src/queries.rs index 3f3fa89..169a47d 100644 --- a/maskinrommet/src/queries.rs +++ b/maskinrommet/src/queries.rs @@ -120,6 +120,176 @@ async fn run_query_segments( }) } +// ============================================================================= +// GET /query/transcription_versions — alle transkripsjonsversjoner for en node +// ============================================================================= + +#[derive(Deserialize)] +pub struct QueryVersionsRequest { + pub node_id: Uuid, +} + +#[derive(Serialize)] +pub struct TranscriptionVersion { + pub transcribed_at: String, + pub segment_count: i64, + pub edited_count: i64, +} + +#[derive(Serialize)] +pub struct QueryVersionsResponse { + pub versions: Vec, +} + +/// GET /query/transcription_versions?node_id=... +/// +/// Lister alle transkripsjonsversjoner for en node, sortert nyeste først. +pub async fn query_transcription_versions( + State(state): State, + user: AuthUser, + axum::extract::Query(params): axum::extract::Query, +) -> Result, (StatusCode, Json)> { + // Verifiser tilgang + let mut tx = state.db.begin().await.map_err(|e| { + tracing::error!(error = %e, "Transaksjon feilet"); + internal_error("Databasefeil") + })?; + set_rls_context(&mut tx, user.node_id).await.map_err(|e| { + tracing::error!(error = %e, "RLS-kontekst feilet"); + internal_error("Databasefeil") + })?; + + let exists = sqlx::query_scalar::<_, bool>( + "SELECT EXISTS(SELECT 1 FROM nodes WHERE id = $1)", + ) + .bind(params.node_id) + .fetch_one(&mut *tx) + .await + .map_err(|e| { + tracing::error!(error = %e, "Tilgangssjekk feilet"); + internal_error("Databasefeil") + })?; + + tx.commit().await.map_err(|e| { + tracing::error!(error = %e, "Commit feilet"); + internal_error("Databasefeil") + })?; + + if !exists { + return Ok(Json(QueryVersionsResponse { versions: vec![] })); + } + + let rows: Vec<(chrono::DateTime, i64, i64)> = sqlx::query_as( + r#" + SELECT transcribed_at, COUNT(*) as segment_count, + COUNT(*) FILTER (WHERE edited) as edited_count + FROM transcription_segments + WHERE node_id = $1 + GROUP BY transcribed_at + ORDER BY transcribed_at DESC + "#, + ) + .bind(params.node_id) + .fetch_all(&state.db) + .await + .map_err(|e| { + tracing::error!(error = %e, "Feil ved henting av versjoner"); + internal_error("Databasefeil") + })?; + + let versions = rows + .into_iter() + .map(|(ts, count, edited)| TranscriptionVersion { + transcribed_at: ts.to_rfc3339(), + segment_count: count, + edited_count: edited, + }) + .collect(); + + Ok(Json(QueryVersionsResponse { versions })) +} + +// ============================================================================= +// GET /query/segments_version — segmenter for en spesifikk versjon +// ============================================================================= + +#[derive(Deserialize)] +pub struct QuerySegmentsVersionRequest { + pub node_id: Uuid, + pub transcribed_at: String, +} + +/// GET /query/segments_version?node_id=...&transcribed_at=... +/// +/// Henter segmenter for en spesifikk transkripsjonsversjon. +pub async fn query_segments_version( + State(state): State, + user: AuthUser, + axum::extract::Query(params): axum::extract::Query, +) -> Result, (StatusCode, Json)> { + // Verifiser tilgang + let mut tx = state.db.begin().await.map_err(|e| { + tracing::error!(error = %e, "Transaksjon feilet"); + internal_error("Databasefeil") + })?; + set_rls_context(&mut tx, user.node_id).await.map_err(|e| { + tracing::error!(error = %e, "RLS-kontekst feilet"); + internal_error("Databasefeil") + })?; + + let exists = sqlx::query_scalar::<_, bool>( + "SELECT EXISTS(SELECT 1 FROM nodes WHERE id = $1)", + ) + .bind(params.node_id) + .fetch_one(&mut *tx) + .await + .map_err(|e| { + tracing::error!(error = %e, "Tilgangssjekk feilet"); + internal_error("Databasefeil") + })?; + + tx.commit().await.map_err(|e| { + tracing::error!(error = %e, "Commit feilet"); + internal_error("Databasefeil") + })?; + + if !exists { + return Ok(Json(QuerySegmentsResponse { + segments: vec![], + transcribed_at: None, + })); + } + + let ts: chrono::DateTime = params.transcribed_at.parse() + .map_err(|_| { + (StatusCode::BAD_REQUEST, Json(ErrorResponse { + error: "Ugyldig transcribed_at-tidsstempel".to_string(), + })) + })?; + + let segments = sqlx::query_as::<_, SegmentResult>( + r#" + SELECT id, seq, start_ms, end_ms, content, edited + FROM transcription_segments + WHERE node_id = $1 AND transcribed_at = $2 + ORDER BY seq + "#, + ) + .bind(params.node_id) + .bind(ts) + .fetch_all(&state.db) + .await + .map_err(|e| { + tracing::error!(error = %e, "Feil ved henting av segmenter"); + internal_error("Databasefeil") + })?; + + Ok(Json(QuerySegmentsResponse { + segments, + transcribed_at: Some(ts.to_rfc3339()), + })) +} + // ============================================================================= // RLS-kontekst // ============================================================================= diff --git a/tasks.md b/tasks.md index be63778..5ac2ae7 100644 --- a/tasks.md +++ b/tasks.md @@ -99,8 +99,7 @@ Uavhengige faser kan fortsatt plukkes. - [x] 7.4 Lyd-avspilling: spiller av original lyd fra CAS-node. Waveform-visning. - [x] 7.5 Segmenttabell-migrasjon: opprett `transcription_segments`-tabell i PG. Oppdater `transcribe.rs` til SRT-format → parse → skriv segmenter. Miljøvariabler: `WHISPER_MODEL` (default "medium"), `WHISPER_INITIAL_PROMPT`. Ref: `docs/concepts/podcastfabrikken.md` § 3. - [x] 7.6 Transkripsjonsvisning i frontend: segmenter med tidsstempler, avspillingsknapp per segment (hopper til riktig sted i lydfilen), redigerbare tekstfelt (setter `edited = true`). Universell komponent for podcast, møter, voice memos. -- [~] 7.7 Re-transkripsjonsflyt: ved ny transkripsjon, vis side-om-side med forrige versjon. Highlight manuelt redigerte segmenter fra forrige versjon. Bruker velger per segment. - > Påbegynt: 2026-03-17T18:32 +- [x] 7.7 Re-transkripsjonsflyt: ved ny transkripsjon, vis side-om-side med forrige versjon. Highlight manuelt redigerte segmenter fra forrige versjon. Bruker velger per segment. - [ ] 7.8 SRT-eksport: generer nedlastbar SRT-fil fra `transcription_segments`-tabellen. ## Fase 8: Aliaser