diff --git a/migrations/005_transcription_segments.sql b/migrations/005_transcription_segments.sql new file mode 100644 index 0000000..0aa0a75 --- /dev/null +++ b/migrations/005_transcription_segments.sql @@ -0,0 +1,26 @@ +-- Migration 005: Transkripsjonssegmenter +-- Ref: docs/concepts/podcastfabrikken.md § 3 +-- +-- Master-kopi av alle transkripsjoner. SRT og ren tekst er avledede +-- eksportformater som genereres fra denne tabellen. +-- Universell for all lyd: podcast, møter, voice memos. + +CREATE TABLE transcription_segments ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + node_id UUID NOT NULL REFERENCES nodes(id) ON DELETE CASCADE, + transcribed_at TIMESTAMPTZ NOT NULL, -- grupperer segmenter fra samme kjøring + seq INT NOT NULL, + start_ms INT NOT NULL, + end_ms INT NOT NULL, + content TEXT NOT NULL, + edited BOOLEAN DEFAULT false, + + UNIQUE (node_id, transcribed_at, seq) +); + +-- Hovedindeks for oppslag: hent segmenter for en node, sortert +CREATE INDEX idx_segments_node ON transcription_segments (node_id, transcribed_at, seq); + +-- Fulltekstsøk på tvers av transkripsjoner (norsk) +CREATE INDEX idx_segments_fts ON transcription_segments + USING gin(to_tsvector('norwegian', content));