// synops-prune — Opprydding av gamle CAS-filer basert på TTL og diskbruk. // // Tre-fase pruning: // Fase 1 (disk ≥ 85%): Slett generert innhold (TTS, thumbnails — regenererbart) // Fase 2 (alltid): TTL-basert pruning per modalitet (lyd 30d, bilde 30d, video 14d) // Fase 3 (disk ≥ 95%): Kritisk — alt uten publishing-edge slettes (tekst beholdes) // // Signaler som forlenger levetid: // - Publishing-edge → behold for alltid (unntatt kritisk) // - last_accessed_at innenfor TTL → forleng // - Utranskribert lyd → behold i normal modus // // Input: // --dry-run Forhåndsvisning uten sletting (default) // --write Faktisk slett filer // --cas-root PATH CAS-katalog (default: /srv/synops/media/cas) // --audio-ttl N TTL for lyd i dager (default: 30) // --image-ttl N TTL for bilder i dager (default: 30) // --video-ttl N TTL for video i dager (default: 14) // // Miljøvariabler: // DATABASE_URL — PostgreSQL-tilkobling (påkrevd) // CAS_ROOT — Alternativ til --cas-root // // Erstatter: maskinrommet/src/pruning.rs // Ref: docs/retninger/unix_filosofi.md use chrono::{DateTime, Utc}; use clap::Parser; use serde::Serialize; use std::path::{Path, PathBuf}; use std::process; use uuid::Uuid; /// Opprydding av gamle CAS-filer basert på TTL og diskbruk. #[derive(Parser)] #[command(name = "synops-prune", about = "Rydd opp gamle CAS-filer (TTL + disk-nødventil)")] struct Cli { /// Forhåndsvisning — vis hva som ville blitt slettet uten å slette #[arg(long, default_value_t = false)] dry_run: bool, /// Faktisk slett filer (uten dette flagget er dry-run implisitt) #[arg(long, default_value_t = false)] write: bool, /// CAS-rotkatalog #[arg(long, env = "CAS_ROOT", default_value = "/srv/synops/media/cas")] cas_root: PathBuf, /// TTL for lydfiler i dager #[arg(long, default_value_t = 30)] audio_ttl: i64, /// TTL for bilder i dager #[arg(long, default_value_t = 30)] image_ttl: i64, /// TTL for video i dager #[arg(long, default_value_t = 14)] video_ttl: i64, } impl Cli { /// Effektiv dry-run: sant hvis --dry-run er satt ELLER --write ikke er satt. fn is_dry_run(&self) -> bool { self.dry_run || !self.write } } // --- Datastrukturer --- #[derive(Debug, sqlx::FromRow)] struct PruneCandidate { id: Uuid, cas_hash: String, mime_category: String, size_bytes: i64, created_at: DateTime, last_accessed_at: Option>, has_publishing_edge: bool, is_generated: bool, has_transcription: bool, } #[derive(Debug, Default, Serialize)] struct PhaseResult { checked: usize, would_delete: usize, deleted: usize, bytes_freed: u64, } #[derive(Debug, Serialize)] struct PruneOutput { status: &'static str, dry_run: bool, candidates_checked: usize, files_deleted: usize, bytes_freed: u64, disk_pct_before: f64, disk_pct_after: f64, emergency_level: &'static str, phases: PrunePhases, } #[derive(Debug, Serialize)] struct PrunePhases { generated: PhaseResult, ttl: PhaseResult, critical: PhaseResult, } // --- CAS-operasjoner --- /// Sjekk diskbruk-prosent for partisjonen CAS-katalogen ligger på. async fn disk_usage_percent(cas_root: &Path) -> Result { let output = tokio::process::Command::new("df") .args(["--output=pcent", cas_root.to_str().unwrap_or("/")]) .output() .await .map_err(|e| format!("df-kommando feilet: {e}"))?; if !output.status.success() { return Err("df-kommando returnerte feil".into()); } let stdout = String::from_utf8_lossy(&output.stdout); let pct: f64 = stdout .lines() .nth(1) .and_then(|line| line.trim().trim_end_matches('%').parse().ok()) .unwrap_or(0.0); Ok(pct) } /// Beregn filsti for en CAS-hash (2-tegn prefix-katalog). fn cas_path_for(cas_root: &Path, hash: &str) -> PathBuf { let prefix = &hash[..2.min(hash.len())]; cas_root.join(prefix).join(hash) } /// Slett en CAS-fil. Returnerer bytes frigitt (0 hvis filen ikke fantes). async fn cas_delete(cas_root: &Path, hash: &str) -> Result { let path = cas_path_for(cas_root, hash); match tokio::fs::metadata(&path).await { Ok(meta) => { let size = meta.len(); tokio::fs::remove_file(&path) .await .map_err(|e| format!("Kunne ikke slette {}: {e}", path.display()))?; Ok(size) } Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(0), Err(e) => Err(format!("Kunne ikke lese metadata for {}: {e}", path.display())), } } /// Sjekk om en CAS-fil eksisterer og returner størrelse. async fn cas_file_size(cas_root: &Path, hash: &str) -> u64 { let path = cas_path_for(cas_root, hash); tokio::fs::metadata(&path) .await .map(|m| m.len()) .unwrap_or(0) } // --- Logg --- async fn log_prune(db: &sqlx::PgPool, node_id: Uuid, hash: &str, bytes: u64, reason: &str) { let detail = serde_json::json!({ "hash": hash, "size_bytes": bytes, "operation": "delete", "reason": reason, }); let _ = sqlx::query( "INSERT INTO resource_usage_log (target_node_id, resource_type, detail) VALUES ($1, 'cas', $2)", ) .bind(node_id) .bind(&detail) .execute(db) .await .map_err(|e| tracing::warn!(error = %e, "Kunne ikke logge pruning-hendelse")); } // --- Fasene --- /// Fase 1: Slett generert innhold (TTS, thumbnails) — kan regenereres. async fn phase_generated( db: &sqlx::PgPool, cas_root: &Path, dry_run: bool, ) -> Result { let rows: Vec<(String, Uuid)> = sqlx::query_as( r#" SELECT metadata->>'cas_hash' AS cas_hash, id FROM nodes WHERE node_kind = 'media' AND metadata->>'cas_hash' IS NOT NULL AND ( metadata ? 'tts' OR metadata->>'generated' = 'true' ) "#, ) .fetch_all(db) .await .map_err(|e| format!("Spørring for generert innhold feilet: {e}"))?; let mut result = PhaseResult { checked: rows.len(), ..Default::default() }; for (hash, node_id) in &rows { let size = cas_file_size(cas_root, hash).await; if size == 0 { continue; // Allerede borte } result.would_delete += 1; if dry_run { result.bytes_freed += size; tracing::info!(hash = %hash, node_id = %node_id, size, "Fase 1: ville slettet generert fil"); } else { match cas_delete(cas_root, hash).await { Ok(bytes) if bytes > 0 => { result.deleted += 1; result.bytes_freed += bytes; log_prune(db, *node_id, hash, bytes, "generated_cleanup").await; } Ok(_) => {} Err(e) => tracing::warn!(hash = %hash, error = %e, "Kunne ikke slette generert fil"), } } } Ok(result) } /// Fase 2: TTL-basert pruning per modalitet. async fn phase_ttl( db: &sqlx::PgPool, cas_root: &Path, audio_ttl: i64, image_ttl: i64, video_ttl: i64, emergency_level: &str, dry_run: bool, ) -> Result { let candidates: Vec = sqlx::query_as( r#" SELECT n.id, n.metadata->>'cas_hash' AS cas_hash, CASE WHEN n.metadata->>'mime' LIKE 'audio/%' THEN 'audio' WHEN n.metadata->>'mime' LIKE 'image/%' THEN 'image' WHEN n.metadata->>'mime' LIKE 'video/%' THEN 'video' ELSE 'other' END AS mime_category, COALESCE((n.metadata->>'size_bytes')::bigint, 0) AS size_bytes, n.created_at, n.last_accessed_at, EXISTS( SELECT 1 FROM edges e WHERE (e.source_id = n.id OR e.target_id = n.id) AND e.edge_type IN ('belongs_to', 'has_media') AND EXISTS( SELECT 1 FROM edges pub WHERE pub.edge_type = 'publishing' AND (pub.source_id = e.target_id OR pub.source_id = e.source_id) ) ) AS has_publishing_edge, COALESCE(n.metadata ? 'tts' OR n.metadata->>'generated' = 'true', false) AS is_generated, EXISTS( SELECT 1 FROM transcription_segments ts WHERE ts.node_id = n.id ) AS has_transcription FROM nodes n WHERE n.node_kind = 'media' AND n.metadata->>'cas_hash' IS NOT NULL ORDER BY n.created_at ASC "#, ) .fetch_all(db) .await .map_err(|e| format!("TTL-kandidatspørring feilet: {e}"))?; let now = Utc::now(); let mut result = PhaseResult { checked: candidates.len(), ..Default::default() }; for c in &candidates { // Tekst/transkripsjon slettes aldri if c.mime_category == "other" { continue; } // Publishing-edge = behold for alltid (unntatt ved kritisk) if c.has_publishing_edge && emergency_level != "critical" { continue; } // Bestem TTL basert på modalitet og nødsituasjon let ttl_days = match emergency_level { "critical" => 0, "aggressive" => match c.mime_category.as_str() { "audio" => audio_ttl / 3, "image" => image_ttl / 3, "video" => 1, _ => i64::MAX, }, _ => match c.mime_category.as_str() { "audio" => audio_ttl, "image" => image_ttl, "video" => video_ttl, _ => i64::MAX, }, }; if ttl_days == i64::MAX { continue; } // Beregn effektiv alder — siste tilgang forlenger levetiden let reference_time = c.last_accessed_at.unwrap_or(c.created_at); let age_days = (now - reference_time).num_days(); if age_days < ttl_days { continue; } // Lyd uten transkripsjon: behold lengre (trenger transkribering først) if c.mime_category == "audio" && !c.has_transcription && emergency_level == "normal" { tracing::debug!(node_id = %c.id, "Beholder utranskribert lyd"); continue; } // Allerede håndtert generert innhold i fase 1 if c.is_generated { continue; } let size = if c.size_bytes > 0 { c.size_bytes as u64 } else { cas_file_size(cas_root, &c.cas_hash).await }; if size == 0 { continue; } result.would_delete += 1; if dry_run { result.bytes_freed += size; tracing::info!( hash = %c.cas_hash, node_id = %c.id, mime = %c.mime_category, age_days, ttl_days, size, "Fase 2: ville slettet TTL-utløpt fil" ); } else { match cas_delete(cas_root, &c.cas_hash).await { Ok(bytes) if bytes > 0 => { result.deleted += 1; result.bytes_freed += bytes; log_prune(db, c.id, &c.cas_hash, bytes, "ttl_expired").await; } Ok(_) => {} Err(e) => tracing::warn!(hash = %c.cas_hash, error = %e, "Kunne ikke slette CAS-fil"), } } } Ok(result) } /// Fase 3: Kritisk — slett ALT uten publishing-edge (unntatt tekst). async fn phase_critical( db: &sqlx::PgPool, cas_root: &Path, dry_run: bool, ) -> Result { let rows: Vec<(String, Uuid, i64)> = sqlx::query_as( r#" SELECT n.metadata->>'cas_hash' AS cas_hash, n.id, COALESCE((n.metadata->>'size_bytes')::bigint, 0) AS size_bytes FROM nodes n WHERE n.node_kind = 'media' AND n.metadata->>'cas_hash' IS NOT NULL AND NOT EXISTS( SELECT 1 FROM edges e WHERE (e.source_id = n.id OR e.target_id = n.id) AND e.edge_type IN ('belongs_to', 'has_media') AND EXISTS( SELECT 1 FROM edges pub WHERE pub.edge_type = 'publishing' AND (pub.source_id = e.target_id OR pub.source_id = e.source_id) ) ) "#, ) .fetch_all(db) .await .map_err(|e| format!("Kritisk pruning-spørring feilet: {e}"))?; let mut result = PhaseResult { checked: rows.len(), ..Default::default() }; for (hash, node_id, size_bytes) in &rows { let size = if *size_bytes > 0 { *size_bytes as u64 } else { cas_file_size(cas_root, hash).await }; if size == 0 { continue; } result.would_delete += 1; if dry_run { result.bytes_freed += size; tracing::info!(hash = %hash, node_id = %node_id, size, "Fase 3: ville slettet (kritisk)"); } else { match cas_delete(cas_root, hash).await { Ok(bytes) if bytes > 0 => { result.deleted += 1; result.bytes_freed += bytes; log_prune(db, *node_id, hash, bytes, "critical_emergency").await; } Ok(_) => {} Err(e) => tracing::warn!(hash = %hash, error = %e, "Kritisk: kunne ikke slette CAS-fil"), } } } Ok(result) } // --- Main --- #[tokio::main] async fn main() { let cli = Cli::parse(); synops_common::logging::init("synops_prune"); let dry_run = cli.is_dry_run(); if let Err(e) = run(cli, dry_run).await { tracing::error!(error = %e, "synops-prune feilet"); eprintln!("{e}"); process::exit(1); } } async fn run(cli: Cli, dry_run: bool) -> Result<(), String> { let db = synops_common::db::connect().await?; // Sjekk at CAS-katalogen eksisterer if !cli.cas_root.exists() { return Err(format!("CAS-katalog finnes ikke: {}", cli.cas_root.display())); } let disk_pct_before = disk_usage_percent(&cli.cas_root).await?; let emergency_level = if disk_pct_before >= 95.0 { "critical" } else if disk_pct_before >= 90.0 { "aggressive" } else if disk_pct_before >= 85.0 { "warning" } else { "normal" }; tracing::info!( disk_pct = disk_pct_before, level = emergency_level, dry_run, "Pruning startet" ); // Fase 1: Slett generert innhold ved disk ≥ 85% let generated = if disk_pct_before >= 85.0 { phase_generated(&db, &cli.cas_root, dry_run).await? } else { PhaseResult::default() }; if generated.would_delete > 0 || generated.deleted > 0 { tracing::info!( would_delete = generated.would_delete, deleted = generated.deleted, bytes = generated.bytes_freed, "Fase 1: Generert innhold" ); } // Fase 2: TTL-basert pruning (alltid) let ttl = phase_ttl( &db, &cli.cas_root, cli.audio_ttl, cli.image_ttl, cli.video_ttl, emergency_level, dry_run, ) .await?; if ttl.would_delete > 0 || ttl.deleted > 0 { tracing::info!( checked = ttl.checked, would_delete = ttl.would_delete, deleted = ttl.deleted, bytes = ttl.bytes_freed, "Fase 2: TTL-basert pruning" ); } // Fase 3: Kritisk pruning ved disk ≥ 95% let critical = if disk_pct_before >= 95.0 { phase_critical(&db, &cli.cas_root, dry_run).await? } else { PhaseResult::default() }; if critical.would_delete > 0 || critical.deleted > 0 { tracing::warn!( would_delete = critical.would_delete, deleted = critical.deleted, bytes = critical.bytes_freed, "Fase 3: KRITISK pruning" ); } // Diskbruk etter pruning let disk_pct_after = if !dry_run { disk_usage_percent(&cli.cas_root).await.unwrap_or(disk_pct_before) } else { disk_pct_before // Ingen endring ved dry-run }; let total_deleted = generated.deleted + ttl.deleted + critical.deleted; let total_would_delete = generated.would_delete + ttl.would_delete + critical.would_delete; let total_bytes = generated.bytes_freed + ttl.bytes_freed + critical.bytes_freed; let output = PruneOutput { status: if dry_run { "dry_run" } else { "completed" }, dry_run, candidates_checked: generated.checked + ttl.checked + critical.checked, files_deleted: if dry_run { total_would_delete } else { total_deleted }, bytes_freed: total_bytes, disk_pct_before, disk_pct_after, emergency_level, phases: PrunePhases { generated, ttl, critical, }, }; println!( "{}", serde_json::to_string_pretty(&output).map_err(|e| format!("JSON-serialisering feilet: {e}"))? ); Ok(()) }