Implementer synops-prune CLI-verktøy (oppgave 21.9)

Erstatter maskinrommet/src/pruning.rs med selvstendig CLI-verktøy
som følger unix-filosofien. Tre-fase pruning:

- Fase 1 (disk ≥85%): Slett regenererbart innhold (TTS, thumbnails)
- Fase 2 (alltid): TTL-basert pruning per modalitet (lyd 30d, bilde 30d, video 14d)
- Fase 3 (disk ≥95%): Kritisk — alt uten publishing-edge slettes

Dry-run er default (krever --write for faktisk sletting).
JSON-output til stdout, logging til stderr.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
vegard 2026-03-18 09:58:03 +00:00
parent f34dee810b
commit 45efeb080a
5 changed files with 3053 additions and 2 deletions

View file

@ -249,8 +249,7 @@ kaller dem direkte. Samme verktøy, to brukere.
- [x] 21.6 `synops-summarize`: AI-oppsummering. Input: `--communication-id <uuid>`. Output: sammendrag som tekst. Erstatter `summarize.rs`.
- [x] 21.7 `synops-suggest-edges`: AI-foreslåtte edges. Input: `--node-id <uuid>`. Output: JSON med forslag (target, edge_type, confidence). Erstatter `ai_edges.rs`.
- [x] 21.8 `synops-respond`: Claude chat-svar. Input: `--communication-id <uuid> --message-id <uuid>`. Output: svartekst. Erstatter `agent.rs` sin prosessering (auth/ratelimit forblir i maskinrommet).
- [~] 21.9 `synops-prune`: Opprydding av gamle noder. Input: `--dry-run` for forhåndsvisning. Erstatter `pruning.rs`.
> Påbegynt: 2026-03-18T09:53
- [x] 21.9 `synops-prune`: Opprydding av gamle noder. Input: `--dry-run` for forhåndsvisning. Erstatter `pruning.rs`.
### Oppslag (Claude-verktøy)

View file

@ -15,6 +15,7 @@ eller maskinrommet-API. Ligger i PATH via symlink eller direkte kall.
| `synops-summarize` | AI-oppsummering av kommunikasjonsnode via LiteLLM | Ferdig |
| `synops-suggest-edges` | AI-foreslåtte edges (topics/mentions) for en node via LiteLLM | Ferdig |
| `synops-respond` | Claude chat-svar i kommunikasjonsnoder | Ferdig |
| `synops-prune` | Opprydding av gamle CAS-filer (TTL + disk-nødventil) | Ferdig |
## Konvensjoner
- Navnekonvensjon: `synops-<verb>` (f.eks. `synops-context`)

2433
tools/synops-prune/Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,19 @@
[package]
name = "synops-prune"
version = "0.1.0"
edition = "2024"
[[bin]]
name = "synops-prune"
path = "src/main.rs"
[dependencies]
clap = { version = "4", features = ["derive", "env"] }
tokio = { version = "1", features = ["full"] }
sqlx = { version = "0.8", features = ["runtime-tokio", "tls-rustls", "postgres", "uuid", "chrono", "json"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
uuid = { version = "1", features = ["v7", "serde"] }
chrono = { version = "0.4", features = ["serde"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }

View file

@ -0,0 +1,599 @@
// synops-prune — Opprydding av gamle CAS-filer basert på TTL og diskbruk.
//
// Tre-fase pruning:
// Fase 1 (disk ≥ 85%): Slett generert innhold (TTS, thumbnails — regenererbart)
// Fase 2 (alltid): TTL-basert pruning per modalitet (lyd 30d, bilde 30d, video 14d)
// Fase 3 (disk ≥ 95%): Kritisk — alt uten publishing-edge slettes (tekst beholdes)
//
// Signaler som forlenger levetid:
// - Publishing-edge → behold for alltid (unntatt kritisk)
// - last_accessed_at innenfor TTL → forleng
// - Utranskribert lyd → behold i normal modus
//
// Input:
// --dry-run Forhåndsvisning uten sletting (default)
// --write Faktisk slett filer
// --cas-root PATH CAS-katalog (default: /srv/synops/media/cas)
// --audio-ttl N TTL for lyd i dager (default: 30)
// --image-ttl N TTL for bilder i dager (default: 30)
// --video-ttl N TTL for video i dager (default: 14)
//
// Miljøvariabler:
// DATABASE_URL — PostgreSQL-tilkobling (påkrevd)
// CAS_ROOT — Alternativ til --cas-root
//
// Erstatter: maskinrommet/src/pruning.rs
// Ref: docs/retninger/unix_filosofi.md
use chrono::{DateTime, Utc};
use clap::Parser;
use serde::Serialize;
use std::path::{Path, PathBuf};
use std::process;
use uuid::Uuid;
/// Opprydding av gamle CAS-filer basert på TTL og diskbruk.
#[derive(Parser)]
#[command(name = "synops-prune", about = "Rydd opp gamle CAS-filer (TTL + disk-nødventil)")]
struct Cli {
/// Forhåndsvisning — vis hva som ville blitt slettet uten å slette
#[arg(long, default_value_t = false)]
dry_run: bool,
/// Faktisk slett filer (uten dette flagget er dry-run implisitt)
#[arg(long, default_value_t = false)]
write: bool,
/// CAS-rotkatalog
#[arg(long, env = "CAS_ROOT", default_value = "/srv/synops/media/cas")]
cas_root: PathBuf,
/// TTL for lydfiler i dager
#[arg(long, default_value_t = 30)]
audio_ttl: i64,
/// TTL for bilder i dager
#[arg(long, default_value_t = 30)]
image_ttl: i64,
/// TTL for video i dager
#[arg(long, default_value_t = 14)]
video_ttl: i64,
}
impl Cli {
/// Effektiv dry-run: sant hvis --dry-run er satt ELLER --write ikke er satt.
fn is_dry_run(&self) -> bool {
self.dry_run || !self.write
}
}
// --- Datastrukturer ---
#[derive(Debug, sqlx::FromRow)]
struct PruneCandidate {
id: Uuid,
cas_hash: String,
mime_category: String,
size_bytes: i64,
created_at: DateTime<Utc>,
last_accessed_at: Option<DateTime<Utc>>,
has_publishing_edge: bool,
is_generated: bool,
has_transcription: bool,
}
#[derive(Debug, Default, Serialize)]
struct PhaseResult {
checked: usize,
would_delete: usize,
deleted: usize,
bytes_freed: u64,
}
#[derive(Debug, Serialize)]
struct PruneOutput {
status: &'static str,
dry_run: bool,
candidates_checked: usize,
files_deleted: usize,
bytes_freed: u64,
disk_pct_before: f64,
disk_pct_after: f64,
emergency_level: &'static str,
phases: PrunePhases,
}
#[derive(Debug, Serialize)]
struct PrunePhases {
generated: PhaseResult,
ttl: PhaseResult,
critical: PhaseResult,
}
// --- CAS-operasjoner ---
/// Sjekk diskbruk-prosent for partisjonen CAS-katalogen ligger på.
async fn disk_usage_percent(cas_root: &Path) -> Result<f64, String> {
let output = tokio::process::Command::new("df")
.args(["--output=pcent", cas_root.to_str().unwrap_or("/")])
.output()
.await
.map_err(|e| format!("df-kommando feilet: {e}"))?;
if !output.status.success() {
return Err("df-kommando returnerte feil".into());
}
let stdout = String::from_utf8_lossy(&output.stdout);
let pct: f64 = stdout
.lines()
.nth(1)
.and_then(|line| line.trim().trim_end_matches('%').parse().ok())
.unwrap_or(0.0);
Ok(pct)
}
/// Beregn filsti for en CAS-hash (2-tegn prefix-katalog).
fn cas_path_for(cas_root: &Path, hash: &str) -> PathBuf {
let prefix = &hash[..2.min(hash.len())];
cas_root.join(prefix).join(hash)
}
/// Slett en CAS-fil. Returnerer bytes frigitt (0 hvis filen ikke fantes).
async fn cas_delete(cas_root: &Path, hash: &str) -> Result<u64, String> {
let path = cas_path_for(cas_root, hash);
match tokio::fs::metadata(&path).await {
Ok(meta) => {
let size = meta.len();
tokio::fs::remove_file(&path)
.await
.map_err(|e| format!("Kunne ikke slette {}: {e}", path.display()))?;
Ok(size)
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(0),
Err(e) => Err(format!("Kunne ikke lese metadata for {}: {e}", path.display())),
}
}
/// Sjekk om en CAS-fil eksisterer og returner størrelse.
async fn cas_file_size(cas_root: &Path, hash: &str) -> u64 {
let path = cas_path_for(cas_root, hash);
tokio::fs::metadata(&path)
.await
.map(|m| m.len())
.unwrap_or(0)
}
// --- Logg ---
async fn log_prune(db: &sqlx::PgPool, node_id: Uuid, hash: &str, bytes: u64, reason: &str) {
let detail = serde_json::json!({
"hash": hash,
"size_bytes": bytes,
"operation": "delete",
"reason": reason,
});
let _ = sqlx::query(
"INSERT INTO resource_usage_log (target_node_id, resource_type, detail) VALUES ($1, 'cas', $2)",
)
.bind(node_id)
.bind(&detail)
.execute(db)
.await
.map_err(|e| tracing::warn!(error = %e, "Kunne ikke logge pruning-hendelse"));
}
// --- Fasene ---
/// Fase 1: Slett generert innhold (TTS, thumbnails) — kan regenereres.
async fn phase_generated(
db: &sqlx::PgPool,
cas_root: &Path,
dry_run: bool,
) -> Result<PhaseResult, String> {
let rows: Vec<(String, Uuid)> = sqlx::query_as(
r#"
SELECT metadata->>'cas_hash' AS cas_hash, id
FROM nodes
WHERE node_kind = 'media'
AND metadata->>'cas_hash' IS NOT NULL
AND (
metadata ? 'tts'
OR metadata->>'generated' = 'true'
)
"#,
)
.fetch_all(db)
.await
.map_err(|e| format!("Spørring for generert innhold feilet: {e}"))?;
let mut result = PhaseResult {
checked: rows.len(),
..Default::default()
};
for (hash, node_id) in &rows {
let size = cas_file_size(cas_root, hash).await;
if size == 0 {
continue; // Allerede borte
}
result.would_delete += 1;
if dry_run {
result.bytes_freed += size;
tracing::info!(hash = %hash, node_id = %node_id, size, "Fase 1: ville slettet generert fil");
} else {
match cas_delete(cas_root, hash).await {
Ok(bytes) if bytes > 0 => {
result.deleted += 1;
result.bytes_freed += bytes;
log_prune(db, *node_id, hash, bytes, "generated_cleanup").await;
}
Ok(_) => {}
Err(e) => tracing::warn!(hash = %hash, error = %e, "Kunne ikke slette generert fil"),
}
}
}
Ok(result)
}
/// Fase 2: TTL-basert pruning per modalitet.
async fn phase_ttl(
db: &sqlx::PgPool,
cas_root: &Path,
audio_ttl: i64,
image_ttl: i64,
video_ttl: i64,
emergency_level: &str,
dry_run: bool,
) -> Result<PhaseResult, String> {
let candidates: Vec<PruneCandidate> = sqlx::query_as(
r#"
SELECT
n.id,
n.metadata->>'cas_hash' AS cas_hash,
CASE
WHEN n.metadata->>'mime' LIKE 'audio/%' THEN 'audio'
WHEN n.metadata->>'mime' LIKE 'image/%' THEN 'image'
WHEN n.metadata->>'mime' LIKE 'video/%' THEN 'video'
ELSE 'other'
END AS mime_category,
COALESCE((n.metadata->>'size_bytes')::bigint, 0) AS size_bytes,
n.created_at,
n.last_accessed_at,
EXISTS(
SELECT 1 FROM edges e
WHERE (e.source_id = n.id OR e.target_id = n.id)
AND e.edge_type IN ('belongs_to', 'has_media')
AND EXISTS(
SELECT 1 FROM edges pub
WHERE pub.edge_type = 'publishing'
AND (pub.source_id = e.target_id OR pub.source_id = e.source_id)
)
) AS has_publishing_edge,
COALESCE(n.metadata ? 'tts' OR n.metadata->>'generated' = 'true', false) AS is_generated,
EXISTS(
SELECT 1 FROM transcription_segments ts
WHERE ts.node_id = n.id
) AS has_transcription
FROM nodes n
WHERE n.node_kind = 'media'
AND n.metadata->>'cas_hash' IS NOT NULL
ORDER BY n.created_at ASC
"#,
)
.fetch_all(db)
.await
.map_err(|e| format!("TTL-kandidatspørring feilet: {e}"))?;
let now = Utc::now();
let mut result = PhaseResult {
checked: candidates.len(),
..Default::default()
};
for c in &candidates {
// Tekst/transkripsjon slettes aldri
if c.mime_category == "other" {
continue;
}
// Publishing-edge = behold for alltid (unntatt ved kritisk)
if c.has_publishing_edge && emergency_level != "critical" {
continue;
}
// Bestem TTL basert på modalitet og nødsituasjon
let ttl_days = match emergency_level {
"critical" => 0,
"aggressive" => match c.mime_category.as_str() {
"audio" => audio_ttl / 3,
"image" => image_ttl / 3,
"video" => 1,
_ => i64::MAX,
},
_ => match c.mime_category.as_str() {
"audio" => audio_ttl,
"image" => image_ttl,
"video" => video_ttl,
_ => i64::MAX,
},
};
if ttl_days == i64::MAX {
continue;
}
// Beregn effektiv alder — siste tilgang forlenger levetiden
let reference_time = c.last_accessed_at.unwrap_or(c.created_at);
let age_days = (now - reference_time).num_days();
if age_days < ttl_days {
continue;
}
// Lyd uten transkripsjon: behold lengre (trenger transkribering først)
if c.mime_category == "audio" && !c.has_transcription && emergency_level == "normal" {
tracing::debug!(node_id = %c.id, "Beholder utranskribert lyd");
continue;
}
// Allerede håndtert generert innhold i fase 1
if c.is_generated {
continue;
}
let size = if c.size_bytes > 0 {
c.size_bytes as u64
} else {
cas_file_size(cas_root, &c.cas_hash).await
};
if size == 0 {
continue;
}
result.would_delete += 1;
if dry_run {
result.bytes_freed += size;
tracing::info!(
hash = %c.cas_hash,
node_id = %c.id,
mime = %c.mime_category,
age_days,
ttl_days,
size,
"Fase 2: ville slettet TTL-utløpt fil"
);
} else {
match cas_delete(cas_root, &c.cas_hash).await {
Ok(bytes) if bytes > 0 => {
result.deleted += 1;
result.bytes_freed += bytes;
log_prune(db, c.id, &c.cas_hash, bytes, "ttl_expired").await;
}
Ok(_) => {}
Err(e) => tracing::warn!(hash = %c.cas_hash, error = %e, "Kunne ikke slette CAS-fil"),
}
}
}
Ok(result)
}
/// Fase 3: Kritisk — slett ALT uten publishing-edge (unntatt tekst).
async fn phase_critical(
db: &sqlx::PgPool,
cas_root: &Path,
dry_run: bool,
) -> Result<PhaseResult, String> {
let rows: Vec<(String, Uuid, i64)> = sqlx::query_as(
r#"
SELECT
n.metadata->>'cas_hash' AS cas_hash,
n.id,
COALESCE((n.metadata->>'size_bytes')::bigint, 0) AS size_bytes
FROM nodes n
WHERE n.node_kind = 'media'
AND n.metadata->>'cas_hash' IS NOT NULL
AND NOT EXISTS(
SELECT 1 FROM edges e
WHERE (e.source_id = n.id OR e.target_id = n.id)
AND e.edge_type IN ('belongs_to', 'has_media')
AND EXISTS(
SELECT 1 FROM edges pub
WHERE pub.edge_type = 'publishing'
AND (pub.source_id = e.target_id OR pub.source_id = e.source_id)
)
)
"#,
)
.fetch_all(db)
.await
.map_err(|e| format!("Kritisk pruning-spørring feilet: {e}"))?;
let mut result = PhaseResult {
checked: rows.len(),
..Default::default()
};
for (hash, node_id, size_bytes) in &rows {
let size = if *size_bytes > 0 {
*size_bytes as u64
} else {
cas_file_size(cas_root, hash).await
};
if size == 0 {
continue;
}
result.would_delete += 1;
if dry_run {
result.bytes_freed += size;
tracing::info!(hash = %hash, node_id = %node_id, size, "Fase 3: ville slettet (kritisk)");
} else {
match cas_delete(cas_root, hash).await {
Ok(bytes) if bytes > 0 => {
result.deleted += 1;
result.bytes_freed += bytes;
log_prune(db, *node_id, hash, bytes, "critical_emergency").await;
}
Ok(_) => {}
Err(e) => tracing::warn!(hash = %hash, error = %e, "Kritisk: kunne ikke slette CAS-fil"),
}
}
}
Ok(result)
}
// --- Main ---
#[tokio::main]
async fn main() {
let cli = Cli::parse();
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
)
.with_target(false)
.with_writer(std::io::stderr)
.init();
let dry_run = cli.is_dry_run();
if let Err(e) = run(cli, dry_run).await {
tracing::error!(error = %e, "synops-prune feilet");
eprintln!("{e}");
process::exit(1);
}
}
async fn run(cli: Cli, dry_run: bool) -> Result<(), String> {
let database_url = std::env::var("DATABASE_URL")
.map_err(|_| "DATABASE_URL er ikke satt".to_string())?;
let db = sqlx::postgres::PgPoolOptions::new()
.max_connections(2)
.connect(&database_url)
.await
.map_err(|e| format!("Kunne ikke koble til database: {e}"))?;
// Sjekk at CAS-katalogen eksisterer
if !cli.cas_root.exists() {
return Err(format!("CAS-katalog finnes ikke: {}", cli.cas_root.display()));
}
let disk_pct_before = disk_usage_percent(&cli.cas_root).await?;
let emergency_level = if disk_pct_before >= 95.0 {
"critical"
} else if disk_pct_before >= 90.0 {
"aggressive"
} else if disk_pct_before >= 85.0 {
"warning"
} else {
"normal"
};
tracing::info!(
disk_pct = disk_pct_before,
level = emergency_level,
dry_run,
"Pruning startet"
);
// Fase 1: Slett generert innhold ved disk ≥ 85%
let generated = if disk_pct_before >= 85.0 {
phase_generated(&db, &cli.cas_root, dry_run).await?
} else {
PhaseResult::default()
};
if generated.would_delete > 0 || generated.deleted > 0 {
tracing::info!(
would_delete = generated.would_delete,
deleted = generated.deleted,
bytes = generated.bytes_freed,
"Fase 1: Generert innhold"
);
}
// Fase 2: TTL-basert pruning (alltid)
let ttl = phase_ttl(
&db,
&cli.cas_root,
cli.audio_ttl,
cli.image_ttl,
cli.video_ttl,
emergency_level,
dry_run,
)
.await?;
if ttl.would_delete > 0 || ttl.deleted > 0 {
tracing::info!(
checked = ttl.checked,
would_delete = ttl.would_delete,
deleted = ttl.deleted,
bytes = ttl.bytes_freed,
"Fase 2: TTL-basert pruning"
);
}
// Fase 3: Kritisk pruning ved disk ≥ 95%
let critical = if disk_pct_before >= 95.0 {
phase_critical(&db, &cli.cas_root, dry_run).await?
} else {
PhaseResult::default()
};
if critical.would_delete > 0 || critical.deleted > 0 {
tracing::warn!(
would_delete = critical.would_delete,
deleted = critical.deleted,
bytes = critical.bytes_freed,
"Fase 3: KRITISK pruning"
);
}
// Diskbruk etter pruning
let disk_pct_after = if !dry_run {
disk_usage_percent(&cli.cas_root).await.unwrap_or(disk_pct_before)
} else {
disk_pct_before // Ingen endring ved dry-run
};
let total_deleted = generated.deleted + ttl.deleted + critical.deleted;
let total_would_delete = generated.would_delete + ttl.would_delete + critical.would_delete;
let total_bytes = generated.bytes_freed + ttl.bytes_freed + critical.bytes_freed;
let output = PruneOutput {
status: if dry_run { "dry_run" } else { "completed" },
dry_run,
candidates_checked: generated.checked + ttl.checked + critical.checked,
files_deleted: if dry_run { total_would_delete } else { total_deleted },
bytes_freed: total_bytes,
disk_pct_before,
disk_pct_after,
emergency_level,
phases: PrunePhases {
generated,
ttl,
critical,
},
};
println!(
"{}",
serde_json::to_string_pretty(&output).map_err(|e| format!("JSON-serialisering feilet: {e}"))?
);
Ok(())
}