Implementerer automatisk opprydding av CAS-filer basert på dokumentert spec i docs/retninger/maskinrommet.md: - TTL per modalitet: lyd 30d, bilde 30d, video 14d, tekst aldri - Signaler som forlenger levetid: publishing-edge, siste tilgang (last_accessed_at), utranskribert lyd beholdes - Tre-trinns disk-nødventil: - >85%: slett generert innhold (TTS osv, kan regenereres) - >90%: aggressiv pruning med kraftig redusert TTL - >95%: kritisk — alt uten publishing-edge slettes - Periodisk bakgrunnsloop: hvert 6. time, oftere ved høy disk - Tilgangslogging: serving oppdaterer last_accessed_at (fire-and-forget) - Pruning-hendelser logges til resource_usage_log Ny modul: maskinrommet/src/pruning.rs Ny migrasjon: 010_pruning.sql (last_accessed_at kolonne + indeks) CasStore utvidet med delete(), disk_usage_bytes(), disk_usage_percent() Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
273 lines
8.5 KiB
Rust
273 lines
8.5 KiB
Rust
//! Content-Addressable Store (CAS)
|
|
//!
|
|
//! Lagrer binærfiler identifisert av SHA-256 hash.
|
|
//! Katalogstruktur: `{root}/{hash[0..2]}/{hash[2..4]}/{hash}`
|
|
//! Deduplisering er innebygd — identisk innhold gir identisk hash.
|
|
|
|
use sha2::{Digest, Sha256};
|
|
use std::path::{Path, PathBuf};
|
|
use tokio::io::AsyncWriteExt;
|
|
|
|
/// CAS-lagring med filsystem-backend.
|
|
#[derive(Clone)]
|
|
pub struct CasStore {
|
|
root: PathBuf,
|
|
}
|
|
|
|
/// Resultat fra en CAS-skriveoperasjon.
|
|
pub struct StoreResult {
|
|
/// SHA-256 hex-digest (64 tegn).
|
|
pub hash: String,
|
|
/// Antall bytes lagret.
|
|
pub size: u64,
|
|
/// `true` hvis filen allerede fantes (deduplisert).
|
|
pub already_existed: bool,
|
|
}
|
|
|
|
impl CasStore {
|
|
/// Opprett en ny CAS-store med gitt rot-katalog.
|
|
/// Oppretter rot-katalogen hvis den ikke finnes.
|
|
pub async fn new(root: impl Into<PathBuf>) -> std::io::Result<Self> {
|
|
let root = root.into();
|
|
tokio::fs::create_dir_all(&root).await?;
|
|
Ok(Self { root })
|
|
}
|
|
|
|
/// Lagre bytes i CAS. Returnerer hash og metadata.
|
|
/// Hvis filen allerede finnes (identisk hash), skrives ingenting — deduplisering.
|
|
pub async fn store(&self, data: &[u8]) -> std::io::Result<StoreResult> {
|
|
let hash = Self::sha256(data);
|
|
let size = data.len() as u64;
|
|
let path = self.path_for(&hash);
|
|
|
|
if path.exists() {
|
|
return Ok(StoreResult {
|
|
hash,
|
|
size,
|
|
already_existed: true,
|
|
});
|
|
}
|
|
|
|
// Opprett prefix-kataloger
|
|
if let Some(parent) = path.parent() {
|
|
tokio::fs::create_dir_all(parent).await?;
|
|
}
|
|
|
|
// Skriv til temp-fil, deretter atomisk rename.
|
|
// Dette hindrer at en halvskrevet fil blir lest av andre prosesser.
|
|
let tmp_path = self.tmp_path(&hash);
|
|
if let Some(parent) = tmp_path.parent() {
|
|
tokio::fs::create_dir_all(parent).await?;
|
|
}
|
|
|
|
let mut file = tokio::fs::File::create(&tmp_path).await?;
|
|
file.write_all(data).await?;
|
|
file.flush().await?;
|
|
// Synkroniser til disk for dataintegritet
|
|
file.sync_all().await?;
|
|
drop(file);
|
|
|
|
// Atomisk rename — trygt selv ved strømbrudd
|
|
tokio::fs::rename(&tmp_path, &path).await?;
|
|
|
|
tracing::info!(hash = %hash, size = size, "Lagret ny fil i CAS");
|
|
|
|
Ok(StoreResult {
|
|
hash,
|
|
size,
|
|
already_existed: false,
|
|
})
|
|
}
|
|
|
|
/// Sjekk om en hash finnes i CAS.
|
|
pub fn exists(&self, hash: &str) -> bool {
|
|
self.path_for(hash).exists()
|
|
}
|
|
|
|
/// Full filbane for en gitt hash.
|
|
/// Struktur: `{root}/{hash[0..2]}/{hash[2..4]}/{hash}`
|
|
pub fn path_for(&self, hash: &str) -> PathBuf {
|
|
let (prefix1, rest) = hash.split_at(2.min(hash.len()));
|
|
let (prefix2, _) = rest.split_at(2.min(rest.len()));
|
|
self.root.join(prefix1).join(prefix2).join(hash)
|
|
}
|
|
|
|
/// Beregn SHA-256 hex-digest for gitte bytes.
|
|
pub fn sha256(data: &[u8]) -> String {
|
|
let mut hasher = Sha256::new();
|
|
hasher.update(data);
|
|
hex::encode(hasher.finalize())
|
|
}
|
|
|
|
/// Slett en fil fra CAS. Returnerer antall bytes frigitt, eller 0 hvis filen ikke fantes.
|
|
pub async fn delete(&self, hash: &str) -> std::io::Result<u64> {
|
|
let path = self.path_for(hash);
|
|
match tokio::fs::metadata(&path).await {
|
|
Ok(meta) => {
|
|
let size = meta.len();
|
|
tokio::fs::remove_file(&path).await?;
|
|
tracing::info!(hash = %hash, size = size, "Slettet fil fra CAS");
|
|
Ok(size)
|
|
}
|
|
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(0),
|
|
Err(e) => Err(e),
|
|
}
|
|
}
|
|
|
|
/// Beregn total diskbruk for CAS-katalogen (bytes).
|
|
/// Bruker `du -sb` for ytelse — traverserer filsystemet direkte.
|
|
pub async fn disk_usage_bytes(&self) -> std::io::Result<u64> {
|
|
let output = tokio::process::Command::new("du")
|
|
.args(["-sb", self.root.to_str().unwrap_or(".")])
|
|
.output()
|
|
.await?;
|
|
if !output.status.success() {
|
|
return Err(std::io::Error::new(
|
|
std::io::ErrorKind::Other,
|
|
"du-kommando feilet",
|
|
));
|
|
}
|
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
let bytes: u64 = stdout
|
|
.split_whitespace()
|
|
.next()
|
|
.and_then(|s| s.parse().ok())
|
|
.unwrap_or(0);
|
|
Ok(bytes)
|
|
}
|
|
|
|
/// Sjekk diskbruk-prosent for partisjonen CAS ligger på.
|
|
pub async fn disk_usage_percent(&self) -> std::io::Result<f64> {
|
|
let output = tokio::process::Command::new("df")
|
|
.args(["--output=pcent", self.root.to_str().unwrap_or("/")])
|
|
.output()
|
|
.await?;
|
|
if !output.status.success() {
|
|
return Err(std::io::Error::new(
|
|
std::io::ErrorKind::Other,
|
|
"df-kommando feilet",
|
|
));
|
|
}
|
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
// Output: "Use%\n 42%\n"
|
|
let pct: f64 = stdout
|
|
.lines()
|
|
.nth(1)
|
|
.and_then(|line| line.trim().trim_end_matches('%').parse().ok())
|
|
.unwrap_or(0.0);
|
|
Ok(pct)
|
|
}
|
|
|
|
/// Rot-katalogen for CAS.
|
|
pub fn root(&self) -> &Path {
|
|
&self.root
|
|
}
|
|
|
|
/// Temp-filbane brukt under skriving (unngår korrupte reads).
|
|
fn tmp_path(&self, hash: &str) -> PathBuf {
|
|
self.root.join("tmp").join(format!("{hash}.tmp"))
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use std::path::PathBuf;
|
|
|
|
fn test_root() -> PathBuf {
|
|
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("target").join("test-cas")
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn store_and_retrieve() {
|
|
let root = test_root().join("store_and_retrieve");
|
|
let _ = tokio::fs::remove_dir_all(&root).await;
|
|
|
|
let cas = CasStore::new(&root).await.unwrap();
|
|
let data = b"hello world";
|
|
let result = cas.store(data).await.unwrap();
|
|
|
|
// SHA-256 av "hello world"
|
|
assert_eq!(
|
|
result.hash,
|
|
"b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
|
|
);
|
|
assert_eq!(result.size, 11);
|
|
assert!(!result.already_existed);
|
|
|
|
// Filen skal finnes på riktig path
|
|
let path = cas.path_for(&result.hash);
|
|
assert!(path.exists());
|
|
let content = tokio::fs::read(&path).await.unwrap();
|
|
assert_eq!(content, data);
|
|
|
|
// Katalogstruktur: b9/4d/{full hash}
|
|
assert!(path.to_string_lossy().contains("/b9/4d/"));
|
|
|
|
// Cleanup
|
|
let _ = tokio::fs::remove_dir_all(&root).await;
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn deduplication() {
|
|
let root = test_root().join("deduplication");
|
|
let _ = tokio::fs::remove_dir_all(&root).await;
|
|
|
|
let cas = CasStore::new(&root).await.unwrap();
|
|
let data = b"duplicate content";
|
|
|
|
let first = cas.store(data).await.unwrap();
|
|
assert!(!first.already_existed);
|
|
|
|
let second = cas.store(data).await.unwrap();
|
|
assert!(second.already_existed);
|
|
assert_eq!(first.hash, second.hash);
|
|
|
|
let _ = tokio::fs::remove_dir_all(&root).await;
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn exists_check() {
|
|
let root = test_root().join("exists_check");
|
|
let _ = tokio::fs::remove_dir_all(&root).await;
|
|
|
|
let cas = CasStore::new(&root).await.unwrap();
|
|
|
|
assert!(!cas.exists("nonexistent_hash"));
|
|
|
|
let result = cas.store(b"test").await.unwrap();
|
|
assert!(cas.exists(&result.hash));
|
|
|
|
let _ = tokio::fs::remove_dir_all(&root).await;
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn different_content_different_hash() {
|
|
let root = test_root().join("different_content");
|
|
let _ = tokio::fs::remove_dir_all(&root).await;
|
|
|
|
let cas = CasStore::new(&root).await.unwrap();
|
|
|
|
let a = cas.store(b"content A").await.unwrap();
|
|
let b = cas.store(b"content B").await.unwrap();
|
|
|
|
assert_ne!(a.hash, b.hash);
|
|
assert!(cas.exists(&a.hash));
|
|
assert!(cas.exists(&b.hash));
|
|
|
|
let _ = tokio::fs::remove_dir_all(&root).await;
|
|
}
|
|
|
|
#[test]
|
|
fn path_structure() {
|
|
let cas = CasStore {
|
|
root: PathBuf::from("/srv/synops/media/cas"),
|
|
};
|
|
let hash = "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9";
|
|
let path = cas.path_for(hash);
|
|
assert_eq!(
|
|
path,
|
|
PathBuf::from("/srv/synops/media/cas/b9/4d/b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9")
|
|
);
|
|
}
|
|
}
|