//! Content-Addressable Store (CAS) //! //! Lagrer binærfiler identifisert av SHA-256 hash. //! Katalogstruktur: `{root}/{hash[0..2]}/{hash[2..4]}/{hash}` //! Deduplisering er innebygd — identisk innhold gir identisk hash. use sha2::{Digest, Sha256}; use std::path::{Path, PathBuf}; use tokio::io::AsyncWriteExt; /// CAS-lagring med filsystem-backend. #[derive(Clone)] pub struct CasStore { root: PathBuf, } /// Resultat fra en CAS-skriveoperasjon. pub struct StoreResult { /// SHA-256 hex-digest (64 tegn). pub hash: String, /// Antall bytes lagret. pub size: u64, /// `true` hvis filen allerede fantes (deduplisert). pub already_existed: bool, } impl CasStore { /// Opprett en ny CAS-store med gitt rot-katalog. /// Oppretter rot-katalogen hvis den ikke finnes. pub async fn new(root: impl Into) -> std::io::Result { let root = root.into(); tokio::fs::create_dir_all(&root).await?; Ok(Self { root }) } /// Lagre bytes i CAS. Returnerer hash og metadata. /// Hvis filen allerede finnes (identisk hash), skrives ingenting — deduplisering. pub async fn store(&self, data: &[u8]) -> std::io::Result { let hash = Self::sha256(data); let size = data.len() as u64; let path = self.path_for(&hash); if path.exists() { return Ok(StoreResult { hash, size, already_existed: true, }); } // Opprett prefix-kataloger if let Some(parent) = path.parent() { tokio::fs::create_dir_all(parent).await?; } // Skriv til temp-fil, deretter atomisk rename. // Dette hindrer at en halvskrevet fil blir lest av andre prosesser. let tmp_path = self.tmp_path(&hash); if let Some(parent) = tmp_path.parent() { tokio::fs::create_dir_all(parent).await?; } let mut file = tokio::fs::File::create(&tmp_path).await?; file.write_all(data).await?; file.flush().await?; // Synkroniser til disk for dataintegritet file.sync_all().await?; drop(file); // Atomisk rename — trygt selv ved strømbrudd tokio::fs::rename(&tmp_path, &path).await?; tracing::info!(hash = %hash, size = size, "Lagret ny fil i CAS"); Ok(StoreResult { hash, size, already_existed: false, }) } /// Sjekk om en hash finnes i CAS. pub fn exists(&self, hash: &str) -> bool { self.path_for(hash).exists() } /// Full filbane for en gitt hash. /// Struktur: `{root}/{hash[0..2]}/{hash[2..4]}/{hash}` pub fn path_for(&self, hash: &str) -> PathBuf { let (prefix1, rest) = hash.split_at(2.min(hash.len())); let (prefix2, _) = rest.split_at(2.min(rest.len())); self.root.join(prefix1).join(prefix2).join(hash) } /// Beregn SHA-256 hex-digest for gitte bytes. pub fn sha256(data: &[u8]) -> String { let mut hasher = Sha256::new(); hasher.update(data); hex::encode(hasher.finalize()) } /// Slett en fil fra CAS. Returnerer antall bytes frigitt, eller 0 hvis filen ikke fantes. pub async fn delete(&self, hash: &str) -> std::io::Result { let path = self.path_for(hash); match tokio::fs::metadata(&path).await { Ok(meta) => { let size = meta.len(); tokio::fs::remove_file(&path).await?; tracing::info!(hash = %hash, size = size, "Slettet fil fra CAS"); Ok(size) } Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(0), Err(e) => Err(e), } } /// Beregn total diskbruk for CAS-katalogen (bytes). /// Bruker `du -sb` for ytelse — traverserer filsystemet direkte. pub async fn disk_usage_bytes(&self) -> std::io::Result { let output = tokio::process::Command::new("du") .args(["-sb", self.root.to_str().unwrap_or(".")]) .output() .await?; if !output.status.success() { return Err(std::io::Error::new( std::io::ErrorKind::Other, "du-kommando feilet", )); } let stdout = String::from_utf8_lossy(&output.stdout); let bytes: u64 = stdout .split_whitespace() .next() .and_then(|s| s.parse().ok()) .unwrap_or(0); Ok(bytes) } /// Sjekk diskbruk-prosent for partisjonen CAS ligger på. pub async fn disk_usage_percent(&self) -> std::io::Result { let output = tokio::process::Command::new("df") .args(["--output=pcent", self.root.to_str().unwrap_or("/")]) .output() .await?; if !output.status.success() { return Err(std::io::Error::new( std::io::ErrorKind::Other, "df-kommando feilet", )); } let stdout = String::from_utf8_lossy(&output.stdout); // Output: "Use%\n 42%\n" let pct: f64 = stdout .lines() .nth(1) .and_then(|line| line.trim().trim_end_matches('%').parse().ok()) .unwrap_or(0.0); Ok(pct) } /// Rot-katalogen for CAS. pub fn root(&self) -> &Path { &self.root } /// Temp-filbane brukt under skriving (unngår korrupte reads). fn tmp_path(&self, hash: &str) -> PathBuf { self.root.join("tmp").join(format!("{hash}.tmp")) } /// Slett temp-filer eldre enn `max_age` fra CAS tmp-katalogen. /// Returnerer (antall slettet, bytes frigitt). pub async fn cleanup_tmp(&self, max_age: std::time::Duration) -> std::io::Result<(usize, u64)> { let tmp_dir = self.root.join("tmp"); if !tmp_dir.exists() { return Ok((0, 0)); } let mut deleted = 0usize; let mut freed = 0u64; let now = std::time::SystemTime::now(); let mut entries = tokio::fs::read_dir(&tmp_dir).await?; while let Some(entry) = entries.next_entry().await? { let meta = match entry.metadata().await { Ok(m) => m, Err(_) => continue, }; if !meta.is_file() { continue; } let modified = match meta.modified() { Ok(t) => t, Err(_) => continue, }; let age = now.duration_since(modified).unwrap_or_default(); if age > max_age { let size = meta.len(); if let Ok(()) = tokio::fs::remove_file(entry.path()).await { deleted += 1; freed += size; tracing::debug!( file = %entry.file_name().to_string_lossy(), age_secs = age.as_secs(), size, "Slettet gammel temp-fil" ); } } } Ok((deleted, freed)) } } /// Start periodisk opprydding av CAS tmp-filer. /// Kjører hver time, sletter filer eldre enn 1 time. pub fn start_tmp_cleanup_loop(cas: CasStore) { tokio::spawn(async move { // Vent 45 sekunder etter oppstart tokio::time::sleep(std::time::Duration::from_secs(45)).await; tracing::info!("CAS tmp-opprydding startet (intervall: 1t, TTL: 1t)"); let max_age = std::time::Duration::from_secs(3600); // 1 time let interval = std::time::Duration::from_secs(3600); // 1 time loop { match cas.cleanup_tmp(max_age).await { Ok((deleted, freed)) => { if deleted > 0 { tracing::info!( deleted, freed_kb = freed / 1024, "CAS tmp-opprydding: {} filer slettet, {} KB frigitt", deleted, freed / 1024, ); } } Err(e) => { tracing::warn!(error = %e, "CAS tmp-opprydding feilet"); } } tokio::time::sleep(interval).await; } }); } #[cfg(test)] mod tests { use super::*; use std::path::PathBuf; fn test_root() -> PathBuf { PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("target").join("test-cas") } #[tokio::test] async fn store_and_retrieve() { let root = test_root().join("store_and_retrieve"); let _ = tokio::fs::remove_dir_all(&root).await; let cas = CasStore::new(&root).await.unwrap(); let data = b"hello world"; let result = cas.store(data).await.unwrap(); // SHA-256 av "hello world" assert_eq!( result.hash, "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9" ); assert_eq!(result.size, 11); assert!(!result.already_existed); // Filen skal finnes på riktig path let path = cas.path_for(&result.hash); assert!(path.exists()); let content = tokio::fs::read(&path).await.unwrap(); assert_eq!(content, data); // Katalogstruktur: b9/4d/{full hash} assert!(path.to_string_lossy().contains("/b9/4d/")); // Cleanup let _ = tokio::fs::remove_dir_all(&root).await; } #[tokio::test] async fn deduplication() { let root = test_root().join("deduplication"); let _ = tokio::fs::remove_dir_all(&root).await; let cas = CasStore::new(&root).await.unwrap(); let data = b"duplicate content"; let first = cas.store(data).await.unwrap(); assert!(!first.already_existed); let second = cas.store(data).await.unwrap(); assert!(second.already_existed); assert_eq!(first.hash, second.hash); let _ = tokio::fs::remove_dir_all(&root).await; } #[tokio::test] async fn exists_check() { let root = test_root().join("exists_check"); let _ = tokio::fs::remove_dir_all(&root).await; let cas = CasStore::new(&root).await.unwrap(); assert!(!cas.exists("nonexistent_hash")); let result = cas.store(b"test").await.unwrap(); assert!(cas.exists(&result.hash)); let _ = tokio::fs::remove_dir_all(&root).await; } #[tokio::test] async fn different_content_different_hash() { let root = test_root().join("different_content"); let _ = tokio::fs::remove_dir_all(&root).await; let cas = CasStore::new(&root).await.unwrap(); let a = cas.store(b"content A").await.unwrap(); let b = cas.store(b"content B").await.unwrap(); assert_ne!(a.hash, b.hash); assert!(cas.exists(&a.hash)); assert!(cas.exists(&b.hash)); let _ = tokio::fs::remove_dir_all(&root).await; } #[test] fn path_structure() { let cas = CasStore { root: PathBuf::from("/srv/synops/media/cas"), }; let hash = "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"; let path = cas.path_for(hash); assert_eq!( path, PathBuf::from("/srv/synops/media/cas/b9/4d/b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9") ); } }