synops/maskinrommet/src/cas.rs
vegard ce56e31de2 Fullfører oppgave 17.6: Periodisk CAS tmp-opprydding
Legger til cleanup_tmp() i CasStore som sletter orphaned .tmp-filer
eldre enn 1 time. Disse oppstår når en skriveprosess krasjer midt i
en atomisk CAS-skriveoperasjon (skriv til tmp, rename til endelig path).

Ny bakgrunnsloop start_tmp_cleanup_loop() kjører hver time og fjerner
foreldede temp-filer. Følger samme mønster som pruning- og
disk-monitor-loopene.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 05:57:10 +00:00

350 lines
11 KiB
Rust

//! Content-Addressable Store (CAS)
//!
//! Lagrer binærfiler identifisert av SHA-256 hash.
//! Katalogstruktur: `{root}/{hash[0..2]}/{hash[2..4]}/{hash}`
//! Deduplisering er innebygd — identisk innhold gir identisk hash.
use sha2::{Digest, Sha256};
use std::path::{Path, PathBuf};
use tokio::io::AsyncWriteExt;
/// CAS-lagring med filsystem-backend.
#[derive(Clone)]
pub struct CasStore {
root: PathBuf,
}
/// Resultat fra en CAS-skriveoperasjon.
pub struct StoreResult {
/// SHA-256 hex-digest (64 tegn).
pub hash: String,
/// Antall bytes lagret.
pub size: u64,
/// `true` hvis filen allerede fantes (deduplisert).
pub already_existed: bool,
}
impl CasStore {
/// Opprett en ny CAS-store med gitt rot-katalog.
/// Oppretter rot-katalogen hvis den ikke finnes.
pub async fn new(root: impl Into<PathBuf>) -> std::io::Result<Self> {
let root = root.into();
tokio::fs::create_dir_all(&root).await?;
Ok(Self { root })
}
/// Lagre bytes i CAS. Returnerer hash og metadata.
/// Hvis filen allerede finnes (identisk hash), skrives ingenting — deduplisering.
pub async fn store(&self, data: &[u8]) -> std::io::Result<StoreResult> {
let hash = Self::sha256(data);
let size = data.len() as u64;
let path = self.path_for(&hash);
if path.exists() {
return Ok(StoreResult {
hash,
size,
already_existed: true,
});
}
// Opprett prefix-kataloger
if let Some(parent) = path.parent() {
tokio::fs::create_dir_all(parent).await?;
}
// Skriv til temp-fil, deretter atomisk rename.
// Dette hindrer at en halvskrevet fil blir lest av andre prosesser.
let tmp_path = self.tmp_path(&hash);
if let Some(parent) = tmp_path.parent() {
tokio::fs::create_dir_all(parent).await?;
}
let mut file = tokio::fs::File::create(&tmp_path).await?;
file.write_all(data).await?;
file.flush().await?;
// Synkroniser til disk for dataintegritet
file.sync_all().await?;
drop(file);
// Atomisk rename — trygt selv ved strømbrudd
tokio::fs::rename(&tmp_path, &path).await?;
tracing::info!(hash = %hash, size = size, "Lagret ny fil i CAS");
Ok(StoreResult {
hash,
size,
already_existed: false,
})
}
/// Sjekk om en hash finnes i CAS.
pub fn exists(&self, hash: &str) -> bool {
self.path_for(hash).exists()
}
/// Full filbane for en gitt hash.
/// Struktur: `{root}/{hash[0..2]}/{hash[2..4]}/{hash}`
pub fn path_for(&self, hash: &str) -> PathBuf {
let (prefix1, rest) = hash.split_at(2.min(hash.len()));
let (prefix2, _) = rest.split_at(2.min(rest.len()));
self.root.join(prefix1).join(prefix2).join(hash)
}
/// Beregn SHA-256 hex-digest for gitte bytes.
pub fn sha256(data: &[u8]) -> String {
let mut hasher = Sha256::new();
hasher.update(data);
hex::encode(hasher.finalize())
}
/// Slett en fil fra CAS. Returnerer antall bytes frigitt, eller 0 hvis filen ikke fantes.
pub async fn delete(&self, hash: &str) -> std::io::Result<u64> {
let path = self.path_for(hash);
match tokio::fs::metadata(&path).await {
Ok(meta) => {
let size = meta.len();
tokio::fs::remove_file(&path).await?;
tracing::info!(hash = %hash, size = size, "Slettet fil fra CAS");
Ok(size)
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(0),
Err(e) => Err(e),
}
}
/// Beregn total diskbruk for CAS-katalogen (bytes).
/// Bruker `du -sb` for ytelse — traverserer filsystemet direkte.
pub async fn disk_usage_bytes(&self) -> std::io::Result<u64> {
let output = tokio::process::Command::new("du")
.args(["-sb", self.root.to_str().unwrap_or(".")])
.output()
.await?;
if !output.status.success() {
return Err(std::io::Error::new(
std::io::ErrorKind::Other,
"du-kommando feilet",
));
}
let stdout = String::from_utf8_lossy(&output.stdout);
let bytes: u64 = stdout
.split_whitespace()
.next()
.and_then(|s| s.parse().ok())
.unwrap_or(0);
Ok(bytes)
}
/// Sjekk diskbruk-prosent for partisjonen CAS ligger på.
pub async fn disk_usage_percent(&self) -> std::io::Result<f64> {
let output = tokio::process::Command::new("df")
.args(["--output=pcent", self.root.to_str().unwrap_or("/")])
.output()
.await?;
if !output.status.success() {
return Err(std::io::Error::new(
std::io::ErrorKind::Other,
"df-kommando feilet",
));
}
let stdout = String::from_utf8_lossy(&output.stdout);
// Output: "Use%\n 42%\n"
let pct: f64 = stdout
.lines()
.nth(1)
.and_then(|line| line.trim().trim_end_matches('%').parse().ok())
.unwrap_or(0.0);
Ok(pct)
}
/// Rot-katalogen for CAS.
pub fn root(&self) -> &Path {
&self.root
}
/// Temp-filbane brukt under skriving (unngår korrupte reads).
fn tmp_path(&self, hash: &str) -> PathBuf {
self.root.join("tmp").join(format!("{hash}.tmp"))
}
/// Slett temp-filer eldre enn `max_age` fra CAS tmp-katalogen.
/// Returnerer (antall slettet, bytes frigitt).
pub async fn cleanup_tmp(&self, max_age: std::time::Duration) -> std::io::Result<(usize, u64)> {
let tmp_dir = self.root.join("tmp");
if !tmp_dir.exists() {
return Ok((0, 0));
}
let mut deleted = 0usize;
let mut freed = 0u64;
let now = std::time::SystemTime::now();
let mut entries = tokio::fs::read_dir(&tmp_dir).await?;
while let Some(entry) = entries.next_entry().await? {
let meta = match entry.metadata().await {
Ok(m) => m,
Err(_) => continue,
};
if !meta.is_file() {
continue;
}
let modified = match meta.modified() {
Ok(t) => t,
Err(_) => continue,
};
let age = now.duration_since(modified).unwrap_or_default();
if age > max_age {
let size = meta.len();
if let Ok(()) = tokio::fs::remove_file(entry.path()).await {
deleted += 1;
freed += size;
tracing::debug!(
file = %entry.file_name().to_string_lossy(),
age_secs = age.as_secs(),
size,
"Slettet gammel temp-fil"
);
}
}
}
Ok((deleted, freed))
}
}
/// Start periodisk opprydding av CAS tmp-filer.
/// Kjører hver time, sletter filer eldre enn 1 time.
pub fn start_tmp_cleanup_loop(cas: CasStore) {
tokio::spawn(async move {
// Vent 45 sekunder etter oppstart
tokio::time::sleep(std::time::Duration::from_secs(45)).await;
tracing::info!("CAS tmp-opprydding startet (intervall: 1t, TTL: 1t)");
let max_age = std::time::Duration::from_secs(3600); // 1 time
let interval = std::time::Duration::from_secs(3600); // 1 time
loop {
match cas.cleanup_tmp(max_age).await {
Ok((deleted, freed)) => {
if deleted > 0 {
tracing::info!(
deleted,
freed_kb = freed / 1024,
"CAS tmp-opprydding: {} filer slettet, {} KB frigitt",
deleted,
freed / 1024,
);
}
}
Err(e) => {
tracing::warn!(error = %e, "CAS tmp-opprydding feilet");
}
}
tokio::time::sleep(interval).await;
}
});
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn test_root() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("target").join("test-cas")
}
#[tokio::test]
async fn store_and_retrieve() {
let root = test_root().join("store_and_retrieve");
let _ = tokio::fs::remove_dir_all(&root).await;
let cas = CasStore::new(&root).await.unwrap();
let data = b"hello world";
let result = cas.store(data).await.unwrap();
// SHA-256 av "hello world"
assert_eq!(
result.hash,
"b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
);
assert_eq!(result.size, 11);
assert!(!result.already_existed);
// Filen skal finnes på riktig path
let path = cas.path_for(&result.hash);
assert!(path.exists());
let content = tokio::fs::read(&path).await.unwrap();
assert_eq!(content, data);
// Katalogstruktur: b9/4d/{full hash}
assert!(path.to_string_lossy().contains("/b9/4d/"));
// Cleanup
let _ = tokio::fs::remove_dir_all(&root).await;
}
#[tokio::test]
async fn deduplication() {
let root = test_root().join("deduplication");
let _ = tokio::fs::remove_dir_all(&root).await;
let cas = CasStore::new(&root).await.unwrap();
let data = b"duplicate content";
let first = cas.store(data).await.unwrap();
assert!(!first.already_existed);
let second = cas.store(data).await.unwrap();
assert!(second.already_existed);
assert_eq!(first.hash, second.hash);
let _ = tokio::fs::remove_dir_all(&root).await;
}
#[tokio::test]
async fn exists_check() {
let root = test_root().join("exists_check");
let _ = tokio::fs::remove_dir_all(&root).await;
let cas = CasStore::new(&root).await.unwrap();
assert!(!cas.exists("nonexistent_hash"));
let result = cas.store(b"test").await.unwrap();
assert!(cas.exists(&result.hash));
let _ = tokio::fs::remove_dir_all(&root).await;
}
#[tokio::test]
async fn different_content_different_hash() {
let root = test_root().join("different_content");
let _ = tokio::fs::remove_dir_all(&root).await;
let cas = CasStore::new(&root).await.unwrap();
let a = cas.store(b"content A").await.unwrap();
let b = cas.store(b"content B").await.unwrap();
assert_ne!(a.hash, b.hash);
assert!(cas.exists(&a.hash));
assert!(cas.exists(&b.hash));
let _ = tokio::fs::remove_dir_all(&root).await;
}
#[test]
fn path_structure() {
let cas = CasStore {
root: PathBuf::from("/srv/synops/media/cas"),
};
let hash = "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9";
let path = cas.path_for(hash);
assert_eq!(
path,
PathBuf::from("/srv/synops/media/cas/b9/4d/b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9")
);
}
}