Fullfør oppgave 6.1: CAS-lagring med SHA-256 og deduplisering
Implementerer content-addressable store som Rust-modul i maskinrommet:
- CasStore med SHA-256 hashing (sha2 + hex crates)
- Katalogstruktur: {root}/{hash[0..2]}/{hash[2..4]}/{hash}
- Atomisk skriving via temp-fil + rename (hindrer korrupt read)
- Innebygd deduplisering — identisk hash = ingen skriving
- Konfigurerbar rot via CAS_ROOT env (default: /srv/synops/media/cas)
- 5 enhetstester: store, dedup, exists, ulike filer, path-struktur
Neste steg: 6.2 (upload-endepunkt) bruker CasStore for filmottak.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
c85f0d4c92
commit
b7a6f8e7ef
5 changed files with 231 additions and 3 deletions
2
maskinrommet/Cargo.lock
generated
2
maskinrommet/Cargo.lock
generated
|
|
@ -953,10 +953,12 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"axum",
|
||||
"chrono",
|
||||
"hex",
|
||||
"jsonwebtoken",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"sqlx",
|
||||
"tokio",
|
||||
"tower-http",
|
||||
|
|
|
|||
|
|
@ -16,3 +16,5 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
|
|||
tower-http = { version = "0.6", features = ["cors", "trace"] }
|
||||
jsonwebtoken = "9"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "json"] }
|
||||
sha2 = "0.10"
|
||||
hex = "0.4"
|
||||
|
|
|
|||
214
maskinrommet/src/cas.rs
Normal file
214
maskinrommet/src/cas.rs
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
//! Content-Addressable Store (CAS)
|
||||
//!
|
||||
//! Lagrer binærfiler identifisert av SHA-256 hash.
|
||||
//! Katalogstruktur: `{root}/{hash[0..2]}/{hash[2..4]}/{hash}`
|
||||
//! Deduplisering er innebygd — identisk innhold gir identisk hash.
|
||||
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::path::{Path, PathBuf};
|
||||
use tokio::io::AsyncWriteExt;
|
||||
|
||||
/// CAS-lagring med filsystem-backend.
|
||||
#[derive(Clone)]
|
||||
pub struct CasStore {
|
||||
root: PathBuf,
|
||||
}
|
||||
|
||||
/// Resultat fra en CAS-skriveoperasjon.
|
||||
pub struct StoreResult {
|
||||
/// SHA-256 hex-digest (64 tegn).
|
||||
pub hash: String,
|
||||
/// Antall bytes lagret.
|
||||
pub size: u64,
|
||||
/// `true` hvis filen allerede fantes (deduplisert).
|
||||
pub already_existed: bool,
|
||||
}
|
||||
|
||||
impl CasStore {
|
||||
/// Opprett en ny CAS-store med gitt rot-katalog.
|
||||
/// Oppretter rot-katalogen hvis den ikke finnes.
|
||||
pub async fn new(root: impl Into<PathBuf>) -> std::io::Result<Self> {
|
||||
let root = root.into();
|
||||
tokio::fs::create_dir_all(&root).await?;
|
||||
Ok(Self { root })
|
||||
}
|
||||
|
||||
/// Lagre bytes i CAS. Returnerer hash og metadata.
|
||||
/// Hvis filen allerede finnes (identisk hash), skrives ingenting — deduplisering.
|
||||
pub async fn store(&self, data: &[u8]) -> std::io::Result<StoreResult> {
|
||||
let hash = Self::sha256(data);
|
||||
let size = data.len() as u64;
|
||||
let path = self.path_for(&hash);
|
||||
|
||||
if path.exists() {
|
||||
return Ok(StoreResult {
|
||||
hash,
|
||||
size,
|
||||
already_existed: true,
|
||||
});
|
||||
}
|
||||
|
||||
// Opprett prefix-kataloger
|
||||
if let Some(parent) = path.parent() {
|
||||
tokio::fs::create_dir_all(parent).await?;
|
||||
}
|
||||
|
||||
// Skriv til temp-fil, deretter atomisk rename.
|
||||
// Dette hindrer at en halvskrevet fil blir lest av andre prosesser.
|
||||
let tmp_path = self.tmp_path(&hash);
|
||||
if let Some(parent) = tmp_path.parent() {
|
||||
tokio::fs::create_dir_all(parent).await?;
|
||||
}
|
||||
|
||||
let mut file = tokio::fs::File::create(&tmp_path).await?;
|
||||
file.write_all(data).await?;
|
||||
file.flush().await?;
|
||||
// Synkroniser til disk for dataintegritet
|
||||
file.sync_all().await?;
|
||||
drop(file);
|
||||
|
||||
// Atomisk rename — trygt selv ved strømbrudd
|
||||
tokio::fs::rename(&tmp_path, &path).await?;
|
||||
|
||||
tracing::info!(hash = %hash, size = size, "Lagret ny fil i CAS");
|
||||
|
||||
Ok(StoreResult {
|
||||
hash,
|
||||
size,
|
||||
already_existed: false,
|
||||
})
|
||||
}
|
||||
|
||||
/// Sjekk om en hash finnes i CAS.
|
||||
pub fn exists(&self, hash: &str) -> bool {
|
||||
self.path_for(hash).exists()
|
||||
}
|
||||
|
||||
/// Full filbane for en gitt hash.
|
||||
/// Struktur: `{root}/{hash[0..2]}/{hash[2..4]}/{hash}`
|
||||
pub fn path_for(&self, hash: &str) -> PathBuf {
|
||||
let (prefix1, rest) = hash.split_at(2.min(hash.len()));
|
||||
let (prefix2, _) = rest.split_at(2.min(rest.len()));
|
||||
self.root.join(prefix1).join(prefix2).join(hash)
|
||||
}
|
||||
|
||||
/// Beregn SHA-256 hex-digest for gitte bytes.
|
||||
pub fn sha256(data: &[u8]) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(data);
|
||||
hex::encode(hasher.finalize())
|
||||
}
|
||||
|
||||
/// Rot-katalogen for CAS.
|
||||
pub fn root(&self) -> &Path {
|
||||
&self.root
|
||||
}
|
||||
|
||||
/// Temp-filbane brukt under skriving (unngår korrupte reads).
|
||||
fn tmp_path(&self, hash: &str) -> PathBuf {
|
||||
self.root.join("tmp").join(format!("{hash}.tmp"))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn test_root() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("target").join("test-cas")
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn store_and_retrieve() {
|
||||
let root = test_root().join("store_and_retrieve");
|
||||
let _ = tokio::fs::remove_dir_all(&root).await;
|
||||
|
||||
let cas = CasStore::new(&root).await.unwrap();
|
||||
let data = b"hello world";
|
||||
let result = cas.store(data).await.unwrap();
|
||||
|
||||
// SHA-256 av "hello world"
|
||||
assert_eq!(
|
||||
result.hash,
|
||||
"b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
|
||||
);
|
||||
assert_eq!(result.size, 11);
|
||||
assert!(!result.already_existed);
|
||||
|
||||
// Filen skal finnes på riktig path
|
||||
let path = cas.path_for(&result.hash);
|
||||
assert!(path.exists());
|
||||
let content = tokio::fs::read(&path).await.unwrap();
|
||||
assert_eq!(content, data);
|
||||
|
||||
// Katalogstruktur: b9/4d/{full hash}
|
||||
assert!(path.to_string_lossy().contains("/b9/4d/"));
|
||||
|
||||
// Cleanup
|
||||
let _ = tokio::fs::remove_dir_all(&root).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn deduplication() {
|
||||
let root = test_root().join("deduplication");
|
||||
let _ = tokio::fs::remove_dir_all(&root).await;
|
||||
|
||||
let cas = CasStore::new(&root).await.unwrap();
|
||||
let data = b"duplicate content";
|
||||
|
||||
let first = cas.store(data).await.unwrap();
|
||||
assert!(!first.already_existed);
|
||||
|
||||
let second = cas.store(data).await.unwrap();
|
||||
assert!(second.already_existed);
|
||||
assert_eq!(first.hash, second.hash);
|
||||
|
||||
let _ = tokio::fs::remove_dir_all(&root).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn exists_check() {
|
||||
let root = test_root().join("exists_check");
|
||||
let _ = tokio::fs::remove_dir_all(&root).await;
|
||||
|
||||
let cas = CasStore::new(&root).await.unwrap();
|
||||
|
||||
assert!(!cas.exists("nonexistent_hash"));
|
||||
|
||||
let result = cas.store(b"test").await.unwrap();
|
||||
assert!(cas.exists(&result.hash));
|
||||
|
||||
let _ = tokio::fs::remove_dir_all(&root).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn different_content_different_hash() {
|
||||
let root = test_root().join("different_content");
|
||||
let _ = tokio::fs::remove_dir_all(&root).await;
|
||||
|
||||
let cas = CasStore::new(&root).await.unwrap();
|
||||
|
||||
let a = cas.store(b"content A").await.unwrap();
|
||||
let b = cas.store(b"content B").await.unwrap();
|
||||
|
||||
assert_ne!(a.hash, b.hash);
|
||||
assert!(cas.exists(&a.hash));
|
||||
assert!(cas.exists(&b.hash));
|
||||
|
||||
let _ = tokio::fs::remove_dir_all(&root).await;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_structure() {
|
||||
let cas = CasStore {
|
||||
root: PathBuf::from("/srv/synops/media/cas"),
|
||||
};
|
||||
let hash = "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9";
|
||||
let path = cas.path_for(hash);
|
||||
assert_eq!(
|
||||
path,
|
||||
PathBuf::from("/srv/synops/media/cas/b9/4d/b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
mod auth;
|
||||
pub mod cas;
|
||||
mod intentions;
|
||||
mod queries;
|
||||
mod stdb;
|
||||
|
|
@ -12,6 +13,7 @@ use tower_http::trace::TraceLayer;
|
|||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
|
||||
|
||||
use auth::{AuthUser, JwksKeys};
|
||||
use cas::CasStore;
|
||||
use stdb::StdbClient;
|
||||
|
||||
#[derive(Clone)]
|
||||
|
|
@ -19,6 +21,7 @@ pub struct AppState {
|
|||
pub db: PgPool,
|
||||
pub jwks: JwksKeys,
|
||||
pub stdb: StdbClient,
|
||||
pub cas: CasStore,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
|
@ -108,7 +111,15 @@ async fn main() {
|
|||
}
|
||||
}
|
||||
|
||||
let state = AppState { db, jwks, stdb };
|
||||
// CAS — content-addressable store for binærfiler
|
||||
let cas_root = std::env::var("CAS_ROOT")
|
||||
.unwrap_or_else(|_| "/srv/synops/media/cas".to_string());
|
||||
let cas = CasStore::new(&cas_root)
|
||||
.await
|
||||
.expect("Kunne ikke opprette CAS-katalog");
|
||||
tracing::info!(root = %cas_root, "CAS initialisert");
|
||||
|
||||
let state = AppState { db, jwks, stdb, cas };
|
||||
|
||||
// Ruter: /health er offentlig, /me krever gyldig JWT
|
||||
let app = Router::new()
|
||||
|
|
|
|||
3
tasks.md
3
tasks.md
|
|
@ -86,8 +86,7 @@ Uavhengige faser kan fortsatt plukkes.
|
|||
|
||||
## Fase 6: CAS og mediefiler
|
||||
|
||||
- [~] 6.1 CAS-lagring: filsystem med content-addressable hashing (SHA-256). Katalogstruktur med hash-prefix. Deduplisering.
|
||||
> Påbegynt: 2026-03-17T16:27
|
||||
- [x] 6.1 CAS-lagring: filsystem med content-addressable hashing (SHA-256). Katalogstruktur med hash-prefix. Deduplisering.
|
||||
- [ ] 6.2 Upload-endepunkt: `POST /intentions/upload_media` → hash fil, lagre i CAS, opprett media-node med `has_media`-edge.
|
||||
- [ ] 6.3 Serving: `GET /cas/{hash}` → stream fil fra disk. Caddy kan serve direkte for ytelse.
|
||||
- [ ] 6.4 Bilder i TipTap: drag-and-drop/paste → upload → CAS-node → inline i `metadata.document` via `node_id`.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue