// Serverhelse-dashboard — tjeneste-status, metrikker, backup-status, logg-tilgang. // // Sjekker alle tjenester i stacken (PG, Caddy, Authentik, LiteLLM, // Whisper, LiveKit) og samler system-metrikker (CPU, minne, disk). // // Ref: docs/concepts/adminpanelet.md § 4 "Serverhelse", oppgave 15.6 use axum::{extract::{Query, State}, http::StatusCode, Json}; use serde::{Deserialize, Serialize}; use sqlx::PgPool; use crate::auth::AdminUser; use crate::AppState; // ============================================================================= // Typer // ============================================================================= #[derive(Serialize)] pub struct ServiceStatus { pub name: String, pub status: String, // "up", "down", "degraded" pub latency_ms: Option, pub details: Option, } #[derive(Serialize)] pub struct SystemMetrics { pub cpu_usage_percent: f32, pub cpu_cores: usize, pub load_avg: [f32; 3], pub memory_total_bytes: u64, pub memory_used_bytes: u64, pub memory_available_bytes: u64, pub memory_usage_percent: f32, pub disk: crate::resources::DiskStatus, pub uptime_seconds: u64, } #[derive(Serialize)] pub struct BackupInfo { pub backup_type: String, pub last_success: Option, pub path: Option, pub status: String, // "ok", "missing", "stale" } #[derive(Serialize)] pub struct LogEntry { pub timestamp: String, pub service: String, pub level: String, pub message: String, } #[derive(Serialize)] pub struct HealthDashboard { pub services: Vec, pub metrics: SystemMetrics, pub backups: Vec, pub pg_stats: PgStats, } #[derive(Serialize)] pub struct PgStats { pub active_connections: i64, pub max_connections: i64, pub database_size_bytes: i64, pub active_queries: i64, } #[derive(Deserialize)] pub struct LogsQuery { pub service: Option, pub lines: Option, } #[derive(Serialize)] pub struct LogsResponse { pub entries: Vec, } // ============================================================================= // Tjeneste-sjekker // ============================================================================= async fn check_pg(db: &PgPool) -> ServiceStatus { let start = std::time::Instant::now(); match sqlx::query_scalar::<_, i32>("SELECT 1").fetch_one(db).await { Ok(_) => ServiceStatus { name: "PostgreSQL".to_string(), status: "up".to_string(), latency_ms: Some(start.elapsed().as_millis() as u64), details: None, }, Err(e) => ServiceStatus { name: "PostgreSQL".to_string(), status: "down".to_string(), latency_ms: None, details: Some(format!("{e}")), }, } } /// Sjekk en HTTP-tjeneste med timeout. async fn check_http_service(name: &str, url: &str) -> ServiceStatus { let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(5)) .build() .unwrap(); let start = std::time::Instant::now(); match client.get(url).send().await { Ok(resp) => { let latency = start.elapsed().as_millis() as u64; let status_code = resp.status(); if status_code.is_success() || status_code.as_u16() == 401 || status_code.as_u16() == 403 { // 401/403 betyr at tjenesten kjører, bare auth mangler ServiceStatus { name: name.to_string(), status: "up".to_string(), latency_ms: Some(latency), details: None, } } else { ServiceStatus { name: name.to_string(), status: "degraded".to_string(), latency_ms: Some(latency), details: Some(format!("HTTP {}", status_code)), } } } Err(e) => ServiceStatus { name: name.to_string(), status: "down".to_string(), latency_ms: None, details: Some(format!("{e}")), }, } } async fn check_caddy() -> ServiceStatus { // Caddy kjører lokalt, sjekk admin-API check_http_service("Caddy", "http://localhost:2019/config/").await } async fn check_authentik() -> ServiceStatus { // Authentik via Caddy check_http_service("Authentik", "https://auth.sidelinja.org/-/health/ready/").await } async fn check_litellm() -> ServiceStatus { let url = std::env::var("AI_GATEWAY_URL") .unwrap_or_else(|_| "http://localhost:4000".to_string()); check_http_service("LiteLLM", &format!("{url}/health")).await } async fn check_whisper() -> ServiceStatus { let url = std::env::var("WHISPER_URL") .unwrap_or_else(|_| "http://localhost:8000".to_string()); check_http_service("Whisper", &format!("{url}/health")).await } async fn check_livekit() -> ServiceStatus { let url = std::env::var("LIVEKIT_URL") .unwrap_or_else(|_| "http://localhost:7880".to_string()); check_http_service("LiveKit", &url).await } // ============================================================================= // System-metrikker // ============================================================================= fn read_cpu_usage() -> f32 { // Les /proc/stat for CPU-bruk (snapshot, ikke gjennomsnitt). // For enkel implementering bruker vi load average i stedet. // CPU-prosent beregnes fra load_avg[0] / antall kjerner. let cores = num_cpus(); let load = read_load_avg(); // Tilnærming: load / cores * 100, capped til 100 ((load[0] / cores as f32) * 100.0).min(100.0) } fn num_cpus() -> usize { std::thread::available_parallelism() .map(|n| n.get()) .unwrap_or(1) } fn read_load_avg() -> [f32; 3] { let content = std::fs::read_to_string("/proc/loadavg").unwrap_or_default(); let parts: Vec = content .split_whitespace() .take(3) .filter_map(|s| s.parse().ok()) .collect(); [ parts.first().copied().unwrap_or(0.0), parts.get(1).copied().unwrap_or(0.0), parts.get(2).copied().unwrap_or(0.0), ] } fn read_memory_info() -> (u64, u64, u64) { // Les /proc/meminfo let content = std::fs::read_to_string("/proc/meminfo").unwrap_or_default(); let mut total: u64 = 0; let mut available: u64 = 0; for line in content.lines() { if let Some(val) = line.strip_prefix("MemTotal:") { total = parse_meminfo_kb(val) * 1024; } else if let Some(val) = line.strip_prefix("MemAvailable:") { available = parse_meminfo_kb(val) * 1024; } } let used = total.saturating_sub(available); (total, used, available) } fn parse_meminfo_kb(s: &str) -> u64 { s.trim().split_whitespace().next() .and_then(|v| v.parse().ok()) .unwrap_or(0) } fn read_uptime() -> u64 { let content = std::fs::read_to_string("/proc/uptime").unwrap_or_default(); content.split_whitespace().next() .and_then(|v| v.parse::().ok()) .map(|v| v as u64) .unwrap_or(0) } fn collect_metrics() -> SystemMetrics { let load = read_load_avg(); let cores = num_cpus(); let cpu = read_cpu_usage(); let (mem_total, mem_used, mem_available) = read_memory_info(); let mem_percent = if mem_total > 0 { (mem_used as f64 / mem_total as f64 * 100.0) as f32 } else { 0.0 }; let cas_root = std::env::var("CAS_ROOT") .unwrap_or_else(|_| "/srv/synops/media/cas".to_string()); let disk = crate::resources::check_disk_usage(&cas_root) .unwrap_or_else(|_| crate::resources::check_disk_usage("/").unwrap_or( crate::resources::DiskStatus { mount_point: "/".to_string(), total_bytes: 0, used_bytes: 0, available_bytes: 0, usage_percent: 0.0, alert_level: None, } )); SystemMetrics { cpu_usage_percent: cpu, cpu_cores: cores, load_avg: load, memory_total_bytes: mem_total, memory_used_bytes: mem_used, memory_available_bytes: mem_available, memory_usage_percent: mem_percent, disk, uptime_seconds: read_uptime(), } } // ============================================================================= // PG-statistikk // ============================================================================= async fn collect_pg_stats(db: &PgPool) -> PgStats { let active = sqlx::query_scalar::<_, i64>( "SELECT count(*) FROM pg_stat_activity WHERE state = 'active'" ) .fetch_one(db) .await .unwrap_or(0); let max_conn = sqlx::query_scalar::<_, i64>( "SELECT setting::bigint FROM pg_settings WHERE name = 'max_connections'" ) .fetch_one(db) .await .unwrap_or(100); let db_size = sqlx::query_scalar::<_, i64>( "SELECT pg_database_size(current_database())" ) .fetch_one(db) .await .unwrap_or(0); let queries = sqlx::query_scalar::<_, i64>( "SELECT count(*) FROM pg_stat_activity WHERE state = 'active' AND query NOT LIKE '%pg_stat_activity%'" ) .fetch_one(db) .await .unwrap_or(0); PgStats { active_connections: active, max_connections: max_conn, database_size_bytes: db_size, active_queries: queries, } } // ============================================================================= // Backup-status // ============================================================================= fn check_backups() -> Vec { let mut backups = Vec::new(); // PG-dump — sjekk /srv/synops/backup/pg/ for nyeste .dump-fil let pg_dir = "/srv/synops/backup/pg"; let mut pg_backup = BackupInfo { backup_type: "PostgreSQL dump".to_string(), last_success: None, path: None, status: "missing".to_string(), }; if let Ok(entries) = std::fs::read_dir(pg_dir) { // Finn nyeste dump-fil let mut newest: Option<(std::time::SystemTime, std::path::PathBuf)> = None; for entry in entries.flatten() { let name = entry.file_name().to_string_lossy().to_string(); if name.ends_with(".dump") { if let Ok(meta) = entry.metadata() { if let Ok(modified) = meta.modified() { if newest.as_ref().map_or(true, |(t, _)| modified > *t) { newest = Some((modified, entry.path())); } } } } } if let Some((modified, path)) = newest { let age = modified.elapsed().unwrap_or_default(); let ts = chrono::DateTime::::from(modified); pg_backup.last_success = Some(ts.to_rfc3339()); pg_backup.path = Some(path.to_string_lossy().to_string()); pg_backup.status = if age.as_secs() < 86400 + 3600 { // Litt slakk: 25 timer (cron kjører daglig) "ok".to_string() } else { "stale".to_string() }; } } backups.push(pg_backup); backups } // ============================================================================= // Logg-tilgang // ============================================================================= fn read_service_logs(service: &str, max_lines: usize) -> Vec { // Bruk journalctl for systemd-tjenester, docker logs for containere let cmd = match service { "maskinrommet" | "caddy" | "sveltekit" => { format!("journalctl -u {service} --no-pager -n {max_lines} --output=short-iso 2>/dev/null") } "postgres" | "authentik" | "litellm" | "whisper" | "livekit" => { let container = match service { "postgres" => "sidelinja-postgres-1", "authentik" => "sidelinja-authentik-server-1", "litellm" => "sidelinja-ai-gateway-1", "whisper" => "sidelinja-faster-whisper-1", "livekit" => "sidelinja-livekit-1", _ => return Vec::new(), }; format!("docker logs --tail {max_lines} --timestamps {container} 2>&1") } _ => return Vec::new(), }; let output = std::process::Command::new("bash") .arg("-c") .arg(&cmd) .output(); match output { Ok(out) => { let text = String::from_utf8_lossy(&out.stdout); text.lines() .rev() // nyeste først .take(max_lines) .map(|line| { // Prøv å parse tidsstempel fra starten av linjen let (ts, msg) = if line.len() > 24 { (line[..24].trim().to_string(), line[24..].trim().to_string()) } else { (String::new(), line.to_string()) }; let level = if msg.contains("ERROR") || msg.contains("error") || msg.contains("ERR") { "error" } else if msg.contains("WARN") || msg.contains("warn") { "warn" } else if msg.contains("INFO") || msg.contains("info") { "info" } else { "debug" }; LogEntry { timestamp: ts, service: service.to_string(), level: level.to_string(), message: msg, } }) .collect() } Err(_) => Vec::new(), } } // ============================================================================= // API-handlers // ============================================================================= /// GET /admin/health — komplett serverhelse-dashboard. pub async fn health_dashboard( State(state): State, _admin: AdminUser, ) -> Result, (StatusCode, Json)> { // Kjør alle tjeneste-sjekker parallelt let (pg, caddy, authentik, litellm, whisper, livekit) = tokio::join!( check_pg(&state.db), check_caddy(), check_authentik(), check_litellm(), check_whisper(), check_livekit(), ); let services = vec![pg, caddy, authentik, litellm, whisper, livekit]; let metrics = collect_metrics(); let backups = check_backups(); let pg_stats = collect_pg_stats(&state.db).await; Ok(Json(HealthDashboard { services, metrics, backups, pg_stats, })) } /// GET /admin/health/logs?service=maskinrommet&lines=50 pub async fn health_logs( _admin: AdminUser, Query(params): Query, ) -> Json { let max_lines = params.lines.unwrap_or(50).min(200); let entries = if let Some(service) = ¶ms.service { read_service_logs(service, max_lines) } else { // Alle tjenester, siste linjer fra hver let services = ["maskinrommet", "caddy", "postgres", "authentik", "litellm", "whisper", "livekit"]; let per_service = (max_lines / services.len()).max(10); let mut all = Vec::new(); for svc in &services { all.extend(read_service_logs(svc, per_service)); } // Sorter etter timestamp (nyeste først) all.sort_by(|a, b| b.timestamp.cmp(&a.timestamp)); all.truncate(max_lines); all }; Json(LogsResponse { entries }) }