Tre fikser funnet under validering: 1. SIKKERHET: Admin-endepunkter manglet autorisasjonssjekk. Alle /admin/*-endepunkter brukte kun AuthUser (autentisert), ikke admin-rolle. Ny AdminUser-extractor sjekker owner/admin-edge til samling — returnerer 403 Forbidden for ikke-admins. Berører: maintenance, jobs, resources, health, ai, usage. 2. Race condition i toggle_effect: les-modifiser-skriv uten transaksjon på active_effects JSON. Erstattet med atomisk PG jsonb-operasjon. 3. Manglende updated_by i set_gain, set_mute, set_mixer_role, toggle_effect. Nå spores hvem som endret mixer-tilstanden.
488 lines
16 KiB
Rust
488 lines
16 KiB
Rust
// Serverhelse-dashboard — tjeneste-status, metrikker, backup-status, logg-tilgang.
|
|
//
|
|
// Sjekker alle tjenester i stacken (PG, Caddy, Authentik, LiteLLM,
|
|
// Whisper, LiveKit) og samler system-metrikker (CPU, minne, disk).
|
|
//
|
|
// Ref: docs/concepts/adminpanelet.md § 4 "Serverhelse", oppgave 15.6
|
|
|
|
use axum::{extract::{Query, State}, http::StatusCode, Json};
|
|
use serde::{Deserialize, Serialize};
|
|
use sqlx::PgPool;
|
|
|
|
use crate::auth::AdminUser;
|
|
use crate::AppState;
|
|
|
|
// =============================================================================
|
|
// Typer
|
|
// =============================================================================
|
|
|
|
#[derive(Serialize)]
|
|
pub struct ServiceStatus {
|
|
pub name: String,
|
|
pub status: String, // "up", "down", "degraded"
|
|
pub latency_ms: Option<u64>,
|
|
pub details: Option<String>,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct SystemMetrics {
|
|
pub cpu_usage_percent: f32,
|
|
pub cpu_cores: usize,
|
|
pub load_avg: [f32; 3],
|
|
pub memory_total_bytes: u64,
|
|
pub memory_used_bytes: u64,
|
|
pub memory_available_bytes: u64,
|
|
pub memory_usage_percent: f32,
|
|
pub disk: crate::resources::DiskStatus,
|
|
pub uptime_seconds: u64,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct BackupInfo {
|
|
pub backup_type: String,
|
|
pub last_success: Option<String>,
|
|
pub path: Option<String>,
|
|
pub status: String, // "ok", "missing", "stale"
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct LogEntry {
|
|
pub timestamp: String,
|
|
pub service: String,
|
|
pub level: String,
|
|
pub message: String,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct HealthDashboard {
|
|
pub services: Vec<ServiceStatus>,
|
|
pub metrics: SystemMetrics,
|
|
pub backups: Vec<BackupInfo>,
|
|
pub pg_stats: PgStats,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct PgStats {
|
|
pub active_connections: i64,
|
|
pub max_connections: i64,
|
|
pub database_size_bytes: i64,
|
|
pub active_queries: i64,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
pub struct LogsQuery {
|
|
pub service: Option<String>,
|
|
pub lines: Option<usize>,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct LogsResponse {
|
|
pub entries: Vec<LogEntry>,
|
|
}
|
|
|
|
// =============================================================================
|
|
// Tjeneste-sjekker
|
|
// =============================================================================
|
|
|
|
async fn check_pg(db: &PgPool) -> ServiceStatus {
|
|
let start = std::time::Instant::now();
|
|
match sqlx::query_scalar::<_, i32>("SELECT 1").fetch_one(db).await {
|
|
Ok(_) => ServiceStatus {
|
|
name: "PostgreSQL".to_string(),
|
|
status: "up".to_string(),
|
|
latency_ms: Some(start.elapsed().as_millis() as u64),
|
|
details: None,
|
|
},
|
|
Err(e) => ServiceStatus {
|
|
name: "PostgreSQL".to_string(),
|
|
status: "down".to_string(),
|
|
latency_ms: None,
|
|
details: Some(format!("{e}")),
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Sjekk en HTTP-tjeneste med timeout.
|
|
async fn check_http_service(name: &str, url: &str) -> ServiceStatus {
|
|
let client = reqwest::Client::builder()
|
|
.timeout(std::time::Duration::from_secs(5))
|
|
.build()
|
|
.unwrap();
|
|
|
|
let start = std::time::Instant::now();
|
|
match client.get(url).send().await {
|
|
Ok(resp) => {
|
|
let latency = start.elapsed().as_millis() as u64;
|
|
let status_code = resp.status();
|
|
if status_code.is_success() || status_code.as_u16() == 401 || status_code.as_u16() == 403 {
|
|
// 401/403 betyr at tjenesten kjører, bare auth mangler
|
|
ServiceStatus {
|
|
name: name.to_string(),
|
|
status: "up".to_string(),
|
|
latency_ms: Some(latency),
|
|
details: None,
|
|
}
|
|
} else {
|
|
ServiceStatus {
|
|
name: name.to_string(),
|
|
status: "degraded".to_string(),
|
|
latency_ms: Some(latency),
|
|
details: Some(format!("HTTP {}", status_code)),
|
|
}
|
|
}
|
|
}
|
|
Err(e) => ServiceStatus {
|
|
name: name.to_string(),
|
|
status: "down".to_string(),
|
|
latency_ms: None,
|
|
details: Some(format!("{e}")),
|
|
},
|
|
}
|
|
}
|
|
|
|
async fn check_caddy() -> ServiceStatus {
|
|
// Caddy kjører lokalt, sjekk admin-API
|
|
check_http_service("Caddy", "http://localhost:2019/config/").await
|
|
}
|
|
|
|
async fn check_authentik() -> ServiceStatus {
|
|
// Authentik via Caddy
|
|
check_http_service("Authentik", "https://auth.sidelinja.org/-/health/ready/").await
|
|
}
|
|
|
|
async fn check_litellm() -> ServiceStatus {
|
|
let url = std::env::var("AI_GATEWAY_URL")
|
|
.unwrap_or_else(|_| "http://localhost:4000".to_string());
|
|
check_http_service("LiteLLM", &format!("{url}/health")).await
|
|
}
|
|
|
|
async fn check_whisper() -> ServiceStatus {
|
|
let url = std::env::var("WHISPER_URL")
|
|
.unwrap_or_else(|_| "http://localhost:8000".to_string());
|
|
check_http_service("Whisper", &format!("{url}/health")).await
|
|
}
|
|
|
|
async fn check_livekit() -> ServiceStatus {
|
|
let url = std::env::var("LIVEKIT_URL")
|
|
.unwrap_or_else(|_| "http://localhost:7880".to_string());
|
|
check_http_service("LiveKit", &url).await
|
|
}
|
|
|
|
// =============================================================================
|
|
// System-metrikker
|
|
// =============================================================================
|
|
|
|
fn read_cpu_usage() -> f32 {
|
|
// Les /proc/stat for CPU-bruk (snapshot, ikke gjennomsnitt).
|
|
// For enkel implementering bruker vi load average i stedet.
|
|
// CPU-prosent beregnes fra load_avg[0] / antall kjerner.
|
|
let cores = num_cpus();
|
|
let load = read_load_avg();
|
|
// Tilnærming: load / cores * 100, capped til 100
|
|
((load[0] / cores as f32) * 100.0).min(100.0)
|
|
}
|
|
|
|
fn num_cpus() -> usize {
|
|
std::thread::available_parallelism()
|
|
.map(|n| n.get())
|
|
.unwrap_or(1)
|
|
}
|
|
|
|
fn read_load_avg() -> [f32; 3] {
|
|
let content = std::fs::read_to_string("/proc/loadavg").unwrap_or_default();
|
|
let parts: Vec<f32> = content
|
|
.split_whitespace()
|
|
.take(3)
|
|
.filter_map(|s| s.parse().ok())
|
|
.collect();
|
|
[
|
|
parts.first().copied().unwrap_or(0.0),
|
|
parts.get(1).copied().unwrap_or(0.0),
|
|
parts.get(2).copied().unwrap_or(0.0),
|
|
]
|
|
}
|
|
|
|
fn read_memory_info() -> (u64, u64, u64) {
|
|
// Les /proc/meminfo
|
|
let content = std::fs::read_to_string("/proc/meminfo").unwrap_or_default();
|
|
let mut total: u64 = 0;
|
|
let mut available: u64 = 0;
|
|
|
|
for line in content.lines() {
|
|
if let Some(val) = line.strip_prefix("MemTotal:") {
|
|
total = parse_meminfo_kb(val) * 1024;
|
|
} else if let Some(val) = line.strip_prefix("MemAvailable:") {
|
|
available = parse_meminfo_kb(val) * 1024;
|
|
}
|
|
}
|
|
|
|
let used = total.saturating_sub(available);
|
|
(total, used, available)
|
|
}
|
|
|
|
fn parse_meminfo_kb(s: &str) -> u64 {
|
|
s.trim().split_whitespace().next()
|
|
.and_then(|v| v.parse().ok())
|
|
.unwrap_or(0)
|
|
}
|
|
|
|
fn read_uptime() -> u64 {
|
|
let content = std::fs::read_to_string("/proc/uptime").unwrap_or_default();
|
|
content.split_whitespace().next()
|
|
.and_then(|v| v.parse::<f64>().ok())
|
|
.map(|v| v as u64)
|
|
.unwrap_or(0)
|
|
}
|
|
|
|
fn collect_metrics() -> SystemMetrics {
|
|
let load = read_load_avg();
|
|
let cores = num_cpus();
|
|
let cpu = read_cpu_usage();
|
|
let (mem_total, mem_used, mem_available) = read_memory_info();
|
|
let mem_percent = if mem_total > 0 {
|
|
(mem_used as f64 / mem_total as f64 * 100.0) as f32
|
|
} else {
|
|
0.0
|
|
};
|
|
|
|
let cas_root = std::env::var("CAS_ROOT")
|
|
.unwrap_or_else(|_| "/srv/synops/media/cas".to_string());
|
|
let disk = crate::resources::check_disk_usage(&cas_root)
|
|
.unwrap_or_else(|_| crate::resources::check_disk_usage("/").unwrap_or(
|
|
crate::resources::DiskStatus {
|
|
mount_point: "/".to_string(),
|
|
total_bytes: 0,
|
|
used_bytes: 0,
|
|
available_bytes: 0,
|
|
usage_percent: 0.0,
|
|
alert_level: None,
|
|
}
|
|
));
|
|
|
|
SystemMetrics {
|
|
cpu_usage_percent: cpu,
|
|
cpu_cores: cores,
|
|
load_avg: load,
|
|
memory_total_bytes: mem_total,
|
|
memory_used_bytes: mem_used,
|
|
memory_available_bytes: mem_available,
|
|
memory_usage_percent: mem_percent,
|
|
disk,
|
|
uptime_seconds: read_uptime(),
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// PG-statistikk
|
|
// =============================================================================
|
|
|
|
async fn collect_pg_stats(db: &PgPool) -> PgStats {
|
|
let active = sqlx::query_scalar::<_, i64>(
|
|
"SELECT count(*) FROM pg_stat_activity WHERE state = 'active'"
|
|
)
|
|
.fetch_one(db)
|
|
.await
|
|
.unwrap_or(0);
|
|
|
|
let max_conn = sqlx::query_scalar::<_, i64>(
|
|
"SELECT setting::bigint FROM pg_settings WHERE name = 'max_connections'"
|
|
)
|
|
.fetch_one(db)
|
|
.await
|
|
.unwrap_or(100);
|
|
|
|
let db_size = sqlx::query_scalar::<_, i64>(
|
|
"SELECT pg_database_size(current_database())"
|
|
)
|
|
.fetch_one(db)
|
|
.await
|
|
.unwrap_or(0);
|
|
|
|
let queries = sqlx::query_scalar::<_, i64>(
|
|
"SELECT count(*) FROM pg_stat_activity WHERE state = 'active' AND query NOT LIKE '%pg_stat_activity%'"
|
|
)
|
|
.fetch_one(db)
|
|
.await
|
|
.unwrap_or(0);
|
|
|
|
PgStats {
|
|
active_connections: active,
|
|
max_connections: max_conn,
|
|
database_size_bytes: db_size,
|
|
active_queries: queries,
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Backup-status
|
|
// =============================================================================
|
|
|
|
fn check_backups() -> Vec<BackupInfo> {
|
|
let mut backups = Vec::new();
|
|
|
|
// PG-dump — sjekk /srv/synops/backup/pg/ for nyeste .dump-fil
|
|
let pg_dir = "/srv/synops/backup/pg";
|
|
let mut pg_backup = BackupInfo {
|
|
backup_type: "PostgreSQL dump".to_string(),
|
|
last_success: None,
|
|
path: None,
|
|
status: "missing".to_string(),
|
|
};
|
|
|
|
if let Ok(entries) = std::fs::read_dir(pg_dir) {
|
|
// Finn nyeste dump-fil
|
|
let mut newest: Option<(std::time::SystemTime, std::path::PathBuf)> = None;
|
|
for entry in entries.flatten() {
|
|
let name = entry.file_name().to_string_lossy().to_string();
|
|
if name.ends_with(".dump") {
|
|
if let Ok(meta) = entry.metadata() {
|
|
if let Ok(modified) = meta.modified() {
|
|
if newest.as_ref().map_or(true, |(t, _)| modified > *t) {
|
|
newest = Some((modified, entry.path()));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if let Some((modified, path)) = newest {
|
|
let age = modified.elapsed().unwrap_or_default();
|
|
let ts = chrono::DateTime::<chrono::Utc>::from(modified);
|
|
pg_backup.last_success = Some(ts.to_rfc3339());
|
|
pg_backup.path = Some(path.to_string_lossy().to_string());
|
|
pg_backup.status = if age.as_secs() < 86400 + 3600 {
|
|
// Litt slakk: 25 timer (cron kjører daglig)
|
|
"ok".to_string()
|
|
} else {
|
|
"stale".to_string()
|
|
};
|
|
}
|
|
}
|
|
|
|
backups.push(pg_backup);
|
|
backups
|
|
}
|
|
|
|
// =============================================================================
|
|
// Logg-tilgang
|
|
// =============================================================================
|
|
|
|
fn read_service_logs(service: &str, max_lines: usize) -> Vec<LogEntry> {
|
|
// Bruk journalctl for systemd-tjenester, docker logs for containere
|
|
let cmd = match service {
|
|
"maskinrommet" | "caddy" | "sveltekit" => {
|
|
format!("journalctl -u {service} --no-pager -n {max_lines} --output=short-iso 2>/dev/null")
|
|
}
|
|
"postgres" | "authentik" | "litellm" | "whisper" | "livekit" => {
|
|
let container = match service {
|
|
"postgres" => "sidelinja-postgres-1",
|
|
"authentik" => "sidelinja-authentik-server-1",
|
|
"litellm" => "sidelinja-ai-gateway-1",
|
|
"whisper" => "sidelinja-faster-whisper-1",
|
|
"livekit" => "sidelinja-livekit-1",
|
|
_ => return Vec::new(),
|
|
};
|
|
format!("docker logs --tail {max_lines} --timestamps {container} 2>&1")
|
|
}
|
|
_ => return Vec::new(),
|
|
};
|
|
|
|
let output = std::process::Command::new("bash")
|
|
.arg("-c")
|
|
.arg(&cmd)
|
|
.output();
|
|
|
|
match output {
|
|
Ok(out) => {
|
|
let text = String::from_utf8_lossy(&out.stdout);
|
|
text.lines()
|
|
.rev() // nyeste først
|
|
.take(max_lines)
|
|
.map(|line| {
|
|
// Prøv å parse tidsstempel fra starten av linjen
|
|
let (ts, msg) = if line.len() > 24 {
|
|
(line[..24].trim().to_string(), line[24..].trim().to_string())
|
|
} else {
|
|
(String::new(), line.to_string())
|
|
};
|
|
|
|
let level = if msg.contains("ERROR") || msg.contains("error") || msg.contains("ERR") {
|
|
"error"
|
|
} else if msg.contains("WARN") || msg.contains("warn") {
|
|
"warn"
|
|
} else if msg.contains("INFO") || msg.contains("info") {
|
|
"info"
|
|
} else {
|
|
"debug"
|
|
};
|
|
|
|
LogEntry {
|
|
timestamp: ts,
|
|
service: service.to_string(),
|
|
level: level.to_string(),
|
|
message: msg,
|
|
}
|
|
})
|
|
.collect()
|
|
}
|
|
Err(_) => Vec::new(),
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// API-handlers
|
|
// =============================================================================
|
|
|
|
/// GET /admin/health — komplett serverhelse-dashboard.
|
|
pub async fn health_dashboard(
|
|
State(state): State<AppState>,
|
|
_admin: AdminUser,
|
|
) -> Result<Json<HealthDashboard>, (StatusCode, Json<crate::intentions::ErrorResponse>)> {
|
|
// Kjør alle tjeneste-sjekker parallelt
|
|
let (pg, caddy, authentik, litellm, whisper, livekit) = tokio::join!(
|
|
check_pg(&state.db),
|
|
check_caddy(),
|
|
check_authentik(),
|
|
check_litellm(),
|
|
check_whisper(),
|
|
check_livekit(),
|
|
);
|
|
|
|
let services = vec![pg, caddy, authentik, litellm, whisper, livekit];
|
|
let metrics = collect_metrics();
|
|
let backups = check_backups();
|
|
let pg_stats = collect_pg_stats(&state.db).await;
|
|
|
|
Ok(Json(HealthDashboard {
|
|
services,
|
|
metrics,
|
|
backups,
|
|
pg_stats,
|
|
}))
|
|
}
|
|
|
|
/// GET /admin/health/logs?service=maskinrommet&lines=50
|
|
pub async fn health_logs(
|
|
_admin: AdminUser,
|
|
Query(params): Query<LogsQuery>,
|
|
) -> Json<LogsResponse> {
|
|
let max_lines = params.lines.unwrap_or(50).min(200);
|
|
|
|
let entries = if let Some(service) = ¶ms.service {
|
|
read_service_logs(service, max_lines)
|
|
} else {
|
|
// Alle tjenester, siste linjer fra hver
|
|
let services = ["maskinrommet", "caddy", "postgres", "authentik", "litellm", "whisper", "livekit"];
|
|
let per_service = (max_lines / services.len()).max(10);
|
|
let mut all = Vec::new();
|
|
for svc in &services {
|
|
all.extend(read_service_logs(svc, per_service));
|
|
}
|
|
// Sorter etter timestamp (nyeste først)
|
|
all.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
|
|
all.truncate(max_lines);
|
|
all
|
|
};
|
|
|
|
Json(LogsResponse { entries })
|
|
}
|