From b4ede327133372af1f5ef253af5f50fe3053cc3e Mon Sep 17 00:00:00 2001 From: vegard Date: Thu, 19 Mar 2026 23:24:23 +0000 Subject: [PATCH] =?UTF-8?q?Admin=20AI-ruting:=20fire=20niv=C3=A5er=20med?= =?UTF-8?q?=20test-prompt=20og=20kostnadsestimat?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Ny «Nivåer»-fane i /admin/ai med synops/low, medium, high, extreme - Per-nivå: fallback-kjede, provider-administrasjon, kostnadsestimat - Test-knapp sender prompt gjennom LiteLLM og viser respons, latens, tokens, kostnad - Backend: POST /admin/ai/test_prompt + GET /admin/ai/tier_costs - Migration 033: oppretter de fire synops/* aliasene med providers --- frontend/src/lib/api.ts | 48 ++++ frontend/src/routes/admin/ai/+page.svelte | 294 +++++++++++++++++++++- maskinrommet/src/ai_admin.rs | 186 ++++++++++++++ maskinrommet/src/main.rs | 2 + migrations/033_ai_tier_aliases.sql | 61 +++++ 5 files changed, 581 insertions(+), 10 deletions(-) create mode 100644 migrations/033_ai_tier_aliases.sql diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index 0be76c5..93a6938 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -948,6 +948,54 @@ export function deleteAiRouting( return post(accessToken, '/admin/ai/delete_routing', { job_type: jobType }); } +/** Test-prompt respons. */ +export interface AiTestPromptResponse { + success: boolean; + response_text: string; + model_used: string | null; + prompt_tokens: number; + completion_tokens: number; + latency_ms: number; + estimated_cost: number; +} + +/** Send test-prompt til et alias. */ +export function testAiPrompt( + accessToken: string, + alias: string +): Promise { + return post(accessToken, '/admin/ai/test_prompt', { alias }); +} + +/** Kostnadsinfo per provider. */ +export interface ProviderCostInfo { + model: string; + provider: string; + priority: number; + input_cost_per_mtok: number; + output_cost_per_mtok: number; + estimated_cost_1k_tokens: number; +} + +/** Kostnadsinfo per tier. */ +export interface TierCostInfo { + alias: string; + description: string | null; + providers: ProviderCostInfo[]; +} + +/** Hent kostnadsestimat per tier. */ +export async function fetchAiTierCosts(accessToken: string): Promise { + const res = await fetch(`${BASE_URL}/admin/ai/tier_costs`, { + headers: { Authorization: `Bearer ${accessToken}` } + }); + if (!res.ok) { + const body = await res.text(); + throw new Error(`tier costs failed (${res.status}): ${body}`); + } + return res.json(); +} + // ============================================================================= // Serverhelse-dashboard (oppgave 15.6) // ============================================================================= diff --git a/frontend/src/routes/admin/ai/+page.svelte b/frontend/src/routes/admin/ai/+page.svelte index 9acbf45..c902dea 100644 --- a/frontend/src/routes/admin/ai/+page.svelte +++ b/frontend/src/routes/admin/ai/+page.svelte @@ -1,15 +1,17 @@
@@ -322,15 +381,16 @@ {/if} -
+
{#each [ + { key: 'tiers', label: 'Nivåer' }, { key: 'aliases', label: 'Modeller & fallback' }, { key: 'routing', label: 'Ruting' }, { key: 'usage', label: 'Forbruk' }, { key: 'keys', label: 'API-nøkler' } ] as tab}
+ + {#if activeTab === 'tiers'} +
+
+

Synops AI-nivåer

+

+ Fire kvalitetsnivåer med fallback-kjeder. Jobbtyper rutes til et nivå i Ruting-fanen. +

+
+ + {#each TIER_ORDER as tierName} + {@const alias = tierAlias(tierName)} + {@const costs = tierCosts.find((t) => t.alias === tierName)} + {@const providers = alias ? providersForAlias(alias) : []} + {@const routingCount = routingCountForAlias(tierName)} + {@const result = testResults[tierName]} + {@const testing = testLoading[tierName]} + {@const testError = testErrors[tierName]} +
+ +
+
+ +
+
+ {tierName} + {#if alias} + + {alias.is_active ? 'Aktiv' : 'Inaktiv'} + + {:else} + + Ikke opprettet + + {/if} +
+ {#if alias?.description} +

{alias.description}

+ {/if} +
+
+
+ {routingCount} ruting{routingCount !== 1 ? 'er' : ''} + {#if alias} + + {/if} +
+
+ +
+ +
+
+ Fallback-kjede +
+ {#if providers.length === 0} +

Ingen providers konfigurert

+ {:else} +
+ {#each providers as provider, idx} +
+ + {provider.priority} + +
+ {provider.model} +
+ + + + +
+ {#if idx > 0} + + {/if} + {#if idx < providers.length - 1} + + {/if} +
+ + +
+ {/each} +
+ {/if} + + + {#if alias && showNewProvider === alias.id} +
+
+ + + + +
+
+ + +
+
+ {:else if alias} + + {/if} +
+ + +
+
+ Kostnadsestimat +
+ {#if costs && costs.providers.length > 0} +
+ {#each costs.providers as cp} +
+ {cp.model.split('/').pop()} +
+ + ${cp.input_cost_per_mtok}/{cp.output_cost_per_mtok} + + + ~{formatCost(cp.estimated_cost_1k_tokens)}/1k + +
+
+ {/each} +
+ {:else} +

Ingen data

+ {/if} + + +
+ + + {#if result} +
+

{result.response_text}

+
+ Modell: {result.model_used ?? '?'} + {result.latency_ms}ms + {result.prompt_tokens}+{result.completion_tokens} tokens + {formatCost(result.estimated_cost)} +
+
+ {/if} + {#if testError} +
+ {testError} +
+ {/if} +
+
+
+
+ {/each} +
+ - {#if activeTab === 'aliases'} + {:else if activeTab === 'aliases'}
{#each data.aliases as alias}
diff --git a/maskinrommet/src/ai_admin.rs b/maskinrommet/src/ai_admin.rs index bfcf77f..80e5232 100644 --- a/maskinrommet/src/ai_admin.rs +++ b/maskinrommet/src/ai_admin.rs @@ -475,6 +475,192 @@ pub async fn ai_usage( .map_err(|e| internal_error(&format!("Feil ved henting av forbruk: {e}"))) } +// ============================================================================= +// POST /admin/ai/test_prompt — test et alias med en enkel prompt +// ============================================================================= + +#[derive(Deserialize)] +pub struct TestPromptRequest { + pub alias: String, +} + +#[derive(Serialize)] +pub struct TestPromptResponse { + pub success: bool, + pub response_text: String, + pub model_used: Option, + pub prompt_tokens: i64, + pub completion_tokens: i64, + pub latency_ms: u64, + pub estimated_cost: f64, +} + +/// Kjente modellpriser per million tokens (input, output). +/// Brukes for kostnadsestimat i admin-panelet. +fn model_cost_per_million(model: &str) -> (f64, f64) { + match model { + m if m.contains("gemini-2.5-flash-lite") => (0.0, 0.0), // gratis tier + m if m.contains("gemini-2.5-flash") => (0.15, 0.60), + m if m.contains("grok-4-1-fast-non-reasoning") => (0.60, 3.00), + m if m.contains("claude-sonnet-4") => (3.00, 15.00), + m if m.contains("claude-opus") => (15.00, 75.00), + m if m.contains("gpt-4o") => (2.50, 10.00), + m if m.contains("gpt-4o-mini") => (0.15, 0.60), + _ => (1.00, 3.00), // konservativt estimat + } +} + +pub fn estimate_cost(model: &str, prompt_tokens: i64, completion_tokens: i64) -> f64 { + let (input_price, output_price) = model_cost_per_million(model); + (prompt_tokens as f64 * input_price + completion_tokens as f64 * output_price) / 1_000_000.0 +} + +pub async fn test_prompt( + State(_state): State, + _admin: AdminUser, + Json(req): Json, +) -> Result, (StatusCode, Json)> { + if req.alias.trim().is_empty() { + return Err(bad_request("Alias kan ikke være tomt")); + } + + let gateway_url = std::env::var("AI_GATEWAY_URL") + .unwrap_or_else(|_| "http://localhost:4000".to_string()); + let api_key = std::env::var("LITELLM_MASTER_KEY") + .map_err(|_| internal_error("LITELLM_MASTER_KEY ikke satt"))?; + + let request_body = serde_json::json!({ + "model": req.alias, + "messages": [ + {"role": "system", "content": "Du er en hjelpsom assistent. Svar kort og konsist."}, + {"role": "user", "content": "Si «Hei fra Synops!» og beskriv deg selv i én setning."} + ], + "temperature": 0.3 + }); + + let client = reqwest::Client::new(); + let start = std::time::Instant::now(); + + let resp = client + .post(format!("{gateway_url}/v1/chat/completions")) + .header("Authorization", format!("Bearer {api_key}")) + .header("Content-Type", "application/json") + .json(&request_body) + .timeout(std::time::Duration::from_secs(30)) + .send() + .await + .map_err(|e| internal_error(&format!("AI Gateway-kall feilet: {e}")))?; + + let latency_ms = start.elapsed().as_millis() as u64; + + if !resp.status().is_success() { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + return Err(internal_error(&format!("AI Gateway returnerte {status}: {body}"))); + } + + let body: serde_json::Value = resp.json().await + .map_err(|e| internal_error(&format!("Kunne ikke parse respons: {e}")))?; + + let response_text = body["choices"][0]["message"]["content"] + .as_str() + .unwrap_or("(ingen respons)") + .to_string(); + let model_used = body["model"].as_str().map(|s| s.to_string()); + let prompt_tokens = body["usage"]["prompt_tokens"].as_i64().unwrap_or(0); + let completion_tokens = body["usage"]["completion_tokens"].as_i64().unwrap_or(0); + + let cost_model = model_used.as_deref().unwrap_or(&req.alias); + let estimated_cost = estimate_cost(cost_model, prompt_tokens, completion_tokens); + + tracing::info!( + alias = %req.alias, + model = ?model_used, + latency_ms, + prompt_tokens, + completion_tokens, + user = %_admin.node_id, + "Admin: test-prompt kjørt" + ); + + Ok(Json(TestPromptResponse { + success: true, + response_text, + model_used, + prompt_tokens, + completion_tokens, + latency_ms, + estimated_cost, + })) +} + +// ============================================================================= +// GET /admin/ai/tier_costs — kostnadsestimat per nivå +// ============================================================================= + +#[derive(Serialize)] +pub struct TierCostInfo { + pub alias: String, + pub description: Option, + pub providers: Vec, +} + +#[derive(Serialize)] +pub struct ProviderCostInfo { + pub model: String, + pub provider: String, + pub priority: i16, + pub input_cost_per_mtok: f64, + pub output_cost_per_mtok: f64, + pub estimated_cost_1k_tokens: f64, +} + +pub async fn tier_costs( + State(state): State, + _admin: AdminUser, +) -> Result>, (StatusCode, Json)> { + let aliases = sqlx::query_as::<_, AiModelAlias>( + "SELECT id, alias, description, is_active, created_at FROM ai_model_aliases + WHERE alias LIKE 'synops/%' ORDER BY alias" + ) + .fetch_all(&state.db) + .await + .map_err(|e| internal_error(&format!("Feil: {e}")))?; + + let providers = sqlx::query_as::<_, AiModelProvider>( + "SELECT id, alias_id, provider, model, api_key_env, priority, is_active + FROM ai_model_providers ORDER BY priority" + ) + .fetch_all(&state.db) + .await + .map_err(|e| internal_error(&format!("Feil: {e}")))?; + + let tiers: Vec = aliases.iter().map(|a| { + let alias_providers: Vec = providers.iter() + .filter(|p| p.alias_id == a.id && p.is_active) + .map(|p| { + let (input_cost, output_cost) = model_cost_per_million(&p.model); + ProviderCostInfo { + model: p.model.clone(), + provider: p.provider.clone(), + priority: p.priority, + input_cost_per_mtok: input_cost, + output_cost_per_mtok: output_cost, + // Estimat: 700 input + 300 output tokens per 1k + estimated_cost_1k_tokens: (700.0 * input_cost + 300.0 * output_cost) / 1_000_000.0, + } + }) + .collect(); + TierCostInfo { + alias: a.alias.clone(), + description: a.description.clone(), + providers: alias_providers, + } + }).collect(); + + Ok(Json(tiers)) +} + async fn fetch_usage_for_collection( db: &PgPool, collection_id: Uuid, diff --git a/maskinrommet/src/main.rs b/maskinrommet/src/main.rs index 1a402bb..eded4da 100644 --- a/maskinrommet/src/main.rs +++ b/maskinrommet/src/main.rs @@ -257,6 +257,8 @@ async fn main() { .route("/admin/ai/delete_provider", post(ai_admin::delete_provider)) .route("/admin/ai/update_routing", post(ai_admin::update_routing)) .route("/admin/ai/delete_routing", post(ai_admin::delete_routing)) + .route("/admin/ai/test_prompt", post(ai_admin::test_prompt)) + .route("/admin/ai/tier_costs", get(ai_admin::tier_costs)) // Forbruksoversikt (oppgave 15.8) .route("/admin/usage", get(usage_overview::usage_overview)) // Podcast-statistikk (oppgave 30.4) diff --git a/migrations/033_ai_tier_aliases.sql b/migrations/033_ai_tier_aliases.sql new file mode 100644 index 0000000..1e1450f --- /dev/null +++ b/migrations/033_ai_tier_aliases.sql @@ -0,0 +1,61 @@ +-- 033_ai_tier_aliases.sql — Opprett synops/* tier-aliaser for AI-ruting +-- +-- Fire standardnivåer: synops/low, synops/medium, synops/high, synops/extreme +-- Hver tier har en fallback-kjede av providers. +-- +-- Ref: docs/oppdrag/admin-komplett.md § AI-ruting + +-- Opprett de fire tier-aliasene (separate inserts for ON CONFLICT) +INSERT INTO ai_model_aliases (alias, description) VALUES + ('synops/low', 'Billig, høyt volum — tekstvasking, metadata, klassifisering') + ON CONFLICT (alias) DO NOTHING; +INSERT INTO ai_model_aliases (alias, description) VALUES + ('synops/medium', 'Balansert — oppsummering, research, standard AI-oppgaver') + ON CONFLICT (alias) DO NOTHING; +INSERT INTO ai_model_aliases (alias, description) VALUES + ('synops/high', 'Høy kvalitet — chat, kreativ skriving, analyse') + ON CONFLICT (alias) DO NOTHING; +INSERT INTO ai_model_aliases (alias, description) VALUES + ('synops/extreme', 'Beste tilgjengelige — kritiske avgjørelser, kompleks resonnering') + ON CONFLICT (alias) DO NOTHING; + +-- synops/low: billige, raske modeller +INSERT INTO ai_model_providers (alias_id, provider, model, api_key_env, priority) +SELECT id, 'gemini', 'gemini/gemini-2.5-flash-lite', 'GEMINI_API_KEY', 1 + FROM ai_model_aliases WHERE alias = 'synops/low'; +INSERT INTO ai_model_providers (alias_id, provider, model, api_key_env, priority) +SELECT id, 'xai', 'xai/grok-4-1-fast-non-reasoning', 'XAI_API_KEY', 2 + FROM ai_model_aliases WHERE alias = 'synops/low'; +INSERT INTO ai_model_providers (alias_id, provider, model, api_key_env, priority) +SELECT id, 'openrouter', 'openrouter/google/gemini-2.5-flash', 'OPENROUTER_API_KEY', 3 + FROM ai_model_aliases WHERE alias = 'synops/low'; + +-- synops/medium: gode allround-modeller +INSERT INTO ai_model_providers (alias_id, provider, model, api_key_env, priority) +SELECT id, 'gemini', 'gemini/gemini-2.5-flash', 'GEMINI_API_KEY', 1 + FROM ai_model_aliases WHERE alias = 'synops/medium'; +INSERT INTO ai_model_providers (alias_id, provider, model, api_key_env, priority) +SELECT id, 'anthropic', 'anthropic/claude-sonnet-4-20250514', 'ANTHROPIC_API_KEY', 2 + FROM ai_model_aliases WHERE alias = 'synops/medium'; +INSERT INTO ai_model_providers (alias_id, provider, model, api_key_env, priority) +SELECT id, 'openrouter', 'openrouter/anthropic/claude-sonnet-4', 'OPENROUTER_API_KEY', 3 + FROM ai_model_aliases WHERE alias = 'synops/medium'; + +-- synops/high: høykvalitetsmodeller +INSERT INTO ai_model_providers (alias_id, provider, model, api_key_env, priority) +SELECT id, 'anthropic', 'anthropic/claude-sonnet-4-20250514', 'ANTHROPIC_API_KEY', 1 + FROM ai_model_aliases WHERE alias = 'synops/high'; +INSERT INTO ai_model_providers (alias_id, provider, model, api_key_env, priority) +SELECT id, 'openrouter', 'openrouter/anthropic/claude-sonnet-4', 'OPENROUTER_API_KEY', 2 + FROM ai_model_aliases WHERE alias = 'synops/high'; +INSERT INTO ai_model_providers (alias_id, provider, model, api_key_env, priority) +SELECT id, 'openrouter', 'openrouter/google/gemini-2.5-flash', 'OPENROUTER_API_KEY', 3 + FROM ai_model_aliases WHERE alias = 'synops/high'; + +-- synops/extreme: beste modeller for kritiske oppgaver +INSERT INTO ai_model_providers (alias_id, provider, model, api_key_env, priority) +SELECT id, 'anthropic', 'anthropic/claude-sonnet-4-20250514', 'ANTHROPIC_API_KEY', 1 + FROM ai_model_aliases WHERE alias = 'synops/extreme'; +INSERT INTO ai_model_providers (alias_id, provider, model, api_key_env, priority) +SELECT id, 'openrouter', 'openrouter/anthropic/claude-sonnet-4', 'OPENROUTER_API_KEY', 2 + FROM ai_model_aliases WHERE alias = 'synops/extreme';