From 01ad35557f1e6bc3f4666a745a6217ff3d7adfb0 Mon Sep 17 00:00:00 2001 From: vegard Date: Tue, 17 Mar 2026 23:12:46 +0000 Subject: [PATCH] LiteLLM AI Gateway: Docker, DB-ruting, config-generering (oppgave 10.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setter opp AI Gateway med LiteLLM som sentralisert proxy for alle AI-kall. PG eier all modellkonfigurasjon — LiteLLM er stateløs. - Migrasjon 008: ai_model_aliases, ai_model_providers, ai_job_routing med seed-data for sidelinja/rutine og sidelinja/resonering - Config-generering fra PG: scripts/generate-litellm-config.sh filtrerer bort providers med tomme API-nøkler - Docker-container kjører på sidelinja-net (intern, ingen eksponert port) - Maskinrommet har AI_GATEWAY_URL via maskinrommet-env.sh - API-nøkkel-placeholders i .env (GEMINI, ANTHROPIC, XAI) - Oppdatert docs/infra/ai_gateway.md med faktisk config Verifisert: container healthy, modellaliaser eksponert, maskinrommet har korrekt gateway-URL. Reelle API-kall krever at Vegard fyller inn leverandør-nøkler i /srv/synops/.env. Co-Authored-By: Claude Opus 4.6 (1M context) --- config/litellm/config.yaml | 50 +++++++------- docs/infra/ai_gateway.md | 31 ++++++--- migrations/008_ai_model_routing.sql | 100 ++++++++++++++++++++++++++++ scripts/generate-litellm-config.sh | 97 +++++++++++++++++++++++++++ tasks.md | 3 +- 5 files changed, 247 insertions(+), 34 deletions(-) create mode 100644 migrations/008_ai_model_routing.sql create mode 100755 scripts/generate-litellm-config.sh diff --git a/config/litellm/config.yaml b/config/litellm/config.yaml index 2cef0b9..f052ce5 100644 --- a/config/litellm/config.yaml +++ b/config/litellm/config.yaml @@ -1,40 +1,42 @@ -model_list: - - model_name: "kjapp" - litellm_params: - model: "xai/grok-4-1-fast-non-reasoning" - api_key: "os.environ/XAI_API_KEY" - - model_name: "kjapp" - litellm_params: - model: "xai/grok-3-mini" - api_key: "os.environ/XAI_API_KEY" - - model_name: "kjapp" - litellm_params: - model: "gemini/gemini-2.5-flash-lite" - api_key: "os.environ/GEMINI_API_KEY" - - model_name: "kjapp" - litellm_params: - model: "gemini/gemini-flash-lite-latest" - api_key: "os.environ/GEMINI_API_KEY" - - model_name: "kjapp" - litellm_params: - model: "gemini/gemini-flash-latest" - api_key: "os.environ/GEMINI_API_KEY" +# LiteLLM config — GENERERT FIL +# +# Denne filen genereres fra PostgreSQL-tabellene ai_model_aliases +# og ai_model_providers. Rediger ikke manuelt — bruk admin-panelet +# eller kjør scripts/generate-litellm-config.sh. +# +# Bare providers med gyldige API-nøkler i .env inkluderes. +# Fyll inn GEMINI_API_KEY, ANTHROPIC_API_KEY, XAI_API_KEY i +# /srv/synops/.env for å aktivere flere leverandører. - - model_name: "resonering" +model_list: + # === sidelinja/resonering === + - model_name: "sidelinja/resonering" litellm_params: model: "openrouter/anthropic/claude-sonnet-4" api_key: "os.environ/OPENROUTER_API_KEY" - - model_name: "resonering" + model_info: + priority: 1 + - model_name: "sidelinja/resonering" litellm_params: model: "openrouter/google/gemini-2.5-flash" api_key: "os.environ/OPENROUTER_API_KEY" + model_info: + priority: 2 + # === sidelinja/rutine === + - model_name: "sidelinja/rutine" + litellm_params: + model: "openrouter/google/gemini-2.5-flash" + api_key: "os.environ/OPENROUTER_API_KEY" + model_info: + priority: 4 router_settings: routing_strategy: "simple-shuffle" - num_retries: 2 + num_retries: 3 timeout: 60 allowed_fails: 1 retry_after: 5 + enable_pre_call_checks: true general_settings: master_key: "os.environ/LITELLM_MASTER_KEY" diff --git a/docs/infra/ai_gateway.md b/docs/infra/ai_gateway.md index d510054..3a6f80f 100644 --- a/docs/infra/ai_gateway.md +++ b/docs/infra/ai_gateway.md @@ -101,23 +101,38 @@ Admin-panelet lar administrator: ## 4. Docker-oppsett ```yaml -# docker-compose.dev.yml / docker-compose.yml +# /srv/synops/docker-compose.yml ai-gateway: image: ghcr.io/berriai/litellm:main-stable restart: unless-stopped command: --config /etc/litellm/config.yaml environment: LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} - GEMINI_API_KEY: ${GEMINI_API_KEY} - ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} - XAI_API_KEY: ${XAI_API_KEY} + GEMINI_API_KEY: ${GEMINI_API_KEY:-} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-} + XAI_API_KEY: ${XAI_API_KEY:-} OPENROUTER_API_KEY: ${OPENROUTER_API_KEY} volumes: - - ./config/litellm/config.yaml:/etc/litellm/config.yaml:ro - ports: - - "127.0.0.1:4000:4000" # kun localhost (dev), ingen port i prod + - /srv/synops/config/litellm:/etc/litellm networks: - - sidelinja-dev # eller sidelinja-net i prod + - sidelinja-net # intern — maskinrommet når gateway via container-IP + healthcheck: + test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen(\"http://localhost:4000/health/liveliness\")"] + interval: 15s + timeout: 5s + retries: 3 +``` + +### 4.1 Config-generering + +`config.yaml` genereres fra PG-tabellene med `scripts/generate-litellm-config.sh`. +Scriptet filtrerer bort providers med tomme API-nøkler i `.env`, slik at bare +leverandører med gyldige nøkler inkluderes. Kjør med `--restart` for å restarte +containeren etter generering. + +```bash +# Generer og restart: +scripts/generate-litellm-config.sh --restart ``` ## 5. Prompt-kvalitetssikring (Promptfoo) diff --git a/migrations/008_ai_model_routing.sql b/migrations/008_ai_model_routing.sql new file mode 100644 index 0000000..bf82898 --- /dev/null +++ b/migrations/008_ai_model_routing.sql @@ -0,0 +1,100 @@ +-- 008_ai_model_routing.sql — Modellruting for AI Gateway (LiteLLM). +-- +-- Nye tabeller: +-- ai_model_aliases — globale modellaliaser (sidelinja/rutine, sidelinja/resonering) +-- ai_model_providers — leverandør-modeller med prioritert fallback per alias +-- ai_job_routing — jobbtype → modellalias mapping +-- +-- Ref: docs/infra/ai_gateway.md §3.2 + +-- ============================================================================= +-- 1. Modellaliaser +-- ============================================================================= + +CREATE TABLE ai_model_aliases ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + alias TEXT NOT NULL, + description TEXT, + is_active BOOLEAN NOT NULL DEFAULT true, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + UNIQUE(alias) +); + +COMMENT ON TABLE ai_model_aliases IS 'Globale modellaliaser — abstraherer bort leverandør-spesifikke modellnavn'; + +-- ============================================================================= +-- 2. Leverandør-modeller med fallback +-- ============================================================================= + +CREATE TABLE ai_model_providers ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + alias_id UUID NOT NULL REFERENCES ai_model_aliases(id) ON DELETE CASCADE, + provider TEXT NOT NULL, + model TEXT NOT NULL, + api_key_env TEXT NOT NULL, + priority SMALLINT NOT NULL, + is_active BOOLEAN NOT NULL DEFAULT true, + UNIQUE(alias_id, model) +); + +COMMENT ON TABLE ai_model_providers IS 'Leverandør-modeller per alias, sortert etter priority (lavere = prøves først)'; + +-- ============================================================================= +-- 3. Jobbtype → modellalias mapping +-- ============================================================================= + +CREATE TABLE ai_job_routing ( + job_type TEXT PRIMARY KEY, + alias TEXT NOT NULL, + description TEXT +); + +COMMENT ON TABLE ai_job_routing IS 'Hvilken modellalias brukes for hvilken jobbtype'; + +-- ============================================================================= +-- 4. Seed: initielle aliaser og providers +-- ============================================================================= + +-- Rutine-alias (billig, høyt volum) +INSERT INTO ai_model_aliases (alias, description) VALUES + ('sidelinja/rutine', 'Billig, høyt volum — tekstvasking, research, metadata'), + ('sidelinja/resonering', 'Høy kvalitet — resonneringsoppgaver, live-assistent'); + +-- Providers for sidelinja/rutine +INSERT INTO ai_model_providers (alias_id, provider, model, api_key_env, priority) VALUES + ((SELECT id FROM ai_model_aliases WHERE alias = 'sidelinja/rutine'), 'xai', 'xai/grok-4-1-fast-non-reasoning', 'XAI_API_KEY', 1), + ((SELECT id FROM ai_model_aliases WHERE alias = 'sidelinja/rutine'), 'gemini', 'gemini/gemini-2.5-flash-lite', 'GEMINI_API_KEY', 2), + ((SELECT id FROM ai_model_aliases WHERE alias = 'sidelinja/rutine'), 'gemini', 'gemini/gemini-flash-lite-latest', 'GEMINI_API_KEY', 3), + ((SELECT id FROM ai_model_aliases WHERE alias = 'sidelinja/rutine'), 'gemini', 'gemini/gemini-flash-latest', 'GEMINI_API_KEY', 4), + ((SELECT id FROM ai_model_aliases WHERE alias = 'sidelinja/rutine'), 'openrouter', 'openrouter/google/gemini-2.5-flash', 'OPENROUTER_API_KEY', 5); + +-- Providers for sidelinja/resonering +INSERT INTO ai_model_providers (alias_id, provider, model, api_key_env, priority) VALUES + ((SELECT id FROM ai_model_aliases WHERE alias = 'sidelinja/resonering'), 'anthropic', 'anthropic/claude-sonnet-4-20250514', 'ANTHROPIC_API_KEY', 1), + ((SELECT id FROM ai_model_aliases WHERE alias = 'sidelinja/resonering'), 'openrouter', 'openrouter/anthropic/claude-sonnet-4', 'OPENROUTER_API_KEY', 2), + ((SELECT id FROM ai_model_aliases WHERE alias = 'sidelinja/resonering'), 'openrouter', 'openrouter/google/gemini-2.5-flash', 'OPENROUTER_API_KEY', 3); + +-- Jobbtype → alias routing +INSERT INTO ai_job_routing (job_type, alias, description) VALUES + ('ai_text_process', 'sidelinja/rutine', 'Tekstvasking og ✨-behandling, høyt volum'), + ('whisper_postprocess', 'sidelinja/rutine', 'Transkripsjonsvasking etter Whisper'), + ('research_clip', 'sidelinja/rutine', 'Research-oppsummering'), + ('live_factoid_eval', 'sidelinja/resonering', 'Faktoid-vurdering under live sending — krever presisjon'), + ('agent_respond', 'sidelinja/resonering', 'Claude chat-agent svar'); + +-- ============================================================================= +-- 5. Oppdater agent_identities config til ny alias +-- ============================================================================= + +UPDATE agent_identities +SET config = jsonb_set(config, '{model_alias}', '"sidelinja/resonering"') +WHERE agent_key = 'claude-main' + AND config->>'model_alias' = 'resonering'; + +-- ============================================================================= +-- 6. Tilganger +-- ============================================================================= + +GRANT SELECT ON ai_model_aliases TO synops_reader; +GRANT SELECT ON ai_model_providers TO synops_reader; +GRANT SELECT ON ai_job_routing TO synops_reader; diff --git a/scripts/generate-litellm-config.sh b/scripts/generate-litellm-config.sh new file mode 100755 index 0000000..6fd8bf6 --- /dev/null +++ b/scripts/generate-litellm-config.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# Genererer LiteLLM config.yaml fra ai_model_aliases/providers i PostgreSQL. +# Kjøres ved oppstart, ved endringer i admin-panelet, eller manuelt. +# +# Bruk: +# ./scripts/generate-litellm-config.sh [--restart] +# +# --restart: restart ai-gateway containeren etter generering. + +set -euo pipefail + +CONFIG_FILE="/srv/synops/config/litellm/config.yaml" +DOCKER_CONTAINER="sidelinja-ai-gateway-1" +PG_CONTAINER="sidelinja-postgres-1" +PG_USER="sidelinja" +PG_DB="synops" +ENV_FILE="/srv/synops/.env" + +read_env() { grep "^$1=" "$ENV_FILE" 2>/dev/null | head -1 | cut -d= -f2; } + +psql_cmd() { + docker exec "$PG_CONTAINER" psql -U "$PG_USER" -d "$PG_DB" -tAF '|' -c "$1" +} + +# Hent aktive aliaser og deres providers, sortert etter alias + priority +PROVIDERS=$(psql_cmd " + SELECT a.alias, p.model, p.api_key_env, p.priority + FROM ai_model_providers p + JOIN ai_model_aliases a ON p.alias_id = a.id + WHERE a.is_active = true AND p.is_active = true + ORDER BY a.alias, p.priority; +") + +if [ -z "$PROVIDERS" ]; then + echo "FEIL: Ingen aktive providers funnet i databasen." >&2 + exit 1 +fi + +# Generer YAML — filtrerer bort providers med tomme API-nøkler +{ + echo "model_list:" + + current_alias="" + included=0 + while IFS='|' read -r alias model api_key_env priority; do + # Sjekk at API-nøkkelen finnes og er ikke-tom i .env + key_value=$(read_env "$api_key_env") + if [ -z "$key_value" ]; then + echo " # HOPPET OVER: ${model} (${api_key_env} ikke satt)" >&2 + continue + fi + + if [ "$alias" != "$current_alias" ]; then + echo " # === ${alias} ===" + current_alias="$alias" + fi + # priority i DB er 1-basert, LiteLLM priority er 0-basert (lavere = høyere) + litellm_priority=$((priority - 1)) + cat <&2 + exit 1 + fi + + cat <<'FOOTER' + +router_settings: + routing_strategy: "simple-shuffle" + num_retries: 3 + timeout: 60 + allowed_fails: 1 + retry_after: 5 + enable_pre_call_checks: true + +general_settings: + master_key: "os.environ/LITELLM_MASTER_KEY" +FOOTER +} | sudo tee "$CONFIG_FILE" > /dev/null + +echo "Config generert: $CONFIG_FILE" + +# Eventuelt restart +if [[ "${1:-}" == "--restart" ]]; then + echo "Restarter $DOCKER_CONTAINER..." + sudo docker restart "$DOCKER_CONTAINER" > /dev/null + echo "Ferdig." +fi diff --git a/tasks.md b/tasks.md index d4d8aa7..96fcd87 100644 --- a/tasks.md +++ b/tasks.md @@ -116,8 +116,7 @@ Uavhengige faser kan fortsatt plukkes. ## Fase 10: AI og beriking -- [~] 10.1 LiteLLM oppsett: Docker-container, API-nøkler, modell-routing. Ref: `docs/infra/ai_gateway.md`. - > Påbegynt: 2026-03-17T23:03 +- [x] 10.1 LiteLLM oppsett: Docker-container, API-nøkler, modell-routing. Ref: `docs/infra/ai_gateway.md`. - [ ] 10.2 AI-foreslåtte edges: maskinrommet sender innhold til LLM → foreslår mentions, topics. - [ ] 10.3 Oppsummering: kommunikasjonsnode → AI-generert sammendrag som ny node. - [ ] 10.4 TTS: tekst → lyd via ElevenLabs. Mottaker-preferanse i metadata.