synops/scripts/benchmark-models.sh
vegard 4be36857e3 Legg til modell-benchmark: script + foreløpig rapport
benchmark-models.sh: sekvensielt 3×3×2 matrise (model × effort × fil)
Foreløpig rapport med Haiku/Sonnet/Opus medium-effort resultater.
Full matrise kjøres kl 04 når serveren er rolig.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 14:28:03 +00:00

73 lines
2.3 KiB
Bash
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# Benchmark alle modeller × effort-nivåer sekvensielt.
# Kjøres når serveren er rolig (ingen task runner, ingen brukere).
#
# Bruk:
# ./scripts/benchmark-models.sh
# Resultater skrives til docs/erfaringer/modell_benchmark.md
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
SIMPLE_FILE="tools/synops-transcribe/src/main.rs"
COMPLEX_FILE="tools/synops-audio/src/main.rs"
PROMPT_BASE="Les %s nøye. Gi en vurdering: 1) Hva gjør verktøyet? 2) Er det bugs? 3) Hva kan forbedres? Maks 300 ord."
declare -A MODELS=(
[haiku]="claude-haiku-4-5-20251001"
[sonnet]="claude-sonnet-4-6"
[opus]="claude-opus-4-6"
)
declare -A EFFORT_PREFIX=(
[low]="Svar kort og overfladisk. Ikke les grundig."
[medium]=""
[high]="Vær ekstremt grundig. Les hver linje nøye. Tenk dypt på edge cases."
)
OUTDIR="/tmp/benchmark-$(date +%Y%m%d-%H%M)"
mkdir -p "$OUTDIR"
echo "modell,effort,fil,tid_sek,ord,bugs_nøkkelord" > "$OUTDIR/results.csv"
for file in "$SIMPLE_FILE" "$COMPLEX_FILE"; do
file_label=$(basename "$file" .rs)
base_prompt=$(printf "$PROMPT_BASE" "$file")
for model_name in haiku sonnet opus; do
for effort in low medium high; do
prefix="${EFFORT_PREFIX[$effort]}"
if [[ -n "$prefix" ]]; then
prompt="$prefix $base_prompt"
else
prompt="$base_prompt"
fi
outfile="$OUTDIR/${file_label}_${model_name}_${effort}.txt"
echo "[$(date +%H:%M:%S)] $file_label / $model_name / $effort ..."
start_time=$(date +%s)
cd "$ROOT"
claude -p --model "${MODELS[$model_name]}" --dangerously-skip-permissions "$prompt" > "$outfile" 2>&1 || true
end_time=$(date +%s)
elapsed=$((end_time - start_time))
words=$(wc -w < "$outfile" || echo 0)
bugs=$(grep -ciE 'bug|feil|race|overflow|crash|problem|sårbar' "$outfile" || echo 0)
echo "$model_name,$effort,$file_label,$elapsed,$words,$bugs" >> "$OUTDIR/results.csv"
echo "${elapsed}s, ${words} ord, ${bugs} bug-nøkkelord"
done
done
done
echo ""
echo "=== RESULTATER ==="
column -t -s',' "$OUTDIR/results.csv"
echo ""
echo "Rå resultater: $OUTDIR/"
echo "CSV: $OUTDIR/results.csv"
# Kopier CSV til repo for referanse
cp "$OUTDIR/results.csv" "$ROOT/docs/erfaringer/benchmark_$(date +%Y%m%d).csv"
echo "Kopiert til docs/erfaringer/benchmark_$(date +%Y%m%d).csv"