From 67bc564f92167a5d754b8d31844e4ccb2f83ab8b Mon Sep 17 00:00:00 2001 From: vegard Date: Wed, 18 Mar 2026 15:15:36 +0000 Subject: [PATCH] =?UTF-8?q?Benchmark:=20l=C3=A5s=20til=20git-ref,=20advar?= =?UTF-8?q?=20ved=20dirty=20tree?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Alle modeller testes mot nøyaktig samme kildekode. Ref og tidsstempel logges i CSV-header. Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/benchmark-models.sh | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/scripts/benchmark-models.sh b/scripts/benchmark-models.sh index 6ecb315..088bef4 100755 --- a/scripts/benchmark-models.sh +++ b/scripts/benchmark-models.sh @@ -9,6 +9,19 @@ set -euo pipefail ROOT="$(cd "$(dirname "$0")/.." && pwd)" +# Lås til en bestemt git-ref for å sikre at alle modeller +# testes mot nøyaktig samme kildekode. Sett via env eller +# bruk HEAD ved kjøretidspunkt. +BENCHMARK_REF="${BENCHMARK_REF:-$(git -C "$ROOT" rev-parse HEAD)}" +echo "=== Benchmark kjører mot git ref: $BENCHMARK_REF ===" +echo "=== Tidspunkt: $(date -Iseconds) ===" + +# Verifiser at working tree matcher ref (advar hvis dirty) +if ! git -C "$ROOT" diff --quiet HEAD 2>/dev/null; then + echo "ADVARSEL: working tree har ucommittede endringer." + echo "Resultatene kan avvike fra ref $BENCHMARK_REF." +fi + SIMPLE_FILE="tools/synops-transcribe/src/main.rs" COMPLEX_FILE="tools/synops-audio/src/main.rs" PROMPT_BASE="Les %s nøye. Gi en vurdering: 1) Hva gjør verktøyet? 2) Er det bugs? 3) Hva kan forbedres? Maks 300 ord." @@ -28,7 +41,9 @@ declare -A EFFORT_PREFIX=( OUTDIR="/tmp/benchmark-$(date +%Y%m%d-%H%M)" mkdir -p "$OUTDIR" -echo "modell,effort,fil,tid_sek,ord,bugs_nøkkelord" > "$OUTDIR/results.csv" +echo "# git_ref: $BENCHMARK_REF" > "$OUTDIR/results.csv" +echo "# tidspunkt: $(date -Iseconds)" >> "$OUTDIR/results.csv" +echo "modell,effort,fil,tid_sek,ord,bugs_nøkkelord" >> "$OUTDIR/results.csv" for file in "$SIMPLE_FILE" "$COMPLEX_FILE"; do file_label=$(basename "$file" .rs)