From 6bd2787ee8df1435b3d42b522c8dabe1642f3001 Mon Sep 17 00:00:00 2001 From: vegard Date: Thu, 19 Mar 2026 18:20:54 +0000 Subject: [PATCH] Implementer planmodus og selvkritikk i synops-agent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extended ReAct Loop: tenk → plan → selvkritikk → utfør. --plan tvinger planmodus, --no-plan deaktiverer. Uten flagg: auto-detect basert på oppgavens nøkkelord/lengde. /plan kommando i interaktiv modus. --- tools/synops-agent/src/main.rs | 269 ++++++++++++++++++++++++++++++--- 1 file changed, 249 insertions(+), 20 deletions(-) diff --git a/tools/synops-agent/src/main.rs b/tools/synops-agent/src/main.rs index fff78b5..41d0dde 100644 --- a/tools/synops-agent/src/main.rs +++ b/tools/synops-agent/src/main.rs @@ -69,6 +69,14 @@ struct Cli { /// Maks antall retries ved API-feil (default: 3) #[arg(long, default_value = "3")] max_retries: u32, + + /// Tving planmodus: tenk → plan → selvkritikk → utfør + #[arg(long)] + plan: bool, + + /// Deaktiver automatisk planmodus (brukes normalt for komplekse oppgaver) + #[arg(long)] + no_plan: bool, } /// Shared state for the agent session. @@ -89,6 +97,15 @@ struct AgentSession { interrupted: Arc, } +/// Whether to use plan mode for a task. +#[derive(Debug, PartialEq)] +enum PlanDecision { + /// No planning needed — execute directly. + Direct, + /// Use plan mode (Extended ReAct Loop). + Plan, +} + /// Result of running one turn of the agent loop. enum TurnResult { /// Agent finished (no more tool calls). @@ -244,6 +261,154 @@ impl AgentSession { } } + /// Run the Extended ReAct Loop: think → plan → self-critique → execute. + /// + /// 1. Ask the LLM to analyze the task and create a step-by-step plan + /// 2. Ask the LLM to critique its own plan + /// 3. Execute with the refined plan as context + async fn run_with_plan(&mut self, task: &str) -> Result> { + // Phase 1: Think + Plan + tracing::info!("Planmodus: Fase 1 — analyse og planlegging"); + let plan_prompt = format!( + "Du skal løse denne oppgaven:\n\n{}\n\n\ + Før du handler, lag en steg-for-steg plan. For hvert steg:\n\ + - Beskriv hva som skal gjøres\n\ + - Hvilke filer som berøres\n\ + - Mulige risikoer\n\n\ + Bruk verktøy for å undersøke kodebasen (les filer, søk, etc.) \ + for å lage en informert plan. Ikke gjør endringer ennå.\n\n\ + Avslutt med en nummerert plan i dette formatet:\n\ + ## Plan\n\ + 1. [steg]\n\ + 2. [steg]\n\ + ...", + task + ); + + self.messages.push(Message { + role: "user".into(), + content: Some(plan_prompt), + tool_calls: None, + tool_call_id: None, + }); + + // Let the LLM explore and create a plan (allows tool use for research) + let plan_result = self.run_turn().await?; + if !matches!(plan_result, TurnResult::Done) { + return Ok(plan_result); + } + + // Phase 2: Self-critique + tracing::info!("Planmodus: Fase 2 — selvkritikk"); + self.messages.push(Message { + role: "user".into(), + content: Some( + "Kritiser planen din. Vurder:\n\ + - Er det steg som mangler?\n\ + - Er rekkefølgen riktig?\n\ + - Kan noe gå galt? Hva er fallback?\n\ + - Er det enklere alternativer?\n\ + - Berører endringene mer enn nødvendig?\n\n\ + Juster planen om nødvendig, og presenter den endelige \ + versjonen. Ikke implementer ennå." + .into(), + ), + tool_calls: None, + tool_call_id: None, + }); + + let critique_result = self.run_turn().await?; + if !matches!(critique_result, TurnResult::Done) { + return Ok(critique_result); + } + + // Phase 3: Execute + tracing::info!("Planmodus: Fase 3 — utføring"); + self.messages.push(Message { + role: "user".into(), + content: Some( + "Planen er godkjent. Implementer den nå, steg for steg. \ + For hvert steg: utfør, verifiser at det fungerer, \ + og gå videre til neste. Rapporter fremgang underveis." + .into(), + ), + tool_calls: None, + tool_call_id: None, + }); + + self.run_turn().await + } + + /// Decide whether to use plan mode based on task analysis. + /// + /// Heuristic: if the task mentions multiple files, complex operations, + /// or design/architecture keywords, use plan mode. + fn should_plan(task: &str) -> PlanDecision { + let task_lower = task.to_lowercase(); + + // Keywords that strongly suggest planning is needed + let plan_keywords = [ + "implementer", + "implement", + "redesign", + "refaktorer", + "refactor", + "migrer", + "migrate", + "arkitektur", + "architecture", + "bygg", + "build", + "lag en", + "create a", + "sett opp", + "set up", + "endre alle", + "change all", + "oppdater alle", + "update all", + ]; + + // Keywords that suggest simple tasks (no plan needed) + let simple_keywords = [ + "les ", + "read ", + "vis ", + "show ", + "hva er", + "what is", + "forklar", + "explain", + "sjekk", + "check", + "finn", + "find", + "søk", + "search", + ]; + + // Simple tasks — direct execution + for kw in &simple_keywords { + if task_lower.starts_with(kw) { + return PlanDecision::Direct; + } + } + + // Complex keywords — plan + for kw in &plan_keywords { + if task_lower.contains(kw) { + return PlanDecision::Plan; + } + } + + // Length heuristic: long tasks are more likely to need planning + if task.len() > 500 { + return PlanDecision::Plan; + } + + PlanDecision::Direct + } + /// Total input tokens across all models. fn total_input_tokens(&self) -> u64 { self.total_usage.values().map(|u| u.input_tokens).sum() @@ -363,16 +528,32 @@ async fn main() -> Result<(), Box> { if cli.interactive { run_interactive(&mut session).await?; } else { - // Batch mode: add task as user message and run + // Batch mode let task = cli.task.unwrap(); - session.messages.push(Message { - role: "user".into(), - content: Some(task), - tool_calls: None, - tool_call_id: None, - }); - let result = session.run_turn().await?; + // Decide whether to use plan mode + let use_plan = if cli.plan { + true + } else if cli.no_plan { + false + } else { + AgentSession::should_plan(&task) == PlanDecision::Plan + }; + + let result = if use_plan { + tracing::info!("Bruker planmodus (Extended ReAct Loop)"); + session.run_with_plan(&task).await? + } else { + // Direct execution + session.messages.push(Message { + role: "user".into(), + content: Some(task), + tool_calls: None, + tool_call_id: None, + }); + session.run_turn().await? + }; + session.print_summary(); if matches!(result, TurnResult::BudgetExhausted) { @@ -494,10 +675,11 @@ async fn run_interactive( } "/help" => { eprintln!("Kommandoer:"); - eprintln!(" /stats — Vis token-forbruk og kostnad"); - eprintln!(" /clear — Nullstill samtalen"); - eprintln!(" /help — Vis denne hjelpen"); - eprintln!(" exit — Avslutt"); + eprintln!(" /plan — Kjør med planmodus (tenk → plan → selvkritikk → utfør)"); + eprintln!(" /stats — Vis token-forbruk og kostnad"); + eprintln!(" /clear — Nullstill samtalen"); + eprintln!(" /help — Vis denne hjelpen"); + eprintln!(" exit — Avslutt"); eprintln!(); eprintln!("Multi-line: Avslutt linje med \\ for å fortsette."); eprintln!("Ctrl+C avbryter pågående LLM-kall/verktøy."); @@ -506,15 +688,28 @@ async fn run_interactive( _ => {} } - // Add user message and run turn - session.messages.push(Message { - role: "user".into(), - content: Some(input), - tool_calls: None, - tool_call_id: None, - }); + // Check for /plan command + let (use_plan, effective_input) = if let Some(plan_task) = trimmed.strip_prefix("/plan ") { + (true, plan_task.to_string()) + } else { + (false, input.clone()) + }; - match session.run_turn().await { + // Run with or without plan mode + let turn_result = if use_plan { + eprintln!("[planmodus aktivert]"); + session.run_with_plan(&effective_input).await + } else { + session.messages.push(Message { + role: "user".into(), + content: Some(effective_input), + tool_calls: None, + tool_call_id: None, + }); + session.run_turn().await + }; + + match turn_result { Ok(TurnResult::Done) => { // Normal completion } @@ -645,3 +840,37 @@ async fn spawn_claude_code( Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_should_plan_complex_tasks() { + assert_eq!(AgentSession::should_plan("implementer plan-modus i agenten"), PlanDecision::Plan); + assert_eq!(AgentSession::should_plan("refaktorer hele auth-systemet"), PlanDecision::Plan); + assert_eq!(AgentSession::should_plan("migrate database to new schema"), PlanDecision::Plan); + assert_eq!(AgentSession::should_plan("lag en ny CLI-kommando for export"), PlanDecision::Plan); + assert_eq!(AgentSession::should_plan("oppdater alle TODO-kommentarer i prosjektet"), PlanDecision::Plan); + } + + #[test] + fn test_should_plan_simple_tasks() { + assert_eq!(AgentSession::should_plan("les filen src/main.rs"), PlanDecision::Direct); + assert_eq!(AgentSession::should_plan("vis innholdet i config"), PlanDecision::Direct); + assert_eq!(AgentSession::should_plan("hva er denne funksjonen?"), PlanDecision::Direct); + assert_eq!(AgentSession::should_plan("finn alle .rs filer"), PlanDecision::Direct); + assert_eq!(AgentSession::should_plan("sjekk om testen passerer"), PlanDecision::Direct); + } + + #[test] + fn test_should_plan_long_tasks() { + let long_task = "x".repeat(600); + assert_eq!(AgentSession::should_plan(&long_task), PlanDecision::Plan); + } + + #[test] + fn test_should_plan_ambiguous_defaults_to_direct() { + assert_eq!(AgentSession::should_plan("fiks buggen i login"), PlanDecision::Direct); + } +}