From 6bd2787ee8df1435b3d42b522c8dabe1642f3001 Mon Sep 17 00:00:00 2001
From: vegard <vnotnes@pm.me>
Date: Thu, 19 Mar 2026 18:20:54 +0000
Subject: [PATCH] Implementer planmodus og selvkritikk i synops-agent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extended ReAct Loop: tenk → plan → selvkritikk → utfør.
--plan tvinger planmodus, --no-plan deaktiverer.
Uten flagg: auto-detect basert på oppgavens nøkkelord/lengde.
/plan kommando i interaktiv modus.
---
 tools/synops-agent/src/main.rs | 269 ++++++++++++++++++++++++++++++---
 1 file changed, 249 insertions(+), 20 deletions(-)
diff --git a/tools/synops-agent/src/main.rs b/tools/synops-agent/src/main.rs
index fff78b5..41d0dde 100644
--- a/tools/synops-agent/src/main.rs
+++ b/tools/synops-agent/src/main.rs
@@ -69,6 +69,14 @@ struct Cli {
     /// Maks antall retries ved API-feil (default: 3)
     #[arg(long, default_value = "3")]
     max_retries: u32,
+
+    /// Tving planmodus: tenk → plan → selvkritikk → utfør
+    #[arg(long)]
+    plan: bool,
+
+    /// Deaktiver automatisk planmodus (brukes normalt for komplekse oppgaver)
+    #[arg(long)]
+    no_plan: bool,
 }
 
 /// Shared state for the agent session.
@@ -89,6 +97,15 @@ struct AgentSession {
     interrupted: Arc<AtomicBool>,
 }
 
+/// Whether to use plan mode for a task.
+#[derive(Debug, PartialEq)]
+enum PlanDecision {
+    /// No planning needed — execute directly.
+    Direct,
+    /// Use plan mode (Extended ReAct Loop).
+    Plan,
+}
+
 /// Result of running one turn of the agent loop.
 enum TurnResult {
     /// Agent finished (no more tool calls).
@@ -244,6 +261,154 @@ impl AgentSession {
         }
     }
 
+    /// Run the Extended ReAct Loop: think → plan → self-critique → execute.
+    ///
+    /// 1. Ask the LLM to analyze the task and create a step-by-step plan
+    /// 2. Ask the LLM to critique its own plan
+    /// 3. Execute with the refined plan as context
+    async fn run_with_plan(&mut self, task: &str) -> Result<TurnResult, Box<dyn std::error::Error>> {
+        // Phase 1: Think + Plan
+        tracing::info!("Planmodus: Fase 1 — analyse og planlegging");
+        let plan_prompt = format!(
+            "Du skal løse denne oppgaven:\n\n{}\n\n\
+             Før du handler, lag en steg-for-steg plan. For hvert steg:\n\
+             - Beskriv hva som skal gjøres\n\
+             - Hvilke filer som berøres\n\
+             - Mulige risikoer\n\n\
+             Bruk verktøy for å undersøke kodebasen (les filer, søk, etc.) \
+             for å lage en informert plan. Ikke gjør endringer ennå.\n\n\
+             Avslutt med en nummerert plan i dette formatet:\n\
+             ## Plan\n\
+             1. [steg]\n\
+             2. [steg]\n\
+             ...",
+            task
+        );
+
+        self.messages.push(Message {
+            role: "user".into(),
+            content: Some(plan_prompt),
+            tool_calls: None,
+            tool_call_id: None,
+        });
+
+        // Let the LLM explore and create a plan (allows tool use for research)
+        let plan_result = self.run_turn().await?;
+        if !matches!(plan_result, TurnResult::Done) {
+            return Ok(plan_result);
+        }
+
+        // Phase 2: Self-critique
+        tracing::info!("Planmodus: Fase 2 — selvkritikk");
+        self.messages.push(Message {
+            role: "user".into(),
+            content: Some(
+                "Kritiser planen din. Vurder:\n\
+                 - Er det steg som mangler?\n\
+                 - Er rekkefølgen riktig?\n\
+                 - Kan noe gå galt? Hva er fallback?\n\
+                 - Er det enklere alternativer?\n\
+                 - Berører endringene mer enn nødvendig?\n\n\
+                 Juster planen om nødvendig, og presenter den endelige \
+                 versjonen. Ikke implementer ennå."
+                    .into(),
+            ),
+            tool_calls: None,
+            tool_call_id: None,
+        });
+
+        let critique_result = self.run_turn().await?;
+        if !matches!(critique_result, TurnResult::Done) {
+            return Ok(critique_result);
+        }
+
+        // Phase 3: Execute
+        tracing::info!("Planmodus: Fase 3 — utføring");
+        self.messages.push(Message {
+            role: "user".into(),
+            content: Some(
+                "Planen er godkjent. Implementer den nå, steg for steg. \
+                 For hvert steg: utfør, verifiser at det fungerer, \
+                 og gå videre til neste. Rapporter fremgang underveis."
+                    .into(),
+            ),
+            tool_calls: None,
+            tool_call_id: None,
+        });
+
+        self.run_turn().await
+    }
+
+    /// Decide whether to use plan mode based on task analysis.
+    ///
+    /// Heuristic: if the task mentions multiple files, complex operations,
+    /// or design/architecture keywords, use plan mode.
+    fn should_plan(task: &str) -> PlanDecision {
+        let task_lower = task.to_lowercase();
+
+        // Keywords that strongly suggest planning is needed
+        let plan_keywords = [
+            "implementer",
+            "implement",
+            "redesign",
+            "refaktorer",
+            "refactor",
+            "migrer",
+            "migrate",
+            "arkitektur",
+            "architecture",
+            "bygg",
+            "build",
+            "lag en",
+            "create a",
+            "sett opp",
+            "set up",
+            "endre alle",
+            "change all",
+            "oppdater alle",
+            "update all",
+        ];
+
+        // Keywords that suggest simple tasks (no plan needed)
+        let simple_keywords = [
+            "les ",
+            "read ",
+            "vis ",
+            "show ",
+            "hva er",
+            "what is",
+            "forklar",
+            "explain",
+            "sjekk",
+            "check",
+            "finn",
+            "find",
+            "søk",
+            "search",
+        ];
+
+        // Simple tasks — direct execution
+        for kw in &simple_keywords {
+            if task_lower.starts_with(kw) {
+                return PlanDecision::Direct;
+            }
+        }
+
+        // Complex keywords — plan
+        for kw in &plan_keywords {
+            if task_lower.contains(kw) {
+                return PlanDecision::Plan;
+            }
+        }
+
+        // Length heuristic: long tasks are more likely to need planning
+        if task.len() > 500 {
+            return PlanDecision::Plan;
+        }
+
+        PlanDecision::Direct
+    }
+
     /// Total input tokens across all models.
     fn total_input_tokens(&self) -> u64 {
         self.total_usage.values().map(|u| u.input_tokens).sum()
@@ -363,16 +528,32 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     if cli.interactive {
         run_interactive(&mut session).await?;
     } else {
-        // Batch mode: add task as user message and run
+        // Batch mode
         let task = cli.task.unwrap();
-        session.messages.push(Message {
-            role: "user".into(),
-            content: Some(task),
-            tool_calls: None,
-            tool_call_id: None,
-        });
 
-        let result = session.run_turn().await?;
+        // Decide whether to use plan mode
+        let use_plan = if cli.plan {
+            true
+        } else if cli.no_plan {
+            false
+        } else {
+            AgentSession::should_plan(&task) == PlanDecision::Plan
+        };
+
+        let result = if use_plan {
+            tracing::info!("Bruker planmodus (Extended ReAct Loop)");
+            session.run_with_plan(&task).await?
+        } else {
+            // Direct execution
+            session.messages.push(Message {
+                role: "user".into(),
+                content: Some(task),
+                tool_calls: None,
+                tool_call_id: None,
+            });
+            session.run_turn().await?
+        };
+
         session.print_summary();
 
         if matches!(result, TurnResult::BudgetExhausted) {
@@ -494,10 +675,11 @@ async fn run_interactive(
             }
             "/help" => {
                 eprintln!("Kommandoer:");
-                eprintln!("  /stats    — Vis token-forbruk og kostnad");
-                eprintln!("  /clear    — Nullstill samtalen");
-                eprintln!("  /help     — Vis denne hjelpen");
-                eprintln!("  exit      — Avslutt");
+                eprintln!("  /plan <oppgave> — Kjør med planmodus (tenk → plan → selvkritikk → utfør)");
+                eprintln!("  /stats          — Vis token-forbruk og kostnad");
+                eprintln!("  /clear          — Nullstill samtalen");
+                eprintln!("  /help           — Vis denne hjelpen");
+                eprintln!("  exit            — Avslutt");
                 eprintln!();
                 eprintln!("Multi-line: Avslutt linje med \\ for å fortsette.");
                 eprintln!("Ctrl+C avbryter pågående LLM-kall/verktøy.");
@@ -506,15 +688,28 @@ async fn run_interactive(
             _ => {}
         }
 
-        // Add user message and run turn
-        session.messages.push(Message {
-            role: "user".into(),
-            content: Some(input),
-            tool_calls: None,
-            tool_call_id: None,
-        });
+        // Check for /plan command
+        let (use_plan, effective_input) = if let Some(plan_task) = trimmed.strip_prefix("/plan ") {
+            (true, plan_task.to_string())
+        } else {
+            (false, input.clone())
+        };
 
-        match session.run_turn().await {
+        // Run with or without plan mode
+        let turn_result = if use_plan {
+            eprintln!("[planmodus aktivert]");
+            session.run_with_plan(&effective_input).await
+        } else {
+            session.messages.push(Message {
+                role: "user".into(),
+                content: Some(effective_input),
+                tool_calls: None,
+                tool_call_id: None,
+            });
+            session.run_turn().await
+        };
+
+        match turn_result {
             Ok(TurnResult::Done) => {
                 // Normal completion
             }
@@ -645,3 +840,37 @@ async fn spawn_claude_code(
 
     Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_should_plan_complex_tasks() {
+        assert_eq!(AgentSession::should_plan("implementer plan-modus i agenten"), PlanDecision::Plan);
+        assert_eq!(AgentSession::should_plan("refaktorer hele auth-systemet"), PlanDecision::Plan);
+        assert_eq!(AgentSession::should_plan("migrate database to new schema"), PlanDecision::Plan);
+        assert_eq!(AgentSession::should_plan("lag en ny CLI-kommando for export"), PlanDecision::Plan);
+        assert_eq!(AgentSession::should_plan("oppdater alle TODO-kommentarer i prosjektet"), PlanDecision::Plan);
+    }
+
+    #[test]
+    fn test_should_plan_simple_tasks() {
+        assert_eq!(AgentSession::should_plan("les filen src/main.rs"), PlanDecision::Direct);
+        assert_eq!(AgentSession::should_plan("vis innholdet i config"), PlanDecision::Direct);
+        assert_eq!(AgentSession::should_plan("hva er denne funksjonen?"), PlanDecision::Direct);
+        assert_eq!(AgentSession::should_plan("finn alle .rs filer"), PlanDecision::Direct);
+        assert_eq!(AgentSession::should_plan("sjekk om testen passerer"), PlanDecision::Direct);
+    }
+
+    #[test]
+    fn test_should_plan_long_tasks() {
+        let long_task = "x".repeat(600);
+        assert_eq!(AgentSession::should_plan(&long_task), PlanDecision::Plan);
+    }
+
+    #[test]
+    fn test_should_plan_ambiguous_defaults_to_direct() {
+        assert_eq!(AgentSession::should_plan("fiks buggen i login"), PlanDecision::Direct);
+    }
+}