Implementer planmodus og selvkritikk i synops-agent

Extended ReAct Loop: tenk → plan → selvkritikk → utfør. --plan tvinger planmodus, --no-plan deaktiverer. Uten flagg: auto-detect basert på oppgavens nøkkelord/lengde. /plan kommando i interaktiv modus.
2026-03-19 18:20:54 +00:00 · 2026-03-19 18:20:54 +00:00 · 6bd2787ee8
commit 6bd2787ee8
parent 3704a02921
1 changed files with 249 additions and 20 deletions
--- a/tools/synops-agent/src/main.rs
+++ b/tools/synops-agent/src/main.rs
@ -69,6 +69,14 @@ struct Cli {
    /// Maks antall retries ved API-feil (default: 3)
    #[arg(long, default_value = "3")]
    max_retries: u32,
+
+    /// Tving planmodus: tenk → plan → selvkritikk → utfør
+    #[arg(long)]
+    plan: bool,
+
+    /// Deaktiver automatisk planmodus (brukes normalt for komplekse oppgaver)
+    #[arg(long)]
+    no_plan: bool,
 }

 /// Shared state for the agent session.
@ -89,6 +97,15 @@ struct AgentSession {
    interrupted: Arc<AtomicBool>,
 }

+/// Whether to use plan mode for a task.
+#[derive(Debug, PartialEq)]
+enum PlanDecision {
+    /// No planning needed — execute directly.
+    Direct,
+    /// Use plan mode (Extended ReAct Loop).
+    Plan,
+}
+
 /// Result of running one turn of the agent loop.
 enum TurnResult {
    /// Agent finished (no more tool calls).
@ -244,6 +261,154 @@ impl AgentSession {
        }
    }

+    /// Run the Extended ReAct Loop: think → plan → self-critique → execute.
+    ///
+    /// 1. Ask the LLM to analyze the task and create a step-by-step plan
+    /// 2. Ask the LLM to critique its own plan
+    /// 3. Execute with the refined plan as context
+    async fn run_with_plan(&mut self, task: &str) -> Result<TurnResult, Box<dyn std::error::Error>> {
+        // Phase 1: Think + Plan
+        tracing::info!("Planmodus: Fase 1 — analyse og planlegging");
+        let plan_prompt = format!(
+            "Du skal løse denne oppgaven:\n\n{}\n\n\
+             Før du handler, lag en steg-for-steg plan. For hvert steg:\n\
+             - Beskriv hva som skal gjøres\n\
+             - Hvilke filer som berøres\n\
+             - Mulige risikoer\n\n\
+             Bruk verktøy for å undersøke kodebasen (les filer, søk, etc.) \
+             for å lage en informert plan. Ikke gjør endringer ennå.\n\n\
+             Avslutt med en nummerert plan i dette formatet:\n\
+             ## Plan\n\
+             1. [steg]\n\
+             2. [steg]\n\
+             ...",
+            task
+        );
+
+        self.messages.push(Message {
+            role: "user".into(),
+            content: Some(plan_prompt),
+            tool_calls: None,
+            tool_call_id: None,
+        });
+
+        // Let the LLM explore and create a plan (allows tool use for research)
+        let plan_result = self.run_turn().await?;
+        if !matches!(plan_result, TurnResult::Done) {
+            return Ok(plan_result);
+        }
+
+        // Phase 2: Self-critique
+        tracing::info!("Planmodus: Fase 2 — selvkritikk");
+        self.messages.push(Message {
+            role: "user".into(),
+            content: Some(
+                "Kritiser planen din. Vurder:\n\
+                 - Er det steg som mangler?\n\
+                 - Er rekkefølgen riktig?\n\
+                 - Kan noe gå galt? Hva er fallback?\n\
+                 - Er det enklere alternativer?\n\
+                 - Berører endringene mer enn nødvendig?\n\n\
+                 Juster planen om nødvendig, og presenter den endelige \
+                 versjonen. Ikke implementer ennå."
+                    .into(),
+            ),
+            tool_calls: None,
+            tool_call_id: None,
+        });
+
+        let critique_result = self.run_turn().await?;
+        if !matches!(critique_result, TurnResult::Done) {
+            return Ok(critique_result);
+        }
+
+        // Phase 3: Execute
+        tracing::info!("Planmodus: Fase 3 — utføring");
+        self.messages.push(Message {
+            role: "user".into(),
+            content: Some(
+                "Planen er godkjent. Implementer den nå, steg for steg. \
+                 For hvert steg: utfør, verifiser at det fungerer, \
+                 og gå videre til neste. Rapporter fremgang underveis."
+                    .into(),
+            ),
+            tool_calls: None,
+            tool_call_id: None,
+        });
+
+        self.run_turn().await
+    }
+
+    /// Decide whether to use plan mode based on task analysis.
+    ///
+    /// Heuristic: if the task mentions multiple files, complex operations,
+    /// or design/architecture keywords, use plan mode.
+    fn should_plan(task: &str) -> PlanDecision {
+        let task_lower = task.to_lowercase();
+
+        // Keywords that strongly suggest planning is needed
+        let plan_keywords = [
+            "implementer",
+            "implement",
+            "redesign",
+            "refaktorer",
+            "refactor",
+            "migrer",
+            "migrate",
+            "arkitektur",
+            "architecture",
+            "bygg",
+            "build",
+            "lag en",
+            "create a",
+            "sett opp",
+            "set up",
+            "endre alle",
+            "change all",
+            "oppdater alle",
+            "update all",
+        ];
+
+        // Keywords that suggest simple tasks (no plan needed)
+        let simple_keywords = [
+            "les ",
+            "read ",
+            "vis ",
+            "show ",
+            "hva er",
+            "what is",
+            "forklar",
+            "explain",
+            "sjekk",
+            "check",
+            "finn",
+            "find",
+            "søk",
+            "search",
+        ];
+
+        // Simple tasks — direct execution
+        for kw in &simple_keywords {
+            if task_lower.starts_with(kw) {
+                return PlanDecision::Direct;
+            }
+        }
+
+        // Complex keywords — plan
+        for kw in &plan_keywords {
+            if task_lower.contains(kw) {
+                return PlanDecision::Plan;
+            }
+        }
+
+        // Length heuristic: long tasks are more likely to need planning
+        if task.len() > 500 {
+            return PlanDecision::Plan;
+        }
+
+        PlanDecision::Direct
+    }
+
    /// Total input tokens across all models.
    fn total_input_tokens(&self) -> u64 {
        self.total_usage.values().map(|u| u.input_tokens).sum()
@ -363,16 +528,32 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
    if cli.interactive {
        run_interactive(&mut session).await?;
    } else {
-        // Batch mode: add task as user message and run
+        // Batch mode
        let task = cli.task.unwrap();
+
+        // Decide whether to use plan mode
+        let use_plan = if cli.plan {
+            true
+        } else if cli.no_plan {
+            false
+        } else {
+            AgentSession::should_plan(&task) == PlanDecision::Plan
+        };
+
+        let result = if use_plan {
+            tracing::info!("Bruker planmodus (Extended ReAct Loop)");
+            session.run_with_plan(&task).await?
+        } else {
+            // Direct execution
            session.messages.push(Message {
                role: "user".into(),
                content: Some(task),
                tool_calls: None,
                tool_call_id: None,
            });
+            session.run_turn().await?
+        };

-        let result = session.run_turn().await?;
        session.print_summary();

        if matches!(result, TurnResult::BudgetExhausted) {
@ -494,6 +675,7 @@ async fn run_interactive(
            }
            "/help" => {
                eprintln!("Kommandoer:");
+                eprintln!("  /plan <oppgave> — Kjør med planmodus (tenk → plan → selvkritikk → utfør)");
                eprintln!("  /stats          — Vis token-forbruk og kostnad");
                eprintln!("  /clear          — Nullstill samtalen");
                eprintln!("  /help           — Vis denne hjelpen");
@ -506,15 +688,28 @@ async fn run_interactive(
            _ => {}
        }

-        // Add user message and run turn
+        // Check for /plan command
+        let (use_plan, effective_input) = if let Some(plan_task) = trimmed.strip_prefix("/plan ") {
+            (true, plan_task.to_string())
+        } else {
+            (false, input.clone())
+        };
+
+        // Run with or without plan mode
+        let turn_result = if use_plan {
+            eprintln!("[planmodus aktivert]");
+            session.run_with_plan(&effective_input).await
+        } else {
            session.messages.push(Message {
                role: "user".into(),
-            content: Some(input),
+                content: Some(effective_input),
                tool_calls: None,
                tool_call_id: None,
            });
+            session.run_turn().await
+        };

-        match session.run_turn().await {
+        match turn_result {
            Ok(TurnResult::Done) => {
                // Normal completion
            }
@ -645,3 +840,37 @@ async fn spawn_claude_code(

    Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_should_plan_complex_tasks() {
+        assert_eq!(AgentSession::should_plan("implementer plan-modus i agenten"), PlanDecision::Plan);
+        assert_eq!(AgentSession::should_plan("refaktorer hele auth-systemet"), PlanDecision::Plan);
+        assert_eq!(AgentSession::should_plan("migrate database to new schema"), PlanDecision::Plan);
+        assert_eq!(AgentSession::should_plan("lag en ny CLI-kommando for export"), PlanDecision::Plan);
+        assert_eq!(AgentSession::should_plan("oppdater alle TODO-kommentarer i prosjektet"), PlanDecision::Plan);
+    }
+
+    #[test]
+    fn test_should_plan_simple_tasks() {
+        assert_eq!(AgentSession::should_plan("les filen src/main.rs"), PlanDecision::Direct);
+        assert_eq!(AgentSession::should_plan("vis innholdet i config"), PlanDecision::Direct);
+        assert_eq!(AgentSession::should_plan("hva er denne funksjonen?"), PlanDecision::Direct);
+        assert_eq!(AgentSession::should_plan("finn alle .rs filer"), PlanDecision::Direct);
+        assert_eq!(AgentSession::should_plan("sjekk om testen passerer"), PlanDecision::Direct);
+    }
+
+    #[test]
+    fn test_should_plan_long_tasks() {
+        let long_task = "x".repeat(600);
+        assert_eq!(AgentSession::should_plan(&long_task), PlanDecision::Plan);
+    }
+
+    #[test]
+    fn test_should_plan_ambiguous_defaults_to_direct() {
+        assert_eq!(AgentSession::should_plan("fiks buggen i login"), PlanDecision::Direct);
+    }
+}