Implementer planmodus og selvkritikk i synops-agent

Extended ReAct Loop: tenk → plan → selvkritikk → utfør.
--plan tvinger planmodus, --no-plan deaktiverer.
Uten flagg: auto-detect basert på oppgavens nøkkelord/lengde.
/plan kommando i interaktiv modus.
This commit is contained in:
vegard 2026-03-19 18:20:54 +00:00
parent 3704a02921
commit 6bd2787ee8

View file

@ -69,6 +69,14 @@ struct Cli {
/// Maks antall retries ved API-feil (default: 3) /// Maks antall retries ved API-feil (default: 3)
#[arg(long, default_value = "3")] #[arg(long, default_value = "3")]
max_retries: u32, max_retries: u32,
/// Tving planmodus: tenk → plan → selvkritikk → utfør
#[arg(long)]
plan: bool,
/// Deaktiver automatisk planmodus (brukes normalt for komplekse oppgaver)
#[arg(long)]
no_plan: bool,
} }
/// Shared state for the agent session. /// Shared state for the agent session.
@ -89,6 +97,15 @@ struct AgentSession {
interrupted: Arc<AtomicBool>, interrupted: Arc<AtomicBool>,
} }
/// Whether to use plan mode for a task.
#[derive(Debug, PartialEq)]
enum PlanDecision {
/// No planning needed — execute directly.
Direct,
/// Use plan mode (Extended ReAct Loop).
Plan,
}
/// Result of running one turn of the agent loop. /// Result of running one turn of the agent loop.
enum TurnResult { enum TurnResult {
/// Agent finished (no more tool calls). /// Agent finished (no more tool calls).
@ -244,6 +261,154 @@ impl AgentSession {
} }
} }
/// Run the Extended ReAct Loop: think → plan → self-critique → execute.
///
/// 1. Ask the LLM to analyze the task and create a step-by-step plan
/// 2. Ask the LLM to critique its own plan
/// 3. Execute with the refined plan as context
async fn run_with_plan(&mut self, task: &str) -> Result<TurnResult, Box<dyn std::error::Error>> {
// Phase 1: Think + Plan
tracing::info!("Planmodus: Fase 1 — analyse og planlegging");
let plan_prompt = format!(
"Du skal løse denne oppgaven:\n\n{}\n\n\
Før du handler, lag en steg-for-steg plan. For hvert steg:\n\
- Beskriv hva som skal gjøres\n\
- Hvilke filer som berøres\n\
- Mulige risikoer\n\n\
Bruk verktøy for å undersøke kodebasen (les filer, søk, etc.) \
for å lage en informert plan. Ikke gjør endringer ennå.\n\n\
Avslutt med en nummerert plan i dette formatet:\n\
## Plan\n\
1. [steg]\n\
2. [steg]\n\
...",
task
);
self.messages.push(Message {
role: "user".into(),
content: Some(plan_prompt),
tool_calls: None,
tool_call_id: None,
});
// Let the LLM explore and create a plan (allows tool use for research)
let plan_result = self.run_turn().await?;
if !matches!(plan_result, TurnResult::Done) {
return Ok(plan_result);
}
// Phase 2: Self-critique
tracing::info!("Planmodus: Fase 2 — selvkritikk");
self.messages.push(Message {
role: "user".into(),
content: Some(
"Kritiser planen din. Vurder:\n\
- Er det steg som mangler?\n\
- Er rekkefølgen riktig?\n\
- Kan noe galt? Hva er fallback?\n\
- Er det enklere alternativer?\n\
- Berører endringene mer enn nødvendig?\n\n\
Juster planen om nødvendig, og presenter den endelige \
versjonen. Ikke implementer ennå."
.into(),
),
tool_calls: None,
tool_call_id: None,
});
let critique_result = self.run_turn().await?;
if !matches!(critique_result, TurnResult::Done) {
return Ok(critique_result);
}
// Phase 3: Execute
tracing::info!("Planmodus: Fase 3 — utføring");
self.messages.push(Message {
role: "user".into(),
content: Some(
"Planen er godkjent. Implementer den nå, steg for steg. \
For hvert steg: utfør, verifiser at det fungerer, \
og videre til neste. Rapporter fremgang underveis."
.into(),
),
tool_calls: None,
tool_call_id: None,
});
self.run_turn().await
}
/// Decide whether to use plan mode based on task analysis.
///
/// Heuristic: if the task mentions multiple files, complex operations,
/// or design/architecture keywords, use plan mode.
fn should_plan(task: &str) -> PlanDecision {
let task_lower = task.to_lowercase();
// Keywords that strongly suggest planning is needed
let plan_keywords = [
"implementer",
"implement",
"redesign",
"refaktorer",
"refactor",
"migrer",
"migrate",
"arkitektur",
"architecture",
"bygg",
"build",
"lag en",
"create a",
"sett opp",
"set up",
"endre alle",
"change all",
"oppdater alle",
"update all",
];
// Keywords that suggest simple tasks (no plan needed)
let simple_keywords = [
"les ",
"read ",
"vis ",
"show ",
"hva er",
"what is",
"forklar",
"explain",
"sjekk",
"check",
"finn",
"find",
"søk",
"search",
];
// Simple tasks — direct execution
for kw in &simple_keywords {
if task_lower.starts_with(kw) {
return PlanDecision::Direct;
}
}
// Complex keywords — plan
for kw in &plan_keywords {
if task_lower.contains(kw) {
return PlanDecision::Plan;
}
}
// Length heuristic: long tasks are more likely to need planning
if task.len() > 500 {
return PlanDecision::Plan;
}
PlanDecision::Direct
}
/// Total input tokens across all models. /// Total input tokens across all models.
fn total_input_tokens(&self) -> u64 { fn total_input_tokens(&self) -> u64 {
self.total_usage.values().map(|u| u.input_tokens).sum() self.total_usage.values().map(|u| u.input_tokens).sum()
@ -363,16 +528,32 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
if cli.interactive { if cli.interactive {
run_interactive(&mut session).await?; run_interactive(&mut session).await?;
} else { } else {
// Batch mode: add task as user message and run // Batch mode
let task = cli.task.unwrap(); let task = cli.task.unwrap();
session.messages.push(Message {
role: "user".into(),
content: Some(task),
tool_calls: None,
tool_call_id: None,
});
let result = session.run_turn().await?; // Decide whether to use plan mode
let use_plan = if cli.plan {
true
} else if cli.no_plan {
false
} else {
AgentSession::should_plan(&task) == PlanDecision::Plan
};
let result = if use_plan {
tracing::info!("Bruker planmodus (Extended ReAct Loop)");
session.run_with_plan(&task).await?
} else {
// Direct execution
session.messages.push(Message {
role: "user".into(),
content: Some(task),
tool_calls: None,
tool_call_id: None,
});
session.run_turn().await?
};
session.print_summary(); session.print_summary();
if matches!(result, TurnResult::BudgetExhausted) { if matches!(result, TurnResult::BudgetExhausted) {
@ -494,10 +675,11 @@ async fn run_interactive(
} }
"/help" => { "/help" => {
eprintln!("Kommandoer:"); eprintln!("Kommandoer:");
eprintln!(" /stats — Vis token-forbruk og kostnad"); eprintln!(" /plan <oppgave> — Kjør med planmodus (tenk → plan → selvkritikk → utfør)");
eprintln!(" /clear — Nullstill samtalen"); eprintln!(" /stats — Vis token-forbruk og kostnad");
eprintln!(" /help — Vis denne hjelpen"); eprintln!(" /clear — Nullstill samtalen");
eprintln!(" exit — Avslutt"); eprintln!(" /help — Vis denne hjelpen");
eprintln!(" exit — Avslutt");
eprintln!(); eprintln!();
eprintln!("Multi-line: Avslutt linje med \\ for å fortsette."); eprintln!("Multi-line: Avslutt linje med \\ for å fortsette.");
eprintln!("Ctrl+C avbryter pågående LLM-kall/verktøy."); eprintln!("Ctrl+C avbryter pågående LLM-kall/verktøy.");
@ -506,15 +688,28 @@ async fn run_interactive(
_ => {} _ => {}
} }
// Add user message and run turn // Check for /plan command
session.messages.push(Message { let (use_plan, effective_input) = if let Some(plan_task) = trimmed.strip_prefix("/plan ") {
role: "user".into(), (true, plan_task.to_string())
content: Some(input), } else {
tool_calls: None, (false, input.clone())
tool_call_id: None, };
});
match session.run_turn().await { // Run with or without plan mode
let turn_result = if use_plan {
eprintln!("[planmodus aktivert]");
session.run_with_plan(&effective_input).await
} else {
session.messages.push(Message {
role: "user".into(),
content: Some(effective_input),
tool_calls: None,
tool_call_id: None,
});
session.run_turn().await
};
match turn_result {
Ok(TurnResult::Done) => { Ok(TurnResult::Done) => {
// Normal completion // Normal completion
} }
@ -645,3 +840,37 @@ async fn spawn_claude_code(
Ok(()) Ok(())
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_should_plan_complex_tasks() {
assert_eq!(AgentSession::should_plan("implementer plan-modus i agenten"), PlanDecision::Plan);
assert_eq!(AgentSession::should_plan("refaktorer hele auth-systemet"), PlanDecision::Plan);
assert_eq!(AgentSession::should_plan("migrate database to new schema"), PlanDecision::Plan);
assert_eq!(AgentSession::should_plan("lag en ny CLI-kommando for export"), PlanDecision::Plan);
assert_eq!(AgentSession::should_plan("oppdater alle TODO-kommentarer i prosjektet"), PlanDecision::Plan);
}
#[test]
fn test_should_plan_simple_tasks() {
assert_eq!(AgentSession::should_plan("les filen src/main.rs"), PlanDecision::Direct);
assert_eq!(AgentSession::should_plan("vis innholdet i config"), PlanDecision::Direct);
assert_eq!(AgentSession::should_plan("hva er denne funksjonen?"), PlanDecision::Direct);
assert_eq!(AgentSession::should_plan("finn alle .rs filer"), PlanDecision::Direct);
assert_eq!(AgentSession::should_plan("sjekk om testen passerer"), PlanDecision::Direct);
}
#[test]
fn test_should_plan_long_tasks() {
let long_task = "x".repeat(600);
assert_eq!(AgentSession::should_plan(&long_task), PlanDecision::Plan);
}
#[test]
fn test_should_plan_ambiguous_defaults_to_direct() {
assert_eq!(AgentSession::should_plan("fiks buggen i login"), PlanDecision::Direct);
}
}