Implementer planmodus og selvkritikk i synops-agent
Extended ReAct Loop: tenk → plan → selvkritikk → utfør. --plan tvinger planmodus, --no-plan deaktiverer. Uten flagg: auto-detect basert på oppgavens nøkkelord/lengde. /plan kommando i interaktiv modus.
This commit is contained in:
parent
3704a02921
commit
6bd2787ee8
1 changed files with 249 additions and 20 deletions
|
|
@ -69,6 +69,14 @@ struct Cli {
|
||||||
/// Maks antall retries ved API-feil (default: 3)
|
/// Maks antall retries ved API-feil (default: 3)
|
||||||
#[arg(long, default_value = "3")]
|
#[arg(long, default_value = "3")]
|
||||||
max_retries: u32,
|
max_retries: u32,
|
||||||
|
|
||||||
|
/// Tving planmodus: tenk → plan → selvkritikk → utfør
|
||||||
|
#[arg(long)]
|
||||||
|
plan: bool,
|
||||||
|
|
||||||
|
/// Deaktiver automatisk planmodus (brukes normalt for komplekse oppgaver)
|
||||||
|
#[arg(long)]
|
||||||
|
no_plan: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Shared state for the agent session.
|
/// Shared state for the agent session.
|
||||||
|
|
@ -89,6 +97,15 @@ struct AgentSession {
|
||||||
interrupted: Arc<AtomicBool>,
|
interrupted: Arc<AtomicBool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether to use plan mode for a task.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
enum PlanDecision {
|
||||||
|
/// No planning needed — execute directly.
|
||||||
|
Direct,
|
||||||
|
/// Use plan mode (Extended ReAct Loop).
|
||||||
|
Plan,
|
||||||
|
}
|
||||||
|
|
||||||
/// Result of running one turn of the agent loop.
|
/// Result of running one turn of the agent loop.
|
||||||
enum TurnResult {
|
enum TurnResult {
|
||||||
/// Agent finished (no more tool calls).
|
/// Agent finished (no more tool calls).
|
||||||
|
|
@ -244,6 +261,154 @@ impl AgentSession {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Run the Extended ReAct Loop: think → plan → self-critique → execute.
|
||||||
|
///
|
||||||
|
/// 1. Ask the LLM to analyze the task and create a step-by-step plan
|
||||||
|
/// 2. Ask the LLM to critique its own plan
|
||||||
|
/// 3. Execute with the refined plan as context
|
||||||
|
async fn run_with_plan(&mut self, task: &str) -> Result<TurnResult, Box<dyn std::error::Error>> {
|
||||||
|
// Phase 1: Think + Plan
|
||||||
|
tracing::info!("Planmodus: Fase 1 — analyse og planlegging");
|
||||||
|
let plan_prompt = format!(
|
||||||
|
"Du skal løse denne oppgaven:\n\n{}\n\n\
|
||||||
|
Før du handler, lag en steg-for-steg plan. For hvert steg:\n\
|
||||||
|
- Beskriv hva som skal gjøres\n\
|
||||||
|
- Hvilke filer som berøres\n\
|
||||||
|
- Mulige risikoer\n\n\
|
||||||
|
Bruk verktøy for å undersøke kodebasen (les filer, søk, etc.) \
|
||||||
|
for å lage en informert plan. Ikke gjør endringer ennå.\n\n\
|
||||||
|
Avslutt med en nummerert plan i dette formatet:\n\
|
||||||
|
## Plan\n\
|
||||||
|
1. [steg]\n\
|
||||||
|
2. [steg]\n\
|
||||||
|
...",
|
||||||
|
task
|
||||||
|
);
|
||||||
|
|
||||||
|
self.messages.push(Message {
|
||||||
|
role: "user".into(),
|
||||||
|
content: Some(plan_prompt),
|
||||||
|
tool_calls: None,
|
||||||
|
tool_call_id: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Let the LLM explore and create a plan (allows tool use for research)
|
||||||
|
let plan_result = self.run_turn().await?;
|
||||||
|
if !matches!(plan_result, TurnResult::Done) {
|
||||||
|
return Ok(plan_result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 2: Self-critique
|
||||||
|
tracing::info!("Planmodus: Fase 2 — selvkritikk");
|
||||||
|
self.messages.push(Message {
|
||||||
|
role: "user".into(),
|
||||||
|
content: Some(
|
||||||
|
"Kritiser planen din. Vurder:\n\
|
||||||
|
- Er det steg som mangler?\n\
|
||||||
|
- Er rekkefølgen riktig?\n\
|
||||||
|
- Kan noe gå galt? Hva er fallback?\n\
|
||||||
|
- Er det enklere alternativer?\n\
|
||||||
|
- Berører endringene mer enn nødvendig?\n\n\
|
||||||
|
Juster planen om nødvendig, og presenter den endelige \
|
||||||
|
versjonen. Ikke implementer ennå."
|
||||||
|
.into(),
|
||||||
|
),
|
||||||
|
tool_calls: None,
|
||||||
|
tool_call_id: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
let critique_result = self.run_turn().await?;
|
||||||
|
if !matches!(critique_result, TurnResult::Done) {
|
||||||
|
return Ok(critique_result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 3: Execute
|
||||||
|
tracing::info!("Planmodus: Fase 3 — utføring");
|
||||||
|
self.messages.push(Message {
|
||||||
|
role: "user".into(),
|
||||||
|
content: Some(
|
||||||
|
"Planen er godkjent. Implementer den nå, steg for steg. \
|
||||||
|
For hvert steg: utfør, verifiser at det fungerer, \
|
||||||
|
og gå videre til neste. Rapporter fremgang underveis."
|
||||||
|
.into(),
|
||||||
|
),
|
||||||
|
tool_calls: None,
|
||||||
|
tool_call_id: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
self.run_turn().await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decide whether to use plan mode based on task analysis.
|
||||||
|
///
|
||||||
|
/// Heuristic: if the task mentions multiple files, complex operations,
|
||||||
|
/// or design/architecture keywords, use plan mode.
|
||||||
|
fn should_plan(task: &str) -> PlanDecision {
|
||||||
|
let task_lower = task.to_lowercase();
|
||||||
|
|
||||||
|
// Keywords that strongly suggest planning is needed
|
||||||
|
let plan_keywords = [
|
||||||
|
"implementer",
|
||||||
|
"implement",
|
||||||
|
"redesign",
|
||||||
|
"refaktorer",
|
||||||
|
"refactor",
|
||||||
|
"migrer",
|
||||||
|
"migrate",
|
||||||
|
"arkitektur",
|
||||||
|
"architecture",
|
||||||
|
"bygg",
|
||||||
|
"build",
|
||||||
|
"lag en",
|
||||||
|
"create a",
|
||||||
|
"sett opp",
|
||||||
|
"set up",
|
||||||
|
"endre alle",
|
||||||
|
"change all",
|
||||||
|
"oppdater alle",
|
||||||
|
"update all",
|
||||||
|
];
|
||||||
|
|
||||||
|
// Keywords that suggest simple tasks (no plan needed)
|
||||||
|
let simple_keywords = [
|
||||||
|
"les ",
|
||||||
|
"read ",
|
||||||
|
"vis ",
|
||||||
|
"show ",
|
||||||
|
"hva er",
|
||||||
|
"what is",
|
||||||
|
"forklar",
|
||||||
|
"explain",
|
||||||
|
"sjekk",
|
||||||
|
"check",
|
||||||
|
"finn",
|
||||||
|
"find",
|
||||||
|
"søk",
|
||||||
|
"search",
|
||||||
|
];
|
||||||
|
|
||||||
|
// Simple tasks — direct execution
|
||||||
|
for kw in &simple_keywords {
|
||||||
|
if task_lower.starts_with(kw) {
|
||||||
|
return PlanDecision::Direct;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Complex keywords — plan
|
||||||
|
for kw in &plan_keywords {
|
||||||
|
if task_lower.contains(kw) {
|
||||||
|
return PlanDecision::Plan;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Length heuristic: long tasks are more likely to need planning
|
||||||
|
if task.len() > 500 {
|
||||||
|
return PlanDecision::Plan;
|
||||||
|
}
|
||||||
|
|
||||||
|
PlanDecision::Direct
|
||||||
|
}
|
||||||
|
|
||||||
/// Total input tokens across all models.
|
/// Total input tokens across all models.
|
||||||
fn total_input_tokens(&self) -> u64 {
|
fn total_input_tokens(&self) -> u64 {
|
||||||
self.total_usage.values().map(|u| u.input_tokens).sum()
|
self.total_usage.values().map(|u| u.input_tokens).sum()
|
||||||
|
|
@ -363,16 +528,32 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
if cli.interactive {
|
if cli.interactive {
|
||||||
run_interactive(&mut session).await?;
|
run_interactive(&mut session).await?;
|
||||||
} else {
|
} else {
|
||||||
// Batch mode: add task as user message and run
|
// Batch mode
|
||||||
let task = cli.task.unwrap();
|
let task = cli.task.unwrap();
|
||||||
|
|
||||||
|
// Decide whether to use plan mode
|
||||||
|
let use_plan = if cli.plan {
|
||||||
|
true
|
||||||
|
} else if cli.no_plan {
|
||||||
|
false
|
||||||
|
} else {
|
||||||
|
AgentSession::should_plan(&task) == PlanDecision::Plan
|
||||||
|
};
|
||||||
|
|
||||||
|
let result = if use_plan {
|
||||||
|
tracing::info!("Bruker planmodus (Extended ReAct Loop)");
|
||||||
|
session.run_with_plan(&task).await?
|
||||||
|
} else {
|
||||||
|
// Direct execution
|
||||||
session.messages.push(Message {
|
session.messages.push(Message {
|
||||||
role: "user".into(),
|
role: "user".into(),
|
||||||
content: Some(task),
|
content: Some(task),
|
||||||
tool_calls: None,
|
tool_calls: None,
|
||||||
tool_call_id: None,
|
tool_call_id: None,
|
||||||
});
|
});
|
||||||
|
session.run_turn().await?
|
||||||
|
};
|
||||||
|
|
||||||
let result = session.run_turn().await?;
|
|
||||||
session.print_summary();
|
session.print_summary();
|
||||||
|
|
||||||
if matches!(result, TurnResult::BudgetExhausted) {
|
if matches!(result, TurnResult::BudgetExhausted) {
|
||||||
|
|
@ -494,6 +675,7 @@ async fn run_interactive(
|
||||||
}
|
}
|
||||||
"/help" => {
|
"/help" => {
|
||||||
eprintln!("Kommandoer:");
|
eprintln!("Kommandoer:");
|
||||||
|
eprintln!(" /plan <oppgave> — Kjør med planmodus (tenk → plan → selvkritikk → utfør)");
|
||||||
eprintln!(" /stats — Vis token-forbruk og kostnad");
|
eprintln!(" /stats — Vis token-forbruk og kostnad");
|
||||||
eprintln!(" /clear — Nullstill samtalen");
|
eprintln!(" /clear — Nullstill samtalen");
|
||||||
eprintln!(" /help — Vis denne hjelpen");
|
eprintln!(" /help — Vis denne hjelpen");
|
||||||
|
|
@ -506,15 +688,28 @@ async fn run_interactive(
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add user message and run turn
|
// Check for /plan command
|
||||||
|
let (use_plan, effective_input) = if let Some(plan_task) = trimmed.strip_prefix("/plan ") {
|
||||||
|
(true, plan_task.to_string())
|
||||||
|
} else {
|
||||||
|
(false, input.clone())
|
||||||
|
};
|
||||||
|
|
||||||
|
// Run with or without plan mode
|
||||||
|
let turn_result = if use_plan {
|
||||||
|
eprintln!("[planmodus aktivert]");
|
||||||
|
session.run_with_plan(&effective_input).await
|
||||||
|
} else {
|
||||||
session.messages.push(Message {
|
session.messages.push(Message {
|
||||||
role: "user".into(),
|
role: "user".into(),
|
||||||
content: Some(input),
|
content: Some(effective_input),
|
||||||
tool_calls: None,
|
tool_calls: None,
|
||||||
tool_call_id: None,
|
tool_call_id: None,
|
||||||
});
|
});
|
||||||
|
session.run_turn().await
|
||||||
|
};
|
||||||
|
|
||||||
match session.run_turn().await {
|
match turn_result {
|
||||||
Ok(TurnResult::Done) => {
|
Ok(TurnResult::Done) => {
|
||||||
// Normal completion
|
// Normal completion
|
||||||
}
|
}
|
||||||
|
|
@ -645,3 +840,37 @@ async fn spawn_claude_code(
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_should_plan_complex_tasks() {
|
||||||
|
assert_eq!(AgentSession::should_plan("implementer plan-modus i agenten"), PlanDecision::Plan);
|
||||||
|
assert_eq!(AgentSession::should_plan("refaktorer hele auth-systemet"), PlanDecision::Plan);
|
||||||
|
assert_eq!(AgentSession::should_plan("migrate database to new schema"), PlanDecision::Plan);
|
||||||
|
assert_eq!(AgentSession::should_plan("lag en ny CLI-kommando for export"), PlanDecision::Plan);
|
||||||
|
assert_eq!(AgentSession::should_plan("oppdater alle TODO-kommentarer i prosjektet"), PlanDecision::Plan);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_should_plan_simple_tasks() {
|
||||||
|
assert_eq!(AgentSession::should_plan("les filen src/main.rs"), PlanDecision::Direct);
|
||||||
|
assert_eq!(AgentSession::should_plan("vis innholdet i config"), PlanDecision::Direct);
|
||||||
|
assert_eq!(AgentSession::should_plan("hva er denne funksjonen?"), PlanDecision::Direct);
|
||||||
|
assert_eq!(AgentSession::should_plan("finn alle .rs filer"), PlanDecision::Direct);
|
||||||
|
assert_eq!(AgentSession::should_plan("sjekk om testen passerer"), PlanDecision::Direct);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_should_plan_long_tasks() {
|
||||||
|
let long_task = "x".repeat(600);
|
||||||
|
assert_eq!(AgentSession::should_plan(&long_task), PlanDecision::Plan);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_should_plan_ambiguous_defaults_to_direct() {
|
||||||
|
assert_eq!(AgentSession::should_plan("fiks buggen i login"), PlanDecision::Direct);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue