Implementer planmodus og selvkritikk i synops-agent
Extended ReAct Loop: tenk → plan → selvkritikk → utfør. --plan tvinger planmodus, --no-plan deaktiverer. Uten flagg: auto-detect basert på oppgavens nøkkelord/lengde. /plan kommando i interaktiv modus.
This commit is contained in:
parent
3704a02921
commit
6bd2787ee8
1 changed files with 249 additions and 20 deletions
|
|
@ -69,6 +69,14 @@ struct Cli {
|
|||
/// Maks antall retries ved API-feil (default: 3)
|
||||
#[arg(long, default_value = "3")]
|
||||
max_retries: u32,
|
||||
|
||||
/// Tving planmodus: tenk → plan → selvkritikk → utfør
|
||||
#[arg(long)]
|
||||
plan: bool,
|
||||
|
||||
/// Deaktiver automatisk planmodus (brukes normalt for komplekse oppgaver)
|
||||
#[arg(long)]
|
||||
no_plan: bool,
|
||||
}
|
||||
|
||||
/// Shared state for the agent session.
|
||||
|
|
@ -89,6 +97,15 @@ struct AgentSession {
|
|||
interrupted: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
/// Whether to use plan mode for a task.
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum PlanDecision {
|
||||
/// No planning needed — execute directly.
|
||||
Direct,
|
||||
/// Use plan mode (Extended ReAct Loop).
|
||||
Plan,
|
||||
}
|
||||
|
||||
/// Result of running one turn of the agent loop.
|
||||
enum TurnResult {
|
||||
/// Agent finished (no more tool calls).
|
||||
|
|
@ -244,6 +261,154 @@ impl AgentSession {
|
|||
}
|
||||
}
|
||||
|
||||
/// Run the Extended ReAct Loop: think → plan → self-critique → execute.
|
||||
///
|
||||
/// 1. Ask the LLM to analyze the task and create a step-by-step plan
|
||||
/// 2. Ask the LLM to critique its own plan
|
||||
/// 3. Execute with the refined plan as context
|
||||
async fn run_with_plan(&mut self, task: &str) -> Result<TurnResult, Box<dyn std::error::Error>> {
|
||||
// Phase 1: Think + Plan
|
||||
tracing::info!("Planmodus: Fase 1 — analyse og planlegging");
|
||||
let plan_prompt = format!(
|
||||
"Du skal løse denne oppgaven:\n\n{}\n\n\
|
||||
Før du handler, lag en steg-for-steg plan. For hvert steg:\n\
|
||||
- Beskriv hva som skal gjøres\n\
|
||||
- Hvilke filer som berøres\n\
|
||||
- Mulige risikoer\n\n\
|
||||
Bruk verktøy for å undersøke kodebasen (les filer, søk, etc.) \
|
||||
for å lage en informert plan. Ikke gjør endringer ennå.\n\n\
|
||||
Avslutt med en nummerert plan i dette formatet:\n\
|
||||
## Plan\n\
|
||||
1. [steg]\n\
|
||||
2. [steg]\n\
|
||||
...",
|
||||
task
|
||||
);
|
||||
|
||||
self.messages.push(Message {
|
||||
role: "user".into(),
|
||||
content: Some(plan_prompt),
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
|
||||
// Let the LLM explore and create a plan (allows tool use for research)
|
||||
let plan_result = self.run_turn().await?;
|
||||
if !matches!(plan_result, TurnResult::Done) {
|
||||
return Ok(plan_result);
|
||||
}
|
||||
|
||||
// Phase 2: Self-critique
|
||||
tracing::info!("Planmodus: Fase 2 — selvkritikk");
|
||||
self.messages.push(Message {
|
||||
role: "user".into(),
|
||||
content: Some(
|
||||
"Kritiser planen din. Vurder:\n\
|
||||
- Er det steg som mangler?\n\
|
||||
- Er rekkefølgen riktig?\n\
|
||||
- Kan noe gå galt? Hva er fallback?\n\
|
||||
- Er det enklere alternativer?\n\
|
||||
- Berører endringene mer enn nødvendig?\n\n\
|
||||
Juster planen om nødvendig, og presenter den endelige \
|
||||
versjonen. Ikke implementer ennå."
|
||||
.into(),
|
||||
),
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
|
||||
let critique_result = self.run_turn().await?;
|
||||
if !matches!(critique_result, TurnResult::Done) {
|
||||
return Ok(critique_result);
|
||||
}
|
||||
|
||||
// Phase 3: Execute
|
||||
tracing::info!("Planmodus: Fase 3 — utføring");
|
||||
self.messages.push(Message {
|
||||
role: "user".into(),
|
||||
content: Some(
|
||||
"Planen er godkjent. Implementer den nå, steg for steg. \
|
||||
For hvert steg: utfør, verifiser at det fungerer, \
|
||||
og gå videre til neste. Rapporter fremgang underveis."
|
||||
.into(),
|
||||
),
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
|
||||
self.run_turn().await
|
||||
}
|
||||
|
||||
/// Decide whether to use plan mode based on task analysis.
|
||||
///
|
||||
/// Heuristic: if the task mentions multiple files, complex operations,
|
||||
/// or design/architecture keywords, use plan mode.
|
||||
fn should_plan(task: &str) -> PlanDecision {
|
||||
let task_lower = task.to_lowercase();
|
||||
|
||||
// Keywords that strongly suggest planning is needed
|
||||
let plan_keywords = [
|
||||
"implementer",
|
||||
"implement",
|
||||
"redesign",
|
||||
"refaktorer",
|
||||
"refactor",
|
||||
"migrer",
|
||||
"migrate",
|
||||
"arkitektur",
|
||||
"architecture",
|
||||
"bygg",
|
||||
"build",
|
||||
"lag en",
|
||||
"create a",
|
||||
"sett opp",
|
||||
"set up",
|
||||
"endre alle",
|
||||
"change all",
|
||||
"oppdater alle",
|
||||
"update all",
|
||||
];
|
||||
|
||||
// Keywords that suggest simple tasks (no plan needed)
|
||||
let simple_keywords = [
|
||||
"les ",
|
||||
"read ",
|
||||
"vis ",
|
||||
"show ",
|
||||
"hva er",
|
||||
"what is",
|
||||
"forklar",
|
||||
"explain",
|
||||
"sjekk",
|
||||
"check",
|
||||
"finn",
|
||||
"find",
|
||||
"søk",
|
||||
"search",
|
||||
];
|
||||
|
||||
// Simple tasks — direct execution
|
||||
for kw in &simple_keywords {
|
||||
if task_lower.starts_with(kw) {
|
||||
return PlanDecision::Direct;
|
||||
}
|
||||
}
|
||||
|
||||
// Complex keywords — plan
|
||||
for kw in &plan_keywords {
|
||||
if task_lower.contains(kw) {
|
||||
return PlanDecision::Plan;
|
||||
}
|
||||
}
|
||||
|
||||
// Length heuristic: long tasks are more likely to need planning
|
||||
if task.len() > 500 {
|
||||
return PlanDecision::Plan;
|
||||
}
|
||||
|
||||
PlanDecision::Direct
|
||||
}
|
||||
|
||||
/// Total input tokens across all models.
|
||||
fn total_input_tokens(&self) -> u64 {
|
||||
self.total_usage.values().map(|u| u.input_tokens).sum()
|
||||
|
|
@ -363,16 +528,32 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
if cli.interactive {
|
||||
run_interactive(&mut session).await?;
|
||||
} else {
|
||||
// Batch mode: add task as user message and run
|
||||
// Batch mode
|
||||
let task = cli.task.unwrap();
|
||||
|
||||
// Decide whether to use plan mode
|
||||
let use_plan = if cli.plan {
|
||||
true
|
||||
} else if cli.no_plan {
|
||||
false
|
||||
} else {
|
||||
AgentSession::should_plan(&task) == PlanDecision::Plan
|
||||
};
|
||||
|
||||
let result = if use_plan {
|
||||
tracing::info!("Bruker planmodus (Extended ReAct Loop)");
|
||||
session.run_with_plan(&task).await?
|
||||
} else {
|
||||
// Direct execution
|
||||
session.messages.push(Message {
|
||||
role: "user".into(),
|
||||
content: Some(task),
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
session.run_turn().await?
|
||||
};
|
||||
|
||||
let result = session.run_turn().await?;
|
||||
session.print_summary();
|
||||
|
||||
if matches!(result, TurnResult::BudgetExhausted) {
|
||||
|
|
@ -494,6 +675,7 @@ async fn run_interactive(
|
|||
}
|
||||
"/help" => {
|
||||
eprintln!("Kommandoer:");
|
||||
eprintln!(" /plan <oppgave> — Kjør med planmodus (tenk → plan → selvkritikk → utfør)");
|
||||
eprintln!(" /stats — Vis token-forbruk og kostnad");
|
||||
eprintln!(" /clear — Nullstill samtalen");
|
||||
eprintln!(" /help — Vis denne hjelpen");
|
||||
|
|
@ -506,15 +688,28 @@ async fn run_interactive(
|
|||
_ => {}
|
||||
}
|
||||
|
||||
// Add user message and run turn
|
||||
// Check for /plan command
|
||||
let (use_plan, effective_input) = if let Some(plan_task) = trimmed.strip_prefix("/plan ") {
|
||||
(true, plan_task.to_string())
|
||||
} else {
|
||||
(false, input.clone())
|
||||
};
|
||||
|
||||
// Run with or without plan mode
|
||||
let turn_result = if use_plan {
|
||||
eprintln!("[planmodus aktivert]");
|
||||
session.run_with_plan(&effective_input).await
|
||||
} else {
|
||||
session.messages.push(Message {
|
||||
role: "user".into(),
|
||||
content: Some(input),
|
||||
content: Some(effective_input),
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
session.run_turn().await
|
||||
};
|
||||
|
||||
match session.run_turn().await {
|
||||
match turn_result {
|
||||
Ok(TurnResult::Done) => {
|
||||
// Normal completion
|
||||
}
|
||||
|
|
@ -645,3 +840,37 @@ async fn spawn_claude_code(
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_should_plan_complex_tasks() {
|
||||
assert_eq!(AgentSession::should_plan("implementer plan-modus i agenten"), PlanDecision::Plan);
|
||||
assert_eq!(AgentSession::should_plan("refaktorer hele auth-systemet"), PlanDecision::Plan);
|
||||
assert_eq!(AgentSession::should_plan("migrate database to new schema"), PlanDecision::Plan);
|
||||
assert_eq!(AgentSession::should_plan("lag en ny CLI-kommando for export"), PlanDecision::Plan);
|
||||
assert_eq!(AgentSession::should_plan("oppdater alle TODO-kommentarer i prosjektet"), PlanDecision::Plan);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_plan_simple_tasks() {
|
||||
assert_eq!(AgentSession::should_plan("les filen src/main.rs"), PlanDecision::Direct);
|
||||
assert_eq!(AgentSession::should_plan("vis innholdet i config"), PlanDecision::Direct);
|
||||
assert_eq!(AgentSession::should_plan("hva er denne funksjonen?"), PlanDecision::Direct);
|
||||
assert_eq!(AgentSession::should_plan("finn alle .rs filer"), PlanDecision::Direct);
|
||||
assert_eq!(AgentSession::should_plan("sjekk om testen passerer"), PlanDecision::Direct);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_plan_long_tasks() {
|
||||
let long_task = "x".repeat(600);
|
||||
assert_eq!(AgentSession::should_plan(&long_task), PlanDecision::Plan);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_plan_ambiguous_defaults_to_direct() {
|
||||
assert_eq!(AgentSession::should_plan("fiks buggen i login"), PlanDecision::Direct);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue