Implementer Adaptive Context Compaction (ACC) i synops-agent
Automatisk kontekstkomprimering når meldingshistorikken nærmer seg kontekstvinduets grense. Bruker prompt_tokens fra API-respons som kalibreringsanker. - Ny context.rs-modul med to kompaksjonsnivåer: - Moderat (>70%): trunkerer gamle tool-resultater - Aggressiv (>85%): kollapser eldre historikk til oppsummering - Siste 5 meldinger bevares alltid urørt - context_window() på LlmProvider-trait med automatisk estimering - 5 enhetstester for kompaksjonslogikk
This commit is contained in:
parent
6099d0b209
commit
703a0addca
3 changed files with 417 additions and 0 deletions
385
tools/synops-agent/src/context.rs
Normal file
385
tools/synops-agent/src/context.rs
Normal file
|
|
@ -0,0 +1,385 @@
|
|||
//! Adaptive Context Compaction (ACC).
|
||||
//!
|
||||
//! Monitors token usage and compresses message history when approaching
|
||||
//! the model's context window limit. Two compaction levels:
|
||||
//!
|
||||
//! - **Moderate** (>70% capacity): Summarize old tool results
|
||||
//! - **Aggressive** (>85% capacity): Summarize entire older history
|
||||
//!
|
||||
//! The last N messages are always preserved untouched.
|
||||
//! Uses `prompt_tokens` from API responses as the calibration anchor.
|
||||
|
||||
use crate::provider::Message;
|
||||
|
||||
/// Configuration for context compaction.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CompactionConfig {
|
||||
/// Context window size in tokens for the current model.
|
||||
pub context_window: u64,
|
||||
/// Threshold for moderate compaction (fraction, e.g. 0.70).
|
||||
pub moderate_threshold: f64,
|
||||
/// Threshold for aggressive compaction (fraction, e.g. 0.85).
|
||||
pub aggressive_threshold: f64,
|
||||
/// Number of recent messages to always preserve.
|
||||
pub preserve_recent: usize,
|
||||
}
|
||||
|
||||
impl Default for CompactionConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
context_window: 128_000,
|
||||
moderate_threshold: 0.70,
|
||||
aggressive_threshold: 0.85,
|
||||
preserve_recent: 5,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of a compaction check.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum CompactionLevel {
|
||||
/// Under threshold — no action needed.
|
||||
None,
|
||||
/// >70% — summarize old tool results.
|
||||
Moderate,
|
||||
/// >85% — aggressively summarize entire older history.
|
||||
Aggressive,
|
||||
}
|
||||
|
||||
/// Check what compaction level is needed based on current prompt_tokens.
|
||||
pub fn check_compaction_level(prompt_tokens: u64, config: &CompactionConfig) -> CompactionLevel {
|
||||
let ratio = prompt_tokens as f64 / config.context_window as f64;
|
||||
if ratio >= config.aggressive_threshold {
|
||||
CompactionLevel::Aggressive
|
||||
} else if ratio >= config.moderate_threshold {
|
||||
CompactionLevel::Moderate
|
||||
} else {
|
||||
CompactionLevel::None
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply compaction to the message history.
|
||||
///
|
||||
/// Modifies `messages` in-place. The system message (index 0) and
|
||||
/// the last `preserve_recent` messages are never touched.
|
||||
///
|
||||
/// Returns true if any compaction was performed.
|
||||
pub fn compact_messages(
|
||||
messages: &mut Vec<Message>,
|
||||
level: CompactionLevel,
|
||||
config: &CompactionConfig,
|
||||
) -> bool {
|
||||
if level == CompactionLevel::None {
|
||||
return false;
|
||||
}
|
||||
|
||||
// We need at least: system + user + some history + preserved tail
|
||||
// system is at index 0, user task at index 1
|
||||
let fixed_prefix = 2; // system + initial user message
|
||||
let total = messages.len();
|
||||
|
||||
if total <= fixed_prefix + config.preserve_recent {
|
||||
// Not enough messages to compact
|
||||
return false;
|
||||
}
|
||||
|
||||
let compactable_end = total - config.preserve_recent;
|
||||
|
||||
match level {
|
||||
CompactionLevel::Moderate => {
|
||||
compact_tool_results(messages, fixed_prefix, compactable_end)
|
||||
}
|
||||
CompactionLevel::Aggressive => {
|
||||
compact_history_aggressive(messages, fixed_prefix, compactable_end)
|
||||
}
|
||||
CompactionLevel::None => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Moderate compaction: replace tool result contents with short summaries.
|
||||
/// Keeps assistant messages and their tool_calls intact, but truncates
|
||||
/// the tool response bodies.
|
||||
fn compact_tool_results(
|
||||
messages: &mut Vec<Message>,
|
||||
start: usize,
|
||||
end: usize,
|
||||
) -> bool {
|
||||
let mut compacted = false;
|
||||
|
||||
for i in start..end {
|
||||
if messages[i].role == "tool" {
|
||||
if let Some(ref content) = messages[i].content {
|
||||
if content.len() > 200 {
|
||||
// Keep first 150 chars + note
|
||||
let preview = &content[..content.len().min(150)];
|
||||
messages[i].content = Some(format!(
|
||||
"[compacted] {}... ({} bytes omitted)",
|
||||
preview,
|
||||
content.len() - 150
|
||||
));
|
||||
compacted = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if compacted {
|
||||
tracing::info!("ACC: moderate compaction — truncated old tool results");
|
||||
}
|
||||
|
||||
compacted
|
||||
}
|
||||
|
||||
/// Aggressive compaction: replace the entire compactable range with a
|
||||
/// single summary message. This collapses all old assistant/tool
|
||||
/// exchanges into one user message that describes what happened.
|
||||
fn compact_history_aggressive(
|
||||
messages: &mut Vec<Message>,
|
||||
start: usize,
|
||||
end: usize,
|
||||
) -> bool {
|
||||
if end <= start {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Build a summary of what happened in the compacted range
|
||||
let mut summary_parts: Vec<String> = Vec::new();
|
||||
let mut tool_calls_seen: Vec<String> = Vec::new();
|
||||
let mut assistant_snippets: Vec<String> = Vec::new();
|
||||
|
||||
for msg in &messages[start..end] {
|
||||
match msg.role.as_str() {
|
||||
"assistant" => {
|
||||
if let Some(ref tc) = msg.tool_calls {
|
||||
for call in tc {
|
||||
tool_calls_seen.push(call.function.name.clone());
|
||||
}
|
||||
}
|
||||
if let Some(ref text) = msg.content {
|
||||
if !text.is_empty() {
|
||||
let snippet = if text.len() > 100 {
|
||||
format!("{}...", &text[..100])
|
||||
} else {
|
||||
text.clone()
|
||||
};
|
||||
assistant_snippets.push(snippet);
|
||||
}
|
||||
}
|
||||
}
|
||||
"tool" => {
|
||||
// Already captured via tool_calls on assistant
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if !tool_calls_seen.is_empty() {
|
||||
summary_parts.push(format!(
|
||||
"Tools used: {}",
|
||||
tool_calls_seen.join(", ")
|
||||
));
|
||||
}
|
||||
if !assistant_snippets.is_empty() {
|
||||
let max_snippets = assistant_snippets.len().min(5);
|
||||
summary_parts.push(format!(
|
||||
"Key outputs:\n{}",
|
||||
assistant_snippets[..max_snippets].join("\n- ")
|
||||
));
|
||||
}
|
||||
|
||||
let summary = if summary_parts.is_empty() {
|
||||
"[compacted] Previous conversation history was summarized to save context.".to_string()
|
||||
} else {
|
||||
format!(
|
||||
"[compacted] Previous conversation summary ({} messages compressed):\n{}",
|
||||
end - start,
|
||||
summary_parts.join("\n")
|
||||
)
|
||||
};
|
||||
|
||||
// Replace the range [start..end) with a single summary message
|
||||
let preserved_tail: Vec<Message> = messages.drain(end..).collect();
|
||||
messages.truncate(start);
|
||||
messages.push(Message {
|
||||
role: "user".into(),
|
||||
content: Some(summary),
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
messages.extend(preserved_tail);
|
||||
|
||||
tracing::info!(
|
||||
removed = end - start,
|
||||
"ACC: aggressive compaction — collapsed old history into summary"
|
||||
);
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Estimate context window size from model name.
|
||||
/// Returns a conservative estimate if unknown.
|
||||
pub fn estimate_context_window(model: &str) -> u64 {
|
||||
let m = model.to_lowercase();
|
||||
|
||||
// Anthropic
|
||||
if m.contains("claude") {
|
||||
if m.contains("opus") || m.contains("sonnet") || m.contains("haiku") {
|
||||
return 200_000;
|
||||
}
|
||||
return 200_000;
|
||||
}
|
||||
|
||||
// Google Gemini
|
||||
if m.contains("gemini") {
|
||||
if m.contains("flash") || m.contains("pro") {
|
||||
return 1_000_000;
|
||||
}
|
||||
return 1_000_000;
|
||||
}
|
||||
|
||||
// Grok
|
||||
if m.contains("grok") {
|
||||
return 131_072;
|
||||
}
|
||||
|
||||
// OpenAI
|
||||
if m.contains("gpt-4o") || m.contains("gpt-4-turbo") {
|
||||
return 128_000;
|
||||
}
|
||||
if m.contains("gpt-4") {
|
||||
return 8_192;
|
||||
}
|
||||
if m.contains("o1") || m.contains("o3") || m.contains("o4") {
|
||||
return 200_000;
|
||||
}
|
||||
|
||||
// Llama / local
|
||||
if m.contains("llama") {
|
||||
return 128_000;
|
||||
}
|
||||
|
||||
// Conservative default
|
||||
128_000
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn make_msg(role: &str, content: &str) -> Message {
|
||||
Message {
|
||||
role: role.into(),
|
||||
content: Some(content.into()),
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compaction_level() {
|
||||
let config = CompactionConfig {
|
||||
context_window: 100_000,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
check_compaction_level(50_000, &config),
|
||||
CompactionLevel::None
|
||||
);
|
||||
assert_eq!(
|
||||
check_compaction_level(75_000, &config),
|
||||
CompactionLevel::Moderate
|
||||
);
|
||||
assert_eq!(
|
||||
check_compaction_level(90_000, &config),
|
||||
CompactionLevel::Aggressive
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_moderate_compaction_truncates_tool_results() {
|
||||
let long_output = "x".repeat(500);
|
||||
let mut messages = vec![
|
||||
make_msg("system", "system prompt"),
|
||||
make_msg("user", "do something"),
|
||||
make_msg("assistant", "I'll read the file"),
|
||||
Message {
|
||||
role: "tool".into(),
|
||||
content: Some(long_output),
|
||||
tool_calls: None,
|
||||
tool_call_id: Some("call_1".into()),
|
||||
},
|
||||
make_msg("assistant", "recent 1"),
|
||||
make_msg("user", "recent 2"),
|
||||
];
|
||||
|
||||
let config = CompactionConfig {
|
||||
preserve_recent: 2,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let compacted = compact_messages(&mut messages, CompactionLevel::Moderate, &config);
|
||||
assert!(compacted);
|
||||
assert!(messages[3].content.as_ref().unwrap().starts_with("[compacted]"));
|
||||
assert_eq!(messages.len(), 6); // same count, just truncated
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_aggressive_compaction_collapses_history() {
|
||||
let mut messages = vec![
|
||||
make_msg("system", "system prompt"),
|
||||
make_msg("user", "do something"),
|
||||
make_msg("assistant", "step 1"),
|
||||
make_msg("tool", "result 1"),
|
||||
make_msg("assistant", "step 2"),
|
||||
make_msg("tool", "result 2"),
|
||||
make_msg("assistant", "step 3"),
|
||||
make_msg("tool", "result 3"),
|
||||
// These should be preserved:
|
||||
make_msg("assistant", "recent assistant"),
|
||||
make_msg("user", "recent user"),
|
||||
];
|
||||
|
||||
let config = CompactionConfig {
|
||||
preserve_recent: 2,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let compacted = compact_messages(&mut messages, CompactionLevel::Aggressive, &config);
|
||||
assert!(compacted);
|
||||
// system + user + summary + 2 preserved = 5
|
||||
assert_eq!(messages.len(), 5);
|
||||
assert_eq!(messages[0].role, "system");
|
||||
assert_eq!(messages[1].role, "user");
|
||||
assert!(messages[2].content.as_ref().unwrap().contains("[compacted]"));
|
||||
assert_eq!(messages[3].content.as_deref(), Some("recent assistant"));
|
||||
assert_eq!(messages[4].content.as_deref(), Some("recent user"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_compaction_when_too_few_messages() {
|
||||
let mut messages = vec![
|
||||
make_msg("system", "sys"),
|
||||
make_msg("user", "task"),
|
||||
make_msg("assistant", "done"),
|
||||
];
|
||||
|
||||
let config = CompactionConfig {
|
||||
preserve_recent: 3,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let compacted = compact_messages(&mut messages, CompactionLevel::Aggressive, &config);
|
||||
assert!(!compacted);
|
||||
assert_eq!(messages.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_estimate_context_window() {
|
||||
assert_eq!(estimate_context_window("claude-sonnet-4"), 200_000);
|
||||
assert_eq!(estimate_context_window("gemini-2.5-flash"), 1_000_000);
|
||||
assert_eq!(estimate_context_window("grok-3"), 131_072);
|
||||
assert_eq!(estimate_context_window("gpt-4o"), 128_000);
|
||||
assert_eq!(estimate_context_window("unknown-model"), 128_000);
|
||||
}
|
||||
}
|
||||
|
|
@ -8,10 +8,12 @@
|
|||
//! synops-agent --model gemini/gemini-2.5-flash --task "oppsummer denne filen"
|
||||
//! synops-agent --model ollama/llama3 --task "skriv en test"
|
||||
|
||||
mod context;
|
||||
mod provider;
|
||||
mod tools;
|
||||
|
||||
use clap::Parser;
|
||||
use context::{CompactionConfig, CompactionLevel, check_compaction_level, compact_messages};
|
||||
use provider::{ApiKeys, CompletionResponse, Message, TokenUsage, create_provider};
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
|
@ -98,6 +100,16 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
let mut total_usage: HashMap<String, TokenUsage> = HashMap::new();
|
||||
let mut iteration = 0;
|
||||
|
||||
// Context compaction config
|
||||
let compaction_config = CompactionConfig {
|
||||
context_window: provider.context_window(),
|
||||
..Default::default()
|
||||
};
|
||||
tracing::info!(
|
||||
context_window = compaction_config.context_window,
|
||||
"ACC konfigurert"
|
||||
);
|
||||
|
||||
// === Agent loop ===
|
||||
loop {
|
||||
iteration += 1;
|
||||
|
|
@ -125,6 +137,21 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
);
|
||||
}
|
||||
|
||||
// === Adaptive Context Compaction ===
|
||||
// Use prompt_tokens from the API response as calibration anchor
|
||||
let level = check_compaction_level(response.usage.input_tokens, &compaction_config);
|
||||
if level != CompactionLevel::None {
|
||||
let ratio = response.usage.input_tokens as f64 / compaction_config.context_window as f64;
|
||||
tracing::warn!(
|
||||
prompt_tokens = response.usage.input_tokens,
|
||||
context_window = compaction_config.context_window,
|
||||
ratio = format!("{:.1}%", ratio * 100.0),
|
||||
level = ?level,
|
||||
"ACC: kontekstkomprimering trigget"
|
||||
);
|
||||
compact_messages(&mut messages, level, &compaction_config);
|
||||
}
|
||||
|
||||
// Check for tool calls
|
||||
let has_tool_calls = response
|
||||
.message
|
||||
|
|
|
|||
|
|
@ -79,6 +79,11 @@ pub trait LlmProvider: Send + Sync {
|
|||
|
||||
/// Model identifier
|
||||
fn model_id(&self) -> &str;
|
||||
|
||||
/// Context window size in tokens.
|
||||
fn context_window(&self) -> u64 {
|
||||
crate::context::estimate_context_window(self.model_id())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue