Implementer Adaptive Context Compaction (ACC) i synops-agent
Automatisk kontekstkomprimering når meldingshistorikken nærmer seg kontekstvinduets grense. Bruker prompt_tokens fra API-respons som kalibreringsanker. - Ny context.rs-modul med to kompaksjonsnivåer: - Moderat (>70%): trunkerer gamle tool-resultater - Aggressiv (>85%): kollapser eldre historikk til oppsummering - Siste 5 meldinger bevares alltid urørt - context_window() på LlmProvider-trait med automatisk estimering - 5 enhetstester for kompaksjonslogikk
This commit is contained in:
parent
6099d0b209
commit
703a0addca
3 changed files with 417 additions and 0 deletions
385
tools/synops-agent/src/context.rs
Normal file
385
tools/synops-agent/src/context.rs
Normal file
|
|
@ -0,0 +1,385 @@
|
||||||
|
//! Adaptive Context Compaction (ACC).
|
||||||
|
//!
|
||||||
|
//! Monitors token usage and compresses message history when approaching
|
||||||
|
//! the model's context window limit. Two compaction levels:
|
||||||
|
//!
|
||||||
|
//! - **Moderate** (>70% capacity): Summarize old tool results
|
||||||
|
//! - **Aggressive** (>85% capacity): Summarize entire older history
|
||||||
|
//!
|
||||||
|
//! The last N messages are always preserved untouched.
|
||||||
|
//! Uses `prompt_tokens` from API responses as the calibration anchor.
|
||||||
|
|
||||||
|
use crate::provider::Message;
|
||||||
|
|
||||||
|
/// Configuration for context compaction.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct CompactionConfig {
|
||||||
|
/// Context window size in tokens for the current model.
|
||||||
|
pub context_window: u64,
|
||||||
|
/// Threshold for moderate compaction (fraction, e.g. 0.70).
|
||||||
|
pub moderate_threshold: f64,
|
||||||
|
/// Threshold for aggressive compaction (fraction, e.g. 0.85).
|
||||||
|
pub aggressive_threshold: f64,
|
||||||
|
/// Number of recent messages to always preserve.
|
||||||
|
pub preserve_recent: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for CompactionConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
context_window: 128_000,
|
||||||
|
moderate_threshold: 0.70,
|
||||||
|
aggressive_threshold: 0.85,
|
||||||
|
preserve_recent: 5,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Result of a compaction check.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub enum CompactionLevel {
|
||||||
|
/// Under threshold — no action needed.
|
||||||
|
None,
|
||||||
|
/// >70% — summarize old tool results.
|
||||||
|
Moderate,
|
||||||
|
/// >85% — aggressively summarize entire older history.
|
||||||
|
Aggressive,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check what compaction level is needed based on current prompt_tokens.
|
||||||
|
pub fn check_compaction_level(prompt_tokens: u64, config: &CompactionConfig) -> CompactionLevel {
|
||||||
|
let ratio = prompt_tokens as f64 / config.context_window as f64;
|
||||||
|
if ratio >= config.aggressive_threshold {
|
||||||
|
CompactionLevel::Aggressive
|
||||||
|
} else if ratio >= config.moderate_threshold {
|
||||||
|
CompactionLevel::Moderate
|
||||||
|
} else {
|
||||||
|
CompactionLevel::None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Apply compaction to the message history.
|
||||||
|
///
|
||||||
|
/// Modifies `messages` in-place. The system message (index 0) and
|
||||||
|
/// the last `preserve_recent` messages are never touched.
|
||||||
|
///
|
||||||
|
/// Returns true if any compaction was performed.
|
||||||
|
pub fn compact_messages(
|
||||||
|
messages: &mut Vec<Message>,
|
||||||
|
level: CompactionLevel,
|
||||||
|
config: &CompactionConfig,
|
||||||
|
) -> bool {
|
||||||
|
if level == CompactionLevel::None {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We need at least: system + user + some history + preserved tail
|
||||||
|
// system is at index 0, user task at index 1
|
||||||
|
let fixed_prefix = 2; // system + initial user message
|
||||||
|
let total = messages.len();
|
||||||
|
|
||||||
|
if total <= fixed_prefix + config.preserve_recent {
|
||||||
|
// Not enough messages to compact
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let compactable_end = total - config.preserve_recent;
|
||||||
|
|
||||||
|
match level {
|
||||||
|
CompactionLevel::Moderate => {
|
||||||
|
compact_tool_results(messages, fixed_prefix, compactable_end)
|
||||||
|
}
|
||||||
|
CompactionLevel::Aggressive => {
|
||||||
|
compact_history_aggressive(messages, fixed_prefix, compactable_end)
|
||||||
|
}
|
||||||
|
CompactionLevel::None => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Moderate compaction: replace tool result contents with short summaries.
|
||||||
|
/// Keeps assistant messages and their tool_calls intact, but truncates
|
||||||
|
/// the tool response bodies.
|
||||||
|
fn compact_tool_results(
|
||||||
|
messages: &mut Vec<Message>,
|
||||||
|
start: usize,
|
||||||
|
end: usize,
|
||||||
|
) -> bool {
|
||||||
|
let mut compacted = false;
|
||||||
|
|
||||||
|
for i in start..end {
|
||||||
|
if messages[i].role == "tool" {
|
||||||
|
if let Some(ref content) = messages[i].content {
|
||||||
|
if content.len() > 200 {
|
||||||
|
// Keep first 150 chars + note
|
||||||
|
let preview = &content[..content.len().min(150)];
|
||||||
|
messages[i].content = Some(format!(
|
||||||
|
"[compacted] {}... ({} bytes omitted)",
|
||||||
|
preview,
|
||||||
|
content.len() - 150
|
||||||
|
));
|
||||||
|
compacted = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if compacted {
|
||||||
|
tracing::info!("ACC: moderate compaction — truncated old tool results");
|
||||||
|
}
|
||||||
|
|
||||||
|
compacted
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Aggressive compaction: replace the entire compactable range with a
|
||||||
|
/// single summary message. This collapses all old assistant/tool
|
||||||
|
/// exchanges into one user message that describes what happened.
|
||||||
|
fn compact_history_aggressive(
|
||||||
|
messages: &mut Vec<Message>,
|
||||||
|
start: usize,
|
||||||
|
end: usize,
|
||||||
|
) -> bool {
|
||||||
|
if end <= start {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build a summary of what happened in the compacted range
|
||||||
|
let mut summary_parts: Vec<String> = Vec::new();
|
||||||
|
let mut tool_calls_seen: Vec<String> = Vec::new();
|
||||||
|
let mut assistant_snippets: Vec<String> = Vec::new();
|
||||||
|
|
||||||
|
for msg in &messages[start..end] {
|
||||||
|
match msg.role.as_str() {
|
||||||
|
"assistant" => {
|
||||||
|
if let Some(ref tc) = msg.tool_calls {
|
||||||
|
for call in tc {
|
||||||
|
tool_calls_seen.push(call.function.name.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(ref text) = msg.content {
|
||||||
|
if !text.is_empty() {
|
||||||
|
let snippet = if text.len() > 100 {
|
||||||
|
format!("{}...", &text[..100])
|
||||||
|
} else {
|
||||||
|
text.clone()
|
||||||
|
};
|
||||||
|
assistant_snippets.push(snippet);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"tool" => {
|
||||||
|
// Already captured via tool_calls on assistant
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !tool_calls_seen.is_empty() {
|
||||||
|
summary_parts.push(format!(
|
||||||
|
"Tools used: {}",
|
||||||
|
tool_calls_seen.join(", ")
|
||||||
|
));
|
||||||
|
}
|
||||||
|
if !assistant_snippets.is_empty() {
|
||||||
|
let max_snippets = assistant_snippets.len().min(5);
|
||||||
|
summary_parts.push(format!(
|
||||||
|
"Key outputs:\n{}",
|
||||||
|
assistant_snippets[..max_snippets].join("\n- ")
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let summary = if summary_parts.is_empty() {
|
||||||
|
"[compacted] Previous conversation history was summarized to save context.".to_string()
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
"[compacted] Previous conversation summary ({} messages compressed):\n{}",
|
||||||
|
end - start,
|
||||||
|
summary_parts.join("\n")
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Replace the range [start..end) with a single summary message
|
||||||
|
let preserved_tail: Vec<Message> = messages.drain(end..).collect();
|
||||||
|
messages.truncate(start);
|
||||||
|
messages.push(Message {
|
||||||
|
role: "user".into(),
|
||||||
|
content: Some(summary),
|
||||||
|
tool_calls: None,
|
||||||
|
tool_call_id: None,
|
||||||
|
});
|
||||||
|
messages.extend(preserved_tail);
|
||||||
|
|
||||||
|
tracing::info!(
|
||||||
|
removed = end - start,
|
||||||
|
"ACC: aggressive compaction — collapsed old history into summary"
|
||||||
|
);
|
||||||
|
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Estimate context window size from model name.
|
||||||
|
/// Returns a conservative estimate if unknown.
|
||||||
|
pub fn estimate_context_window(model: &str) -> u64 {
|
||||||
|
let m = model.to_lowercase();
|
||||||
|
|
||||||
|
// Anthropic
|
||||||
|
if m.contains("claude") {
|
||||||
|
if m.contains("opus") || m.contains("sonnet") || m.contains("haiku") {
|
||||||
|
return 200_000;
|
||||||
|
}
|
||||||
|
return 200_000;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Google Gemini
|
||||||
|
if m.contains("gemini") {
|
||||||
|
if m.contains("flash") || m.contains("pro") {
|
||||||
|
return 1_000_000;
|
||||||
|
}
|
||||||
|
return 1_000_000;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Grok
|
||||||
|
if m.contains("grok") {
|
||||||
|
return 131_072;
|
||||||
|
}
|
||||||
|
|
||||||
|
// OpenAI
|
||||||
|
if m.contains("gpt-4o") || m.contains("gpt-4-turbo") {
|
||||||
|
return 128_000;
|
||||||
|
}
|
||||||
|
if m.contains("gpt-4") {
|
||||||
|
return 8_192;
|
||||||
|
}
|
||||||
|
if m.contains("o1") || m.contains("o3") || m.contains("o4") {
|
||||||
|
return 200_000;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Llama / local
|
||||||
|
if m.contains("llama") {
|
||||||
|
return 128_000;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Conservative default
|
||||||
|
128_000
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn make_msg(role: &str, content: &str) -> Message {
|
||||||
|
Message {
|
||||||
|
role: role.into(),
|
||||||
|
content: Some(content.into()),
|
||||||
|
tool_calls: None,
|
||||||
|
tool_call_id: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_compaction_level() {
|
||||||
|
let config = CompactionConfig {
|
||||||
|
context_window: 100_000,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
check_compaction_level(50_000, &config),
|
||||||
|
CompactionLevel::None
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
check_compaction_level(75_000, &config),
|
||||||
|
CompactionLevel::Moderate
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
check_compaction_level(90_000, &config),
|
||||||
|
CompactionLevel::Aggressive
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_moderate_compaction_truncates_tool_results() {
|
||||||
|
let long_output = "x".repeat(500);
|
||||||
|
let mut messages = vec![
|
||||||
|
make_msg("system", "system prompt"),
|
||||||
|
make_msg("user", "do something"),
|
||||||
|
make_msg("assistant", "I'll read the file"),
|
||||||
|
Message {
|
||||||
|
role: "tool".into(),
|
||||||
|
content: Some(long_output),
|
||||||
|
tool_calls: None,
|
||||||
|
tool_call_id: Some("call_1".into()),
|
||||||
|
},
|
||||||
|
make_msg("assistant", "recent 1"),
|
||||||
|
make_msg("user", "recent 2"),
|
||||||
|
];
|
||||||
|
|
||||||
|
let config = CompactionConfig {
|
||||||
|
preserve_recent: 2,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let compacted = compact_messages(&mut messages, CompactionLevel::Moderate, &config);
|
||||||
|
assert!(compacted);
|
||||||
|
assert!(messages[3].content.as_ref().unwrap().starts_with("[compacted]"));
|
||||||
|
assert_eq!(messages.len(), 6); // same count, just truncated
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_aggressive_compaction_collapses_history() {
|
||||||
|
let mut messages = vec![
|
||||||
|
make_msg("system", "system prompt"),
|
||||||
|
make_msg("user", "do something"),
|
||||||
|
make_msg("assistant", "step 1"),
|
||||||
|
make_msg("tool", "result 1"),
|
||||||
|
make_msg("assistant", "step 2"),
|
||||||
|
make_msg("tool", "result 2"),
|
||||||
|
make_msg("assistant", "step 3"),
|
||||||
|
make_msg("tool", "result 3"),
|
||||||
|
// These should be preserved:
|
||||||
|
make_msg("assistant", "recent assistant"),
|
||||||
|
make_msg("user", "recent user"),
|
||||||
|
];
|
||||||
|
|
||||||
|
let config = CompactionConfig {
|
||||||
|
preserve_recent: 2,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let compacted = compact_messages(&mut messages, CompactionLevel::Aggressive, &config);
|
||||||
|
assert!(compacted);
|
||||||
|
// system + user + summary + 2 preserved = 5
|
||||||
|
assert_eq!(messages.len(), 5);
|
||||||
|
assert_eq!(messages[0].role, "system");
|
||||||
|
assert_eq!(messages[1].role, "user");
|
||||||
|
assert!(messages[2].content.as_ref().unwrap().contains("[compacted]"));
|
||||||
|
assert_eq!(messages[3].content.as_deref(), Some("recent assistant"));
|
||||||
|
assert_eq!(messages[4].content.as_deref(), Some("recent user"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_no_compaction_when_too_few_messages() {
|
||||||
|
let mut messages = vec![
|
||||||
|
make_msg("system", "sys"),
|
||||||
|
make_msg("user", "task"),
|
||||||
|
make_msg("assistant", "done"),
|
||||||
|
];
|
||||||
|
|
||||||
|
let config = CompactionConfig {
|
||||||
|
preserve_recent: 3,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let compacted = compact_messages(&mut messages, CompactionLevel::Aggressive, &config);
|
||||||
|
assert!(!compacted);
|
||||||
|
assert_eq!(messages.len(), 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_estimate_context_window() {
|
||||||
|
assert_eq!(estimate_context_window("claude-sonnet-4"), 200_000);
|
||||||
|
assert_eq!(estimate_context_window("gemini-2.5-flash"), 1_000_000);
|
||||||
|
assert_eq!(estimate_context_window("grok-3"), 131_072);
|
||||||
|
assert_eq!(estimate_context_window("gpt-4o"), 128_000);
|
||||||
|
assert_eq!(estimate_context_window("unknown-model"), 128_000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -8,10 +8,12 @@
|
||||||
//! synops-agent --model gemini/gemini-2.5-flash --task "oppsummer denne filen"
|
//! synops-agent --model gemini/gemini-2.5-flash --task "oppsummer denne filen"
|
||||||
//! synops-agent --model ollama/llama3 --task "skriv en test"
|
//! synops-agent --model ollama/llama3 --task "skriv en test"
|
||||||
|
|
||||||
|
mod context;
|
||||||
mod provider;
|
mod provider;
|
||||||
mod tools;
|
mod tools;
|
||||||
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
|
use context::{CompactionConfig, CompactionLevel, check_compaction_level, compact_messages};
|
||||||
use provider::{ApiKeys, CompletionResponse, Message, TokenUsage, create_provider};
|
use provider::{ApiKeys, CompletionResponse, Message, TokenUsage, create_provider};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
@ -98,6 +100,16 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let mut total_usage: HashMap<String, TokenUsage> = HashMap::new();
|
let mut total_usage: HashMap<String, TokenUsage> = HashMap::new();
|
||||||
let mut iteration = 0;
|
let mut iteration = 0;
|
||||||
|
|
||||||
|
// Context compaction config
|
||||||
|
let compaction_config = CompactionConfig {
|
||||||
|
context_window: provider.context_window(),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
tracing::info!(
|
||||||
|
context_window = compaction_config.context_window,
|
||||||
|
"ACC konfigurert"
|
||||||
|
);
|
||||||
|
|
||||||
// === Agent loop ===
|
// === Agent loop ===
|
||||||
loop {
|
loop {
|
||||||
iteration += 1;
|
iteration += 1;
|
||||||
|
|
@ -125,6 +137,21 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// === Adaptive Context Compaction ===
|
||||||
|
// Use prompt_tokens from the API response as calibration anchor
|
||||||
|
let level = check_compaction_level(response.usage.input_tokens, &compaction_config);
|
||||||
|
if level != CompactionLevel::None {
|
||||||
|
let ratio = response.usage.input_tokens as f64 / compaction_config.context_window as f64;
|
||||||
|
tracing::warn!(
|
||||||
|
prompt_tokens = response.usage.input_tokens,
|
||||||
|
context_window = compaction_config.context_window,
|
||||||
|
ratio = format!("{:.1}%", ratio * 100.0),
|
||||||
|
level = ?level,
|
||||||
|
"ACC: kontekstkomprimering trigget"
|
||||||
|
);
|
||||||
|
compact_messages(&mut messages, level, &compaction_config);
|
||||||
|
}
|
||||||
|
|
||||||
// Check for tool calls
|
// Check for tool calls
|
||||||
let has_tool_calls = response
|
let has_tool_calls = response
|
||||||
.message
|
.message
|
||||||
|
|
|
||||||
|
|
@ -79,6 +79,11 @@ pub trait LlmProvider: Send + Sync {
|
||||||
|
|
||||||
/// Model identifier
|
/// Model identifier
|
||||||
fn model_id(&self) -> &str;
|
fn model_id(&self) -> &str;
|
||||||
|
|
||||||
|
/// Context window size in tokens.
|
||||||
|
fn context_window(&self) -> u64 {
|
||||||
|
crate::context::estimate_context_window(self.model_id())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue