Implementer Adaptive Context Compaction (ACC) i synops-agent

Automatisk kontekstkomprimering når meldingshistorikken nærmer seg
kontekstvinduets grense. Bruker prompt_tokens fra API-respons som
kalibreringsanker.

- Ny context.rs-modul med to kompaksjonsnivåer:
  - Moderat (>70%): trunkerer gamle tool-resultater
  - Aggressiv (>85%): kollapser eldre historikk til oppsummering
- Siste 5 meldinger bevares alltid urørt
- context_window() på LlmProvider-trait med automatisk estimering
- 5 enhetstester for kompaksjonslogikk
This commit is contained in:
vegard 2026-03-19 18:07:51 +00:00
parent 6099d0b209
commit 703a0addca
3 changed files with 417 additions and 0 deletions

View file

@ -0,0 +1,385 @@
//! Adaptive Context Compaction (ACC).
//!
//! Monitors token usage and compresses message history when approaching
//! the model's context window limit. Two compaction levels:
//!
//! - **Moderate** (>70% capacity): Summarize old tool results
//! - **Aggressive** (>85% capacity): Summarize entire older history
//!
//! The last N messages are always preserved untouched.
//! Uses `prompt_tokens` from API responses as the calibration anchor.
use crate::provider::Message;
/// Configuration for context compaction.
#[derive(Debug, Clone)]
pub struct CompactionConfig {
/// Context window size in tokens for the current model.
pub context_window: u64,
/// Threshold for moderate compaction (fraction, e.g. 0.70).
pub moderate_threshold: f64,
/// Threshold for aggressive compaction (fraction, e.g. 0.85).
pub aggressive_threshold: f64,
/// Number of recent messages to always preserve.
pub preserve_recent: usize,
}
impl Default for CompactionConfig {
fn default() -> Self {
Self {
context_window: 128_000,
moderate_threshold: 0.70,
aggressive_threshold: 0.85,
preserve_recent: 5,
}
}
}
/// Result of a compaction check.
#[derive(Debug, PartialEq)]
pub enum CompactionLevel {
/// Under threshold — no action needed.
None,
/// >70% — summarize old tool results.
Moderate,
/// >85% — aggressively summarize entire older history.
Aggressive,
}
/// Check what compaction level is needed based on current prompt_tokens.
pub fn check_compaction_level(prompt_tokens: u64, config: &CompactionConfig) -> CompactionLevel {
let ratio = prompt_tokens as f64 / config.context_window as f64;
if ratio >= config.aggressive_threshold {
CompactionLevel::Aggressive
} else if ratio >= config.moderate_threshold {
CompactionLevel::Moderate
} else {
CompactionLevel::None
}
}
/// Apply compaction to the message history.
///
/// Modifies `messages` in-place. The system message (index 0) and
/// the last `preserve_recent` messages are never touched.
///
/// Returns true if any compaction was performed.
pub fn compact_messages(
messages: &mut Vec<Message>,
level: CompactionLevel,
config: &CompactionConfig,
) -> bool {
if level == CompactionLevel::None {
return false;
}
// We need at least: system + user + some history + preserved tail
// system is at index 0, user task at index 1
let fixed_prefix = 2; // system + initial user message
let total = messages.len();
if total <= fixed_prefix + config.preserve_recent {
// Not enough messages to compact
return false;
}
let compactable_end = total - config.preserve_recent;
match level {
CompactionLevel::Moderate => {
compact_tool_results(messages, fixed_prefix, compactable_end)
}
CompactionLevel::Aggressive => {
compact_history_aggressive(messages, fixed_prefix, compactable_end)
}
CompactionLevel::None => false,
}
}
/// Moderate compaction: replace tool result contents with short summaries.
/// Keeps assistant messages and their tool_calls intact, but truncates
/// the tool response bodies.
fn compact_tool_results(
messages: &mut Vec<Message>,
start: usize,
end: usize,
) -> bool {
let mut compacted = false;
for i in start..end {
if messages[i].role == "tool" {
if let Some(ref content) = messages[i].content {
if content.len() > 200 {
// Keep first 150 chars + note
let preview = &content[..content.len().min(150)];
messages[i].content = Some(format!(
"[compacted] {}... ({} bytes omitted)",
preview,
content.len() - 150
));
compacted = true;
}
}
}
}
if compacted {
tracing::info!("ACC: moderate compaction — truncated old tool results");
}
compacted
}
/// Aggressive compaction: replace the entire compactable range with a
/// single summary message. This collapses all old assistant/tool
/// exchanges into one user message that describes what happened.
fn compact_history_aggressive(
messages: &mut Vec<Message>,
start: usize,
end: usize,
) -> bool {
if end <= start {
return false;
}
// Build a summary of what happened in the compacted range
let mut summary_parts: Vec<String> = Vec::new();
let mut tool_calls_seen: Vec<String> = Vec::new();
let mut assistant_snippets: Vec<String> = Vec::new();
for msg in &messages[start..end] {
match msg.role.as_str() {
"assistant" => {
if let Some(ref tc) = msg.tool_calls {
for call in tc {
tool_calls_seen.push(call.function.name.clone());
}
}
if let Some(ref text) = msg.content {
if !text.is_empty() {
let snippet = if text.len() > 100 {
format!("{}...", &text[..100])
} else {
text.clone()
};
assistant_snippets.push(snippet);
}
}
}
"tool" => {
// Already captured via tool_calls on assistant
}
_ => {}
}
}
if !tool_calls_seen.is_empty() {
summary_parts.push(format!(
"Tools used: {}",
tool_calls_seen.join(", ")
));
}
if !assistant_snippets.is_empty() {
let max_snippets = assistant_snippets.len().min(5);
summary_parts.push(format!(
"Key outputs:\n{}",
assistant_snippets[..max_snippets].join("\n- ")
));
}
let summary = if summary_parts.is_empty() {
"[compacted] Previous conversation history was summarized to save context.".to_string()
} else {
format!(
"[compacted] Previous conversation summary ({} messages compressed):\n{}",
end - start,
summary_parts.join("\n")
)
};
// Replace the range [start..end) with a single summary message
let preserved_tail: Vec<Message> = messages.drain(end..).collect();
messages.truncate(start);
messages.push(Message {
role: "user".into(),
content: Some(summary),
tool_calls: None,
tool_call_id: None,
});
messages.extend(preserved_tail);
tracing::info!(
removed = end - start,
"ACC: aggressive compaction — collapsed old history into summary"
);
true
}
/// Estimate context window size from model name.
/// Returns a conservative estimate if unknown.
pub fn estimate_context_window(model: &str) -> u64 {
let m = model.to_lowercase();
// Anthropic
if m.contains("claude") {
if m.contains("opus") || m.contains("sonnet") || m.contains("haiku") {
return 200_000;
}
return 200_000;
}
// Google Gemini
if m.contains("gemini") {
if m.contains("flash") || m.contains("pro") {
return 1_000_000;
}
return 1_000_000;
}
// Grok
if m.contains("grok") {
return 131_072;
}
// OpenAI
if m.contains("gpt-4o") || m.contains("gpt-4-turbo") {
return 128_000;
}
if m.contains("gpt-4") {
return 8_192;
}
if m.contains("o1") || m.contains("o3") || m.contains("o4") {
return 200_000;
}
// Llama / local
if m.contains("llama") {
return 128_000;
}
// Conservative default
128_000
}
#[cfg(test)]
mod tests {
use super::*;
fn make_msg(role: &str, content: &str) -> Message {
Message {
role: role.into(),
content: Some(content.into()),
tool_calls: None,
tool_call_id: None,
}
}
#[test]
fn test_compaction_level() {
let config = CompactionConfig {
context_window: 100_000,
..Default::default()
};
assert_eq!(
check_compaction_level(50_000, &config),
CompactionLevel::None
);
assert_eq!(
check_compaction_level(75_000, &config),
CompactionLevel::Moderate
);
assert_eq!(
check_compaction_level(90_000, &config),
CompactionLevel::Aggressive
);
}
#[test]
fn test_moderate_compaction_truncates_tool_results() {
let long_output = "x".repeat(500);
let mut messages = vec![
make_msg("system", "system prompt"),
make_msg("user", "do something"),
make_msg("assistant", "I'll read the file"),
Message {
role: "tool".into(),
content: Some(long_output),
tool_calls: None,
tool_call_id: Some("call_1".into()),
},
make_msg("assistant", "recent 1"),
make_msg("user", "recent 2"),
];
let config = CompactionConfig {
preserve_recent: 2,
..Default::default()
};
let compacted = compact_messages(&mut messages, CompactionLevel::Moderate, &config);
assert!(compacted);
assert!(messages[3].content.as_ref().unwrap().starts_with("[compacted]"));
assert_eq!(messages.len(), 6); // same count, just truncated
}
#[test]
fn test_aggressive_compaction_collapses_history() {
let mut messages = vec![
make_msg("system", "system prompt"),
make_msg("user", "do something"),
make_msg("assistant", "step 1"),
make_msg("tool", "result 1"),
make_msg("assistant", "step 2"),
make_msg("tool", "result 2"),
make_msg("assistant", "step 3"),
make_msg("tool", "result 3"),
// These should be preserved:
make_msg("assistant", "recent assistant"),
make_msg("user", "recent user"),
];
let config = CompactionConfig {
preserve_recent: 2,
..Default::default()
};
let compacted = compact_messages(&mut messages, CompactionLevel::Aggressive, &config);
assert!(compacted);
// system + user + summary + 2 preserved = 5
assert_eq!(messages.len(), 5);
assert_eq!(messages[0].role, "system");
assert_eq!(messages[1].role, "user");
assert!(messages[2].content.as_ref().unwrap().contains("[compacted]"));
assert_eq!(messages[3].content.as_deref(), Some("recent assistant"));
assert_eq!(messages[4].content.as_deref(), Some("recent user"));
}
#[test]
fn test_no_compaction_when_too_few_messages() {
let mut messages = vec![
make_msg("system", "sys"),
make_msg("user", "task"),
make_msg("assistant", "done"),
];
let config = CompactionConfig {
preserve_recent: 3,
..Default::default()
};
let compacted = compact_messages(&mut messages, CompactionLevel::Aggressive, &config);
assert!(!compacted);
assert_eq!(messages.len(), 3);
}
#[test]
fn test_estimate_context_window() {
assert_eq!(estimate_context_window("claude-sonnet-4"), 200_000);
assert_eq!(estimate_context_window("gemini-2.5-flash"), 1_000_000);
assert_eq!(estimate_context_window("grok-3"), 131_072);
assert_eq!(estimate_context_window("gpt-4o"), 128_000);
assert_eq!(estimate_context_window("unknown-model"), 128_000);
}
}

View file

@ -8,10 +8,12 @@
//! synops-agent --model gemini/gemini-2.5-flash --task "oppsummer denne filen"
//! synops-agent --model ollama/llama3 --task "skriv en test"
mod context;
mod provider;
mod tools;
use clap::Parser;
use context::{CompactionConfig, CompactionLevel, check_compaction_level, compact_messages};
use provider::{ApiKeys, CompletionResponse, Message, TokenUsage, create_provider};
use std::collections::HashMap;
use std::path::PathBuf;
@ -98,6 +100,16 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut total_usage: HashMap<String, TokenUsage> = HashMap::new();
let mut iteration = 0;
// Context compaction config
let compaction_config = CompactionConfig {
context_window: provider.context_window(),
..Default::default()
};
tracing::info!(
context_window = compaction_config.context_window,
"ACC konfigurert"
);
// === Agent loop ===
loop {
iteration += 1;
@ -125,6 +137,21 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
);
}
// === Adaptive Context Compaction ===
// Use prompt_tokens from the API response as calibration anchor
let level = check_compaction_level(response.usage.input_tokens, &compaction_config);
if level != CompactionLevel::None {
let ratio = response.usage.input_tokens as f64 / compaction_config.context_window as f64;
tracing::warn!(
prompt_tokens = response.usage.input_tokens,
context_window = compaction_config.context_window,
ratio = format!("{:.1}%", ratio * 100.0),
level = ?level,
"ACC: kontekstkomprimering trigget"
);
compact_messages(&mut messages, level, &compaction_config);
}
// Check for tool calls
let has_tool_calls = response
.message

View file

@ -79,6 +79,11 @@ pub trait LlmProvider: Send + Sync {
/// Model identifier
fn model_id(&self) -> &str;
/// Context window size in tokens.
fn context_window(&self) -> u64 {
crate::context::estimate_context_window(self.model_id())
}
}
#[derive(Debug, thiserror::Error)]