From 703a0addca44ae30d1821560f2c56f116dae8404 Mon Sep 17 00:00:00 2001 From: vegard Date: Thu, 19 Mar 2026 18:07:51 +0000 Subject: [PATCH] Implementer Adaptive Context Compaction (ACC) i synops-agent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Automatisk kontekstkomprimering når meldingshistorikken nærmer seg kontekstvinduets grense. Bruker prompt_tokens fra API-respons som kalibreringsanker. - Ny context.rs-modul med to kompaksjonsnivåer: - Moderat (>70%): trunkerer gamle tool-resultater - Aggressiv (>85%): kollapser eldre historikk til oppsummering - Siste 5 meldinger bevares alltid urørt - context_window() på LlmProvider-trait med automatisk estimering - 5 enhetstester for kompaksjonslogikk --- tools/synops-agent/src/context.rs | 385 +++++++++++++++++++++++++++++ tools/synops-agent/src/main.rs | 27 ++ tools/synops-agent/src/provider.rs | 5 + 3 files changed, 417 insertions(+) create mode 100644 tools/synops-agent/src/context.rs diff --git a/tools/synops-agent/src/context.rs b/tools/synops-agent/src/context.rs new file mode 100644 index 0000000..b882d2e --- /dev/null +++ b/tools/synops-agent/src/context.rs @@ -0,0 +1,385 @@ +//! Adaptive Context Compaction (ACC). +//! +//! Monitors token usage and compresses message history when approaching +//! the model's context window limit. Two compaction levels: +//! +//! - **Moderate** (>70% capacity): Summarize old tool results +//! - **Aggressive** (>85% capacity): Summarize entire older history +//! +//! The last N messages are always preserved untouched. +//! Uses `prompt_tokens` from API responses as the calibration anchor. + +use crate::provider::Message; + +/// Configuration for context compaction. +#[derive(Debug, Clone)] +pub struct CompactionConfig { + /// Context window size in tokens for the current model. + pub context_window: u64, + /// Threshold for moderate compaction (fraction, e.g. 0.70). + pub moderate_threshold: f64, + /// Threshold for aggressive compaction (fraction, e.g. 0.85). + pub aggressive_threshold: f64, + /// Number of recent messages to always preserve. + pub preserve_recent: usize, +} + +impl Default for CompactionConfig { + fn default() -> Self { + Self { + context_window: 128_000, + moderate_threshold: 0.70, + aggressive_threshold: 0.85, + preserve_recent: 5, + } + } +} + +/// Result of a compaction check. +#[derive(Debug, PartialEq)] +pub enum CompactionLevel { + /// Under threshold — no action needed. + None, + /// >70% — summarize old tool results. + Moderate, + /// >85% — aggressively summarize entire older history. + Aggressive, +} + +/// Check what compaction level is needed based on current prompt_tokens. +pub fn check_compaction_level(prompt_tokens: u64, config: &CompactionConfig) -> CompactionLevel { + let ratio = prompt_tokens as f64 / config.context_window as f64; + if ratio >= config.aggressive_threshold { + CompactionLevel::Aggressive + } else if ratio >= config.moderate_threshold { + CompactionLevel::Moderate + } else { + CompactionLevel::None + } +} + +/// Apply compaction to the message history. +/// +/// Modifies `messages` in-place. The system message (index 0) and +/// the last `preserve_recent` messages are never touched. +/// +/// Returns true if any compaction was performed. +pub fn compact_messages( + messages: &mut Vec, + level: CompactionLevel, + config: &CompactionConfig, +) -> bool { + if level == CompactionLevel::None { + return false; + } + + // We need at least: system + user + some history + preserved tail + // system is at index 0, user task at index 1 + let fixed_prefix = 2; // system + initial user message + let total = messages.len(); + + if total <= fixed_prefix + config.preserve_recent { + // Not enough messages to compact + return false; + } + + let compactable_end = total - config.preserve_recent; + + match level { + CompactionLevel::Moderate => { + compact_tool_results(messages, fixed_prefix, compactable_end) + } + CompactionLevel::Aggressive => { + compact_history_aggressive(messages, fixed_prefix, compactable_end) + } + CompactionLevel::None => false, + } +} + +/// Moderate compaction: replace tool result contents with short summaries. +/// Keeps assistant messages and their tool_calls intact, but truncates +/// the tool response bodies. +fn compact_tool_results( + messages: &mut Vec, + start: usize, + end: usize, +) -> bool { + let mut compacted = false; + + for i in start..end { + if messages[i].role == "tool" { + if let Some(ref content) = messages[i].content { + if content.len() > 200 { + // Keep first 150 chars + note + let preview = &content[..content.len().min(150)]; + messages[i].content = Some(format!( + "[compacted] {}... ({} bytes omitted)", + preview, + content.len() - 150 + )); + compacted = true; + } + } + } + } + + if compacted { + tracing::info!("ACC: moderate compaction — truncated old tool results"); + } + + compacted +} + +/// Aggressive compaction: replace the entire compactable range with a +/// single summary message. This collapses all old assistant/tool +/// exchanges into one user message that describes what happened. +fn compact_history_aggressive( + messages: &mut Vec, + start: usize, + end: usize, +) -> bool { + if end <= start { + return false; + } + + // Build a summary of what happened in the compacted range + let mut summary_parts: Vec = Vec::new(); + let mut tool_calls_seen: Vec = Vec::new(); + let mut assistant_snippets: Vec = Vec::new(); + + for msg in &messages[start..end] { + match msg.role.as_str() { + "assistant" => { + if let Some(ref tc) = msg.tool_calls { + for call in tc { + tool_calls_seen.push(call.function.name.clone()); + } + } + if let Some(ref text) = msg.content { + if !text.is_empty() { + let snippet = if text.len() > 100 { + format!("{}...", &text[..100]) + } else { + text.clone() + }; + assistant_snippets.push(snippet); + } + } + } + "tool" => { + // Already captured via tool_calls on assistant + } + _ => {} + } + } + + if !tool_calls_seen.is_empty() { + summary_parts.push(format!( + "Tools used: {}", + tool_calls_seen.join(", ") + )); + } + if !assistant_snippets.is_empty() { + let max_snippets = assistant_snippets.len().min(5); + summary_parts.push(format!( + "Key outputs:\n{}", + assistant_snippets[..max_snippets].join("\n- ") + )); + } + + let summary = if summary_parts.is_empty() { + "[compacted] Previous conversation history was summarized to save context.".to_string() + } else { + format!( + "[compacted] Previous conversation summary ({} messages compressed):\n{}", + end - start, + summary_parts.join("\n") + ) + }; + + // Replace the range [start..end) with a single summary message + let preserved_tail: Vec = messages.drain(end..).collect(); + messages.truncate(start); + messages.push(Message { + role: "user".into(), + content: Some(summary), + tool_calls: None, + tool_call_id: None, + }); + messages.extend(preserved_tail); + + tracing::info!( + removed = end - start, + "ACC: aggressive compaction — collapsed old history into summary" + ); + + true +} + +/// Estimate context window size from model name. +/// Returns a conservative estimate if unknown. +pub fn estimate_context_window(model: &str) -> u64 { + let m = model.to_lowercase(); + + // Anthropic + if m.contains("claude") { + if m.contains("opus") || m.contains("sonnet") || m.contains("haiku") { + return 200_000; + } + return 200_000; + } + + // Google Gemini + if m.contains("gemini") { + if m.contains("flash") || m.contains("pro") { + return 1_000_000; + } + return 1_000_000; + } + + // Grok + if m.contains("grok") { + return 131_072; + } + + // OpenAI + if m.contains("gpt-4o") || m.contains("gpt-4-turbo") { + return 128_000; + } + if m.contains("gpt-4") { + return 8_192; + } + if m.contains("o1") || m.contains("o3") || m.contains("o4") { + return 200_000; + } + + // Llama / local + if m.contains("llama") { + return 128_000; + } + + // Conservative default + 128_000 +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_msg(role: &str, content: &str) -> Message { + Message { + role: role.into(), + content: Some(content.into()), + tool_calls: None, + tool_call_id: None, + } + } + + #[test] + fn test_compaction_level() { + let config = CompactionConfig { + context_window: 100_000, + ..Default::default() + }; + + assert_eq!( + check_compaction_level(50_000, &config), + CompactionLevel::None + ); + assert_eq!( + check_compaction_level(75_000, &config), + CompactionLevel::Moderate + ); + assert_eq!( + check_compaction_level(90_000, &config), + CompactionLevel::Aggressive + ); + } + + #[test] + fn test_moderate_compaction_truncates_tool_results() { + let long_output = "x".repeat(500); + let mut messages = vec![ + make_msg("system", "system prompt"), + make_msg("user", "do something"), + make_msg("assistant", "I'll read the file"), + Message { + role: "tool".into(), + content: Some(long_output), + tool_calls: None, + tool_call_id: Some("call_1".into()), + }, + make_msg("assistant", "recent 1"), + make_msg("user", "recent 2"), + ]; + + let config = CompactionConfig { + preserve_recent: 2, + ..Default::default() + }; + + let compacted = compact_messages(&mut messages, CompactionLevel::Moderate, &config); + assert!(compacted); + assert!(messages[3].content.as_ref().unwrap().starts_with("[compacted]")); + assert_eq!(messages.len(), 6); // same count, just truncated + } + + #[test] + fn test_aggressive_compaction_collapses_history() { + let mut messages = vec![ + make_msg("system", "system prompt"), + make_msg("user", "do something"), + make_msg("assistant", "step 1"), + make_msg("tool", "result 1"), + make_msg("assistant", "step 2"), + make_msg("tool", "result 2"), + make_msg("assistant", "step 3"), + make_msg("tool", "result 3"), + // These should be preserved: + make_msg("assistant", "recent assistant"), + make_msg("user", "recent user"), + ]; + + let config = CompactionConfig { + preserve_recent: 2, + ..Default::default() + }; + + let compacted = compact_messages(&mut messages, CompactionLevel::Aggressive, &config); + assert!(compacted); + // system + user + summary + 2 preserved = 5 + assert_eq!(messages.len(), 5); + assert_eq!(messages[0].role, "system"); + assert_eq!(messages[1].role, "user"); + assert!(messages[2].content.as_ref().unwrap().contains("[compacted]")); + assert_eq!(messages[3].content.as_deref(), Some("recent assistant")); + assert_eq!(messages[4].content.as_deref(), Some("recent user")); + } + + #[test] + fn test_no_compaction_when_too_few_messages() { + let mut messages = vec![ + make_msg("system", "sys"), + make_msg("user", "task"), + make_msg("assistant", "done"), + ]; + + let config = CompactionConfig { + preserve_recent: 3, + ..Default::default() + }; + + let compacted = compact_messages(&mut messages, CompactionLevel::Aggressive, &config); + assert!(!compacted); + assert_eq!(messages.len(), 3); + } + + #[test] + fn test_estimate_context_window() { + assert_eq!(estimate_context_window("claude-sonnet-4"), 200_000); + assert_eq!(estimate_context_window("gemini-2.5-flash"), 1_000_000); + assert_eq!(estimate_context_window("grok-3"), 131_072); + assert_eq!(estimate_context_window("gpt-4o"), 128_000); + assert_eq!(estimate_context_window("unknown-model"), 128_000); + } +} diff --git a/tools/synops-agent/src/main.rs b/tools/synops-agent/src/main.rs index 5b8dded..73b5c63 100644 --- a/tools/synops-agent/src/main.rs +++ b/tools/synops-agent/src/main.rs @@ -8,10 +8,12 @@ //! synops-agent --model gemini/gemini-2.5-flash --task "oppsummer denne filen" //! synops-agent --model ollama/llama3 --task "skriv en test" +mod context; mod provider; mod tools; use clap::Parser; +use context::{CompactionConfig, CompactionLevel, check_compaction_level, compact_messages}; use provider::{ApiKeys, CompletionResponse, Message, TokenUsage, create_provider}; use std::collections::HashMap; use std::path::PathBuf; @@ -98,6 +100,16 @@ async fn main() -> Result<(), Box> { let mut total_usage: HashMap = HashMap::new(); let mut iteration = 0; + // Context compaction config + let compaction_config = CompactionConfig { + context_window: provider.context_window(), + ..Default::default() + }; + tracing::info!( + context_window = compaction_config.context_window, + "ACC konfigurert" + ); + // === Agent loop === loop { iteration += 1; @@ -125,6 +137,21 @@ async fn main() -> Result<(), Box> { ); } + // === Adaptive Context Compaction === + // Use prompt_tokens from the API response as calibration anchor + let level = check_compaction_level(response.usage.input_tokens, &compaction_config); + if level != CompactionLevel::None { + let ratio = response.usage.input_tokens as f64 / compaction_config.context_window as f64; + tracing::warn!( + prompt_tokens = response.usage.input_tokens, + context_window = compaction_config.context_window, + ratio = format!("{:.1}%", ratio * 100.0), + level = ?level, + "ACC: kontekstkomprimering trigget" + ); + compact_messages(&mut messages, level, &compaction_config); + } + // Check for tool calls let has_tool_calls = response .message diff --git a/tools/synops-agent/src/provider.rs b/tools/synops-agent/src/provider.rs index 5aa3e34..52d2ebd 100644 --- a/tools/synops-agent/src/provider.rs +++ b/tools/synops-agent/src/provider.rs @@ -79,6 +79,11 @@ pub trait LlmProvider: Send + Sync { /// Model identifier fn model_id(&self) -> &str; + + /// Context window size in tokens. + fn context_window(&self) -> u64 { + crate::context::estimate_context_window(self.model_id()) + } } #[derive(Debug, thiserror::Error)]