""" Memory Store — Processing history and adaptive memory for the agentic system. Implements short-term (session) and long-term (persistent JSON) memory, enabling the agent to learn from past processing decisions. """ import json import os import hashlib import logging from typing import Optional, List from datetime import datetime logger = logging.getLogger(__name__) MEMORY_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), ".memory") HISTORY_FILE = os.path.join(MEMORY_DIR, "processing_history.json") class MemoryStore: """Persistent memory for the agentic OCR system.""" def __init__(self): os.makedirs(MEMORY_DIR, exist_ok=True) self._history = self._load_history() def _load_history(self) -> List[dict]: """Load processing history from disk.""" try: if os.path.exists(HISTORY_FILE): with open(HISTORY_FILE, 'r') as f: return json.load(f) except Exception as e: logger.warning(f"Could not load history: {e}") return [] def _save_history(self): """Save processing history to disk.""" try: with open(HISTORY_FILE, 'w') as f: json.dump(self._history[-100:], f, indent=2, default=str) except Exception as e: logger.warning(f"Could not save history: {e}") def save_record(self, record: dict): """Save a processing record to long-term memory.""" record["saved_at"] = datetime.now().isoformat() self._history.append(record) self._save_history() logger.info(f"Saved processing record (total: {len(self._history)})") def find_similar(self, image_properties: dict) -> Optional[dict]: """ Find a similar past processing record based on image properties. Uses quality rating and resolution as matching criteria. """ if not self._history: return None target_quality = image_properties.get("quality_rating", "") target_dpi = image_properties.get("resolution_dpi", 0) for record in reversed(self._history[-20:]): past_props = record.get("image_properties", {}) if (past_props.get("quality_rating") == target_quality and abs(past_props.get("resolution_dpi", 0) - target_dpi) < 50): return record return None def get_history(self, limit: int = 20) -> List[dict]: """Get recent processing history.""" return self._history[-limit:] def get_stats(self) -> dict: """Get aggregate processing statistics.""" if not self._history: return { "total_processed": 0, "avg_confidence": 0, "quality_distribution": {}, } confidences = [r.get("confidence_score", 0) for r in self._history] qualities = {} for r in self._history: q = r.get("quality_assessment", "unknown") qualities[q] = qualities.get(q, 0) + 1 return { "total_processed": len(self._history), "avg_confidence": sum(confidences) / len(confidences), "quality_distribution": qualities, } def clear(self): """Clear all history.""" self._history = [] self._save_history()