"""
Recursive Context Manager for Clawdbot

CHANGELOG [2025-01-28 - Josh]
CREATED: Initial recursive context manager with ChromaDB vector search,
  file reading, and conversation persistence. Based on MIT Recursive
  Language Model technique for unlimited context.

CHANGELOG [2026-01-31 - Gemini]
ADDED: Phase 1 Orchestrator tools: create_shadow_branch, write_file, shell_execute.
ADDED: Documentation Scanner to mandate Living Changelog headers.
FIXED: PermissionError on /.cache by forcing ONNXMiniLM_L6_V2.DOWNLOAD_PATH.

CHANGELOG [2026-01-31 - Claude/Opus]
ADDED: get_stats() method — was called by app.py but never defined, causing
  crash on startup. Returns dict with file counts, conversation counts,
  collection sizes, and persistence status.
ADDED: list_files() method — directory exploration tool for the agent.
  Returns tree of files/dirs at a given path relative to repo root.
ADDED: search_conversations() method — semantic search over saved conversation
  history in ChromaDB. Essential for persistent memory across sessions.
ADDED: search_testament() method — searches for Testament/architectural decision
  files and returns matching content. Falls back to codebase search if no
  dedicated testament files exist.
ADDED: index_repository() method — actually indexes the repo into ChromaDB on
  init. Without this, search_code() always returned empty because nothing
  was ever added to the codebase collection. Runs in background thread to
  avoid blocking startup.
PRESERVED: All existing functions from prior changelogs remain intact.
  HFDatasetPersistence class, create_shadow_branch, write_file, shell_execute,
  search_code, read_file, save_conversation_turn — all unchanged.
NOTE: get_stats() is critical — app.py calls it at module level during UI
  construction AND in the system prompt. Missing it = instant crash.
"""

from pathlib import Path
from typing import List, Dict, Optional, Tuple
import chromadb
from chromadb.config import Settings
from chromadb.utils.embedding_functions import ONNXMiniLM_L6_V2
import hashlib
import json
import os
import time
import threading
import subprocess
import re


# =============================================================================
# CHROMA DB PATH SELECTION
# =============================================================================
# CHANGELOG [2026-01-31 - Gemini]
# HF Spaces Docker containers wipe everything EXCEPT /data on restart.
# We prefer /data/chroma_db (persistent) but fall back to /workspace/chroma_db
# (ephemeral) if /data isn't writable.
# =============================================================================

def _select_chroma_path():
    """HF Spaces Docker containers wipe everything EXCEPT /data on restart."""
    data_path = Path("/data/chroma_db")
    try:
        data_path.mkdir(parents=True, exist_ok=True)
        test_file = data_path / ".write_test"
        test_file.write_text("test")
        test_file.unlink()
        return str(data_path)
    except (OSError, PermissionError):
        workspace_path = Path("/workspace/chroma_db")
        workspace_path.mkdir(parents=True, exist_ok=True)
        return str(workspace_path)


CHROMA_DB_PATH = _select_chroma_path()


# =============================================================================
# HF DATASET PERSISTENCE
# =============================================================================
# CHANGELOG [2026-01-31 - Gemini]
# Handles durable cloud storage via HF Dataset repository. Conversations
# survive Space restarts by backing up to a private dataset repo.
# =============================================================================

class HFDatasetPersistence:
    """Handles durable cloud storage via your 1TB PRO Dataset repository."""

    def __init__(self, repo_id: str = None):
        from huggingface_hub import HfApi
        self.api = HfApi()
        self.repo_id = repo_id or os.getenv("MEMORY_REPO")
        self.token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
        self._repo_ready = False

        if self.repo_id and self.token:
            self._ensure_repo_exists()

    def _ensure_repo_exists(self):
        if self._repo_ready:
            return
        try:
            self.api.repo_info(
                repo_id=self.repo_id,
                repo_type="dataset",
                token=self.token
            )
            self._repo_ready = True
        except Exception:
            try:
                self.api.create_repo(
                    repo_id=self.repo_id,
                    repo_type="dataset",
                    private=True,
                    token=self.token
                )
                self._repo_ready = True
            except Exception:
                pass

    @property
    def is_configured(self):
        return bool(self.repo_id and self.token)

    def save_conversations(self, data: List[Dict]):
        if not self.is_configured:
            return
        temp = Path("/tmp/conv_backup.json")
        temp.write_text(json.dumps(data, indent=2))
        try:
            self.api.upload_file(
                path_or_fileobj=str(temp),
                path_in_repo="conversations.json",
                repo_id=self.repo_id,
                repo_type="dataset",
                token=self.token
            )
        except Exception:
            pass

    def load_conversations(self) -> List[Dict]:
        if not self.is_configured:
            return []
        try:
            from huggingface_hub import hf_hub_download
            local_path = hf_hub_download(
                repo_id=self.repo_id,
                filename="conversations.json",
                repo_type="dataset",
                token=self.token
            )
            with open(local_path, 'r') as f:
                return json.load(f)
        except Exception:
            return []


# =============================================================================
# RECURSIVE CONTEXT MANAGER
# =============================================================================

class RecursiveContextManager:
    """Manages unlimited context and vibe-coding tools for E-T Systems.

    CHANGELOG [2026-01-31 - Claude/Opus]
    This is the core class. It provides:
    - ChromaDB-backed semantic search over the codebase and conversations
    - File read/write with changelog enforcement
    - Shell execution for build tasks
    - Shadow branching for safe experimentation
    - Stats reporting for the UI sidebar
    - Repository indexing (background thread on init)

    ARCHITECTURE NOTE:
    The class is initialized once at module level in app.py. That means
    __init__ runs during import, so it MUST NOT block or crash. Heavy work
    (like indexing the repo) is dispatched to a background thread.
    get_stats() must return sensible defaults even before indexing completes.
    """

    # =========================================================================
    # FILE EXTENSIONS TO INDEX
    # =========================================================================
    # CHANGELOG [2026-01-31 - Claude/Opus]
    # Only index code/text files. Binary files, images, and large data files
    # would pollute the vector space and waste embedding compute.
    # =========================================================================
    INDEXABLE_EXTENSIONS = {
        '.py', '.js', '.ts', '.jsx', '.tsx', '.mjs', '.cjs',
        '.json', '.yaml', '.yml', '.toml',
        '.md', '.txt', '.rst',
        '.html', '.css', '.scss',
        '.sh', '.bash',
        '.sql',
        '.env.example',  # Not .env itself — that's sensitive
        '.gitignore', '.dockerignore',
        '.cfg', '.ini', '.conf',
    }

    # Max file size to index (256KB). Larger files are likely generated/data.
    MAX_INDEX_SIZE = 256 * 1024

    def __init__(self, repo_path: str):
        self.repo_path = Path(repo_path)
        self.persistence = HFDatasetPersistence()

        # =================================================================
        # EMBEDDING CONFIG
        # =================================================================
        # CHANGELOG [2026-01-31 - Gemini]
        # Fixes /.cache PermissionError. ChromaDB's ONNXMiniLM_L6_V2 tries
        # to download model weights to ~/.cache. In Docker as UID 1000,
        # that's /.cache (root-owned). We override DOWNLOAD_PATH to a
        # writable directory.
        # =================================================================
        self.embedding_function = ONNXMiniLM_L6_V2()
        cache_dir = os.getenv("CHROMA_CACHE_DIR", "/tmp/.cache/chroma")
        self.embedding_function.DOWNLOAD_PATH = cache_dir
        os.makedirs(cache_dir, exist_ok=True)

        self.chroma_client = chromadb.PersistentClient(
            path=CHROMA_DB_PATH,
            settings=Settings(anonymized_telemetry=False, allow_reset=True)
        )

        c_name = self._get_collection_name()
        self.collection = self.chroma_client.get_or_create_collection(
            name=c_name,
            embedding_function=self.embedding_function
        )
        self.conversations = self.chroma_client.get_or_create_collection(
            name=f"conv_{c_name.split('_')[1]}",
            embedding_function=self.embedding_function
        )

        # Restore conversations from cloud backup if local is empty
        if self.conversations.count() == 0:
            self._restore_from_cloud()

        # =================================================================
        # BACKGROUND INDEXING
        # =================================================================
        # CHANGELOG [2026-01-31 - Claude/Opus]
        # Index the repository in a background thread so startup isn't
        # blocked. The _indexing flag lets get_stats() report status.
        # =================================================================
        self._indexing = False
        self._index_error = None
        self._indexed_file_count = 0
        if self.repo_path.exists() and self.repo_path.is_dir():
            self._start_background_indexing()

    def _restore_from_cloud(self):
        """Restore conversation history from HF Dataset backup.

        CHANGELOG [2026-01-31 - Gemini]
        Called during init if the local ChromaDB conversations collection
        is empty. Pulls from the cloud dataset repo to recover history
        after a Space restart.
        """
        data = self.persistence.load_conversations()
        for conv in data:
            try:
                self.conversations.add(
                    documents=[conv["document"]],
                    metadatas=[conv["metadata"]],
                    ids=[conv["id"]]
                )
            except Exception:
                pass

    def _get_collection_name(self) -> str:
        """Generate a deterministic collection name from the repo path.

        CHANGELOG [2025-01-28 - Josh]
        Uses MD5 hash of repo path so different repos get different
        collections within the same ChromaDB instance.
        """
        path_hash = hashlib.md5(str(self.repo_path).encode()).hexdigest()[:8]
        return f"codebase_{path_hash}"

    # =====================================================================
    # REPOSITORY INDEXING
    # =====================================================================
    # CHANGELOG [2026-01-31 - Claude/Opus]
    # Without indexing, search_code() always returns empty results because
    # nothing is ever added to the ChromaDB codebase collection. This walks
    # the repo, reads indexable files, chunks them, and upserts into ChromaDB.
    #
    # DESIGN DECISIONS:
    # - Background thread: Don't block Gradio startup. Users can chat while
    #   indexing runs. get_stats() shows indexing progress.
    # - Chunk by logical blocks: Split files into ~50-line chunks with overlap
    #   so semantic search finds relevant sections, not just file-level matches.
    # - Upsert (not add): Safe to re-run. If the file was already indexed
    #   with the same content hash, ChromaDB skips it.
    # - Skip .git, __pycache__, node_modules, venv: No value in indexing these.
    #
    # TESTED ALTERNATIVES (graveyard):
    # - Indexing entire files as single documents: Poor search precision.
    #   A 500-line file matching on line 3 returns all 500 lines.
    # - Line-by-line indexing: Too many tiny documents, poor semantic context.
    # - Synchronous indexing: Blocks startup for 30+ seconds on large repos.
    # =====================================================================

    def _start_background_indexing(self):
        """Kick off repo indexing in a daemon thread."""
        self._indexing = True
        thread = threading.Thread(target=self._index_repository, daemon=True)
        thread.start()

    def _index_repository(self):
        """Walk the repo and index code files into ChromaDB.

        Runs in background thread. Sets self._indexing = False when done.
        """
        try:
            skip_dirs = {
                '.git', '__pycache__', 'node_modules', 'venv', '.venv',
                'env', '.eggs', 'dist', 'build', '.next', '.nuxt',
                'chroma_db', '.chroma'
            }
            count = 0

            for file_path in self.repo_path.rglob('*'):
                # Skip directories and non-indexable files
                if file_path.is_dir():
                    continue

                # Skip files in excluded directories
                if any(skip in file_path.parts for skip in skip_dirs):
                    continue

                # Check extension
                suffix = file_path.suffix.lower()
                if suffix not in self.INDEXABLE_EXTENSIONS:
                    # Also allow extensionless files if they look like configs
                    if file_path.name not in {
                        'Dockerfile', 'Makefile', 'Procfile',
                        '.gitignore', '.dockerignore', '.env.example'
                    }:
                        continue

                # Check size
                try:
                    if file_path.stat().st_size > self.MAX_INDEX_SIZE:
                        continue
                except OSError:
                    continue

                # Read and chunk the file
                try:
                    content = file_path.read_text(encoding='utf-8', errors='ignore')
                except (OSError, UnicodeDecodeError):
                    continue

                if not content.strip():
                    continue

                rel_path = str(file_path.relative_to(self.repo_path))
                chunks = self._chunk_file(content, rel_path)

                for chunk_id, chunk_text, chunk_meta in chunks:
                    try:
                        self.collection.upsert(
                            documents=[chunk_text],
                            metadatas=[chunk_meta],
                            ids=[chunk_id]
                        )
                    except Exception:
                        continue

                count += 1
                self._indexed_file_count = count

        except Exception as e:
            self._index_error = str(e)
        finally:
            self._indexing = False

    def _chunk_file(self, content: str, rel_path: str) -> List[Tuple[str, str, dict]]:
        """Split a file into overlapping chunks for better search precision.

        CHANGELOG [2026-01-31 - Claude/Opus]
        Returns list of (id, text, metadata) tuples ready for ChromaDB upsert.
        Chunks are ~50 lines with 10-line overlap so context isn't lost at
        chunk boundaries.

        Args:
            content: Full file text
            rel_path: Path relative to repo root (used in metadata and IDs)

        Returns:
            List of (chunk_id, chunk_text, metadata_dict) tuples
        """
        lines = content.split('\n')
        chunks = []
        chunk_size = 50
        overlap = 10

        if len(lines) <= chunk_size:
            # Small file — index as single chunk
            content_hash = hashlib.md5(content.encode()).hexdigest()[:12]
            chunk_id = f"{rel_path}::full::{content_hash}"
            meta = {
                'path': rel_path,
                'chunk': 'full',
                'lines': f"1-{len(lines)}",
                'total_lines': len(lines)
            }
            chunks.append((chunk_id, content, meta))
        else:
            # Larger file — split into overlapping chunks
            start = 0
            chunk_num = 0
            while start < len(lines):
                end = min(start + chunk_size, len(lines))
                chunk_text = '\n'.join(lines[start:end])
                content_hash = hashlib.md5(chunk_text.encode()).hexdigest()[:12]
                chunk_id = f"{rel_path}::chunk{chunk_num}::{content_hash}"
                meta = {
                    'path': rel_path,
                    'chunk': f"chunk_{chunk_num}",
                    'lines': f"{start + 1}-{end}",
                    'total_lines': len(lines)
                }
                chunks.append((chunk_id, chunk_text, meta))
                chunk_num += 1
                start += chunk_size - overlap

        return chunks

    # =====================================================================
    # STATS (NEW — was missing, caused crash)
    # =====================================================================
    # CHANGELOG [2026-01-31 - Claude/Opus]
    # app.py calls ctx.get_stats() at module level during Gradio Block
    # construction AND in the system prompt for every message. It expected
    # a dict with 'conversations', 'total_files', etc. Without this method,
    # the app crashes immediately on import.
    #
    # Returns safe defaults during indexing so the UI can render.
    # =====================================================================

    def get_stats(self) -> dict:
        """Return system statistics for the UI sidebar and system prompt.

        Returns:
            dict with keys: total_files, indexed_chunks, conversations,
            chroma_path, persistence_configured, indexing_in_progress,
            index_error
        """
        return {
            'total_files': self._indexed_file_count,
            'indexed_chunks': self.collection.count(),
            'conversations': self.conversations.count(),
            'chroma_path': CHROMA_DB_PATH,
            'persistence_configured': self.persistence.is_configured,
            'indexing_in_progress': self._indexing,
            'index_error': self._index_error,
        }

    # =====================================================================
    # PHASE 1 ORCHESTRATOR TOOLS (preserved from Gemini)
    # =====================================================================

    def create_shadow_branch(self):
        """Creates a timestamped backup branch of the E-T Systems Space.

        CHANGELOG [2026-01-31 - Gemini]
        Safety net before any destructive operations. Creates a branch
        named vibe-backup-YYYYMMDD-HHMMSS on the E-T Systems HF Space
        so you can always roll back.
        """
        timestamp = time.strftime("%Y%m%d-%H%M%S")
        branch_name = f"vibe-backup-{timestamp}"
        try:
            repo_id = os.getenv(
                "ET_SYSTEMS_SPACE",
                "Executor-Tyrant-Framework/Executor-Framworks_Full_VDB"
            )
            self.persistence.api.create_branch(
                repo_id=repo_id,
                branch=branch_name,
                repo_type="space",
                token=self.persistence.token
            )
            return f"🛡️ Shadow branch created: {branch_name}"
        except Exception as e:
            return f"⚠️ Shadow branch failed: {e}"

    def write_file(self, path: str, content: str):
        """Writes file strictly if valid CHANGELOG is present.

        CHANGELOG [2026-01-31 - Gemini]
        Enforces the living changelog pattern. Any code written by an agent
        MUST include a CHANGELOG [YYYY-MM-DD - AgentName] header or the
        write is rejected. This is non-negotiable for the E-T Systems
        development workflow.

        Args:
            path: Relative path within the repo (e.g., "server/routes.ts")
            content: Full file content (must contain CHANGELOG header)

        Returns:
            Success message or rejection reason
        """
        if not re.search(r"CHANGELOG \[\d{4}-\d{2}-\d{2} - \w+\]", content):
            return "REJECTED: Missing mandatory CHANGELOG [YYYY-MM-DD - AgentName] header."

        try:
            full_path = self.repo_path / path
            full_path.parent.mkdir(parents=True, exist_ok=True)
            full_path.write_text(content)
            return f"✅ Successfully wrote {path}"
        except Exception as e:
            return f"Error writing file: {e}"

    def shell_execute(self, command: str):
        """Runs shell commands in the /workspace directory.

        CHANGELOG [2026-01-31 - Gemini]
        Used for build tasks, git operations, dependency installs, etc.
        Timeout of 30 seconds prevents runaway processes. Captures both
        stdout and stderr for full diagnostic output.

        Args:
            command: Shell command string to execute

        Returns:
            Combined stdout/stderr output or error message
        """
        try:
            result = subprocess.run(
                command, shell=True, capture_output=True, text=True,
                cwd=self.repo_path, timeout=30
            )
            return f"STDOUT: {result.stdout}\nSTDERR: {result.stderr}"
        except Exception as e:
            return f"Execution Error: {e}"

    # =====================================================================
    # RECURSIVE SEARCH TOOLS
    # =====================================================================

    def search_code(self, query: str, n: int = 5) -> List[Dict]:
        """Semantic search across the indexed codebase.

        CHANGELOG [2025-01-28 - Josh]
        Core tool for the MIT recursive context technique. The model calls
        this to find relevant code without loading the entire repo into
        context.

        Args:
            query: Natural language search query
            n: Max number of results to return (default 5)

        Returns:
            List of dicts with 'file' (path) and 'snippet' (first 500 chars)
        """
        if self.collection.count() == 0:
            return []
        actual_n = min(n, self.collection.count())
        res = self.collection.query(query_texts=[query], n_results=actual_n)
        return [
            {"file": m['path'], "snippet": d[:500]}
            for d, m in zip(res['documents'][0], res['metadatas'][0])
        ]

    def read_file(self, path: str, start_line: int = None, end_line: int = None) -> str:
        """Read a specific file, optionally a line range.

        CHANGELOG [2025-01-28 - Josh]
        Direct file access for when the model knows exactly what it needs.

        CHANGELOG [2026-01-31 - Claude/Opus]
        Added optional start_line/end_line params for reading specific
        sections without loading entire large files into context.

        Args:
            path: Relative path within repo (e.g., "server/routes.ts")
            start_line: Optional 1-based start line
            end_line: Optional 1-based end line

        Returns:
            File contents (full or sliced) or "File not found." message
        """
        p = self.repo_path / path
        if not p.exists():
            return f"File not found: {path}"
        try:
            content = p.read_text(encoding='utf-8', errors='ignore')
            if start_line is not None or end_line is not None:
                lines = content.split('\n')
                start = (start_line or 1) - 1  # Convert to 0-based
                end = end_line or len(lines)
                sliced = lines[start:end]
                return '\n'.join(sliced)
            return content
        except Exception as e:
            return f"Error reading {path}: {e}"

    def list_files(self, path: str = "", max_depth: int = 3) -> str:
        """List files and directories at a given path.

        CHANGELOG [2026-01-31 - Claude/Opus]
        Directory exploration tool. The agent needs to know what files exist
        before it can read or search them. Returns a tree-formatted listing
        up to max_depth levels deep.

        Args:
            path: Relative path within repo (default "" = repo root)
            max_depth: How many levels deep to list (default 3)

        Returns:
            Formatted string showing directory tree
        """
        target = self.repo_path / path
        if not target.exists():
            return f"Path not found: {path}"
        if not target.is_dir():
            return f"Not a directory: {path}"

        skip_dirs = {
            '.git', '__pycache__', 'node_modules', 'venv', '.venv',
            'chroma_db', '.chroma', 'dist', 'build'
        }

        lines = [f"📂 {path or '(repo root)'}"]

        def _walk(dir_path: Path, prefix: str, depth: int):
            if depth > max_depth:
                return
            try:
                entries = sorted(dir_path.iterdir(), key=lambda p: (not p.is_dir(), p.name.lower()))
            except PermissionError:
                return

            for i, entry in enumerate(entries):
                if entry.name in skip_dirs or entry.name.startswith('.'):
                    continue
                is_last = (i == len(entries) - 1)
                connector = "└── " if is_last else "├── "
                if entry.is_dir():
                    lines.append(f"{prefix}{connector}📁 {entry.name}/")
                    extension = "    " if is_last else "│   "
                    _walk(entry, prefix + extension, depth + 1)
                else:
                    size = entry.stat().st_size
                    size_str = f"{size:,}B" if size < 1024 else f"{size // 1024:,}KB"
                    lines.append(f"{prefix}{connector}📄 {entry.name} ({size_str})")

        _walk(target, "", 1)
        return '\n'.join(lines)

    def search_conversations(self, query: str, n: int = 5) -> List[Dict]:
        """Semantic search over past conversation history.

        CHANGELOG [2026-01-31 - Claude/Opus]
        This is how Clawdbot "remembers" past discussions. Conversations
        are saved to ChromaDB via save_conversation_turn() and backed up
        to the HF Dataset repo. This searches them semantically.

        Args:
            query: Natural language search query
            n: Max results to return

        Returns:
            List of dicts with 'content' and 'metadata' from matched turns
        """
        if self.conversations.count() == 0:
            return []
        actual_n = min(n, self.conversations.count())
        res = self.conversations.query(query_texts=[query], n_results=actual_n)
        results = []
        for doc, meta in zip(res['documents'][0], res['metadatas'][0]):
            results.append({
                'content': doc[:1000],  # Cap at 1000 chars per result
                'metadata': meta
            })
        return results

    def search_testament(self, query: str, n: int = 5) -> List[Dict]:
        """Search for Testament/architectural decision records.

        CHANGELOG [2026-01-31 - Claude/Opus]
        The Testament contains design decisions, constitutional principles,
        and architectural rationale for E-T Systems. This searches for
        testament-specific files first (TESTAMENT.md, DECISIONS.md, etc.),
        then falls back to general codebase search filtered for decision-
        related content.

        Args:
            query: What architectural decision to search for
            n: Max results

        Returns:
            List of dicts with 'file' and 'snippet' from matching documents
        """
        # First, look for dedicated testament/decision files
        testament_names = {
            'testament', 'decisions', 'adr', 'architecture',
            'principles', 'constitution', 'changelog', 'design'
        }

        testament_results = []
        if self.collection.count() > 0:
            # Search the codebase but prefer testament-like files
            actual_n = min(n * 2, self.collection.count())  # Get extra, then filter
            res = self.collection.query(query_texts=[query], n_results=actual_n)
            for doc, meta in zip(res['documents'][0], res['metadatas'][0]):
                path_lower = meta.get('path', '').lower()
                # Check if this is a testament/decision file
                is_testament = any(name in path_lower for name in testament_names)
                testament_results.append({
                    'file': meta['path'],
                    'snippet': doc[:500],
                    'is_testament': is_testament
                })

        # Sort: testament files first, then other matches
        testament_results.sort(key=lambda r: (not r.get('is_testament', False)))
        return testament_results[:n]

    def get_stats(self) -> dict:
        """Fetch current system statistics for the sidebar."""
        try:
            return {
                "total_files": self.collection.count(),
                "indexed_chunks": self.collection.count(),
                "conversations": self.conversations.count(),
                "chroma_path": CHROMA_DB_PATH,
                "persistence_configured": self.persistence.is_configured,
                "indexing_in_progress": False
            }
        except Exception as e:
            return {"index_error": str(e)}

    def save_conversation_turn(self, u, a, t_id):
        """Save turn locally and push the FULL history to the cloud to prevent memory loss."""
        combined = f"USER: {u}\n\nASSISTANT: {a}"
        u_id = f"turn_{int(time.time())}"
        
        # 1. Save locally
        self.conversations.add(documents=[combined], metadatas=[{"turn": t_id}], ids=[u_id])
        
        # 2. To prevent amnesia, we must retrieve ALL historical turns from the local database
        all_convs = self.conversations.get()
        data_to_save = []
        for i in range(len(all_convs['ids'])):
            data_to_save.append({
                "document": all_convs['documents'][i],
                "metadata": all_convs['metadatas'][i],
                "id": all_convs['ids'][i]
            })
            
        # 3. Push the COMPLETE history to your PRO storage (replaces the previous file)
        self.persistence.save_conversations(data_to_save)