import os import pickle import json from src.config import Settings from src.retrieval.vectorstore import VectorStore from src.retrieval.embedder import get_embedder def ensure_dirs(): for p in ["data/raw", "storage"]: os.makedirs(p, exist_ok=True) def save_pickle(obj, path: str): with open(path, "wb") as f: pickle.dump(obj, f) def load_pickle(path: str): with open(path, "rb") as f: return pickle.load(f) def write_json(obj, path: str): with open(path, "w", encoding="utf-8") as f: json.dump(obj, f, ensure_ascii=False, indent=2) def bootstrap_demo_index(): """Create a minimal index so the app works before ingestion.""" settings = Settings() demo_docs = [ "Directive: Reinforce border surveillance along this area and also that one too. [Source: KGB/1963/Example 1]", "Report: Intercepted correspondence near Moscow. Oh no! [Source: KGB/1972/Example 2]", "Memo: Field notes suggest supply shortages in the winter. Hopefully it wont be cold. [Source: KGB/1979/Example 3]", "Alert: Increased activity detected in the northern territories. Deploy additional units immediately. [Source: KGB/1965/Example 4]", "Communication: Agent reports suspicious behavior at the embassy. Further investigation required. [Source: KGB/1968/Example 5]", "Analysis: Economic data indicates potential instability in the region. Monitor closely. [Source: KGB/1974/Example 6]", "Order: All operatives must report to headquarters by end of month for briefing. No exceptions. [Source: KGB/1977/Example 7]", "Intelligence: Foreign delegation arriving next week. Ensure proper surveillance measures are in place. [Source: KGB/1980/Example 8]", "Warning: Communication channels may be compromised. Switch to backup protocols effective immediately. [Source: KGB/1982/Example 9]", "Summary: Operation in Leningrad completed successfully. All targets accounted for. [Source: KGB/1985/Example 10]" ] save_pickle(demo_docs, settings.docs_path) embedder = get_embedder(settings) vs = VectorStore(settings).build(demo_docs, embedder) vs.save() write_json({"demo": True, "count": len(demo_docs)}, settings.meta_path)