Spaces:
Sleeping
Sleeping
File size: 2,254 Bytes
4abd84c 1b80ae8 4abd84c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import os
import pickle
import json
from src.config import Settings
from src.retrieval.vectorstore import VectorStore
from src.retrieval.embedder import get_embedder
def ensure_dirs():
for p in ["data/raw", "storage"]:
os.makedirs(p, exist_ok=True)
def save_pickle(obj, path: str):
with open(path, "wb") as f:
pickle.dump(obj, f)
def load_pickle(path: str):
with open(path, "rb") as f:
return pickle.load(f)
def write_json(obj, path: str):
with open(path, "w", encoding="utf-8") as f:
json.dump(obj, f, ensure_ascii=False, indent=2)
def bootstrap_demo_index():
"""Create a minimal index so the app works before ingestion."""
settings = Settings()
demo_docs = [
"Directive: Reinforce border surveillance along this area and also that one too. [Source: KGB/1963/Example 1]",
"Report: Intercepted correspondence near Moscow. Oh no! [Source: KGB/1972/Example 2]",
"Memo: Field notes suggest supply shortages in the winter. Hopefully it wont be cold. [Source: KGB/1979/Example 3]",
"Alert: Increased activity detected in the northern territories. Deploy additional units immediately. [Source: KGB/1965/Example 4]",
"Communication: Agent reports suspicious behavior at the embassy. Further investigation required. [Source: KGB/1968/Example 5]",
"Analysis: Economic data indicates potential instability in the region. Monitor closely. [Source: KGB/1974/Example 6]",
"Order: All operatives must report to headquarters by end of month for briefing. No exceptions. [Source: KGB/1977/Example 7]",
"Intelligence: Foreign delegation arriving next week. Ensure proper surveillance measures are in place. [Source: KGB/1980/Example 8]",
"Warning: Communication channels may be compromised. Switch to backup protocols effective immediately. [Source: KGB/1982/Example 9]",
"Summary: Operation in Leningrad completed successfully. All targets accounted for. [Source: KGB/1985/Example 10]"
]
save_pickle(demo_docs, settings.docs_path)
embedder = get_embedder(settings)
vs = VectorStore(settings).build(demo_docs, embedder)
vs.save()
write_json({"demo": True, "count": len(demo_docs)}, settings.meta_path)
|