Spaces:

Omar10lfc
/

Fil-RAG-Goal

Running

App Files Files Community

Fil-RAG-Goal / qa_engine /cache.py

Omar10lfc

Initial Space deployment

18fd039 3 days ago

raw

history blame contribute delete

2.32 kB

	"""
	FilGoalBot — On-disk LLM response cache.

	Keyed on (model, intent, sorted chunk_ids, normalised query). Intentionally
	file-based + JSON: trivial to inspect, trivial to invalidate by deletion, and
	survives across processes (eval re-runs, API restarts).

	Saves Groq tokens on:
	- eval re-runs (same 50 questions, same retrieved chunks)
	- production duplicates (the same query within the eviction window)
	"""

	import hashlib
	import json
	import time
	from pathlib import Path

	CACHE_DIR = Path(".cache/llm")
	CACHE_DIR.mkdir(parents=True, exist_ok=True)

	# 30 days. Football news goes stale much faster, but the cache key includes the
	# retrieved chunk IDs — when new articles come in, the chunk set shifts and the
	# key changes naturally.
	DEFAULT_TTL_SECONDS = 30 * 24 * 3600


	def _make_key(
	model: str,
	intent: str,
	chunk_ids: list[str],
	query: str,
	) -> str:
	payload = json.dumps(
	{
	"model": model,
	"intent": intent,
	"chunks": sorted(chunk_ids),
	"query": query.strip().lower(),
	},
	ensure_ascii=False,
	sort_keys=True,
	)
	return hashlib.sha256(payload.encode("utf-8")).hexdigest()[:32]


	def get(model: str, intent: str, chunk_ids: list[str], query: str,
	ttl_seconds: int = DEFAULT_TTL_SECONDS) -> str \| None:
	key = _make_key(model, intent, chunk_ids, query)
	path = CACHE_DIR / f"{key}.json"
	if not path.exists():
	return None
	try:
	entry = json.loads(path.read_text(encoding="utf-8"))
	except (json.JSONDecodeError, OSError):
	return None
	if time.time() - entry.get("ts", 0) >= ttl_seconds:
	return None
	return entry.get("answer")


	def put(model: str, intent: str, chunk_ids: list[str], query: str,
	answer: str) -> None:
	key = _make_key(model, intent, chunk_ids, query)
	path = CACHE_DIR / f"{key}.json"
	path.write_text(
	json.dumps(
	{"ts": time.time(), "answer": answer, "query": query, "intent": intent},
	ensure_ascii=False,
	),
	encoding="utf-8",
	)


	def estimate_tokens(text: str) -> int:
	"""Rough token estimate for budget guard. Arabic averages ~3 chars/token
	on llama tokenizers; we use 3 to err conservative (overestimate)."""
	return max(1, len(text) // 3)