Spaces:

Khatwanigaurav
/

RAG

Sleeping

RAG / app.py

Gaurav Khatwani

app.py 2

a850731 about 2 months ago

33 kB


	import os
	import re
	import time
	from pathlib import Path
	from typing import Any, Dict, List


	os.environ.setdefault("TRANSFORMERS_NO_TF", "1")
	os.environ.setdefault("USE_TF", "0")

	import gradio as gr
	import numpy as np
	import pandas as pd
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity

	import spacy
	nlp = spacy.load('en_core_web_md')

	try:
	from dotenv import load_dotenv
	load_dotenv(override=True)
	except Exception:
	pass

	try:
	from pymongo import ASCENDING, MongoClient, ReplaceOne
	except Exception:
	ASCENDING = None
	MongoClient = None
	ReplaceOne = None

	try:
	from rank_bm25 import BM25Okapi
	except Exception:
	BM25Okapi = None

	try:
	from pinecone import Pinecone, ServerlessSpec
	except Exception:
	Pinecone = None
	ServerlessSpec = None

	try:
	from groq import Groq
	except Exception:
	Groq = None


	GENERATOR_MODEL = os.getenv("GROQ_GENERATOR_MODEL", os.getenv("GENERATOR_MODEL", "llama-3.1-8b-instant"))
	JUDGE_MODEL = os.getenv("GROQ_JUDGE_MODEL", os.getenv("JUDGE_MODEL", "llama-3.3-70b-versatile"))
	TRANSLATION_MODEL = os.getenv("GROQ_TRANSLATION_MODEL", os.getenv("TRANSLATION_MODEL", "deepseek-r1-distill-qwen-14b"))
	CHUNKING_METHOD = os.getenv("CHUNKING_METHOD", "semantic").strip().lower() or "semantic"
	CHUNK_CACHE_VERSION = os.getenv("CHUNK_CACHE_VERSION", "v6")
	CHUNK_CACHE_DIR = Path(__file__).with_name("chunk_cache")
	CHUNK_CACHE_DIR.mkdir(parents=True, exist_ok=True)

	_MODEL_CACHE: Dict[str, Any] = {}

	URDU_CHAR_RE = re.compile(r"[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF]")
	LATIN_CHAR_RE = re.compile(r"[A-Za-z]")


	def recursive_chunking(text: str, target_size: int = 800) -> List[str]:
	sections = text.split("\n## ")
	chunks: List[str] = []
	for section in sections:
	if len(section) > 1000:
	paragraphs = section.split("\n\n")
	current = ""
	for para in paragraphs:
	if len(current + para) < target_size:
	current += para + "\n\n"
	else:
	if current.strip():
	chunks.append(current.strip())
	current = para + "\n\n"
	if current.strip():
	chunks.append(current.strip())
	elif section.strip():
	chunks.append(section.strip())
	return chunks


	def Semantic_chunking(text, max_chars=800):
	try:

	doc = nlp(text)
	sentences = [sent.text.strip() for sent in doc.sents if sent.text.strip()]
	except Exception:
	# Fallback if spaCy model is unavailable
	sentences = [s.strip() for s in re.split(r'(?<=[.!?])\s+', text) if s.strip()]

	chunks = []
	current_chunk = ""
	for sentence in sentences:
	if len(current_chunk) + len(sentence) < max_chars:
	current_chunk += sentence + " "
	else:
	chunks.append(current_chunk.strip())
	current_chunk = sentence + " "
	if current_chunk.strip():
	chunks.append(current_chunk.strip())
	return chunks

	def read_corpus_documents(corpus_path: str) -> List[Dict[str, str]]:
	path = Path(corpus_path)
	docs: List[Dict[str, str]] = []

	def load_text_file(fp: Path) -> None:
	text = fp.read_text(encoding="utf-8", errors="ignore").strip()
	if text:
	docs.append({"source": str(fp), "text": text})

	def load_csv_file(fp: Path) -> None:
	df = pd.read_csv(fp)
	text_cols = [c for c in df.columns if df[c].dtype == "object"]
	if not text_cols:
	text_cols = list(df.columns)

	for idx, row in df.iterrows():
	row_parts = []
	for col in text_cols:
	value = row.get(col, None)
	if pd.notna(value):
	s = str(value).strip()
	if s:
	row_parts.append(f"{col}: {s}")
	if row_parts:
	docs.append({"source": f"{fp}#row={idx}", "text": "\n".join(row_parts)})

	def load_parquet_file(fp: Path) -> None:
	try:
	df = pd.read_parquet(fp)
	except ImportError as exc:
	raise ImportError("Reading parquet files requires 'pyarrow' or 'fastparquet'.") from exc

	if len(df.columns) > 1:
	# Matches notebook behavior: ignore likely index/id first column for parquet corpora.
	df = df.iloc[:, 1:].copy()

	text_cols = [
	c
	for c in df.columns
	if pd.api.types.is_string_dtype(df[c]) or pd.api.types.is_object_dtype(df[c])
	]
	if not text_cols:
	text_cols = list(df.columns)

	for idx, row in df.iterrows():
	row_parts = []
	for col in text_cols:
	value = row.get(col, None)
	if pd.notna(value):
	s = str(value).strip()
	if s:
	row_parts.append(f"{col}: {s}")
	if row_parts:
	docs.append({"source": f"{fp}#row={idx}", "text": "\n".join(row_parts)})

	if path.is_file():
	suffix = path.suffix.lower()
	if suffix in [".txt", ".md"]:
	load_text_file(path)
	elif suffix == ".csv":
	load_csv_file(path)
	elif suffix == ".parquet":
	load_parquet_file(path)
	return docs

	if path.is_dir():
	for pattern in [".txt", ".md"]:
	for fp in path.rglob(pattern):
	load_text_file(fp)
	for fp in path.rglob("*.csv"):
	load_csv_file(fp)
	for fp in path.rglob("*.parquet"):
	load_parquet_file(fp)

	return docs


	def build_chunks(docs: List[Dict[str, str]]) -> List[Dict[str, str]]:
	chunks: List[Dict[str, str]] = []
	cid = 0
	for doc in docs:
	local_chunks = Semantic_chunking(doc["text"])
	for chunk_text in local_chunks:
	chunks.append({"id": f"ch_{cid}", "text": chunk_text, "source": doc["source"]})
	cid += 1
	return chunks


	def get_cached_embedding_model(model_name: str):
	cache_key = f"embedding::{model_name}"
	model = _MODEL_CACHE.get(cache_key)
	if model is None:
	model = SentenceTransformer(model_name)
	_MODEL_CACHE[cache_key] = model
	return model


	def _corpus_signature(corpus_path: Path) -> str:
	import hashlib

	h = hashlib.sha1()
	corpus_path = Path(corpus_path)

	if corpus_path.is_file():
	st = corpus_path.stat()
	h.update(f"{corpus_path.resolve()}\|{st.st_mtime_ns}\|{st.st_size}".encode("utf-8"))
	return h.hexdigest()[:12]

	if corpus_path.is_dir():
	supported = {".txt", ".md", ".csv", ".parquet"}
	for fp in sorted(corpus_path.rglob("*")):
	if fp.is_file() and fp.suffix.lower() in supported:
	st = fp.stat()
	h.update(f"{fp.resolve()}\|{st.st_mtime_ns}\|{st.st_size}".encode("utf-8"))
	return h.hexdigest()[:12]

	h.update(str(corpus_path).encode("utf-8"))
	return h.hexdigest()[:12]


	def get_chunk_cache_path(corpus_path: str, chunking_method: str = "semantic") -> Path:
	import hashlib

	corpus_path_obj = Path(corpus_path)
	corpus_name = corpus_path_obj.stem if corpus_path_obj.is_file() else corpus_path_obj.name
	corpus_name = re.sub(r"[^a-zA-Z0-9._-]+", "_", corpus_name or "corpus").strip("_").lower()
	resolved = str(corpus_path_obj.resolve()) if corpus_path_obj.exists() else str(corpus_path_obj)
	source_sig = _corpus_signature(corpus_path_obj)
	chunk_cfg = f"{CHUNK_CACHE_VERSION}\|{chunking_method}"
	digest = hashlib.sha1(f"{resolved}\|{source_sig}\|{chunk_cfg}".encode("utf-8")).hexdigest()[:12]
	return CHUNK_CACHE_DIR / f"{corpus_name}_{chunking_method}_{CHUNK_CACHE_VERSION}_{digest}.jsonl"


	def save_chunks_to_cache(chunks: List[Dict[str, str]], cache_path: Path) -> None:
	import json

	cache_path = Path(cache_path)
	cache_path.parent.mkdir(parents=True, exist_ok=True)

	with cache_path.open("w", encoding="utf-8") as f:
	for chunk in chunks:
	chunk_id = str(chunk.get("id", "")).strip()
	text = str(chunk.get("text", "")).strip()
	source = str(chunk.get("source", "unknown")).strip() or "unknown"
	if chunk_id and text:
	record = {"id": chunk_id, "text": text, "source": source}
	f.write(json.dumps(record, ensure_ascii=False))
	f.write("\n")


	def load_chunks_from_cache(cache_path: Path) -> List[Dict[str, str]]:
	import json

	cache_path = Path(cache_path)
	if not cache_path.exists():
	return []

	chunks: List[Dict[str, str]] = []
	with cache_path.open("r", encoding="utf-8") as f:
	for line in f:
	line = line.strip()
	if not line:
	continue
	try:
	record = json.loads(line)
	except json.JSONDecodeError:
	continue

	chunk_id = str(record.get("id", "")).strip()
	text = str(record.get("text", "")).strip()
	source = str(record.get("source", "unknown")).strip() or "unknown"
	if chunk_id and text:
	chunks.append({"id": chunk_id, "text": text, "source": source})

	return chunks


	def get_semantic_cache_path(cache_path: Path) -> Path:
	cache_path = Path(cache_path)
	return cache_path.with_suffix(".semantic.npy")


	def save_semantic_embeddings_to_cache(embedding_matrix, semantic_cache_path: Path) -> None:
	if embedding_matrix is None:
	return
	semantic_cache_path = Path(semantic_cache_path)
	semantic_cache_path.parent.mkdir(parents=True, exist_ok=True)
	np.save(semantic_cache_path, embedding_matrix)


	def load_semantic_embeddings_from_cache(semantic_cache_path: Path, expected_rows: int \| None = None):
	semantic_cache_path = Path(semantic_cache_path)
	if not semantic_cache_path.exists():
	return None

	try:
	matrix = np.load(semantic_cache_path, allow_pickle=False)
	except Exception:
	return None

	if not isinstance(matrix, np.ndarray) or matrix.ndim != 2:
	return None

	if expected_rows is not None and matrix.shape[0] != int(expected_rows):
	return None

	return matrix


	def get_mongo_collection():
	mongo_uri = os.getenv("MONGODB_URI", "").strip()
	if not mongo_uri or MongoClient is None:
	return None

	db_name = os.getenv("MONGODB_DB", "rag_db")
	coll_name = os.getenv("MONGODB_COLLECTION", "rag_chunks")
	try:
	client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
	client.admin.command("ping")
	collection = client[db_name][coll_name]
	if ASCENDING is not None:
	collection.create_index([("chunk_id", ASCENDING)], unique=True)
	collection.create_index([("source", ASCENDING)])
	return collection
	except Exception:
	return None


	def upsert_chunks_to_mongodb(collection, chunks: List[Dict[str, str]]) -> None:
	if collection is None or ReplaceOne is None or not chunks:
	return

	ops = []
	for chunk in chunks:
	payload = {
	"chunk_id": chunk["id"],
	"text": chunk["text"],
	"source": chunk.get("source", "unknown"),
	}
	ops.append(ReplaceOne({"chunk_id": chunk["id"]}, payload, upsert=True))

	if ops:
	collection.bulk_write(ops, ordered=False)


	def load_chunks_from_mongodb(collection, limit: int = 20000) -> List[Dict[str, str]]:
	if collection is None:
	return []

	records = list(collection.find({}, {"_id": 0, "chunk_id": 1, "text": 1, "source": 1}).limit(limit))
	chunks = []
	for doc in records:
	chunk_id = str(doc.get("chunk_id", "")).strip()
	text = str(doc.get("text", "")).strip()
	source = str(doc.get("source", "unknown")).strip() or "unknown"
	if chunk_id and text:
	chunks.append({"id": chunk_id, "text": text, "source": source})
	return chunks


	class HybridRetriever:
	def __init__(self, embedding_model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
	self.embedding_model = get_cached_embedding_model(embedding_model_name)
	self.bm25_model = None
	self.bm25_chunks: List[Dict[str, Any]] = []
	self.local_chunk_matrix = None
	self.pc_client = None
	self.pc_index = None
	self.index_name = os.getenv("PINECONE_INDEX_NAME", "rag-assignment3-index")

	def set_corpus(self, chunks: List[Dict[str, Any]], semantic_matrix=None) -> None:
	self.bm25_chunks = chunks
	if BM25Okapi is not None:
	tokenized = [c["text"].lower().split() for c in chunks]
	self.bm25_model = BM25Okapi(tokenized)
	if semantic_matrix is not None and len(semantic_matrix) == len(chunks):
	self.local_chunk_matrix = np.array(semantic_matrix)
	else:
	vectors = self.embedding_model.encode([c["text"] for c in chunks], show_progress_bar=False)
	self.local_chunk_matrix = np.array(vectors)

	def try_init_pinecone(self) -> None:
	api_key = os.getenv("PINECONE_API_KEY")
	region = os.getenv("PINECONE_ENVIRONMENT", "us-east-1")
	if not (Pinecone and ServerlessSpec and api_key):
	return

	try:
	self.pc_client = Pinecone(api_key=api_key)
	existing = [idx.name for idx in self.pc_client.list_indexes()]
	if self.index_name not in existing:
	self.pc_client.create_index(
	name=self.index_name,
	dimension=384,
	metric="cosine",
	spec=ServerlessSpec(cloud="aws", region=region),
	)
	self.pc_index = self.pc_client.Index(self.index_name)
	except Exception:
	self.pc_client = None
	self.pc_index = None

	def upsert_to_pinecone(self, chunks: List[Dict[str, Any]], batch_size: int = 100) -> None:
	if self.pc_index is None:
	return
	vectors = []
	for chunk in chunks:
	vec = self.embedding_model.encode(chunk["text"]).tolist()
	vectors.append(
	{
	"id": chunk["id"],
	"values": vec,
	"metadata": {"text": chunk["text"], "source": chunk["source"]},
	}
	)
	for i in range(0, len(vectors), batch_size):
	self.pc_index.upsert(vectors=vectors[i : i + batch_size])

	def _bm25_search(self, query: str, top_k: int) -> List[Dict[str, Any]]:
	if self.bm25_model is None:
	return []
	scores = self.bm25_model.get_scores(query.lower().split())
	indices = np.argsort(scores)[::-1][:top_k]
	out = []
	for i in indices:
	doc = dict(self.bm25_chunks[i])
	doc["score"] = float(scores[i])
	doc["search_type"] = "keyword"
	out.append(doc)
	return out

	def _semantic_search(self, query: str, top_k: int) -> List[Dict[str, Any]]:
	if self.pc_index is not None:
	try:
	qv = self.embedding_model.encode(query).tolist()
	response = self.pc_index.query(vector=qv, top_k=top_k, include_metadata=True)
	out = []
	for m in response.matches:
	meta = m.metadata or {}
	out.append(
	{
	"id": m.id,
	"text": meta.get("text", ""),
	"source": meta.get("source", "unknown"),
	"score": float(m.score),
	"search_type": "semantic",
	}
	)
	return out
	except Exception:
	pass

	if self.local_chunk_matrix is None or len(self.bm25_chunks) == 0:
	return []

	qv = self.embedding_model.encode(query)
	sims = cosine_similarity([qv], self.local_chunk_matrix)[0]
	indices = np.argsort(sims)[::-1][:top_k]
	out = []
	for i in indices:
	doc = dict(self.bm25_chunks[i])
	doc["score"] = float(sims[i])
	doc["search_type"] = "semantic"
	out.append(doc)
	return out

	@staticmethod
	def _rrf_fusion(keyword_results: List[Dict[str, Any]], semantic_results: List[Dict[str, Any]], k: int = 60):
	scores: Dict[str, float] = {}
	merged: Dict[str, Dict[str, Any]] = {}

	for rank, doc in enumerate(keyword_results, start=1):
	doc_id = doc["id"]
	scores[doc_id] = scores.get(doc_id, 0.0) + 1.0 / (k + rank)
	merged[doc_id] = doc

	for rank, doc in enumerate(semantic_results, start=1):
	doc_id = doc["id"]
	scores[doc_id] = scores.get(doc_id, 0.0) + 1.0 / (k + rank)
	if doc_id in merged:
	merged[doc_id]["search_type"] = "hybrid"
	else:
	merged[doc_id] = doc

	fused = []
	for doc_id, score in scores.items():
	d = dict(merged[doc_id])
	d["rrf_score"] = score
	fused.append(d)

	fused.sort(key=lambda x: x.get("rrf_score", 0.0), reverse=True)
	return fused

	def _rerank(self, query: str, results: List[Dict[str, Any]], top_k: int) -> List[Dict[str, Any]]:
	if not results:
	return []
	qv = self.embedding_model.encode(query)
	reranked = []
	for doc in results:
	dv = self.embedding_model.encode(doc.get("text", ""))
	sim = float(cosine_similarity([qv], [dv])[0][0])
	d = dict(doc)
	d["rerank_score"] = sim
	reranked.append(d)
	reranked.sort(key=lambda x: x["rerank_score"], reverse=True)
	return reranked[:top_k]

	def retrieve_hybrid(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
	keyword = self._bm25_search(query, top_k=8)
	semantic = self._semantic_search(query, top_k=8)
	fused = self._rrf_fusion(keyword, semantic)
	return self._rerank(query, fused, top_k=top_k)


	def create_rag_prompt(query: str, context_chunks: List[Dict[str, Any]]) -> str:
	context_text = "\n\n".join(
	[f"Source {i + 1}: {chunk.get('text', '')}" for i, chunk in enumerate(context_chunks)]
	)
	return f"""CONTEXT:
	{context_text}

	QUESTION: {query}

	INSTRUCTIONS:
	1. Answer only from the provided context.
	2. If information is missing, say clearly what is missing.
	3. Keep answer concise and factual.
	"""


	def _resolve_groq_api_key() -> str:
	return (
	os.getenv("GROQ_API_KEY", "").strip()
	or os.getenv("Groq_API_KEY", "").strip()
	)


	def get_cached_groq_client():
	client = _MODEL_CACHE.get("groq_client")
	if client is not None:
	return client

	api_key = _resolve_groq_api_key()
	if not api_key or Groq is None:
	return None

	try:
	client = Groq(api_key=api_key)
	_MODEL_CACHE["groq_client"] = client
	return client
	except Exception:
	return None


	def _dedupe_models(*models):
	seen = set()
	ordered = []
	for m in models:
	name = str(m or "").strip()
	if not name or name in seen:
	continue
	seen.add(name)
	ordered.append(name)
	return ordered


	def _groq_chat_completion(prompt: str, model_name: str, max_tokens: int = 450, temperature: float = 0.2) -> str:
	client = get_cached_groq_client()
	if client is None:
	raise RuntimeError("Groq client is not initialized. Set GROQ_API_KEY first.")

	response = client.chat.completions.create(
	model=model_name,
	messages=[
	{
	"role": "system",
	"content": "You are a concise RAG assistant. Answer using only the provided context.",
	},
	{"role": "user", "content": prompt},
	],
	temperature=temperature,
	max_tokens=max_tokens,
	)
	if not response or not getattr(response, "choices", None):
	return ""
	msg = response.choices[0].message
	return (msg.content or "").strip()


	def _clean_generated_answer(text: str) -> str:
	text = re.sub(r"\s+", " ", str(text or "")).strip()
	return text[:1800]


	def is_pure_urdu_text(text: str, min_urdu_ratio: float = 0.85) -> bool:
	text = str(text or "").strip()
	if not text:
	return False

	if LATIN_CHAR_RE.search(text):
	return False

	letters = [ch for ch in text if ch.isalpha()]
	if not letters:
	return False

	urdu_letters = [ch for ch in letters if URDU_CHAR_RE.fullmatch(ch)]
	ratio = len(urdu_letters) / max(1, len(letters))
	return ratio >= min_urdu_ratio


	def _translate_text(text: str, source_lang: str, target_lang: str, enforce_pure_urdu: bool = False) -> str:
	text = str(text or "").strip()
	if not text:
	return text

	model_name = os.getenv("GROQ_TRANSLATION_MODEL", "").strip() or TRANSLATION_MODEL or GENERATOR_MODEL
	purity_rule = ""
	if enforce_pure_urdu:
	purity_rule = "- Output must be in Urdu script only. Do not use English words or Roman Urdu.\\n"

	prompt = (
	f"Translate the following {source_lang} text to {target_lang}.\\n"
	"Rules:\\n"
	"- Preserve meaning faithfully and keep tone natural.\\n"
	"- Keep names, numbers, and dates unchanged when possible.\\n"
	f"{purity_rule}"
	"- Return only the translation text, without notes or quotes.\\n\\n"
	f"Text:\\n{text}"
	)

	try:
	translated = _groq_chat_completion(prompt, model_name=model_name, max_tokens=600, temperature=0.0)
	translated = str(translated or "").strip()
	if translated:
	return translated
	except Exception:
	pass

	return text


	def translate_urdu_to_english(text: str) -> str:
	return _translate_text(text, source_lang="Urdu", target_lang="English")


	def translate_english_to_pure_urdu(text: str) -> str:
	return _translate_text(
	text,
	source_lang="English",
	target_lang="Urdu",
	enforce_pure_urdu=True,
	)


	def _extractive_fallback_answer(prompt: str, max_points: int = 6) -> str:
	context_match = re.search(r"CONTEXT:\s(.?)\s*QUESTION:", prompt, re.DOTALL)
	if not context_match:
	return "Not found in provided context."

	context_block = context_match.group(1).strip()
	if not context_block:
	return "Not found in provided context."

	lines = [line.strip() for line in context_block.split("\n") if line.strip()]
	picked = []
	for line in lines:
	if line.lower().startswith("source "):
	source_part = line.split(":", 1)
	if len(source_part) == 2 and source_part[1].strip():
	picked.append(f"- {source_part[1].strip()}")
	if len(picked) >= max_points:
	break

	if not picked:
	return "Not found in provided context."
	return "\n".join(picked)


	def generate_answer_hf(prompt: str, hf_model: str = GENERATOR_MODEL):
	generator_model = hf_model or os.getenv("GROQ_GENERATOR_MODEL") or GENERATOR_MODEL
	candidate_models = _dedupe_models(generator_model, "llama-3.1-8b-instant", "llama3-8b-8192")
	last_error = None

	for model_name in candidate_models:
	start = time.time()
	try:
	out = _groq_chat_completion(prompt, model_name=model_name, max_tokens=450, temperature=0.2)
	return _clean_generated_answer(out), time.time() - start
	except Exception as e:
	last_error = repr(e)

	extractive_answer = _extractive_fallback_answer(prompt, max_points=6)
	if extractive_answer:
	return extractive_answer, 0.0
	return f"Groq generation failed: {last_error}", 0.0


	def call_hf_judge(prompt: str, model: str = JUDGE_MODEL) -> str:
	judge_model = model or os.getenv("GROQ_JUDGE_MODEL") or JUDGE_MODEL
	candidate_models = _dedupe_models(
	judge_model,
	os.getenv("GROQ_JUDGE_MODEL"),
	os.getenv("GROQ_GENERATOR_MODEL"),
	"llama-3.1-8b-instant",
	)
	last_error = None

	for model_name in candidate_models:
	try:
	return _groq_chat_completion(prompt[:4000], model_name=model_name, max_tokens=180, temperature=0.0)
	except Exception as e:
	last_error = repr(e)

	return f"Groq judge failed: {last_error}"


	def extract_claims(answer_text: str) -> List[str]:
	prompt = f"""Extract atomic factual claims from the answer.
	Return only a JSON array of short claims.
	Answer: {answer_text}"""
	out = call_hf_judge(prompt)
	try:
	arr_match = re.search(r"\[.*\]", out, re.DOTALL)
	if arr_match:
	parsed = eval(arr_match.group(0))
	if isinstance(parsed, list):
	return [str(x) for x in parsed if str(x).strip()][:8]
	except Exception:
	pass
	lines = [line.strip("- ").strip() for line in out.split("\n") if line.strip()]
	return [line for line in lines if len(line) > 5][:8]


	def verify_claims_against_context(claims: List[str], context_text: str):
	verdicts = []
	for claim in claims:
	prompt = (
	f"Context:\n{context_text}\n\nClaim: {claim}\n\n"
	"Is this claim supported by context? Reply only with SUPPORTED or UNSUPPORTED."
	)
	out = call_hf_judge(prompt).upper()
	supported = "SUPPORTED" in out and "UNSUPPORTED" not in out
	verdicts.append({"claim": claim, "supported": supported})
	return verdicts


	def faithfulness_score(answer_text: str, retrieved_chunks: List[Dict[str, Any]]):
	context_text = "\n\n".join([c.get("text", "") for c in retrieved_chunks])
	claims = extract_claims(answer_text)
	if not claims:
	return 0.0
	verdicts = verify_claims_against_context(claims, context_text)
	return float(sum(v["supported"] for v in verdicts) / len(verdicts))


	def relevancy_score(original_query: str, answer_text: str, embedding_model: SentenceTransformer):
	prompt = (
	"Generate 3 alternative user questions that would have answer below. "
	f"Return only one question per line.\n\nAnswer:\n{answer_text}"
	)
	out = call_hf_judge(prompt)
	alt_qs = [line.strip(" -").strip() for line in out.split("\n") if line.strip()][:3]
	if not alt_qs:
	return 0.0

	q_vec = embedding_model.encode(original_query)
	sims = []
	for q in alt_qs:
	q2 = embedding_model.encode(q)
	sims.append(float(cosine_similarity([q_vec], [q2])[0][0]))
	return float(np.mean(sims))


	STATE: Dict[str, Any] = {"ready": False, "retriever": None, "chunks": [], "docs": []}


	def ensure_pipeline_ready() -> None:
	if STATE["ready"]:
	return

	app_dir = Path(__file__).resolve().parent
	candidate_defaults = [
	app_dir / "Mental_Health_" / "support_1000.parquet",
	app_dir / "synthetic_knowledge_items.csv",
	]
	default_corpus = next((p for p in candidate_defaults if p.exists()), candidate_defaults[-1])
	corpus_path = (os.getenv("CORPUS_PATH", str(default_corpus)) or str(default_corpus)).strip()

	force_rechunk = str(os.getenv("FORCE_RECHUNK", "false")).strip().lower() in {"1", "true", "yes", "y"}
	load_docs_on_cache_hit = str(os.getenv("LOAD_DOCS_ON_CACHE_HIT", "false")).strip().lower() in {"1", "true", "yes", "y"}
	upsert_on_cache_hit = str(os.getenv("UPSERT_ON_CACHE_HIT", "false")).strip().lower() in {"1", "true", "yes", "y"}

	cache_path = get_chunk_cache_path(corpus_path, chunking_method=CHUNKING_METHOD)
	semantic_cache_path = get_semantic_cache_path(cache_path)

	chunks: List[Dict[str, str]] = []
	docs: List[Dict[str, str]] = []
	chunk_cache_hit = False

	if not force_rechunk:
	chunks = load_chunks_from_cache(cache_path)
	chunk_cache_hit = len(chunks) > 0
	if chunk_cache_hit:
	print(f"Loaded {len(chunks)} chunks from cache: {cache_path}")
	print("Reusing cached chunks. Skipping chunking step.")

	mongo_collection = get_mongo_collection()
	if not chunks and mongo_collection is not None:
	chunks = load_chunks_from_mongodb(mongo_collection)
	if chunks:
	chunk_cache_hit = True
	save_chunks_to_cache(chunks, cache_path)
	print(f"Loaded {len(chunks)} chunks from MongoDB and saved local cache: {cache_path}")

	if not chunks:
	docs = read_corpus_documents(corpus_path)
	if not docs:
	raise ValueError(f"No documents found at CORPUS_PATH={corpus_path}")
	chunks = build_chunks(docs)
	save_chunks_to_cache(chunks, cache_path)
	upsert_chunks_to_mongodb(mongo_collection, chunks)
	print(f"Chunked corpus and saved {len(chunks)} chunks to cache: {cache_path}")
	elif load_docs_on_cache_hit:
	docs = read_corpus_documents(corpus_path)
	else:
	print("Skipping corpus read on cache hit for faster startup.")

	retriever = HybridRetriever()

	semantic_matrix = None
	if not force_rechunk:
	semantic_matrix = load_semantic_embeddings_from_cache(semantic_cache_path, expected_rows=len(chunks))
	if semantic_matrix is not None:
	print(f"Loaded semantic embedding matrix from cache: {semantic_cache_path.name}")

	retriever.set_corpus(chunks, semantic_matrix=semantic_matrix)

	if semantic_matrix is None and retriever.local_chunk_matrix is not None:
	save_semantic_embeddings_to_cache(retriever.local_chunk_matrix, semantic_cache_path)
	print(f"Saved semantic embedding matrix cache: {semantic_cache_path.name}")

	retriever.try_init_pinecone()
	should_upsert = (not chunk_cache_hit) or bool(upsert_on_cache_hit)
	if should_upsert:
	retriever.upsert_to_pinecone(chunks)
	else:
	print("Skipping Pinecone upsert on cache hit for faster startup.")

	STATE["retriever"] = retriever
	STATE["chunks"] = chunks
	STATE["docs"] = docs
	STATE["ready"] = True


	def _format_context(chunks: List[Dict[str, Any]], max_items: int = 3) -> str:
	if not chunks:
	return "No context chunks returned."

	lines = []
	for i, chunk in enumerate(chunks[:max_items], start=1):
	text = str(chunk.get("text", "")).strip()
	source = str(chunk.get("source", "unknown"))
	preview = (text[:350] + "...") if len(text) > 350 else text
	lines.append(f"[{i}] Source: {source}\n{preview}")
	return "\n\n".join(lines)


	def run_rag(query: str):
	query = (query or "").strip()
	if not query:
	return "Please enter a question.", "", "", ""

	try:
	ensure_pipeline_ready()
	retriever: HybridRetriever = STATE["retriever"]

	urdu_query = is_pure_urdu_text(query)
	rag_query = translate_urdu_to_english(query) if urdu_query else query

	retrieved = retriever.retrieve_hybrid(rag_query, top_k=5)
	prompt = create_rag_prompt(rag_query, retrieved)
	english_answer, _ = generate_answer_hf(prompt)

	answer = translate_english_to_pure_urdu(english_answer) if urdu_query else english_answer

	faith = faithfulness_score(english_answer, retrieved)
	relev = relevancy_score(rag_query, english_answer, retriever.embedding_model)

	return answer, _format_context(retrieved), f"{faith:.3f}", f"{relev:.3f}"
	except Exception as e:
	msg = f"Pipeline error: {repr(e)}"
	return msg, "", "N/A", "N/A"


	with gr.Blocks(title="RAG Assignment 3") as demo:
	gr.Markdown("# RAG-based Question Answering System")
	gr.Markdown(
	"Set environment variables: GROQ_API_KEY, GROQ_GENERATOR_MODEL, GROQ_JUDGE_MODEL, GROQ_TRANSLATION_MODEL, CORPUS_PATH, CHUNKING_METHOD, MONGODB_URI, MONGODB_DB, MONGODB_COLLECTION, PINECONE_API_KEY, PINECONE_ENVIRONMENT, FORCE_RECHUNK, LOAD_DOCS_ON_CACHE_HIT, UPSERT_ON_CACHE_HIT"
	)

	query_input = gr.Textbox(label="Ask a question", lines=2, placeholder="Type your question here...")
	submit_btn = gr.Button("Generate Answer", variant="primary")

	answer_output = gr.Textbox(label="Generated Answer", lines=8)
	context_output = gr.Textbox(label="Retrieved Context (Top Chunks)", lines=10)
	faithfulness_output = gr.Textbox(label="Faithfulness Score")
	relevancy_output = gr.Textbox(label="Relevancy Score")

	submit_btn.click(
	fn=run_rag,
	inputs=[query_input],
	outputs=[answer_output, context_output, faithfulness_output, relevancy_output],
	)
	query_input.submit(
	fn=run_rag,
	inputs=[query_input],
	outputs=[answer_output, context_output, faithfulness_output, relevancy_output],
	)


	if __name__ == "__main__":
	print("Starting RAG QA system...")

	# Simple, hardcoded for HF environment stability
	demo.queue().launch(
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True # This helps see errors in the browser
	)