# ---------------------------- # Hugging Face cache bootstrap # ---------------------------- import os CACHE_DIR = "/tmp/hf_cache" os.makedirs(CACHE_DIR, exist_ok=True) os.environ["HF_HOME"] = CACHE_DIR os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR os.environ["HF_DATASETS_CACHE"] = CACHE_DIR os.environ["HF_MODULES_CACHE"] = CACHE_DIR print(f"✅ Using Hugging Face cache at {CACHE_DIR}") # ---------------------------- # Imports AFTER cache bootstrap # ---------------------------- from sentence_transformers import SentenceTransformer from transformers import pipeline from vectorstore import search_faiss # ---------------------------- # Query embedding model # ---------------------------- _query_model = SentenceTransformer( "sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR ) # ---------------------------- # LLM for answers # ---------------------------- MODEL_NAME = "google/flan-t5-small" _answer_model = pipeline( "text2text-generation", model=MODEL_NAME, cache_dir=CACHE_DIR ) # ---------------------------- # Functions # ---------------------------- def retrieve_chunks(query, index, chunks, top_k=3): q_emb = _query_model.encode([query], convert_to_numpy=True)[0] return search_faiss(q_emb, index, chunks, top_k) def generate_answer(query, retrieved_chunks): if not retrieved_chunks: return "Sorry, I could not find relevant information." context = " ".join(retrieved_chunks) prompt = ( "You are an assistant. Use the context to answer the question clearly.\n" f"Context:\n{context}\n\nQuestion:\n{query}\n\nAnswer:" ) result = _answer_model(prompt, max_length=300, do_sample=False) return result[0]["generated_text"].strip()