|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
|
|
|
CACHE_DIR = "/tmp/hf_cache" |
|
|
os.makedirs(CACHE_DIR, exist_ok=True) |
|
|
|
|
|
os.environ["HF_HOME"] = CACHE_DIR |
|
|
os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR |
|
|
os.environ["HF_DATASETS_CACHE"] = CACHE_DIR |
|
|
os.environ["HF_MODULES_CACHE"] = CACHE_DIR |
|
|
|
|
|
print(f"β
Using Hugging Face cache at {CACHE_DIR}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from sentence_transformers import SentenceTransformer |
|
|
from transformers import pipeline |
|
|
from vectorstore import search_faiss |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_query_model = SentenceTransformer( |
|
|
"sentence-transformers/all-MiniLM-L6-v2", |
|
|
cache_folder=CACHE_DIR |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_NAME = "google/flan-t5-small" |
|
|
|
|
|
_answer_model = pipeline( |
|
|
"text2text-generation", |
|
|
model=MODEL_NAME, |
|
|
cache_dir=CACHE_DIR |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def retrieve_chunks(query, index, chunks, top_k=3): |
|
|
q_emb = _query_model.encode([query], convert_to_numpy=True)[0] |
|
|
return search_faiss(q_emb, index, chunks, top_k) |
|
|
|
|
|
def generate_answer(query, retrieved_chunks): |
|
|
if not retrieved_chunks: |
|
|
return "Sorry, I could not find relevant information." |
|
|
|
|
|
context = " ".join(retrieved_chunks) |
|
|
prompt = ( |
|
|
"You are an assistant. Use the context to answer the question clearly.\n" |
|
|
f"Context:\n{context}\n\nQuestion:\n{query}\n\nAnswer:" |
|
|
) |
|
|
result = _answer_model(prompt, max_length=300, do_sample=False) |
|
|
return result[0]["generated_text"].strip() |
|
|
|