# ========================================= # RAG QnA System (FIXED FOR HF SPACES) # ========================================= import gradio as gr import numpy as np import faiss import os from sentence_transformers import SentenceTransformer from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch # ----------------------------- # 1. Load Documents (FIXED) # ----------------------------- def load_documents(file_path): if not os.path.exists(file_path): return ["No document found."] try: with open(file_path, "r", encoding="utf-8") as f: text = f.read() except: with open(file_path, "r", encoding="latin-1") as f: text = f.read() return text.split("\n\n") def chunk_text(text, chunk_size=120): words = text.split() return [ " ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size) ] documents = load_documents("data/data.txt") all_chunks = [] for doc in documents: all_chunks.extend(chunk_text(doc)) # ----------------------------- # 2. Embeddings + FAISS # ----------------------------- embedder = SentenceTransformer("all-MiniLM-L6-v2") embeddings = embedder.encode(all_chunks) dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(np.array(embeddings)) # ----------------------------- # 3. GENERATIVE MODEL (FIXED) # ----------------------------- model_name = "google/flan-t5-base" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) # ----------------------------- # 4. RAG Function (FIXED) # ----------------------------- def rag_query(query): if not query.strip(): return "Please enter a question." # Retrieve relevant chunks query_embedding = embedder.encode([query]) D, I = index.search(np.array(query_embedding), k=5) retrieved_docs = [all_chunks[i] for i in I[0]] context = " ".join(retrieved_docs) # Prompt for model prompt = f""" Answer the question ONLY using the context below. If the answer is not present, say "Not found in document". Context: {context} Question: {query} """ inputs = tokenizer(prompt, return_tensors="pt", truncation=True) outputs = model.generate( **inputs, max_new_tokens=150, do_sample=True, temperature=0.7 ) answer = tokenizer.decode(outputs[0], skip_special_tokens=True) return f"Answer:\n{answer}\n\nContext:\n{context}" # ----------------------------- # 5. Gradio UI # ----------------------------- iface = gr.Interface( fn=rag_query, inputs=gr.Textbox(lines=2, placeholder="Ask your question..."), outputs="text", title="📚 RAG QnA System (Fixed)", description="Retriever + FLAN-T5 (Works on Hugging Face Spaces)" ) # ----------------------------- # 6. Launch # ----------------------------- if __name__ == "__main__": iface.launch()