import gradio as gr import faiss import pickle import numpy as np from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline # ---- Load FAISS index and metadata ---- index = faiss.read_index("faiss_index/index.faiss") with open("faiss_index/metadata.pkl", "rb") as f: passages = pickle.load(f) # ---- Load FLAN-T5 model ---- model_name = "google/flan-t5-large" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) # Optionally use HF pipeline for simplicity generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer) def get_relevant_chunks(query, k=3): # In practice you’d embed the query; here we mock similarity search # For demo, return first few passages _, I = index.search(np.random.random((1, index.d)), k) # replace with real embedding lookup return " ".join([passages[i] for i in I[0]]) def rag_answer(query): context = get_relevant_chunks(query) prompt = f"Question: {query}\nContext: {context}\nAnswer:" result = generator(prompt, max_new_tokens=150, do_sample=False) return result[0]['generated_text'] iface = gr.Interface( fn=rag_answer, inputs=gr.Textbox(label="Ask about Śrīla Prabhupāda"), outputs=gr.Textbox(label="Answer"), title="Śrīla Prabhupāda RAG Assistant", description="Retrieval-Augmented Generation model using FLAN-T5-Large to answer spiritual and biographical questions." ) iface.launch()