import gradio as gr
import faiss
import pickle
import numpy as np
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# ---- Load FAISS index and metadata ----
index = faiss.read_index("faiss_index/index.faiss")
with open("faiss_index/metadata.pkl", "rb") as f:
    passages = pickle.load(f)

# ---- Load FLAN-T5 model ----
model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Optionally use HF pipeline for simplicity
generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

def get_relevant_chunks(query, k=3):
    # In practice you’d embed the query; here we mock similarity search
    # For demo, return first few passages
    _, I = index.search(np.random.random((1, index.d)), k)  # replace with real embedding lookup
    return " ".join([passages[i] for i in I[0]])

def rag_answer(query):
    context = get_relevant_chunks(query)
    prompt = f"Question: {query}\nContext: {context}\nAnswer:"
    result = generator(prompt, max_new_tokens=150, do_sample=False)
    return result[0]['generated_text']

iface = gr.Interface(
    fn=rag_answer,
    inputs=gr.Textbox(label="Ask about Śrīla Prabhupāda"),
    outputs=gr.Textbox(label="Answer"),
    title="Śrīla Prabhupāda RAG Assistant",
    description="Retrieval-Augmented Generation model using FLAN-T5-Large to answer spiritual and biographical questions."
)

iface.launch()