# =========================================
# RAG QnA System (FIXED FOR HF SPACES)
# =========================================

import gradio as gr
import numpy as np
import faiss
import os

from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# -----------------------------
# 1. Load Documents (FIXED)
# -----------------------------
def load_documents(file_path):
    if not os.path.exists(file_path):
        return ["No document found."]
    
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            text = f.read()
    except:
        with open(file_path, "r", encoding="latin-1") as f:
            text = f.read()

    return text.split("\n\n")


def chunk_text(text, chunk_size=120):
    words = text.split()
    return [
        " ".join(words[i:i+chunk_size])
        for i in range(0, len(words), chunk_size)
    ]


documents = load_documents("data/data.txt")

all_chunks = []
for doc in documents:
    all_chunks.extend(chunk_text(doc))

# -----------------------------
# 2. Embeddings + FAISS
# -----------------------------
embedder = SentenceTransformer("all-MiniLM-L6-v2")

embeddings = embedder.encode(all_chunks)

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))

# -----------------------------
# 3. GENERATIVE MODEL (FIXED)
# -----------------------------
model_name = "google/flan-t5-base"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# -----------------------------
# 4. RAG Function (FIXED)
# -----------------------------
def rag_query(query):
    if not query.strip():
        return "Please enter a question."

    # Retrieve relevant chunks
    query_embedding = embedder.encode([query])
    D, I = index.search(np.array(query_embedding), k=5)

    retrieved_docs = [all_chunks[i] for i in I[0]]
    context = " ".join(retrieved_docs)

    # Prompt for model
    prompt = f"""
    Answer the question ONLY using the context below.
    If the answer is not present, say "Not found in document".

    Context:
    {context}

    Question:
    {query}
    """

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)

    outputs = model.generate(
        **inputs,
        max_new_tokens=150,
        do_sample=True,
        temperature=0.7
    )

    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return f"Answer:\n{answer}\n\nContext:\n{context}"

# -----------------------------
# 5. Gradio UI
# -----------------------------
iface = gr.Interface(
    fn=rag_query,
    inputs=gr.Textbox(lines=2, placeholder="Ask your question..."),
    outputs="text",
    title="📚 RAG QnA System (Fixed)",
    description="Retriever + FLAN-T5 (Works on Hugging Face Spaces)"
)

# -----------------------------
# 6. Launch
# -----------------------------
if __name__ == "__main__":
    iface.launch()