import gradio as gr
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import json
import pickle
import requests
import os

HF_REPO = os.getenv("HF_REPO", "Nadun102/RAG-system")

# -----------------------
# 1. Load LLM
# -----------------------
generator = pipeline(
    "text-generation",
    model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    max_length=300,
    temperature=0.3
)

# -----------------------
# 2. Load embeddings
# -----------------------
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# -----------------------
# 3. Download files from HF repo
# -----------------------
def hf_download(path):
    url = f"https://huggingface.co/{HF_REPO}/resolve/main/{path}"
    return requests.get(url).content

chunks = json.loads(hf_download("chunks.json"))
with open("metadata.pkl", "wb") as f:
    f.write(hf_download("metadata.pkl"))
metadata = pickle.load(open("metadata.pkl", "rb"))

with open("faiss_index.bin", "wb") as f:
    f.write(hf_download("faiss_index.bin"))
faiss_index = faiss.read_index("faiss_index.bin")

# -----------------------
# 4. RAG Retrieval
# -----------------------
def retrieve(query):
    q_emb = embedder.encode([query])
    scores, indices = faiss_index.search(q_emb, 3)

    ctx = []
    for idx in indices[0]:
        ctx.append(chunks[idx])
    return "\n".join(ctx)

# -----------------------
# 5. Generate answer
# -----------------------
def answer_fn(message, history):
    context = retrieve(message)

    prompt = f"""
Use the following context to answer the question clearly:

CONTEXT:
{context}

QUESTION:
{message}

ANSWER:
"""

    output = generator(prompt)[0]["generated_text"]
    return output

# -----------------------
# 6. Gradio Chat UI
# -----------------------
chatbot = gr.ChatInterface(fn=answer_fn)

if __name__ == "__main__":
    chatbot.launch()