import os
import gradio as gr
import fitz
import docx
import faiss
import numpy as np
import torch

from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from gtts import gTTS
from huggingface_hub import login

# =============================
# 1) Config
# =============================
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
    raise ValueError("Please set HF_TOKEN in Space secrets")

login(HF_TOKEN)

EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
LLM_MODEL_ID = "google/flan-t5-base"
ASR_MODEL_ID = "openai/whisper-small"

# =============================
# 2) Load Models (cached)
# =============================
embedding_model = SentenceTransformer(EMBED_MODEL_ID)

tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID)
llm = AutoModelForSeq2SeqLM.from_pretrained(LLM_MODEL_ID)

stt_model = pipeline(
    "automatic-speech-recognition",
    model=ASR_MODEL_ID,
    token=HF_TOKEN
)

# =============================
# 3) Text Extraction
# =============================
def extract_text(file_path: str) -> str:
    if not file_path:
        return ""

    text = ""
    ext = os.path.splitext(file_path)[1].lower()

    try:
        if ext == ".pdf":
            doc = fitz.open(file_path)
            for page in doc:
                text += page.get_text()
        elif ext == ".docx":
            doc = docx.Document(file_path)
            for p in doc.paragraphs:
                text += p.text + "\n"
        else:
            with open(file_path, "r", errors="ignore") as f:
                text = f.read()
    except Exception:
        return ""

    return text.strip()

# =============================
# 4) Build FAISS Index
# =============================
def build_faiss(text, chunk_size=500, overlap=50):
    if not text:
        return None, None

    chunks = []
    step = chunk_size - overlap

    for i in range(0, len(text), step):
        chunk = text[i:i + chunk_size].strip()
        if chunk:
            chunks.append(chunk)

    if not chunks:
        return None, None

    embeds = embedding_model.encode(
        chunks,
        convert_to_numpy=True,
        normalize_embeddings=True
    )

    index = faiss.IndexFlatIP(embeds.shape[1])
    index.add(embeds)

    return index, chunks

# =============================
# 5) Globals
# =============================
doc_index = None
doc_chunks = None

# =============================
# 6) Handlers
# =============================
def upload_file(file_path):
    global doc_index, doc_chunks
    text = extract_text(file_path)

    if not text:
        return "❌ No readable text found."

    idx, chunks = build_faiss(text)

    if idx is None:
        return "❌ Indexing failed."

    doc_index, doc_chunks = idx, chunks
    return f"✅ Indexed {len(chunks)} chunks."

def answer_query(query):
    if not query.strip():
        return "⚠️ Enter a question."

    if doc_index is None:
        return "⚠️ Upload a document first."

    q_vec = embedding_model.encode(
        [query],
        convert_to_numpy=True,
        normalize_embeddings=True
    )

    _, I = doc_index.search(q_vec, k=5)
    context = "\n".join(doc_chunks[i] for i in I[0])

    prompt = f"""
Answer using only the context below.
If not found, say "Not in document".

Context:
{context}

Question:
{query}
"""

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
    outputs = llm.generate(**inputs, max_new_tokens=200)

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def voice_query(audio_path):
    if not audio_path:
        return "", "", None

    speech = stt_model(audio_path)["text"]
    answer = answer_query(speech)

    tts = gTTS(answer)
    tts.save("reply.mp3")

    return speech, answer, "reply.mp3"

# =============================
# 7) UI
# =============================
with gr.Blocks() as demo:
    gr.Markdown("# 📚 RAG Chatbot with Voice")

    file = gr.File(type="filepath")
    status = gr.Textbox()
    gr.Button("Index").click(upload_file, file, status)

    query = gr.Textbox(label="Question")
    answer = gr.Textbox()
    gr.Button("Ask").click(answer_query, query, answer)

    audio = gr.Audio(type="filepath")
    rec = gr.Textbox()
    v_ans = gr.Textbox()
    v_audio = gr.Audio()
    audio.change(voice_query, audio, [rec, v_ans, v_audio])

demo.launch()