import gradio as gr
import os
import torch
import spaces
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_cohere import ChatCohere
from langchain_classic.chains import RetrievalQA

# 1. SETUP: DEFINING THE MODELS
# We use a smaller/faster multilingual embedding model for retrieval
embedding_model_name = "intfloat/multilingual-e5-large"

# Detect device - use CUDA if available, otherwise CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Initialize Embeddings - will use GPU when available via ZeroGPU
embeddings = None

def get_embeddings():
    global embeddings
    if embeddings is None:
        embeddings = HuggingFaceEmbeddings(
            model_name=embedding_model_name,
            model_kwargs={"device": device}
        )
    return embeddings

@spaces.GPU(duration=120)
def process_and_chat(file, query):
    try:
        if not file:
            return "Please upload a PDF file first."
        
        if not query:
            return "Please enter a question."
        
        # Check for API key
        api_key = os.environ.get("COHERE_API_KEY")
        if not api_key:
            return "**Error:** COHERE_API_KEY environment variable is not set."
        
        # Initialize LLM (Cohere Command R - faster model)
        llm = ChatCohere(model="command-a-03-2025", temperature=0.3, cohere_api_key=api_key)
        
        # 2. LOAD & PROCESS DOCUMENT
        # Gradio 6.x returns file path as string directly
        file_path = file if isinstance(file, str) else file.name
        loader = PyPDFLoader(file_path)
        documents = loader.load()
        
        # Split text into chunks (Arabic text needs careful splitting)
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            separators=["\n\n", "\n", "。", ".", " ", ""]
        )
        texts = text_splitter.split_documents(documents)
        
        # 3. CREATE VECTOR STORE (In-Memory FAISS - faster than Chroma)
        # This turns your Arabic text into searchable vectors
        db = FAISS.from_documents(texts, get_embeddings())
        retriever = db.as_retriever(search_kwargs={"k": 3})  # Retrieve top 3 chunks (faster)
        
        # 4. RAG CHAIN
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=retriever,
            return_source_documents=True
        )
        
        # 5. GENERATE ANSWER
        # We add a specific instruction for Arabic
        augmented_query = f"Answer the following question in Arabic based ONLY on the context provided. If you don't know, say you don't know. Question: {query}"
        
        response = qa_chain.invoke(augmented_query)
        
        # Optional: Format sources
        sources = [doc.page_content[:100] + "..." for doc in response['source_documents']]
        return f"**Answer:**\n{response['result']}\n\n**Sources:**\n" + "\n".join(sources)
    except Exception as e:
        return f"**Error:** {str(e)}"

# 6. BUILD UI
iface = gr.Interface(
    fn=process_and_chat,
    inputs=[
        gr.File(label="Upload Arabic PDF"),
        gr.Textbox(label="Ask a question in Arabic", placeholder="ما هي النقاط الرئيسية في هذا المستند؟")
    ],
    outputs=gr.Markdown(),
    title="Arabic RAG (Command R)",
    description="Upload a PDF and ask questions. Powered by Cohere Command R and Multilingual-E5-Small embeddings."
)

iface.launch(share=True)