import gradio as gr
import torch
import spaces

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

import PyPDF2
from docx import Document


class ResumeRAG:
    def __init__(self):
        self.has_cuda = torch.cuda.is_available()
        self.device = "cuda" if self.has_cuda else "cpu"
        print(f"Using device: {self.device}")

        # Embeddings (small + fast)
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2",
            model_kwargs={"device": self.device},
        )

        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=50
        )

        self.vector_store = None

        model_name = "mistralai/Mistral-7B-Instruct-v0.2"

        if not self.has_cuda:
            raise RuntimeError(
                "No CUDA GPU detected. Use a GPU Space/ZeroGPU, or switch to a smaller CPU model."
            )

        # 4-bit quantization for GPU efficiency
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
        )

        print("Loading tokenizer...")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

        print("Loading model...")
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            quantization_config=quantization_config,
            device_map="auto",           # important for Spaces
            trust_remote_code=True
        )

        # Ensure pad token exists
        if self.tokenizer.pad_token_id is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

    def extract_text_from_pdf(self, file_path: str) -> str:
        try:
            with open(file_path, "rb") as f:
                reader = PyPDF2.PdfReader(f)
                return "".join([(p.extract_text() or "") for p in reader.pages])
        except Exception as e:
            return f"Error reading PDF: {e}"

    def extract_text_from_docx(self, file_path: str) -> str:
        try:
            doc = Document(file_path)
            return "\n".join([p.text for p in doc.paragraphs])
        except Exception as e:
            return f"Error reading DOCX: {e}"

    def process_resume(self, file) -> str:
        if file is None:
            return "Please upload a resume file."

        file_path = file.name
        if file_path.lower().endswith(".pdf"):
            text = self.extract_text_from_pdf(file_path)
        elif file_path.lower().endswith(".docx"):
            text = self.extract_text_from_docx(file_path)
        else:
            return "Unsupported file format. Please upload PDF or DOCX."

        if text.startswith("Error"):
            return text

        if not text.strip():
            return "No text could be extracted from the resume."

        chunks = self.text_splitter.split_text(text)
        if not chunks:
            return "No text chunks could be created from the resume."

        self.vector_store = FAISS.from_texts(chunks, self.embeddings)
        return f"✅ Resume processed successfully! Extracted {len(chunks)} text chunks."

    def generate_answer(self, question: str, context: str) -> str:
        prompt = f"""[INST] You are a helpful assistant analyzing a resume.

Context:
{context}

Question: {question}

Answer only from the context. If the answer is not in the context, say it is not in the resume. [/INST]"""

        inputs = self.tokenizer(prompt, return_tensors="pt")

        # FIX: move inputs onto the SAME device as the model's embedding weights
        target_device = self.model.get_input_embeddings().weight.device
        inputs = {k: v.to(target_device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=1024,
                temperature=0.7,
                top_p=0.9,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id,
            )

        text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        # If the full prompt is included, return only the last segment
        if "[/INST]" in text:
            return text.split("[/INST]")[-1].strip()
        return text.strip()

    def query(self, question: str):
        if self.vector_store is None:
            return "Please upload a resume first.", ""

        if not question.strip():
            return "Please enter a question.", ""

        docs = self.vector_store.similarity_search(question, k=3)
        context = "\n\n".join([d.page_content for d in docs])

        answer = self.generate_answer(question, context)

        if torch.cuda.is_available():
            torch.cuda.empty_cache()

        return answer, context


print("Initializing Resume RAG System...")
rag_system = ResumeRAG()

with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
    gr.Markdown(
        """
# 📄 Resume RAG Q&A System
Powered by Mistral-7B + FAISS vector search

Upload your resume and ask questions about experience, skills, education, and more.
"""
    )

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 📤 Upload Resume")
            file_input = gr.File(
                label="Upload PDF or DOCX",
                file_types=[".pdf", ".docx"]
            )
            upload_btn = gr.Button("Process Resume", variant="primary", size="lg")
            upload_status = gr.Textbox(label="Status", interactive=False)

            gr.Markdown(
                """
---
**Example Questions:**
- What programming languages does the candidate know?
- Summarize the work experience
- What is the education background?
- List all technical skills
"""
            )

        with gr.Column(scale=2):
            gr.Markdown("### 💬 Ask Questions")
            question_input = gr.Textbox(
                label="Your Question",
                placeholder="e.g., What are the candidate's key skills?",
                lines=2
            )
            submit_btn = gr.Button("Get Answer", variant="primary", size="lg")

            answer_output = gr.Textbox(
                label="Answer",
                lines=8,
                interactive=False
            )

            with gr.Accordion("📚 Retrieved Context", open=False):
                context_output = gr.Textbox(
                    label="Relevant Resume Sections",
                    lines=6,
                    interactive=False
                )

    # GPU-decorated handler for ZeroGPU/Spaces GPU
    @spaces.GPU
    def query_gpu(q):
        return rag_system.query(q)

    upload_btn.click(
        fn=rag_system.process_resume,
        inputs=[file_input],
        outputs=[upload_status]
    )

    submit_btn.click(
        fn=query_gpu,
        inputs=[question_input],
        outputs=[answer_output, context_output]
    )

    question_input.submit(
        fn=query_gpu,
        inputs=[question_input],
        outputs=[answer_output, context_output]
    )

if __name__ == "__main__":
    demo.launch(share=True)