import os
import zipfile
import tempfile
import fitz  # PyMuPDF
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from langchain_community.llms import HuggingFaceHub

# Set Hugging Face token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HF_Token")

# Initialize the language model (Mistral)
llm = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-Instruct-v0.1",
    model_kwargs={"temperature": 0.5, "max_new_tokens": 512}
)

# Extract text from PDF

def extract_text_from_pdf(file_bytes):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
        tmp_file.write(file_bytes)
        doc = fitz.open(tmp_file.name)
        text = ""
        for page in doc:
            text += page.get_text()
        return text

# Create structured extraction prompts

def create_resume_prompt(text):
    return (
        "Extract structured information from the resume:\n"
        "1. Full Name\n2. Education\n3. Total Experience (years)\n4. Key Skills\n5. Projects (if any)\n"
        f"\nResume Text:\n{text}"
    )

def create_jd_prompt(text):
    return (
        "Extract structured information from the job description:\n"
        "1. Job ID\n2. Company Name\n3. Role\n4. Experience Required\n5. Skills Required\n6. Education Required\n7. Location\n"
        f"\nJD Text:\n{text}"
    )

def matching_prompt(jd, resumes, top_n=3):
    return (
        f"You are a resume screening expert.\n"
        f"Job Description:\n{jd}\n"
        f"Resumes:\n{resumes}\n"
        f"Rank the top {top_n} matching candidates with reasons. Format:\n"
        "1. Candidate Name - Reason\n2. Candidate Name - Reason\n..."
    )

# Streamlit UI
st.set_page_config(page_title="Resume Matcher", layout="wide")
st.title("🤖 Smart Resume Matcher with JD Insights")

zip_file = st.file_uploader("Upload ZIP of Resumes (PDF only)", type=["zip"])
jd_file = st.file_uploader("Upload Job Description (PDF or TXT)", type=["pdf", "txt"])
jd_text_input = st.text_area("Or paste JD directly below")
top_n = st.slider("Select number of top matches", 1, 10, 3)

if st.button("🔍 Match Resumes"):
    if not zip_file or not (jd_file or jd_text_input.strip()):
        st.warning("Please upload both resumes and a JD.")
        st.stop()

    jd_text = ""
    if jd_file:
        jd_text = extract_text_from_pdf(jd_file.read()) if jd_file.name.endswith(".pdf") else jd_file.read().decode("utf-8")
    elif jd_text_input:
        jd_text = jd_text_input

    jd_structured = llm.invoke(create_jd_prompt(jd_text))
    st.subheader("📋 Extracted JD Details")
    st.markdown(jd_structured)

    resumes_info = ""
    resume_skills_list = []
    resume_names = []

    with tempfile.TemporaryDirectory() as tmpdir:
        with zipfile.ZipFile(zip_file, 'r') as z:
            pdf_files = [f for f in z.namelist() if f.endswith(".pdf")]

            for file in pdf_files:
                with z.open(file) as resume_pdf:
                    text = extract_text_from_pdf(resume_pdf.read())
                    result = llm.invoke(create_resume_prompt(text))
                    resumes_info += f"\n\nResume File: {file}\n{result}"

                    # Skill parsing for heatmap
                    skills_line = next((line for line in result.split('\n') if "Key Skills" in line), "")
                    skills = [skill.strip().lower() for skill in skills_line.replace("Key Skills:", "").split(',') if skill.strip()]
                    resume_skills_list.append(skills)
                    resume_names.append(file.replace(".pdf", ""))

    # Matching logic
    match_response = llm.invoke(matching_prompt(jd_text, resumes_info, top_n))
    st.subheader("🏆 Top Matches")
    st.markdown(match_response)

    # Skill heatmap visualization
    st.subheader("🔬 Skill Match Heatmap")
    jd_keywords = [kw.strip().lower() for kw in jd_structured.split() if len(kw) > 2 and kw.isalpha()]
    jd_keywords = list(set(jd_keywords))

    heatmap_data = pd.DataFrame(0, index=resume_names, columns=jd_keywords)

    for i, skills in enumerate(resume_skills_list):
        for kw in jd_keywords:
            if kw in skills:
                heatmap_data.loc[resume_names[i], kw] = 1

    if not heatmap_data.empty:
        fig, ax = plt.subplots(figsize=(12, len(resume_names)*0.5 + 2))
        sns.heatmap(heatmap_data, annot=True, cmap="YlGnBu", cbar=False, ax=ax)
        st.pyplot(fig)
    else:
        st.info("No matching skills found for heatmap.")