import os
import zipfile
import tempfile
import fitz  # PyMuPDF
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import re
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace

# Set HuggingFace API keys
hf_token = os.getenv("hf1")
if not hf_token:
    st.error("HuggingFace token not found. Please set HF_Token as an environment variable.")
    st.stop()

os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("hf1")
os.environ["hf1"] = os.getenv("hf1")

# Load LLM
llm_base = HuggingFaceEndpoint(
    repo_id="meta-llama/Llama-3.1-8B-Instruct",
    provider="novita",
    temperature=0.7,
    max_new_tokens=150,
    task="conversational"
)
llm = ChatHuggingFace(
    llm=llm_base,
    repo_id="meta-llama/Llama-3.2-3B-Instruct",
    provider="novita",
    temperature=0.7,
    max_new_tokens=150,
    task="conversational"
)

# Text extraction from PDF
def extract_text_from_pdf(file_bytes):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
        tmp_file.write(file_bytes)
        doc = fitz.open(tmp_file.name)
        text = "".join([page.get_text() for page in doc])
        return text

# Prompt creators
def create_resume_prompt(text):
    return f"""
Extract structured information from the resume:
1. Full Name
2. Education
3. Total Experience (in years)
4. Key Skills
5. Projects (Names and Outcomes if any)

Resume Text:
{text}
"""

def create_jd_prompt(text):
    return f"""
Extract structured job description info:
1. Job ID
2. Company Name
3. Role
4. Experience Required
5. Skills Required
6. Education Required
7. Location

Job Description:
{text}
"""

def matching_prompt(jd_text, resumes_info, top_n=3):
    return f"""
You are a resume screening expert. Based on the JD and resume summaries below, return the top {top_n} matching candidates.

Criteria:
- Skill alignment
- Relevant experience
- Education
- Domain-specific keywords

Job Description:
{jd_text}

Resumes:
{resumes_info}

Format:
1. Candidate Name: Reason
2. Candidate Name: Reason
... up to {top_n}
"""

# UI setup
st.set_page_config(page_title="Resume Matcher + JD Extractor", layout="centered")
st.title("🤖 Resume Matcher & JD Extractor")
st.markdown("Upload resumes in a ZIP file and a Job Description. The app will extract, match, and visualize candidate alignment.")

# Upload section
zip_file = st.file_uploader("📁 Upload ZIP of Resumes (PDF)", type=["zip"])
jd_file = st.file_uploader("📄 Upload Job Description (PDF/TXT)", type=["pdf", "txt"])
jd_text_input = st.text_area("✍️ Or Paste Job Description Text")
top_n = st.slider("How many top candidates to return?", min_value=1, max_value=10, value=3)

if st.button("🔍 Match Candidates"):
    if not zip_file or not (jd_file or jd_text_input.strip()):
        st.warning("Please upload both a ZIP of resumes and a JD.")
        st.stop()

    # Extract JD
    if jd_file:
        jd_text = extract_text_from_pdf(jd_file.read()) if jd_file.name.endswith(".pdf") else jd_file.read().decode("utf-8")
    else:
        jd_text = jd_text_input.strip()

    jd_structured = llm.invoke(create_jd_prompt(jd_text)).content
    st.subheader("📌 Extracted JD Information")
    st.markdown(jd_structured)

    # Extract resumes
    resumes_info = ""
    resume_texts = {}

    with tempfile.TemporaryDirectory() as tmpdir:
        with zipfile.ZipFile(zip_file, "r") as z:
            pdf_files = [f for f in z.namelist() if f.endswith(".pdf")]
            if not pdf_files:
                st.error("No PDF resumes found.")
                st.stop()

            st.success(f"Found {len(pdf_files)} resumes. Extracting...")

            for file in pdf_files:
                with z.open(file) as resume_pdf:
                    pdf_data = resume_pdf.read()
                    text = extract_text_from_pdf(pdf_data)
                    summary = llm.invoke(create_resume_prompt(text)).content
                    resumes_info += f"\n\nResume File: {file}\n{summary}"
                    resume_texts[file] = summary

    # Match candidates
    st.info("🔗 Matching resumes to JD...")
    match_result = llm.invoke(matching_prompt(jd_text, resumes_info, top_n)).content
    st.subheader("✅ Top Matched Candidates")
    st.markdown(match_result)

    # Visualize Match Heatmap
    def extract_required_skills_and_experience(jd_structured_text):
        skills_match = re.search(r"Skills Required[:\-\u2013]?\s*(.*)", jd_structured_text, re.IGNORECASE)
        exp_match = re.search(r"Experience Required[:\-\u2013]?\s*(.*)", jd_structured_text, re.IGNORECASE)

        skills = []
        if skills_match:
            skills_line = skills_match.group(1)
            skills = [s.strip().lower() for s in re.split(r"[,;/\n]", skills_line) if s.strip()]

        min_exp = 0
        if exp_match:
            match_years = re.search(r"(\d+)\+?", exp_match.group(1))
            if match_years:
                min_exp = int(match_years.group(1))

        return skills, min_exp

    required_skills, required_exp = extract_required_skills_and_experience(jd_structured)

    match_matrix = []
    for file_name, summary in resume_texts.items():
        skill_score = 0
        exp_score = 0

        exp_match = re.search(r"Total Experience[:\-\u2013]?\s*(\d+)", summary, re.IGNORECASE)
        candidate_exp = int(exp_match.group(1)) if exp_match else 0
        if candidate_exp >= required_exp:
            exp_score = 1

        skill_matches = sum(skill.lower() in summary.lower() for skill in required_skills)
        skill_score = round(skill_matches / len(required_skills), 2) if required_skills else 0

        match_matrix.append({
            "Resume": file_name,
            "Experience Match": exp_score,
            "Skill Match %": skill_score
        })

    df_match = pd.DataFrame(match_matrix).set_index("Resume")

    st.subheader("📊 Heatmap: Skills & Experience Match")
    fig, ax = plt.subplots(figsize=(8, len(df_match) * 0.5 + 1))
    sns.heatmap(df_match, annot=True, cmap="YlGnBu", linewidths=0.5, cbar=False, ax=ax)
    st.pyplot(fig)