# app.py
"""
Quantum Scrutiny Platform | Groq-Powered
Single-file Streamlit app (refactored & debugged)
"""

# --- 0. Always set page config as the first Streamlit command ---
import os
from dotenv import load_dotenv

load_dotenv()  # load local .env if present (during local dev)

import io
import base64
import traceback
from typing import Optional, List

import streamlit as st
import pandas as pd

# resume parsing
import fitz                  # PyMuPDF
from docx import Document    # python-docx

# Groq client (keep same import name as you used)
from groq import Groq

# Pydantic for schema validation
from pydantic import BaseModel, Field, ValidationError

# --- Streamlit UI config ---
st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")

# --- Config / Secrets ---
GROQ_API_KEY = os.getenv("GROQ_API_KEY")  # set in environment or .env or deploy secrets
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin")  # optional override via env

# --- Initialize Groq client with safe error messaging ---
groq_client = None
if not GROQ_API_KEY:
    st.error("GROQ_API_KEY not found. Please set GROQ_API_KEY as an environment variable or in Hugging Face secrets.")
    # We won't stop here to allow UI to display, but analysis will error if used.
else:
    try:
        groq_client = Groq(api_key=GROQ_API_KEY)
    except Exception as e:
        st.error(f"Failed to initialize Groq client: {e}")
        groq_client = None


# --- Session state defaults ---
if 'is_admin_logged_in' not in st.session_state:
    st.session_state.is_admin_logged_in = False
if 'analyzed_data' not in st.session_state:
    initial_cols = [
        'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
        'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
        'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
        'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
    ]
    st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
if 'individual_analysis' not in st.session_state:
    st.session_state.individual_analysis = []
if 'run_analysis' not in st.session_state:
    st.session_state.run_analysis = False


# --- Pydantic schema for Groq output ---
class ResumeAnalysis(BaseModel):
    name: str = Field(description="Full name of the candidate.")
    email: str = Field(description="Professional email address.")
    phone: str = Field(description="Primary phone number.")
    certifications: List[str] = Field(default_factory=list, description="List of professional certifications.")
    experience_summary: str = Field(default="", description="Concise summary of experience.")
    education_summary: str = Field(default="", description="Concise summary of education.")
    communication_skills: str = Field(description="Communication score as a STRING ('8') or description.")
    technical_skills: List[str] = Field(default_factory=list, description="List of skills/technologies.")
    aba_therapy_skills: Optional[str] = Field(default="N/A", description="ABA Therapy score as STRING or 'N/A'.")
    rbt_bcba_certification: Optional[str] = Field(default="N/A", description="'Yes'/'No'/'N/A'.")
    autism_care_experience_score: Optional[str] = Field(default="N/A", description="Autism care experience score as STRING or 'N/A'.")


# --- Helper: File text extraction ---
def extract_text_from_file(uploaded_file) -> str:
    """
    Accepts a Streamlit UploadedFile object and returns extracted text.
    Supports PDF and DOCX. Returns empty string on failure.
    """
    try:
        content = uploaded_file.read()
        # Reset pointer if needed (Streamlit UploadedFile likely returns bytes; after read it's consumed)
        # We already consumed it into `content` so use BytesIO for downstream if needed.

        # detect PDF by mime or header bytes
        name_lower = uploaded_file.name.lower()
        if name_lower.endswith(".pdf") or content[:5] == b"%PDF-":
            # use fitz (PyMuPDF)
            try:
                with fitz.open(stream=content, filetype="pdf") as doc:
                    pages_text = []
                    for p in doc:
                        pages_text.append(p.get_text())
                return "\n".join(pages_text).strip()
            except Exception as e:
                # fallback: try PyMuPDF alternative reading
                st.warning(f"PDF extraction issue for {uploaded_file.name}: {e}")
                return ""
        elif name_lower.endswith(".docx"):
            # python-docx can accept a file-like object
            try:
                doc = Document(io.BytesIO(content))
                paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
                return "\n".join(paragraphs).strip()
            except Exception as e:
                st.warning(f"DOCX extraction issue for {uploaded_file.name}: {e}")
                return ""
        else:
            # Try simple decode for text-like files
            try:
                return content.decode('utf-8', errors='ignore')
            except Exception:
                return ""
    except Exception as e:
        st.error(f"Unexpected file extraction error: {e}")
        return ""


# --- Helper: call Groq (safe wrapper) ---
def call_groq_chat_system(resume_text: str, job_role: str) -> Optional[str]:
    """
    Calls Groq chat completion. Returns model text content or None on error.
    Note: groq_client must be initialized.
    """
    if not groq_client:
        st.error("Groq client is not initialized. Set GROQ_API_KEY in environment or secrets.")
        return None

    # role-specific instructions
    therapist_instructions = ""
    if job_role == "Therapist":
        therapist_instructions = (
            "Because the job role is 'Therapist', you MUST carefully look for ABA Therapy Skills, "
            "RBT/BCBA Certification, and Autism-Care Experience. Provide a score from 1-10 as a STRING "
            "(e.g., '7') for the specialized fields. If any specialized field is not present, return 'N/A'."
        )
    else:
        therapist_instructions = (
            "This is NOT a Therapist role. Set 'aba_therapy_skills', 'autism_care_experience_score', "
            "and 'rbt_bcba_certification' to 'N/A' if not applicable."
        )

    system_prompt = (
        "You are a professional Resume Analyzer. Extract the requested fields and return a strict JSON object "
        "matching the schema: name, email, phone, certifications (array), experience_summary, education_summary, "
        "communication_skills (AS A STRING, e.g., '8'), technical_skills (array), aba_therapy_skills, "
        "rbt_bcba_certification, autism_care_experience_score. " + therapist_instructions
    )

    user_prompt = f"Analyze the following resume text and return a JSON object:\n\n---\n{resume_text}\n---\nReturn only valid JSON."

    try:
        result = groq_client.chat.completions.create(
            model="mixtral-8x7b-32768",  # keep your original model choice; adapt if needed
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.0,
            max_tokens=2000,
            # Not all Groq SDK versions support response_model in the same way; we parse manually below.
        )
        # Depending on SDK, result structure varies; common: result.choices[0].message.content
        model_text = None
        try:
            model_text = result.choices[0].message.content
        except Exception:
            # try alternate structure
            try:
                model_text = result["choices"][0]["message"]["content"]
            except Exception:
                model_text = str(result)

        return model_text
    except Exception as e:
        st.error(f"Groq API call failed: {e}")
        st.exception(e)
        return None


# --- Cached wrapper for analysis (cache by resume_text + role) ---
@st.cache_data(show_spinner=False)
def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
    """
    Calls Groq (or fallback) and returns a validated ResumeAnalysis Pydantic object.
    This function is cached to avoid repeated calls for identical text+role.
    """
    raw_response = call_groq_chat_system(resume_text, job_role)
    if not raw_response:
        # return safe failure object
        return ResumeAnalysis(
            name="Extraction Failed",
            email="",
            phone="",
            certifications=[],
            experience_summary="",
            education_summary="",
            communication_skills="N/A",
            technical_skills=[],
            aba_therapy_skills="N/A",
            rbt_bcba_certification="N/A",
            autism_care_experience_score="N/A"
        )

    # Attempt to parse JSON from the model text. The model might include commentary;
    # so we try to extract the first JSON object in the text.
    import json
    import re

    json_text = None
    try:
        # Find the first {...} JSON object in the string (greedy to closing brace)
        match = re.search(r"(\{.*\})", raw_response, re.DOTALL)
        if match:
            json_text = match.group(1)
        else:
            # if no braces found, maybe the model returned just JSON-like lines
            json_text = raw_response
        parsed = json.loads(json_text)
    except Exception as e:
        # Try to be forgiving: if the model returned Python dict-like, attempt eval safely
        try:
            parsed = eval(json_text, {"__builtins__": None}, {})  # limited eval fallback
            if not isinstance(parsed, dict):
                raise ValueError("Parsed non-dict from model response fallback.")
        except Exception as ex:
            # Failed to parse model output -> return failure object and log both
            st.warning("Failed to parse Groq output as JSON. Returning fallback extraction.")
            st.text_area("Raw model output (for debugging)", raw_response, height=200)
            return ResumeAnalysis(
                name="Extraction Failed",
                email="",
                phone="",
                certifications=[],
                experience_summary="",
                education_summary="",
                communication_skills="N/A",
                technical_skills=[],
                aba_therapy_skills="N/A",
                rbt_bcba_certification="N/A",
                autism_care_experience_score="N/A"
            )

    # Validate & coerce to Pydantic model (safe defaults applied)
    try:
        # Ensure lists exist
        parsed.setdefault("certifications", [])
        parsed.setdefault("technical_skills", [])
        # Ensure communication_skills is string
        if "communication_skills" in parsed and parsed["communication_skills"] is not None:
            parsed["communication_skills"] = str(parsed["communication_skills"])
        else:
            parsed["communication_skills"] = "N/A"

        # Safety: set therapist-specific fields default to "N/A" if missing
        for k in ["aba_therapy_skills", "rbt_bcba_certification", "autism_care_experience_score"]:
            if k not in parsed or parsed[k] is None:
                parsed[k] = "N/A"
            else:
                parsed[k] = str(parsed[k])

        analysis = ResumeAnalysis.parse_obj(parsed)
        # Final coercions to guarantee string types for some fields
        analysis.communication_skills = str(analysis.communication_skills or "N/A")
        analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or "N/A")
        analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or "N/A")
        analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or "N/A")

        return analysis
    except ValidationError as ve:
        st.error("Model output failed schema validation. Returning fallback object.")
        st.text_area("Model raw response (for debugging)", raw_response, height=200)
        st.exception(ve)
        return ResumeAnalysis(
            name="Extraction Failed",
            email="",
            phone="",
            certifications=[],
            experience_summary="",
            education_summary="",
            communication_skills="N/A",
            technical_skills=[],
            aba_therapy_skills="N/A",
            rbt_bcba_certification="N/A",
            autism_care_experience_score="N/A"
        )
    except Exception as e:
        st.error("Unexpected error while validating model output.")
        st.exception(e)
        return ResumeAnalysis(
            name="Extraction Failed",
            email="",
            phone="",
            certifications=[],
            experience_summary="",
            education_summary="",
            communication_skills="N/A",
            technical_skills=[],
            aba_therapy_skills="N/A",
            rbt_bcba_certification="N/A",
            autism_care_experience_score="N/A"
        )


# --- Scoring function ---
def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float:
    total_score = 0.0

    # Experience summary length -> up to 40 points
    exp_len = len(analysis.experience_summary or "")
    exp_factor = min(exp_len / 100.0, 1.0)  # 100 chars or more -> full points
    total_score += exp_factor * 40.0

    # Skills count -> up to 30 points
    skills_count = len(analysis.technical_skills or [])
    skills_factor = min(skills_count / 10.0, 1.0)
    total_score += skills_factor * 30.0

    # Communication -> up to 20 points (expects 1-10 in string)
    try:
        comm_raw = str(analysis.communication_skills).strip()
        # allow '8/10' or '8 - good' forms: extract leading number
        import re
        m = re.search(r"(\d+(\.\d+)?)", comm_raw)
        comm_val = float(m.group(1)) if m else float(comm_raw)
        comm_val = max(0.0, min(10.0, comm_val))
    except Exception:
        comm_val = 5.0
    total_score += (comm_val / 10.0) * 20.0

    # Certifications -> up to 10 points (1 point each up to 10)
    cert_points = min(len(analysis.certifications or []), 10) * 1.0
    total_score += cert_points

    # Therapist-specific bonus up to 10 points
    if role == "Therapist":
        try:
            def safe_score(x):
                try:
                    m = re.search(r"(\d+(\.\d+)?)", str(x))
                    return float(m.group(1)) if m else 0.0
                except Exception:
                    return 0.0

            aba = safe_score(analysis.aba_therapy_skills)
            autism = safe_score(analysis.autism_care_experience_score)
            spec_bonus = ((aba + autism) / 20.0) * 10.0  # average scaled to 10
            total_score += spec_bonus
        except Exception:
            pass

    final = round(min(total_score, 100))
    return float(final)


# --- Append to session DataFrame helper ---
def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
    data = analysis.dict()
    technical_skills_list = ", ".join(data.get('technical_skills') or [])
    certifications_list = ", ".join(data.get('certifications') or [])

    df_data = {
        'Name': data.get('name') or "",
        'Job Role': job_role,
        'Resume Score (100)': score,
        'Email': data.get('email') or "",
        'Phone': data.get('phone') or "",
        'Shortlisted': 'No',
        'Experience Summary': data.get('experience_summary') or "",
        'Education Summary': data.get('education_summary') or "",
        'Communication Rating (1-10)': str(data.get('communication_skills') or "N/A"),
        'Skills/Technologies': technical_skills_list,
        'Certifications': certifications_list,
        'ABA Skills (1-10)': str(data.get('aba_therapy_skills') or "N/A"),
        'RBT/BCBA Cert': str(data.get('rbt_bcba_certification') or "N/A"),
        'Autism-Care Exp (1-10)': str(data.get('autism_care_experience_score') or "N/A"),
    }

    new_df = pd.DataFrame([df_data])
    st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)


# --- Utility: Excel download as BytesIO for st.download_button ---
def df_to_excel_bytes(df: pd.DataFrame) -> bytes:
    output = io.BytesIO()
    with pd.ExcelWriter(output, engine='openpyxl') as writer:
        df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")
    return output.getvalue()


# --- App Layout ---
st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")

tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"])

# -------------------------
# User Panel
# -------------------------
with tab_user:
    st.header("Upload Resumes for Analysis")
    st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score key fields.")

    job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
    selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")

    uploaded_files = st.file_uploader(
        "2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True
    )

    # Analyze button sets a session_state flag and reruns
    if st.button("🚀 Analyze All Uploaded Resumes"):
        if not uploaded_files:
            st.warning("Please upload one or more resume files to begin analysis.")
        else:
            st.session_state.run_analysis = True
            st.rerun()

    # If run_analysis flag is set, process uploads
    if st.session_state.get("run_analysis", False):
        if not uploaded_files:
            st.warning("No files found. Upload files and try again.")
            st.session_state.run_analysis = False
        else:
            total = len(uploaded_files)
            progress = st.progress(0)
            st.session_state.individual_analysis = []
            idx = 0
            with st.spinner("Processing resumes..."):
                for f in uploaded_files:
                    idx += 1
                    try:
                        st.write(f"Analyzing **{f.name}**...")
                        resume_text = extract_text_from_file(f)
                        if not resume_text:
                            st.error(f"Could not extract text from {f.name}. Skipping.")
                            progress.progress(idx / total)
                            continue

                        # Call cached analyze function
                        analysis = analyze_resume_with_groq_cached(resume_text, selected_role)

                        if analysis.name == "Extraction Failed":
                            st.error(f"Extraction failed for {f.name}. See debug output.")
                            progress.progress(idx / total)
                            continue

                        score = calculate_resume_score(analysis, selected_role)
                        append_analysis_to_dataframe(selected_role, analysis, score)

                        st.session_state.individual_analysis.append({
                            'name': analysis.name,
                            'score': score,
                            'role': selected_role,
                            'file_name': f.name
                        })
                    except Exception as e:
                        st.error(f"Error analyzing {f.name}: {e}")
                        st.exception(traceback.format_exc())
                    finally:
                        progress.progress(idx / total)

            st.success(f"✅ Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")
            st.session_state.run_analysis = False  # reset flag

    # Show last analysis summary
    if st.session_state.individual_analysis:
        st.subheader("Last Analysis Summary")
        for item in st.session_state.individual_analysis:
            st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
        st.markdown("---")
        st.caption("All analyzed data is stored in the Admin Dashboard.")

# -------------------------
# Admin Panel (Password Protected)
# -------------------------
with tab_admin:
    if not st.session_state.is_admin_logged_in:
        st.header("Admin Login")
        password = st.text_input("Enter Admin Password", type="password")
        if st.button("🔑 Login"):
            if password == ADMIN_PASSWORD:
                st.session_state.is_admin_logged_in = True
                st.rerun()
            else:
                st.error("Incorrect password.")
        # stop further admin rendering while not logged in
        st.stop()

    st.header("🎯 Recruitment Dashboard")
    if st.button("🚪 Logout"):
        st.session_state.is_admin_logged_in = False
        st.rerun()

    if st.session_state.analyzed_data.empty:
        st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
    else:
        df = st.session_state.analyzed_data.copy()
        st.subheader("Candidate Data Table")
        st.success(f"**Total Candidates Analyzed: {len(df)}**")

        display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']

        # data_editor with SelectboxColumn for 'Shortlisted'
        edited_df = st.data_editor(
            df[display_cols],
            column_config={
                "Shortlisted": st.column_config.SelectboxColumn(
                    "Shortlisted",
                    help="Mark the candidate as Shortlisted or Rejected.",
                    options=["No", "Yes"],
                    required=True
                )
            },
            key="dashboard_editor",
            hide_index=True
        )

        # propagate the 'Shortlisted' edits back to session dataframe
        try:
            st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values
        except Exception:
            # fallback for indexing mismatches
            for i, val in enumerate(edited_df['Shortlisted'].tolist()):
                if i < len(st.session_state.analyzed_data):
                    st.session_state.analyzed_data.at[i, 'Shortlisted'] = val

        st.markdown("---")
        st.subheader("📥 Download Data")
        df_export = st.session_state.analyzed_data.copy()
        excel_bytes = df_to_excel_bytes(df_export)

        st.download_button(
            label="💾 Download All Data as Excel (.xlsx)",
            data=excel_bytes,
            file_name="quantum_scrutiny_report.xlsx",
            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
            help="Downloads the full table including all extracted fields and shortlist status."
        )

# --- End of app.py ---