""" Quantum Scrutiny Platform | Groq-Powered Single-file Streamlit app (refactored, Groq streaming-compatible) """ import os import io import re import json import base64 import traceback from typing import Optional, List from dotenv import load_dotenv load_dotenv() import streamlit as st import pandas as pd # File parsing import fitz                   # PyMuPDF from docx import Document     # python-docx # Groq client from groq import Groq # Validation from pydantic import BaseModel, Field, ValidationError # --- Page config --- st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered") # --- Config / Secrets --- GROQ_API_KEY = os.getenv("GROQ_API_KEY") ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin") # Initialize Groq client (no API key -> UI warning but app still loads) groq_client = None if GROQ_API_KEY:     try:         groq_client = Groq(api_key=GROQ_API_KEY)     except Exception as e:         st.error(f"Failed to initialize Groq client: {e}") else:     st.warning("GROQ_API_KEY not found. Set it as an environment variable or in .env for model calls to work.") # --- Session state defaults --- if 'is_admin_logged_in' not in st.session_state:     st.session_state.is_admin_logged_in = False if 'analyzed_data' not in st.session_state:     initial_cols = [         'Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Phone',         # NEW SCORE COLUMNS         'Experience Score (40)', 'Skills Score (30)', 'Communication Score (20)', 'Certifications Score (10)',         'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',         'Skills/Technologies', 'Certifications',         # THERAPIST FIELDS         'ABA Skills (1-10)', 'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'     ]     st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols) if 'individual_analysis' not in st.session_state:     st.session_state.individual_analysis = [] if 'run_analysis' not in st.session_state:     st.session_state.run_analysis = False # --- Pydantic schema (No change needed here, as the new scores are derived) --- class ResumeAnalysis(BaseModel):     name: str = Field(default="Unknown")     email: str = Field(default="")     phone: str = Field(default="")     certifications: List[str] = Field(default_factory=list)     experience_summary: str = Field(default="")     education_summary: str = Field(default="")     communication_skills: str = Field(default="N/A")     technical_skills: List[str] = Field(default_factory=list)     aba_therapy_skills: Optional[str] = Field(default="N/A")     rbt_bcba_certification: Optional[str] = Field(default="N/A")     autism_care_experience_score: Optional[str] = Field(default="N/A") # --- Helpers: file text extraction (No change) --- def extract_text_from_file(uploaded_file) -> str:     """Extract text from PDF or DOCX. Returns empty string on failure."""     try:         content = uploaded_file.read()         filename = uploaded_file.name.lower()         if filename.endswith(".pdf") or content[:5] == b"%PDF-":             try:                 with fitz.open(stream=content, filetype="pdf") as doc:                     text = ""                     for p in doc:                         text += p.get_text()                 return text.strip()             except Exception:                 return ""         elif filename.endswith(".docx"):             try:                 doc = Document(io.BytesIO(content))                 paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]                 return "\n".join(paragraphs).strip()             except Exception:                 return ""         else:             # fallback: decode bytes as text             try:                 return content.decode('utf-8', errors='ignore')             except Exception:                 return ""     except Exception:         return "" # --- Groq call with streaming (collects chunks) (No change) --- def call_groq_stream_collect(prompt: str, model_name: str = "llama-3.3-70b-versatile", temperature: float = 0.2, max_completion_tokens: int = 2048, top_p: float = 1.0) -> Optional[str]:     """     Calls Groq with streaming enabled and collects the textual output.     Returns the full model text, or None on failure.     """     if not groq_client:         st.error("Groq client not initialized. Set GROQ_API_KEY in environment/secrets.")         return None     try:         completion = groq_client.chat.completions.create(             model=model_name,             messages=[                 {"role": "system", "content": "You are a professional Resume Analyzer. Return JSON only when asked."},                 {"role": "user", "content": prompt}             ],             temperature=temperature,             max_completion_tokens=max_completion_tokens,             top_p=top_p,             stream=True         )         # completion is an iterator/streamable object; collect chunks         collected = ""         # some SDKs yield dict-like chunks, some objects; handle both         for chunk in completion:             try:                 # Common pattern: chunk.choices[0].delta.content                 delta = getattr(chunk.choices[0].delta, "content", None) if hasattr(chunk, "choices") else None                 if delta is None:                     # fallback for dict-like object                     if isinstance(chunk, dict):                         delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content")                 if delta:                     collected += delta                 else:                     # Some SDKs return final message in chunk.choices[0].message.content                     try:                         msg = getattr(chunk.choices[0].message, "content", None)                         if msg:                             collected += msg                     except Exception:                         pass             except Exception:                 # last-resort: append str(chunk)                 try:                     collected += str(chunk)                 except Exception:                     pass         return collected.strip()     except Exception as e:         st.error(f"Groq API call failed: {e}")         return None # --- Parsing model output safely to JSON (No change) --- def extract_first_json(text: str) -> Optional[dict]:     """     Find the first JSON object in text and parse it; return dict or None.     """     if not text:         return None     # find first balanced braces block     # quick heuristic regex for {...}     try:         match = re.search(r"(\{(?:[^{}]|(?R))*\})", text, re.DOTALL)     except re.error:         # Python's re doesn't support (?R); fallback to simpler greedy         match = re.search(r"(\{.*\})", text, re.DOTALL)     if match:         json_text = match.group(1)     else:         # maybe the model returned only JSON-like lines -> try to parse full text         json_text = text     try:         parsed = json.loads(json_text)         return parsed     except Exception:         # try to clean common issues: single quotes -> double quotes         try:             json_text_fixed = json_text.replace("'", '"')             parsed = json.loads(json_text_fixed)             return parsed         except Exception:             return None # --- Analyze with Groq (cached by resume text + role) (No change) --- @st.cache_data(show_spinner=False) def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:     """     Calls Groq (streaming) and returns a ResumeAnalysis instance.     Uses caching to avoid duplicate calls for same resume_text+role.     """     # Build prompt instructing JSON structure     therapist_instructions = ""     if job_role.lower() == "therapist":         therapist_instructions = (             "Because the role is 'Therapist', carefully search for ABA Therapy Skills, "             "RBT/BCBA Certification, and Autism-Care Experience. Provide scores 1-10 as STRINGS, or 'N/A'."         )     else:         therapist_instructions = "If therapist-specific fields are not relevant, set them to 'N/A'."     system_user_prompt = (         "Return a single JSON object with the following keys exactly: "         "name (string), email (string), phone (string), certifications (array of strings), "         "experience_summary (string), education_summary (string), communication_skills (STRING, e.g., '8'), "         "technical_skills (array of strings), aba_therapy_skills (STRING or 'N/A'), "         "rbt_bcba_certification (STRING 'Yes'/'No'/'N/A'), autism_care_experience_score (STRING or 'N/A'). "         f"{therapist_instructions}\n\nResume Text:\n\n{resume_text}\n\nReturn only the JSON object."     )     raw = call_groq_stream_collect(system_user_prompt, model_name="llama-3.3-70b-versatile", temperature=0.0, max_completion_tokens=2048)     if not raw:         # fallback empty object         return ResumeAnalysis(             name="Extraction Failed",             email="",             phone="",             certifications=[],             experience_summary="",             education_summary="",             communication_skills="N/A",             technical_skills=[],             aba_therapy_skills="N/A",             rbt_bcba_certification="N/A",             autism_care_experience_score="N/A"         )     parsed = extract_first_json(raw)     if not parsed:         # show raw output for debugging when developer runs app locally (admin panel will show too)         st.warning("Failed to parse model JSON output. See raw output below for debugging.")         st.text_area("Raw model output (debug)", raw, height=200)         return ResumeAnalysis(             name="Extraction Failed",             email="",             phone="",             certifications=[],             experience_summary="",             education_summary="",             communication_skills="N/A",             technical_skills=[],             aba_therapy_skills="N/A",             rbt_bcba_certification="N/A",             autism_care_experience_score="N/A"         )     # Ensure keys exist and coerce types     parsed.setdefault("name", "Unknown")     parsed.setdefault("email", "")     parsed.setdefault("phone", "")     parsed.setdefault("certifications", [])     parsed.setdefault("experience_summary", "")     parsed.setdefault("education_summary", "")     parsed.setdefault("communication_skills", "N/A")     parsed.setdefault("technical_skills", [])     parsed.setdefault("aba_therapy_skills", "N/A")     parsed.setdefault("rbt_bcba_certification", "N/A")     parsed.setdefault("autism_care_experience_score", "N/A")     # Ensure string coercions for some fields     try:         parsed["communication_skills"] = str(parsed.get("communication_skills") or "N/A")         parsed["aba_therapy_skills"] = str(parsed.get("aba_therapy_skills") or "N/A")         parsed["rbt_bcba_certification"] = str(parsed.get("rbt_bcba_certification") or "N/A")         parsed["autism_care_experience_score"] = str(parsed.get("autism_care_experience_score") or "N/A")     except Exception:         pass     # Validate via Pydantic     try:         analysis = ResumeAnalysis.parse_obj(parsed)         return analysis     except ValidationError as ve:         st.error("Model output failed schema validation.")         st.text_area("Raw model output (debug)", raw, height=200)         st.exception(ve)         return ResumeAnalysis(             name="Extraction Failed",             email="",             phone="",             certifications=[],             experience_summary="",             education_summary="",             communication_skills="N/A",             technical_skills=[],             aba_therapy_skills="N/A",             rbt_bcba_certification="N/A",             autism_care_experience_score="N/A"         ) # --- Scoring logic (MODIFIED) --- def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> tuple[float, float, float, float, float]:     """     Calculates the overall score and the individual component scores.     Returns (final_score, exp_score, skills_score, comm_score, certs_score)     """     total_score = 0.0     # 1. Experience score: up to 40 points     exp_len = len(analysis.experience_summary or "")     # Cap factor at 1.0 (e.g., 100+ chars = 1.0)     exp_factor = min(exp_len / 100.0, 1.0)     exp_score = round(exp_factor * 40.0)     total_score += exp_score     # 2. Skills score: up to 30 points     skills_count = len(analysis.technical_skills or [])     # Cap factor at 1.0 (e.g., 10+ skills = 1.0)     skills_factor = min(skills_count / 10.0, 1.0)     skills_score = round(skills_factor * 30.0)     total_score += skills_score     # 3. Communication score: up to 20 points (expects 0-10 in string)     try:         m = re.search(r"(\d+(\.\d+)?)", str(analysis.communication_skills))         # Use regex match if available, otherwise try direct float conversion         comm_val = float(m.group(1)) if m else float(str(analysis.communication_skills))         comm_val = max(0.0, min(10.0, comm_val)) # Clamp to 0-10     except Exception:         comm_val = 5.0 # Default if model extraction failed     comm_score = round((comm_val / 10.0) * 20.0)     total_score += comm_score     # 4. Certifications score: up to 10 points     # Max 10 points for 10 or more certifications     certs_score = min(len(analysis.certifications or []), 10) * 1.0     total_score += certs_score     # 5. Therapist bonus: up to 10 points (added to overall score if applicable)     spec_bonus = 0.0     if role.lower() == "therapist":         def safe_score(x):             try:                 m = re.search(r"(\d+(\.\d+)?)", str(x))                 return float(m.group(1)) if m else 0.0             except Exception:                 return 0.0         aba = safe_score(analysis.aba_therapy_skills)         autism = safe_score(analysis.autism_care_experience_score)         # Average of the two specialized scores, scaled to a max of 10 points         spec_bonus = ((aba + autism) / 20.0) * 10.0         total_score += spec_bonus     final_score = round(min(total_score, 100))         return (float(final_score), float(exp_score), float(skills_score), float(comm_score), float(certs_score)) # --- Append to DataFrame (MODIFIED) --- def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, scores: tuple[float, float, float, float, float]):     final_score, exp_score, skills_score, comm_score, certs_score = scores         data = analysis.dict()     tech = ", ".join(data.get("technical_skills") or [])     certs = ", ".join(data.get("certifications") or [])         row = {         'Name': data.get("name") or "",         'Job Role': job_role,         'Resume Score (100)': final_score,         'Shortlisted': 'No',         'Email': data.get("email") or "",         'Phone': data.get("phone") or "",                 # NEW SCORE COLUMNS         'Experience Score (40)': exp_score,         'Skills Score (30)': skills_score,         'Communication Score (20)': comm_score,         'Certifications Score (10)': certs_score,                 'Experience Summary': data.get("experience_summary") or "",         'Education Summary': data.get("education_summary") or "",         'Communication Rating (1-10)': str(data.get("communication_skills") or "N/A"),         'Skills/Technologies': tech,         'Certifications': certs,         'ABA Skills (1-10)': str(data.get("aba_therapy_skills") or "N/A"),         'RBT/BCBA Cert': str(data.get("rbt_bcba_certification") or "N/A"),         'Autism-Care Exp (1-10)': str(data.get("autism_care_experience_score") or "N/A"),     }     new_df = pd.DataFrame([row])     st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True) # --- Excel export helper (No change) --- def df_to_excel_bytes(df: pd.DataFrame) -> bytes:     output = io.BytesIO()     with pd.ExcelWriter(output, engine="openpyxl") as writer:         df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")     return output.getvalue() # --- UI Layout --- st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis (Single-file)") tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"]) # --- User Panel (Minor change for scoring) --- with tab_user:     st.header("Upload Resumes for Analysis")     st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score fields.")     job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]     selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")     uploaded_files = st.file_uploader("2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)     if st.button("🚀 Analyze All Uploaded Resumes"):         if not uploaded_files:             st.warning("Please upload one or more resume files to begin analysis.")         else:             st.session_state.run_analysis = True             st.rerun()     if st.session_state.get("run_analysis", False):         if not uploaded_files:             st.warning("No files found. Upload files and try again.")             st.session_state.run_analysis = False         else:             total = len(uploaded_files)             progress = st.progress(0)             st.session_state.individual_analysis = []             idx = 0             with st.spinner("Processing resumes..."):                 for f in uploaded_files:                     idx += 1                     try:                         st.write(f"Analyzing **{f.name}**...")                         resume_text = extract_text_from_file(f)                         if not resume_text:                             st.error(f"Could not extract text from {f.name}. Skipping.")                             progress.progress(idx / total)                             continue                         analysis = analyze_resume_with_groq_cached(resume_text, selected_role)                         if analysis.name == "Extraction Failed":                             st.error(f"Extraction failed for {f.name}. See debug output.")                             progress.progress(idx / total)                             continue                         scores = calculate_resume_score(analysis, selected_role)                         final_score = scores[0]                                                 append_analysis_to_dataframe(selected_role, analysis, scores)                         st.session_state.individual_analysis.append({                             'name': analysis.name,                             'score': final_score,                             'role': selected_role,                             'file_name': f.name                         })                     except Exception as e:                         st.error(f"Error analyzing {f.name}: {e}")                         st.exception(traceback.format_exc())                     finally:                         progress.progress(idx / total)             st.success(f"✅ Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")             st.session_state.run_analysis = False     # Display last results summary     if st.session_state.individual_analysis:         st.subheader("Last Analysis Summary")         for item in st.session_state.individual_analysis:             st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")         st.markdown("---")         st.caption("All analyzed data is stored in the Admin Dashboard.") # --- Admin Panel (MODIFIED for new columns) --- with tab_admin:     if not st.session_state.is_admin_logged_in:         st.header("Admin Login")         password = st.text_input("Enter Admin Password", type="password")         if st.button("🔑 Login"):             if password == ADMIN_PASSWORD:                 st.session_state.is_admin_logged_in = True                 st.rerun()             else:                 st.error("Incorrect password.")         st.stop()     st.header("🎯 Recruitment Dashboard")     if st.button("🚪 Logout"):         st.session_state.is_admin_logged_in = False         st.rerun()     if st.session_state.analyzed_data.empty:         st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")     else:         df = st.session_state.analyzed_data.copy()         st.subheader("Candidate Data Table")         st.success(f"**Total Candidates Analyzed: {len(df)}**")         # Updated columns for display in the data editor         display_cols = [             'Name',             'Job Role',             'Resume Score (100)',             'Experience Score (40)',             'Skills Score (30)',             'Communication Score (20)',             'Certifications Score (10)',             'Shortlisted',             'Email',             'Skills/Technologies'         ]         # Filter columns to only those present in the current dataframe (safety check)         current_display_cols = [col for col in display_cols if col in df.columns]         edited_df = st.data_editor(             df[current_display_cols],             column_config={                 "Shortlisted": st.column_config.SelectboxColumn(                     "Shortlisted",                     help="Mark the candidate as Shortlisted or Rejected.",                     options=["No", "Yes"],                     required=True                 ),                 "Resume Score (100)": st.column_config.ProgressColumn(                     "Total Score",                     format="%f",                     min_value=0, max_value=100,                 ),                 "Experience Score (40)": st.column_config.ProgressColumn(                     "Experience (40)",                     format="%f",                     min_value=0, max_value=40,                 ),                 "Skills Score (30)": st.column_config.ProgressColumn(                     "Skills (30)",                     format="%f",                     min_value=0, max_value=30,                 ),                 "Communication Score (20)": st.column_config.ProgressColumn(                     "Comms (20)",                     format="%f",                     min_value=0, max_value=20,                 ),                 "Certifications Score (10)": st.column_config.ProgressColumn(                     "Certs (10)",                     format="%f",                     min_value=0, max_value=10,                 ),             },             key="dashboard_editor",             hide_index=True         )         # The logic to update the session state with the edited 'Shortlisted' column remains the same         try:             # Update the master dataframe with the edited 'Shortlisted' column             for col in edited_df.columns:                 if col in st.session_state.analyzed_data.columns and not edited_df[col].equals(st.session_state.analyzed_data[col]):                     # Only update 'Shortlisted' which is the only editable field                     if col == 'Shortlisted':                         st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values         except Exception:             # Fallback for index issues on data_editor changes             for i, val in enumerate(edited_df.get('Shortlisted', []).tolist()):                 if i < len(st.session_state.analyzed_data):                     st.session_state.analyzed_data.at[i, 'Shortlisted'] = val         st.markdown("---")         st.subheader("📥 Download Data")         df_export = st.session_state.analyzed_data.copy()         excel_bytes = df_to_excel_bytes(df_export)         st.download_button(             label="💾 Download All Data as Excel (.xlsx)",             data=excel_bytes,             file_name="quantum_scrutiny_report.xlsx",             mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",             help="Downloads the full table including all extracted fields and shortlist status."         ) # --- End of file ---