# app.py """ Quantum Scrutiny Platform | Groq-Powered Single-file Streamlit app (refactored & debugged) """ # --- 0. Always set page config as the first Streamlit command --- import os from dotenv import load_dotenv load_dotenv() # load local .env if present (during local dev) import io import base64 import traceback from typing import Optional, List import streamlit as st import pandas as pd # resume parsing import fitz # PyMuPDF from docx import Document # python-docx # Groq client (keep same import name as you used) from groq import Groq # Pydantic for schema validation from pydantic import BaseModel, Field, ValidationError # --- Streamlit UI config --- st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered") # --- Config / Secrets --- GROQ_API_KEY = os.getenv("GROQ_API_KEY") # set in environment or .env or deploy secrets ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin") # optional override via env # --- Initialize Groq client with safe error messaging --- groq_client = None if not GROQ_API_KEY: st.error("GROQ_API_KEY not found. Please set GROQ_API_KEY as an environment variable or in Hugging Face secrets.") # We won't stop here to allow UI to display, but analysis will error if used. else: try: groq_client = Groq(api_key=GROQ_API_KEY) except Exception as e: st.error(f"Failed to initialize Groq client: {e}") groq_client = None # --- Session state defaults --- if 'is_admin_logged_in' not in st.session_state: st.session_state.is_admin_logged_in = False if 'analyzed_data' not in st.session_state: initial_cols = [ 'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted', 'Experience Summary', 'Education Summary', 'Communication Rating (1-10)', 'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)', 'RBT/BCBA Cert', 'Autism-Care Exp (1-10)' ] st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols) if 'individual_analysis' not in st.session_state: st.session_state.individual_analysis = [] if 'run_analysis' not in st.session_state: st.session_state.run_analysis = False # --- Pydantic schema for Groq output --- class ResumeAnalysis(BaseModel): name: str = Field(description="Full name of the candidate.") email: str = Field(description="Professional email address.") phone: str = Field(description="Primary phone number.") certifications: List[str] = Field(default_factory=list, description="List of professional certifications.") experience_summary: str = Field(default="", description="Concise summary of experience.") education_summary: str = Field(default="", description="Concise summary of education.") communication_skills: str = Field(description="Communication score as a STRING ('8') or description.") technical_skills: List[str] = Field(default_factory=list, description="List of skills/technologies.") aba_therapy_skills: Optional[str] = Field(default="N/A", description="ABA Therapy score as STRING or 'N/A'.") rbt_bcba_certification: Optional[str] = Field(default="N/A", description="'Yes'/'No'/'N/A'.") autism_care_experience_score: Optional[str] = Field(default="N/A", description="Autism care experience score as STRING or 'N/A'.") # --- Helper: File text extraction --- def extract_text_from_file(uploaded_file) -> str: """ Accepts a Streamlit UploadedFile object and returns extracted text. Supports PDF and DOCX. Returns empty string on failure. """ try: content = uploaded_file.read() # Reset pointer if needed (Streamlit UploadedFile likely returns bytes; after read it's consumed) # We already consumed it into `content` so use BytesIO for downstream if needed. # detect PDF by mime or header bytes name_lower = uploaded_file.name.lower() if name_lower.endswith(".pdf") or content[:5] == b"%PDF-": # use fitz (PyMuPDF) try: with fitz.open(stream=content, filetype="pdf") as doc: pages_text = [] for p in doc: pages_text.append(p.get_text()) return "\n".join(pages_text).strip() except Exception as e: # fallback: try PyMuPDF alternative reading st.warning(f"PDF extraction issue for {uploaded_file.name}: {e}") return "" elif name_lower.endswith(".docx"): # python-docx can accept a file-like object try: doc = Document(io.BytesIO(content)) paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()] return "\n".join(paragraphs).strip() except Exception as e: st.warning(f"DOCX extraction issue for {uploaded_file.name}: {e}") return "" else: # Try simple decode for text-like files try: return content.decode('utf-8', errors='ignore') except Exception: return "" except Exception as e: st.error(f"Unexpected file extraction error: {e}") return "" # --- Helper: call Groq (safe wrapper) --- def call_groq_chat_system(resume_text: str, job_role: str) -> Optional[str]: """ Calls Groq chat completion. Returns model text content or None on error. Note: groq_client must be initialized. """ if not groq_client: st.error("Groq client is not initialized. Set GROQ_API_KEY in environment or secrets.") return None # role-specific instructions therapist_instructions = "" if job_role == "Therapist": therapist_instructions = ( "Because the job role is 'Therapist', you MUST carefully look for ABA Therapy Skills, " "RBT/BCBA Certification, and Autism-Care Experience. Provide a score from 1-10 as a STRING " "(e.g., '7') for the specialized fields. If any specialized field is not present, return 'N/A'." ) else: therapist_instructions = ( "This is NOT a Therapist role. Set 'aba_therapy_skills', 'autism_care_experience_score', " "and 'rbt_bcba_certification' to 'N/A' if not applicable." ) system_prompt = ( "You are a professional Resume Analyzer. Extract the requested fields and return a strict JSON object " "matching the schema: name, email, phone, certifications (array), experience_summary, education_summary, " "communication_skills (AS A STRING, e.g., '8'), technical_skills (array), aba_therapy_skills, " "rbt_bcba_certification, autism_care_experience_score. " + therapist_instructions ) user_prompt = f"Analyze the following resume text and return a JSON object:\n\n---\n{resume_text}\n---\nReturn only valid JSON." try: result = groq_client.chat.completions.create( model="mixtral-8x7b-32768", # keep your original model choice; adapt if needed messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ], temperature=0.0, max_tokens=2000, # Not all Groq SDK versions support response_model in the same way; we parse manually below. ) # Depending on SDK, result structure varies; common: result.choices[0].message.content model_text = None try: model_text = result.choices[0].message.content except Exception: # try alternate structure try: model_text = result["choices"][0]["message"]["content"] except Exception: model_text = str(result) return model_text except Exception as e: st.error(f"Groq API call failed: {e}") st.exception(e) return None # --- Cached wrapper for analysis (cache by resume_text + role) --- @st.cache_data(show_spinner=False) def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis: """ Calls Groq (or fallback) and returns a validated ResumeAnalysis Pydantic object. This function is cached to avoid repeated calls for identical text+role. """ raw_response = call_groq_chat_system(resume_text, job_role) if not raw_response: # return safe failure object return ResumeAnalysis( name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A" ) # Attempt to parse JSON from the model text. The model might include commentary; # so we try to extract the first JSON object in the text. import json import re json_text = None try: # Find the first {...} JSON object in the string (greedy to closing brace) match = re.search(r"(\{.*\})", raw_response, re.DOTALL) if match: json_text = match.group(1) else: # if no braces found, maybe the model returned just JSON-like lines json_text = raw_response parsed = json.loads(json_text) except Exception as e: # Try to be forgiving: if the model returned Python dict-like, attempt eval safely try: parsed = eval(json_text, {"__builtins__": None}, {}) # limited eval fallback if not isinstance(parsed, dict): raise ValueError("Parsed non-dict from model response fallback.") except Exception as ex: # Failed to parse model output -> return failure object and log both st.warning("Failed to parse Groq output as JSON. Returning fallback extraction.") st.text_area("Raw model output (for debugging)", raw_response, height=200) return ResumeAnalysis( name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A" ) # Validate & coerce to Pydantic model (safe defaults applied) try: # Ensure lists exist parsed.setdefault("certifications", []) parsed.setdefault("technical_skills", []) # Ensure communication_skills is string if "communication_skills" in parsed and parsed["communication_skills"] is not None: parsed["communication_skills"] = str(parsed["communication_skills"]) else: parsed["communication_skills"] = "N/A" # Safety: set therapist-specific fields default to "N/A" if missing for k in ["aba_therapy_skills", "rbt_bcba_certification", "autism_care_experience_score"]: if k not in parsed or parsed[k] is None: parsed[k] = "N/A" else: parsed[k] = str(parsed[k]) analysis = ResumeAnalysis.parse_obj(parsed) # Final coercions to guarantee string types for some fields analysis.communication_skills = str(analysis.communication_skills or "N/A") analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or "N/A") analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or "N/A") analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or "N/A") return analysis except ValidationError as ve: st.error("Model output failed schema validation. Returning fallback object.") st.text_area("Model raw response (for debugging)", raw_response, height=200) st.exception(ve) return ResumeAnalysis( name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A" ) except Exception as e: st.error("Unexpected error while validating model output.") st.exception(e) return ResumeAnalysis( name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A" ) # --- Scoring function --- def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float: total_score = 0.0 # Experience summary length -> up to 40 points exp_len = len(analysis.experience_summary or "") exp_factor = min(exp_len / 100.0, 1.0) # 100 chars or more -> full points total_score += exp_factor * 40.0 # Skills count -> up to 30 points skills_count = len(analysis.technical_skills or []) skills_factor = min(skills_count / 10.0, 1.0) total_score += skills_factor * 30.0 # Communication -> up to 20 points (expects 1-10 in string) try: comm_raw = str(analysis.communication_skills).strip() # allow '8/10' or '8 - good' forms: extract leading number import re m = re.search(r"(\d+(\.\d+)?)", comm_raw) comm_val = float(m.group(1)) if m else float(comm_raw) comm_val = max(0.0, min(10.0, comm_val)) except Exception: comm_val = 5.0 total_score += (comm_val / 10.0) * 20.0 # Certifications -> up to 10 points (1 point each up to 10) cert_points = min(len(analysis.certifications or []), 10) * 1.0 total_score += cert_points # Therapist-specific bonus up to 10 points if role == "Therapist": try: def safe_score(x): try: m = re.search(r"(\d+(\.\d+)?)", str(x)) return float(m.group(1)) if m else 0.0 except Exception: return 0.0 aba = safe_score(analysis.aba_therapy_skills) autism = safe_score(analysis.autism_care_experience_score) spec_bonus = ((aba + autism) / 20.0) * 10.0 # average scaled to 10 total_score += spec_bonus except Exception: pass final = round(min(total_score, 100)) return float(final) # --- Append to session DataFrame helper --- def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float): data = analysis.dict() technical_skills_list = ", ".join(data.get('technical_skills') or []) certifications_list = ", ".join(data.get('certifications') or []) df_data = { 'Name': data.get('name') or "", 'Job Role': job_role, 'Resume Score (100)': score, 'Email': data.get('email') or "", 'Phone': data.get('phone') or "", 'Shortlisted': 'No', 'Experience Summary': data.get('experience_summary') or "", 'Education Summary': data.get('education_summary') or "", 'Communication Rating (1-10)': str(data.get('communication_skills') or "N/A"), 'Skills/Technologies': technical_skills_list, 'Certifications': certifications_list, 'ABA Skills (1-10)': str(data.get('aba_therapy_skills') or "N/A"), 'RBT/BCBA Cert': str(data.get('rbt_bcba_certification') or "N/A"), 'Autism-Care Exp (1-10)': str(data.get('autism_care_experience_score') or "N/A"), } new_df = pd.DataFrame([df_data]) st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True) # --- Utility: Excel download as BytesIO for st.download_button --- def df_to_excel_bytes(df: pd.DataFrame) -> bytes: output = io.BytesIO() with pd.ExcelWriter(output, engine='openpyxl') as writer: df.to_excel(writer, index=False, sheet_name="Resume Analysis Data") return output.getvalue() # --- App Layout --- st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis") tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"]) # ------------------------- # User Panel # ------------------------- with tab_user: st.header("Upload Resumes for Analysis") st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score key fields.") job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"] selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role") uploaded_files = st.file_uploader( "2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True ) # Analyze button sets a session_state flag and reruns if st.button("🚀 Analyze All Uploaded Resumes"): if not uploaded_files: st.warning("Please upload one or more resume files to begin analysis.") else: st.session_state.run_analysis = True st.rerun() # If run_analysis flag is set, process uploads if st.session_state.get("run_analysis", False): if not uploaded_files: st.warning("No files found. Upload files and try again.") st.session_state.run_analysis = False else: total = len(uploaded_files) progress = st.progress(0) st.session_state.individual_analysis = [] idx = 0 with st.spinner("Processing resumes..."): for f in uploaded_files: idx += 1 try: st.write(f"Analyzing **{f.name}**...") resume_text = extract_text_from_file(f) if not resume_text: st.error(f"Could not extract text from {f.name}. Skipping.") progress.progress(idx / total) continue # Call cached analyze function analysis = analyze_resume_with_groq_cached(resume_text, selected_role) if analysis.name == "Extraction Failed": st.error(f"Extraction failed for {f.name}. See debug output.") progress.progress(idx / total) continue score = calculate_resume_score(analysis, selected_role) append_analysis_to_dataframe(selected_role, analysis, score) st.session_state.individual_analysis.append({ 'name': analysis.name, 'score': score, 'role': selected_role, 'file_name': f.name }) except Exception as e: st.error(f"Error analyzing {f.name}: {e}") st.exception(traceback.format_exc()) finally: progress.progress(idx / total) st.success(f"✅ Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.") st.session_state.run_analysis = False # reset flag # Show last analysis summary if st.session_state.individual_analysis: st.subheader("Last Analysis Summary") for item in st.session_state.individual_analysis: st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**") st.markdown("---") st.caption("All analyzed data is stored in the Admin Dashboard.") # ------------------------- # Admin Panel (Password Protected) # ------------------------- with tab_admin: if not st.session_state.is_admin_logged_in: st.header("Admin Login") password = st.text_input("Enter Admin Password", type="password") if st.button("🔑 Login"): if password == ADMIN_PASSWORD: st.session_state.is_admin_logged_in = True st.rerun() else: st.error("Incorrect password.") # stop further admin rendering while not logged in st.stop() st.header("🎯 Recruitment Dashboard") if st.button("🚪 Logout"): st.session_state.is_admin_logged_in = False st.rerun() if st.session_state.analyzed_data.empty: st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.") else: df = st.session_state.analyzed_data.copy() st.subheader("Candidate Data Table") st.success(f"**Total Candidates Analyzed: {len(df)}**") display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies'] # data_editor with SelectboxColumn for 'Shortlisted' edited_df = st.data_editor( df[display_cols], column_config={ "Shortlisted": st.column_config.SelectboxColumn( "Shortlisted", help="Mark the candidate as Shortlisted or Rejected.", options=["No", "Yes"], required=True ) }, key="dashboard_editor", hide_index=True ) # propagate the 'Shortlisted' edits back to session dataframe try: st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values except Exception: # fallback for indexing mismatches for i, val in enumerate(edited_df['Shortlisted'].tolist()): if i < len(st.session_state.analyzed_data): st.session_state.analyzed_data.at[i, 'Shortlisted'] = val st.markdown("---") st.subheader("📥 Download Data") df_export = st.session_state.analyzed_data.copy() excel_bytes = df_to_excel_bytes(df_export) st.download_button( label="💾 Download All Data as Excel (.xlsx)", data=excel_bytes, file_name="quantum_scrutiny_report.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", help="Downloads the full table including all extracted fields and shortlist status." ) # --- End of app.py ---