import streamlit as st import pandas as pd import io import os import fitz import docx2txt import tempfile from groq import Groq from dotenv import load_dotenv from pydantic import BaseModel, Field, ValidationError from typing import Optional, List # -------------------- # Config & Secrets # -------------------- # Ensure page config is the very first Streamlit command (done here) st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered") # Load local .env if present (useful for local testing) load_dotenv() # Try multiple locations for the API key: environment variables, Streamlit secrets GROQ_API_KEY = os.getenv("GROQ_API_KEY") or os.getenv("GROQ_APIKEY") if not GROQ_API_KEY: # If deployed on Streamlit Cloud or similar, users might put secrets in st.secrets try: GROQ_API_KEY = st.secrets["GROQ_API_KEY"] except Exception: GROQ_API_KEY = None # Admin password (for demo). In production, store this in secrets. ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin") # Initialize Groq Client (if key present) groq_client = None if GROQ_API_KEY: try: groq_client = Groq(api_key=GROQ_API_KEY) except Exception as e: st.warning(f"Warning: Failed to initialize Groq client: {e}") groq_client = None else: st.warning("GROQ_API_KEY not found in environment or Streamlit secrets. The app will run in fallback mode.") # -------------------- # Session state init # -------------------- if 'is_admin_logged_in' not in st.session_state: st.session_state.is_admin_logged_in = False if 'analyzed_data' not in st.session_state: initial_cols = [ 'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted', 'Experience Summary', 'Education Summary', 'Communication Rating (1-10)', 'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)', 'RBT/BCBA Cert', 'Autism-Care Exp (1-10)' ] st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols) # -------------------- # Pydantic Schema # -------------------- class ResumeAnalysis(BaseModel): name: str = Field(description="Full name of the candidate.") email: str = Field(description="Professional email address.") phone: str = Field(description="Primary phone number.") certifications: List[str] = Field(default_factory=list, description="List of professional certifications.") experience_summary: str = Field(default="", description="A concise summary of the candidate's professional experience.") education_summary: str = Field(default="", description="A concise summary of the candidate's highest education.") communication_skills: str = Field(default="N/A", description="A score as a STRING (e.g., '8') or description of communication skills.") technical_skills: List[str] = Field(default_factory=list, description="List of technical skills/technologies mentioned.") aba_therapy_skills: Optional[str] = Field(default="N/A", description="Specific score as a STRING (e.g., '7').") rbt_bcba_certification: Optional[str] = Field(default="N/A", description="Indicate 'Yes' or 'No'.") autism_care_experience_score: Optional[str] = Field(default="N/A", description="A score as a STRING (e.g., '9').") # -------------------- # Helpers # -------------------- def extract_text_from_file(uploaded_file) -> str: """Extract text from uploaded file safely by writing to a temp file.""" try: suffix = os.path.splitext(uploaded_file.name)[1].lower() with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: tmp.write(uploaded_file.read()) tmp_path = tmp.name text = "" if suffix == '.pdf': try: doc = fitz.open(tmp_path) for page in doc: text += page.get_text() doc.close() except Exception as e: st.error(f"PDF extraction error for {uploaded_file.name}: {e}") text = "" elif suffix in ['.docx', '.doc']: try: text = docx2txt.process(tmp_path) or "" except Exception as e: st.error(f"DOCX extraction error for {uploaded_file.name}: {e}") text = "" else: st.warning(f"Unsupported file type: {suffix}") # Clean up temp file try: os.unlink(tmp_path) except Exception: pass return text except Exception as e: st.error(f"Failed to extract text: {e}") return "" @st.cache_data def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis: """Call Groq to extract structured data. If Groq is not available or returns invalid JSON, fall back to a lightweight heuristic parser. """ # If no groq client, skip to fallback if not groq_client: return fallback_simple_extraction(resume_text, job_role) # Build role-specific instructions therapist_instructions = "" if job_role == "Therapist": therapist_instructions = ( "Because the job role is 'Therapist', carefully look for ABA Therapy Skills, RBT/BCBA Certification, " "and Autism-Care Experience. Provide a score from 1-10 as a STRING (e.g., '7') for these fields. " "If not found, return 'N/A'." ) else: therapist_instructions = ( "Since the role is not 'Therapist', set specialized therapist fields to 'N/A' if not present." ) system_prompt = ( f"You are a professional Resume Analyzer. Extract fields exactly matching the JSON schema: name, email, phone, certifications (list), " f"experience_summary, education_summary, communication_skills (STRING), technical_skills (list), aba_therapy_skills, rbt_bcba_certification, autism_care_experience_score. " f"The candidate is applying for '{job_role}'. {therapist_instructions} Return valid JSON only." ) try: chat_completion = groq_client.chat.completions.create( model="mixtral-8x7b-32768", messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"} ], response_model={"type": "json_object", "schema": ResumeAnalysis.schema()}, temperature=0.0 ) # Extract raw content (SDK may vary — keep defensive) raw = None try: raw = chat_completion.choices[0].message.content except Exception: raw = str(chat_completion) # Parse with Pydantic try: analysis = ResumeAnalysis.parse_raw(raw) except ValidationError as ve: st.warning(f"Groq returned invalid format; falling back to heuristic extraction. Details: {ve}") return fallback_simple_extraction(resume_text, job_role) # Ensure string coercions analysis.communication_skills = str(analysis.communication_skills or 'N/A') analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or 'N/A') analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or 'N/A') analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or 'N/A') return analysis except Exception as e: st.warning(f"Groq API call failed: {e}. Using fallback extraction.") return fallback_simple_extraction(resume_text, job_role) def fallback_simple_extraction(text: str, job_role: str) -> ResumeAnalysis: """A minimal, robust heuristic extractor used when the LLM call fails. It tries to find name/email/phone and picks up some keywords for skills and certifications. """ import re # Very simple heuristics (intended as a fallback only) email_match = re.search(r"[\w\.-]+@[\w\.-]+", text) phone_match = re.search(r"(\+?\d[\d\-\s]{7,}\d)", text) name = "Unknown" # Heuristic: first line that looks like a name (two words, capitalized) lines = [l.strip() for l in text.splitlines() if l.strip()] if lines: for line in lines[:5]: if len(line.split()) <= 4 and any(ch.isalpha() for ch in line) and line[0].isupper(): name = line break email = email_match.group(0) if email_match else "" phone = phone_match.group(0) if phone_match else "" # Skills: gather common programming / therapy keywords skills_candidates = [] certifications = [] keywords = ['python','java','c++','machine learning','deep learning','tensorflow','pytorch','rbt','bcba','aba','autism'] lower_text = text.lower() for kw in keywords: if kw in lower_text: skills_candidates.append(kw) if kw in ['rbt','bcba']: certifications.append(kw.upper()) experience_summary = ' '.join(lines[:4]) if lines else '' education_summary = '' # Therapist-specific small heuristics aba = 'N/A' rbt_cert = 'Yes' if 'rbt' in lower_text or 'registered behavior technician' in lower_text else 'N/A' autism_score = 'N/A' return ResumeAnalysis( name=name, email=email, phone=phone, certifications=certifications, experience_summary=experience_summary, education_summary=education_summary, communication_skills='5', technical_skills=list(set(skills_candidates)), aba_therapy_skills=aba, rbt_bcba_certification=rbt_cert, autism_care_experience_score=autism_score ) def calculate_resume_score(analysis: ResumeAnalysis) -> float: """Calculates a weighted score out of 100 based on heuristics and extracted values.""" total_score = 0.0 # 1. Experience Score (Max 40) exp_len = len(analysis.experience_summary or "") exp_factor = min(exp_len / 100.0, 1.0) total_score += exp_factor * 40.0 # 2. Skills Score (Max 30) skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0) total_score += skills_factor * 30.0 # 3. Communication (Max 20) try: score_str = str(analysis.communication_skills).split('-')[0].strip() comm_rating = float(score_str) except Exception: comm_rating = 5.0 total_score += (comm_rating / 10.0) * 20.0 # 4. Certifications (Max 10) total_score += min(len(analysis.certifications), 10) * 1.0 # Therapist bonus (max 10) if st.session_state.get('selected_role') == 'Therapist': try: aba = float(str(analysis.aba_therapy_skills)) if str(analysis.aba_therapy_skills).upper() not in ['N/A', 'NONE', ''] else 0.0 autism = float(str(analysis.autism_care_experience_score)) if str(analysis.autism_care_experience_score).upper() not in ['N/A', 'NONE', ''] else 0.0 total_score += ((aba + autism) / 20.0) * 10.0 except Exception: pass final_score = round(min(total_score, 100)) return float(final_score) def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float): data = analysis.dict() data['Job Role'] = job_role data['Resume Score'] = score data['Shortlisted'] = 'No' technical_skills_list = ", ".join(data.get('technical_skills', [])) certifications_list = ", ".join(data.get('certifications', [])) df_data = { 'Name': data.get('name', ''), 'Job Role': job_role, 'Resume Score (100)': score, 'Email': data.get('email', ''), 'Phone': data.get('phone', ''), 'Shortlisted': data.get('Shortlisted', 'No'), 'Experience Summary': data.get('experience_summary', ''), 'Education Summary': data.get('education_summary', ''), 'Communication Rating (1-10)': str(data.get('communication_skills', 'N/A')), 'Skills/Technologies': technical_skills_list, 'Certifications': certifications_list, 'ABA Skills (1-10)': str(data.get('aba_therapy_skills', 'N/A')), 'RBT/BCBA Cert': str(data.get('rbt_bcba_certification', 'N/A')), 'Autism-Care Exp (1-10)': str(data.get('autism_care_experience_score', 'N/A')), } new_df = pd.DataFrame([df_data]) st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True) # -------------------- # App layout # -------------------- st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis") tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"]) with tab_user: st.header("Upload Resumes for Analysis") st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score key data. If the API key is missing, a fallback heuristic extractor will run.") job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"] selected_role = st.selectbox("**1. Select the Target Job Role**", options=job_role_options, key='selected_role') uploaded_files = st.file_uploader("**2. Upload Resumes** (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True) if st.button("🚀 Analyze All Uploaded Resumes"): if not uploaded_files: st.warning("Please upload one or more resume files to begin analysis.") else: total_files = len(uploaded_files) progress_bar = st.progress(0.0) st.session_state.individual_analysis = [] with st.spinner("Processing resumes..."): for i, file in enumerate(uploaded_files): file_name = file.name st.write(f"Analyzing **{file_name}**...") resume_text = extract_text_from_file(file) if not resume_text: st.error(f"Could not extract text from {file_name}. Skipping.") continue analysis = analyze_resume_with_groq(resume_text, selected_role) if isinstance(analysis, ResumeAnalysis) and analysis.name == "Extraction Failed": st.error(f"Extraction failed for {file_name}. Skipping.") continue score = calculate_resume_score(analysis) append_analysis_to_dataframe(selected_role, analysis, score) st.session_state.individual_analysis.append({ 'name': analysis.name, 'score': score, 'role': selected_role, 'file_name': file_name }) progress_bar.progress((i + 1) / total_files) st.success(f"**✅ Successfully processed {len(st.session_state.individual_analysis)} / {total_files} resumes.**") if 'individual_analysis' in st.session_state and st.session_state.individual_analysis: st.subheader("Last Analysis Summary") for item in st.session_state.individual_analysis: st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**") st.markdown("---") st.caption("All analyzed data is stored in the **Admin Dashboard**.") with tab_admin: if not st.session_state.is_admin_logged_in: st.header("Admin Login") password = st.text_input("Enter Admin Password", type="password") if st.button("🔑 Login"): if password == ADMIN_PASSWORD: st.session_state.is_admin_logged_in = True st.experimental_rerun() else: st.error("Incorrect password.") st.stop() st.header("🎯 Recruitment Dashboard") st.markdown("---") if st.button("🚪 Logout"): st.session_state.is_admin_logged_in = False st.experimental_rerun() if st.session_state.analyzed_data.empty: st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.") else: df = st.session_state.analyzed_data.copy() st.subheader("Candidate Data Table") st.success(f"**Total Candidates Analyzed: {len(df)}**") display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies'] edited_df = st.data_editor( df[display_cols], column_config={ "Shortlisted": st.column_config.SelectboxColumn( "Shortlisted", help="Mark the candidate as Shortlisted or Rejected.", options=["No", "Yes"], required=True, ) }, key="dashboard_editor", hide_index=True ) # Persist shortlist changes back to session state st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted'] st.markdown("---") st.subheader("📥 Download Data") df_export = st.session_state.analyzed_data.copy() excel_buffer = io.BytesIO() with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer: df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data') excel_buffer.seek(0) st.download_button( label="💾 Download All Data as Excel (.xlsx)", data=excel_buffer.getvalue(), file_name="quantum_scrutiny_report.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", help="Downloads the full table including all extracted fields and shortlist status." ) # End of file