# src/streamlit_app.py import streamlit as st import pandas as pd import io import os import fitz import docx2txt from groq import Groq from dotenv import load_dotenv from pydantic import BaseModel, Field # --- 0. FIX: SET PAGE CONFIG AS THE FIRST STREAMLIT COMMAND --- st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered") # --- 1. CONFIGURATION AND INITIALIZATION --- # FIX for .env on local machine: Load environment variables by explicitly pointing up one directory. load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env')) # FIX for Hugging Face Deployment: Read the key from the environment/Secrets. GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Admin Password (as requested) ADMIN_PASSWORD = "admin" # Initialize Groq Client if GROQ_API_KEY: try: groq_client = Groq(api_key=GROQ_API_KEY) except Exception as e: st.error(f"Error initializing Groq Client: {e}") st.stop() else: st.error("GROQ_API_KEY not found. Please ensure the key is set as a Secret in Hugging Face or in the local .env file.") st.stop() # Initialize Session State if 'is_admin_logged_in' not in st.session_state: st.session_state.is_admin_logged_in = False if 'analyzed_data' not in st.session_state: initial_cols = [ 'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted', 'Experience Summary', 'Education Summary', 'Communication Rating (1-10)', 'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)', 'RBT/BCBA Cert', 'Autism-Care Exp (1-10)' ] st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols) # --- 2. DATA STRUCTURE FOR GROQ OUTPUT (Pydantic Schema) --- class ResumeAnalysis(BaseModel): """Pydantic model for structured resume data extraction.""" name: str = Field(description="Full name of the candidate.") email: str = Field(description="Professional email address.") phone: str = Field(description="Primary phone number.") certifications: list[str] = Field(description="List of professional certifications (e.g., PMP, AWS Certified).") experience_summary: str = Field(description="A concise summary of the candidate's professional experience.") education_summary: str = Field(description="A concise summary of the candidate's highest education.") # --- FIX 1: Explicitly describe required STRING output format --- communication_skills: str = Field(description="A score as a STRING (e.g., '8') or brief description of communication skills.") technical_skills: list[str] = Field(description="List of technical skills/technologies mentioned (e.g., Python, SQL, Docker).") aba_therapy_skills: str = Field(description="Specific mention or score as a STRING (e.g., '7') for ABA Therapy skills, ONLY if the role is 'Therapist'. Use the STRING 'N/A' if not applicable or found.") rbt_bcba_certification: str = Field(description="Indicate the STRING 'Yes' or 'No' if RBT/BCBA certification is mentioned, ONLY if the role is 'Therapist'. Use the STRING 'N/A' if not applicable or found.") autism_care_experience_score: str = Field(description="A score as a STRING (e.g., '9') for Autism-Care Experience, ONLY if the role is 'Therapist'. Use the STRING 'N/A' if not applicable or found.") # --- 3. HELPER FUNCTIONS --- def extract_text_from_file(uploaded_file): """Extracts text from PDF or DOCX files.""" file_type = uploaded_file.type try: if file_type == "application/pdf": with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc: text = "" for page in doc: text += page.get_text() return text elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": return docx2txt.process(uploaded_file) else: return "" except Exception as e: print(f"Error extracting text: {e}") return "" @st.cache_data(show_spinner="Analyzing resume with Groq...") def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis: """Uses Groq and the Pydantic schema for structured extraction.""" # Custom instructions for Therapist role therapist_instructions = "" if job_role == "Therapist": therapist_instructions = ( "Because the job role is 'Therapist', you MUST carefully look for: " "1. ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. " "2. Provide a score from 1-10 as a **STRING** (e.g., '7') for the specialized fields: 'aba_therapy_skills' and 'autism_care_experience_score'. " "3. If any specialized therapist field is not found, you MUST use the **STRING** 'N/A'. " "4. Set 'rbt_bcba_certification' to the **STRING** 'Yes' or 'No'." ) else: # For non-therapist roles, explicitly instruct the model to use 'N/A' for therapist fields therapist_instructions = ( "Since the role is not 'Therapist', set 'aba_therapy_skills', 'autism_care_experience_score', and 'rbt_bcba_certification' to the **STRING** 'N/A'." ) # System Prompt for Groq system_prompt = ( f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. " f"The candidate is applying for the role of '{job_role}'. " f"Follow the instructions precisely and return a JSON object that strictly adheres to the provided Pydantic schema. " f"**IMPORTANT:** All values must be returned as the data type specified. Numerical scores must be enclosed in quotes to be treated as **STRING** types (e.g., \"8\"). " f"For skills, provide a list of 5-10 most relevant items. {therapist_instructions}" ) try: chat_completion = groq_client.chat.completions.create( model="mixtral-8x7b-32768", # Fast model suitable for this task messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"} ], response_model={"type": "json_object", "schema": ResumeAnalysis.schema()}, temperature=0.0 ) # The response is a JSON string, which we can parse into the Pydantic model analysis = ResumeAnalysis.parse_raw(chat_completion.choices[0].message.content) return analysis except Exception as e: # This will now only catch errors related to the API connection or Pydantic structural errors # (e.g., list vs string), not the common type mismatches. st.error(f"Groq API Error: {e}") # Return an empty/default analysis object on failure return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A") def calculate_resume_score(analysis: ResumeAnalysis) -> float: """Calculates the weighted score out of 100.""" total_score = 0.0 # 1. Experience Score (Max 40 points) exp_factor = min(len(analysis.experience_summary) / 100.0, 1.0) total_score += exp_factor * 40.0 # 2. Skills Score (Max 30 points) skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0) total_score += skills_factor * 30.0 # 3. Communication Score (Max 20 points) try: # Safely parse the communication score string (e.g., '8' or '8-High') score_str = analysis.communication_skills.split('-')[0].strip() comm_rating = float(score_str) except (ValueError, IndexError): comm_rating = 5.0 # Default if unparsable score_comm = (comm_rating / 10.0) * 20.0 total_score += score_comm # 4. Certification Score (Max 10 points) score_cert = min(len(analysis.certifications), 10) * 1.0 total_score += score_cert # --- Therapist-Specific Bonus Checks --- if st.session_state.get('selected_role') == "Therapist": try: # Safely parse specialized scores, handling 'N/A' aba_score = float(analysis.aba_therapy_skills.split('-')[0].strip()) if analysis.aba_therapy_skills != 'N/A' else 0.0 autism_score = float(analysis.autism_care_experience_score.split('-')[0].strip()) if analysis.autism_care_experience_score != 'N/A' else 0.0 # Add a bonus based on the average specialized scores (max 10 points) specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0 total_score += specialized_bonus except (ValueError, IndexError): pass final_score = round(min(total_score, 100)) return float(final_score) def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float): """Formats and appends the new analysis to the session state DataFrame.""" data = analysis.dict() data['Job Role'] = job_role data['Resume Score'] = score data['Shortlisted'] = 'No' technical_skills_list = ", ".join(data['technical_skills']) certifications_list = ", ".join(data['certifications']) df_data = { 'Name': data['name'], 'Job Role': job_role, 'Resume Score (100)': score, 'Email': data['email'], 'Phone': data['phone'], 'Shortlisted': data['Shortlisted'], 'Experience Summary': data['experience_summary'], 'Education Summary': data['education_summary'], 'Communication Rating (1-10)': data['communication_skills'], 'Skills/Technologies': technical_skills_list, 'Certifications': certifications_list, 'ABA Skills (1-10)': data['aba_therapy_skills'], 'RBT/BCBA Cert': data['rbt_bcba_certification'], 'Autism-Care Exp (1-10)': data['autism_care_experience_score'], } new_df = pd.DataFrame([df_data]) st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True) # --- 4. APP LAYOUT AND LOGIC --- st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis") tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"]) # ========================================================================= # A. Resume Upload (User Panel) # ========================================================================= with tab_user: st.header("Upload Resumes for Analysis") st.info("Upload multiple PDF or DOCX files. The Groq AI engine will quickly extract and score the key data.") job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"] selected_role = st.selectbox( "**1. Select the Target Job Role** (Influences analysis and scoring)", options=job_role_options, key='selected_role' ) uploaded_files = st.file_uploader( "**2. Upload Resumes** (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True ) if st.button("🚀 Analyze All Uploaded Resumes"): if not uploaded_files: st.warning("Please upload one or more resume files to begin analysis.") else: total_files = len(uploaded_files) progress_bar = st.progress(0) st.session_state.individual_analysis = [] with st.status("Processing Resumes...", expanded=True) as status_box: for i, file in enumerate(uploaded_files): file_name = file.name st.write(f"Analyzing **{file_name}**...") resume_text = extract_text_from_file(file) if not resume_text: st.error(f"Could not extract text from {file_name}. Skipping.") continue analysis = analyze_resume_with_groq(resume_text, selected_role) if analysis.name == "Extraction Failed": st.error(f"Groq extraction failed for {file_name}. Skipping.") continue score = calculate_resume_score(analysis) append_analysis_to_dataframe(selected_role, analysis, score) st.session_state.individual_analysis.append({ 'name': analysis.name, 'score': score, 'role': selected_role, 'file_name': file_name }) progress_bar.progress((i + 1) / total_files) status_box.update(label="Analysis Complete!", state="complete", expanded=False) st.success(f"**✅ Successfully analyzed {total_files} resumes.**") if 'individual_analysis' in st.session_state and st.session_state.individual_analysis: st.subheader("Last Analysis Summary") for item in st.session_state.individual_analysis: st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**") st.markdown("---") st.caption("All analyzed data is stored in the **Admin Dashboard**.") # ========================================================================= # B. Admin Panel (Password Protected) # ========================================================================= with tab_admin: if not st.session_state.is_admin_logged_in: st.header("Admin Login") password = st.text_input("Enter Admin Password", type="password") if st.button("🔑 Login"): if password == ADMIN_PASSWORD: st.session_state.is_admin_logged_in = True st.rerun() else: st.error("Incorrect password.") st.stop() st.header("🎯 Recruitment Dashboard") st.markdown("---") if st.button("🚪 Logout"): st.session_state.is_admin_logged_in = False st.rerun() if st.session_state.analyzed_data.empty: st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.") else: df = st.session_state.analyzed_data.copy() st.subheader("Candidate Data Table") st.success(f"**Total Candidates Analyzed: {len(df)}**") display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies'] edited_df = st.data_editor( df[display_cols], column_config={ "Shortlisted": st.column_config.SelectboxColumn( "Shortlisted", help="Mark the candidate as Shortlisted or Rejected.", options=["No", "Yes"], required=True, ) }, key="dashboard_editor", hide_index=True ) st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted'] st.markdown("---") st.subheader("📥 Download Data") df_export = st.session_state.analyzed_data.copy() excel_buffer = io.BytesIO() with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer: df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data') excel_buffer.seek(0) st.download_button( label="💾 Download All Data as Excel (.xlsx)", data=excel_buffer, file_name="quantum_scrutiny_report.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", help="Downloads the full table including all extracted fields and shortlist status." ) # --- End of src/streamlit_app.py ---