# src/streamlit_app.py import streamlit as st import pandas as pd import io import os import fitz # PyMuPDF import docx2txt from groq import Groq from dotenv import load_dotenv from pydantic import BaseModel, Field # --- 1. CONFIGURATION AND INITIALIZATION --- # 🚨 FIX for .env: Load environment variables by explicitly pointing up one directory. # This ensures the script finds the .env file even though it's run from the 'src' folder. load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env')) GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Initialize Groq Client if GROQ_API_KEY: try: groq_client = Groq(api_key=GROQ_API_KEY) except Exception as e: st.error(f"Error initializing Groq Client: {e}") st.stop() else: # This message should no longer appear if the .env fix works st.error("GROQ_API_KEY not found. Please ensure the .env file is in the project root and contains your key.") st.stop() # Admin Password (as requested) ADMIN_PASSWORD = "admin" # Initialize Session State if 'is_admin_logged_in' not in st.session_state: st.session_state.is_admin_logged_in = False if 'analyzed_data' not in st.session_state: # Define DataFrame with columns for initial structure initial_cols = [ 'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted', 'Experience Summary', 'Education Summary', 'Communication Rating (1-10)', 'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)', 'RBT/BCBA Cert', 'Autism-Care Exp (1-10)' ] st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols) # --- 2. DATA STRUCTURE FOR GROQ OUTPUT (Pydantic Schema) --- class ResumeAnalysis(BaseModel): """Pydantic model for structured resume data extraction.""" name: str = Field(description="Full name of the candidate.") email: str = Field(description="Professional email address.") phone: str = Field(description="Primary phone number.") certifications: list[str] = Field(description="List of professional certifications (e.g., PMP, AWS Certified).") experience_summary: str = Field(description="A concise summary of the candidate's professional experience.") education_summary: str = Field(description="A concise summary of the candidate's highest education.") communication_skills: str = Field(description="A rating (1-10) or brief description of communication skills based on the resume language.") technical_skills: list[str] = Field(description="List of technical skills/technologies mentioned (e.g., Python, SQL, Docker).") aba_therapy_skills: str = Field(description="Specific mention or score (1-10) for ABA Therapy skills, ONLY if the role is 'Therapist'.") rbt_bcba_certification: str = Field(description="Indicate 'Yes' or 'No' if RBT/BCBA certification is mentioned, ONLY if the role is 'Therapist'.") autism_care_experience_score: str = Field(description="A score (1-10) for Autism-Care Experience, ONLY if the role is 'Therapist'.") # --- 3. HELPER FUNCTIONS --- def extract_text_from_file(uploaded_file): """Extracts text from PDF or DOCX files.""" file_type = uploaded_file.type try: if file_type == "application/pdf": # Use PyMuPDF for PDF with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc: text = "" for page in doc: text += page.get_text() return text elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": # Use docx2txt for DOCX return docx2txt.process(uploaded_file) else: return "" except Exception as e: st.error(f"Error extracting text: {e}") return "" @st.cache_data(show_spinner="Analyzing resume with Groq...") def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis: """Uses Groq and the Pydantic schema for structured extraction.""" # Custom instructions for Therapist role therapist_instructions = "" if job_role == "Therapist": therapist_instructions = ( "Because the job role is 'Therapist', you MUST carefully look for: " "1. ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. " "2. Provide a score from 1-10 for the specialized fields: 'aba_therapy_skills' and 'autism_care_experience_score'. " "3. Set 'rbt_bcba_certification' to 'Yes' or 'No'." ) # System Prompt for Groq system_prompt = ( f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. " f"The candidate is applying for the role of '{job_role}'. " f"Follow the instructions precisely and return a JSON object that strictly adheres to the provided Pydantic schema. " f"For skills, provide a list of 5-10 most relevant items. {therapist_instructions}" ) try: chat_completion = groq_client.chat.completions.create( model="mixtral-8x7b-32768", # Fast model suitable for this task messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"} ], response_model={"type": "json_object", "schema": ResumeAnalysis.schema()}, temperature=0.0 ) # The response is a JSON string, which we can parse into the Pydantic model analysis = ResumeAnalysis.parse_raw(chat_completion.choices[0].message.content) return analysis except Exception as e: st.error(f"Groq API Error: {e}") # Return an empty/default analysis object on failure return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="0", technical_skills=[], aba_therapy_skills="0", rbt_bcba_certification="No", autism_care_experience_score="0") def calculate_resume_score(analysis: ResumeAnalysis) -> float: """Calculates the weighted score out of 100.""" # Weights for maximum possible score contribution: # Experience (40%), Skills (30%), Communication (20%), Certifications (10%) total_score = 0.0 # 1. Experience Score (Max 40 points) # Simple heuristic: longer summary means more experience found. # Max score is 40. exp_factor = min(len(analysis.experience_summary) / 100.0, 1.0) # Use 100 chars as the max point total_score += exp_factor * 40.0 # 2. Skills Score (Max 30 points) # Based on number of skills found (up to 10 relevant skills) # Max score is 30. skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0) total_score += skills_factor * 30.0 # 3. Communication Score (Max 20 points) # Assuming 'communication_skills' is a score string '1-10' from Groq try: # Tries to extract the first number from the string (e.g., '7-High' -> 7) comm_rating = float(analysis.communication_skills.split('-')[0].strip()) except (ValueError, IndexError): comm_rating = 5.0 # Default if Groq returns unparsable text score_comm = (comm_rating / 10.0) * 20.0 # Scale 1-10 rating to max 20 points total_score += score_comm # 4. Certification Score (Max 10 points) # Each certification adds a point, max 10 certs. score_cert = min(len(analysis.certifications), 10) * 1.0 total_score += score_cert # --- Therapist-Specific Bonus Checks --- if st.session_state.get('selected_role') == "Therapist": # Additional points based on specialized scores (e.g., up to 5 points bonus) try: aba_score = float(analysis.aba_therapy_skills.split('-')[0].strip()) autism_score = float(analysis.autism_care_experience_score.split('-')[0].strip()) # Add a bonus based on the average specialized scores (max 10 points) specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0 total_score += specialized_bonus except (ValueError, IndexError): pass # Ignore if specialized scores are not numbers # Final cleanup and capping final_score = round(min(total_score, 100)) return float(final_score) def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float): """Formats and appends the new analysis to the session state DataFrame.""" # Convert Pydantic model to dictionary data = analysis.dict() # Add computed and derived fields data['Job Role'] = job_role data['Resume Score'] = score data['Shortlisted'] = 'No' # Default status # Clean up list fields for display/Excel technical_skills_list = ", ".join(data['technical_skills']) certifications_list = ", ".join(data['certifications']) # The new row data df_data = { 'Name': data['name'], 'Job Role': job_role, 'Resume Score (100)': score, 'Email': data['email'], 'Phone': data['phone'], 'Shortlisted': data['Shortlisted'], 'Experience Summary': data['experience_summary'], 'Education Summary': data['education_summary'], 'Communication Rating (1-10)': data['communication_skills'], 'Skills/Technologies': technical_skills_list, 'Certifications': certifications_list, 'ABA Skills (1-10)': data['aba_therapy_skills'], 'RBT/BCBA Cert': data['rbt_bcba_certification'], 'Autism-Care Exp (1-10)': data['autism_care_experience_score'], } # Convert to a single-row DataFrame and concatenate new_df = pd.DataFrame([df_data]) st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True) # --- 4. APP LAYOUT AND LOGIC --- st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered") st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis") # --- Tabs for User and Admin --- tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"]) # ========================================================================= # A. Resume Upload (User Panel) # ========================================================================= with tab_user: st.header("Upload Resumes for Analysis") st.info("Upload multiple PDF or DOCX files. The Groq AI engine will quickly extract and score the key data.") # Job Role Selection job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"] selected_role = st.selectbox( "**1. Select the Target Job Role** (Influences analysis and scoring)", options=job_role_options, key='selected_role' # Store role in session state for scoring logic ) # File Uploader uploaded_files = st.file_uploader( "**2. Upload Resumes** (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True ) if st.button("🚀 Analyze All Uploaded Resumes"): if not uploaded_files: st.warning("Please upload one or more resume files to begin analysis.") else: total_files = len(uploaded_files) progress_bar = st.progress(0) # Clear previous individual file analysis displays st.session_state.individual_analysis = [] with st.status("Processing Resumes...", expanded=True) as status_box: for i, file in enumerate(uploaded_files): file_name = file.name st.write(f"Analyzing **{file_name}**...") # 1. Extract Text resume_text = extract_text_from_file(file) if not resume_text: st.error(f"Could not extract text from {file_name}. Skipping.") continue # 2. Analyze with Groq analysis = analyze_resume_with_groq(resume_text, selected_role) if analysis.name == "Extraction Failed": st.error(f"Groq extraction failed for {file_name}. Skipping.") continue # 3. Calculate Score score = calculate_resume_score(analysis) # 4. Store Data append_analysis_to_dataframe(selected_role, analysis, score) # Store data for individual display below st.session_state.individual_analysis.append({ 'name': analysis.name, 'score': score, 'role': selected_role, 'file_name': file_name }) # Update progress progress_bar.progress((i + 1) / total_files) status_box.update(label="Analysis Complete!", state="complete", expanded=False) st.success(f"**✅ Successfully analyzed {total_files} resumes.**") # Display results of the last batch of analysis if 'individual_analysis' in st.session_state and st.session_state.individual_analysis: st.subheader("Last Analysis Summary") for item in st.session_state.individual_analysis: st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**") st.markdown("---") st.caption("All analyzed data is stored in the **Admin Dashboard**.") # ========================================================================= # B. Admin Panel (Password Protected) # ========================================================================= with tab_admin: # --- Login Logic --- if not st.session_state.is_admin_logged_in: st.header("Admin Login") password = st.text_input("Enter Admin Password", type="password") if st.button("🔑 Login"): if password == ADMIN_PASSWORD: st.session_state.is_admin_logged_in = True st.rerun() else: st.error("Incorrect password.") st.stop() # Stop execution until logged in # --- Dashboard Content (Logged In) --- st.header("🎯 Recruitment Dashboard") st.markdown("---") if st.button("🚪 Logout"): st.session_state.is_admin_logged_in = False st.rerun() if st.session_state.analyzed_data.empty: st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.") else: df = st.session_state.analyzed_data.copy() # --- 1. Shortlisting & Data Display --- st.subheader("Candidate Data Table") st.success(f"**Total Candidates Analyzed: {len(df)}**") # Key columns for display display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies'] # Editable Data Table (allowing admin to change 'Shortlisted' status) edited_df = st.data_editor( df[display_cols], column_config={ "Shortlisted": st.column_config.SelectboxColumn( "Shortlisted", help="Mark the candidate as Shortlisted or Rejected.", options=["No", "Yes"], required=True, ) }, key="dashboard_editor", hide_index=True ) # Update the session state DataFrame with the edited shortlisting status # This keeps the changes persistent st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted'] st.markdown("---") # --- 2. Download Excel File --- st.subheader("📥 Download Data") # The full DataFrame to export df_export = st.session_state.analyzed_data.copy() # Create an in-memory Excel file buffer excel_buffer = io.BytesIO() with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer: df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data') excel_buffer.seek(0) st.download_button( label="💾 Download All Data as Excel (.xlsx)", data=excel_buffer, file_name="quantum_scrutiny_report.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", help="Downloads the full table including all extracted fields and shortlist status." ) # --- End of src/streamlit_app.py ---