meesamraza's picture
Update app.py
6e2aa61 verified
raw
history blame
17.2 kB
# src/streamlit_app.py
import streamlit as st
import pandas as pd
import io
import os
import fitz
import docx2txt
from groq import Groq
from dotenv import load_dotenv
from pydantic import BaseModel, Field, ValidationError # Added ValidationError
from typing import Optional, List # Added Optional and List
# --- 0. FIX: SET PAGE CONFIG AS THE FIRST STREAMLIT COMMAND ---
st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
# FIX for Hugging Face Deployment: Read the key from the environment/Secrets.
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# Admin Password (as requested)
ADMIN_PASSWORD = "admin"
# Initialize Groq Client
if GROQ_API_KEY:
try:
groq_client = Groq(api_key=GROQ_API_KEY)
except Exception as e:
st.error(f"Error initializing Groq Client: {e}")
st.stop()
else:
st.error("GROQ_API_KEY not found. Please ensure the key is set as a Secret in Hugging Face or in the local .env file.")
st.stop()
# Initialize Session State
if 'is_admin_logged_in' not in st.session_state:
st.session_state.is_admin_logged_in = False
if 'analyzed_data' not in st.session_state:
initial_cols = [
'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
]
st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
# --- 2. DATA STRUCTURE FOR GROQ OUTPUT (Pydantic Schema) ---
class ResumeAnalysis(BaseModel):
"""Pydantic model for structured resume data extraction."""
name: str = Field(description="Full name of the candidate.")
email: str = Field(description="Professional email address.")
phone: str = Field(description="Primary phone number.")
certifications: List[str] = Field(description="List of professional certifications.")
experience_summary: str = Field(description="A concise summary of the candidate's professional experience.")
education_summary: str = Field(description="A concise summary of the candidate's highest education.")
# --- CRITICAL FIX: Use str or Optional[str] and improve coercion ---
# The Groq model is returning INT (8) instead of STR ('8') for communication_skills.
# The most stable fix is to keep the field as str and rely on Groq's JSON mode
# but improve the prompt guidance. We will also update the helper functions to be more robust.
communication_skills: str = Field(description="A score as a STRING (e.g., '8') or description of communication skills.")
technical_skills: List[str] = Field(description="List of technical skills/technologies mentioned.")
# These fields can sometimes return None, so we make them Optional[str]
# and default them to "N/A" in the final output in the analyze function if still None.
aba_therapy_skills: Optional[str] = Field(default="N/A", description="Specific score as a STRING (e.g., '7'). Use 'N/A' if not applicable.")
rbt_bcba_certification: Optional[str] = Field(default="N/A", description="Indicate 'Yes' or 'No'. Use 'N/A' if not applicable.")
autism_care_experience_score: Optional[str] = Field(default="N/A", description="A score as a STRING (e.g., '9'). Use 'N/A' if not applicable.")
# --- 3. HELPER FUNCTIONS ---
def extract_text_from_file(uploaded_file):
"""Extracts text from PDF or DOCX files."""
file_type = uploaded_file.type
try:
if file_type == "application/pdf":
with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
text = ""
for page in doc:
text += page.get_text()
return text
elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
return docx2txt.process(uploaded_file)
else:
return ""
except Exception as e:
print(f"Error extracting text: {e}")
return ""
@st.cache_data(show_spinner="Analyzing resume with Groq...")
def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
"""Uses Groq and the Pydantic schema for structured extraction."""
# Custom instructions for Therapist role
therapist_instructions = ""
if job_role == "Therapist":
therapist_instructions = (
"Because the job role is 'Therapist', you MUST carefully look for ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. "
"Provide a score from 1-10 as a **STRING** (e.g., '7') for the specialized fields. "
"If any specialized therapist field is not found, you MUST return **null** or **N/A** for that field."
)
else:
# For non-therapist roles, explicitly instruct the model to use 'null'
# so Optional[str] handles it cleanly.
therapist_instructions = (
"Since the role is not 'Therapist', set 'aba_therapy_skills', 'autism_care_experience_score', and 'rbt_bcba_certification' to **null** or **N/A**."
)
# System Prompt for Groq
system_prompt = (
f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. "
f"The candidate is applying for the role of '{job_role}'. "
f"Return a JSON object that strictly adheres to the provided Pydantic schema. "
f"**CRITICAL:** Ensure 'communication_skills' is returned as a **STRING** value, even if it's a number (e.g., \"8\" NOT 8). " # <-- Re-emphasizing string output for the specific failing field
f"{therapist_instructions}"
)
try:
chat_completion = groq_client.chat.completions.create(
model="mixtral-8x7b-32768",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"}
],
response_model={"type": "json_object", "schema": ResumeAnalysis.schema()},
temperature=0.0
)
# Parse the JSON response
analysis = ResumeAnalysis.parse_raw(chat_completion.choices[0].message.content)
# Post-processing: Ensure Optional fields are strings for score calculation
analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or 'N/A')
analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or 'N/A')
analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or 'N/A')
analysis.communication_skills = str(analysis.communication_skills) # Coerce communication_skills to string just in case it passed validation as an int somehow
return analysis
except ValidationError as ve:
st.error(f"Groq API Validation Error: The model returned incompatible data. Details: {ve}")
print(f"Failed JSON: {chat_completion.choices[0].message.content}") # Print the bad JSON for debugging
return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")
except Exception as e:
st.error(f"Groq API Error: {e}")
return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")
def calculate_resume_score(analysis: ResumeAnalysis) -> float:
"""Calculates the weighted score out of 100."""
total_score = 0.0
# 1. Experience Score (Max 40 points)
exp_factor = min(len(analysis.experience_summary) / 100.0, 1.0)
total_score += exp_factor * 40.0
# 2. Skills Score (Max 30 points)
skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0)
total_score += skills_factor * 30.0
# 3. Communication Score (Max 20 points)
try:
# Safely parse the communication score string, handling N/A or raw numbers
score_str = str(analysis.communication_skills).split('-')[0].strip() # Use str() to handle if it somehow remained an int
comm_rating = float(score_str)
except (ValueError, IndexError):
comm_rating = 5.0
score_comm = (comm_rating / 10.0) * 20.0
total_score += score_comm
# 4. Certification Score (Max 10 points)
score_cert = min(len(analysis.certifications), 10) * 1.0
total_score += score_cert
# --- Therapist-Specific Bonus Checks ---
if st.session_state.get('selected_role') == "Therapist":
try:
# Safely parse specialized scores, handling 'N/A' or None
aba_score = float(str(analysis.aba_therapy_skills).split('-')[0].strip()) if str(analysis.aba_therapy_skills).upper() not in ['N/A', 'NONE'] else 0.0
autism_score = float(str(analysis.autism_care_experience_score).split('-')[0].strip()) if str(analysis.autism_care_experience_score).upper() not in ['N/A', 'NONE'] else 0.0
# Add a bonus based on the average specialized scores (max 10 points)
specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0
total_score += specialized_bonus
except (ValueError, IndexError, TypeError):
pass # Ignore if specialized scores are still corrupted
final_score = round(min(total_score, 100))
return float(final_score)
def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
"""Formats and appends the new analysis to the session state DataFrame."""
data = analysis.dict()
data['Job Role'] = job_role
data['Resume Score'] = score
data['Shortlisted'] = 'No'
technical_skills_list = ", ".join(data['technical_skills'])
certifications_list = ", ".join(data['certifications'])
# Ensure fields that might have been None are now strings for the DataFrame
comm_skills = str(data['communication_skills'] or 'N/A')
aba_skills = str(data['aba_therapy_skills'] or 'N/A')
rbt_cert = str(data['rbt_bcba_certification'] or 'N/A')
autism_exp = str(data['autism_care_experience_score'] or 'N/A')
df_data = {
'Name': data['name'],
'Job Role': job_role,
'Resume Score (100)': score,
'Email': data['email'],
'Phone': data['phone'],
'Shortlisted': data['Shortlisted'],
'Experience Summary': data['experience_summary'],
'Education Summary': data['education_summary'],
'Communication Rating (1-10)': comm_skills,
'Skills/Technologies': technical_skills_list,
'Certifications': certifications_list,
'ABA Skills (1-10)': aba_skills,
'RBT/BCBA Cert': rbt_cert,
'Autism-Care Exp (1-10)': autism_exp,
}
new_df = pd.DataFrame([df_data])
st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
# --- 4. APP LAYOUT AND LOGIC ---
st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")
tab_user, tab_admin = st.tabs(["πŸ‘€ Resume Uploader (User Panel)", "πŸ”’ Admin Dashboard (Password Protected)"])
# =========================================================================
# A. Resume Upload (User Panel)
# =========================================================================
with tab_user:
st.header("Upload Resumes for Analysis")
st.info("Upload multiple PDF or DOCX files. The Groq AI engine will quickly extract and score the key data.")
job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
selected_role = st.selectbox(
"**1. Select the Target Job Role** (Influences analysis and scoring)",
options=job_role_options,
key='selected_role'
)
uploaded_files = st.file_uploader(
"**2. Upload Resumes** (PDF or DOCX)",
type=["pdf", "docx"],
accept_multiple_files=True
)
if st.button("πŸš€ Analyze All Uploaded Resumes"):
if not uploaded_files:
st.warning("Please upload one or more resume files to begin analysis.")
else:
total_files = len(uploaded_files)
progress_bar = st.progress(0)
st.session_state.individual_analysis = []
with st.status("Processing Resumes...", expanded=True) as status_box:
for i, file in enumerate(uploaded_files):
file_name = file.name
st.write(f"Analyzing **{file_name}**...")
resume_text = extract_text_from_file(file)
if not resume_text:
st.error(f"Could not extract text from {file_name}. Skipping.")
continue
analysis = analyze_resume_with_groq(resume_text, selected_role)
if analysis.name == "Extraction Failed":
st.error(f"Groq extraction failed for {file_name}. Skipping.")
continue
score = calculate_resume_score(analysis)
append_analysis_to_dataframe(selected_role, analysis, score)
st.session_state.individual_analysis.append({
'name': analysis.name,
'score': score,
'role': selected_role,
'file_name': file_name
})
progress_bar.progress((i + 1) / total_files)
status_box.update(label="Analysis Complete!", state="complete", expanded=False)
st.success(f"**βœ… Successfully analyzed {total_files} resumes.**")
if 'individual_analysis' in st.session_state and st.session_state.individual_analysis:
st.subheader("Last Analysis Summary")
for item in st.session_state.individual_analysis:
st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
st.markdown("---")
st.caption("All analyzed data is stored in the **Admin Dashboard**.")
# =========================================================================
# B. Admin Panel (Password Protected)
# =========================================================================
with tab_admin:
if not st.session_state.is_admin_logged_in:
st.header("Admin Login")
password = st.text_input("Enter Admin Password", type="password")
if st.button("πŸ”‘ Login"):
if password == ADMIN_PASSWORD:
st.session_state.is_admin_logged_in = True
st.rerun()
else:
st.error("Incorrect password.")
st.stop()
st.header("🎯 Recruitment Dashboard")
st.markdown("---")
if st.button("πŸšͺ Logout"):
st.session_state.is_admin_logged_in = False
st.rerun()
if st.session_state.analyzed_data.empty:
st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
else:
df = st.session_state.analyzed_data.copy()
st.subheader("Candidate Data Table")
st.success(f"**Total Candidates Analyzed: {len(df)}**")
display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
edited_df = st.data_editor(
df[display_cols],
column_config={
"Shortlisted": st.column_config.SelectboxColumn(
"Shortlisted",
help="Mark the candidate as Shortlisted or Rejected.",
options=["No", "Yes"],
required=True,
)
},
key="dashboard_editor",
hide_index=True
)
st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted']
st.markdown("---")
st.subheader("πŸ“₯ Download Data")
df_export = st.session_state.analyzed_data.copy()
excel_buffer = io.BytesIO()
with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data')
excel_buffer.seek(0)
st.download_button(
label="πŸ’Ύ Download All Data as Excel (.xlsx)",
data=excel_buffer,
file_name="quantum_scrutiny_report.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
help="Downloads the full table including all extracted fields and shortlist status."
)
# --- End of src/streamlit_app.py ---