meesamraza's picture
Update app.py
80e6947 verified
raw
history blame
22.9 kB
# app.py
"""
Quantum Scrutiny Platform | Groq-Powered
Single-file Streamlit app (refactored & debugged)
"""
# --- 0. Always set page config as the first Streamlit command ---
import os
from dotenv import load_dotenv
load_dotenv() # load local .env if present (during local dev)
import io
import base64
import traceback
from typing import Optional, List
import streamlit as st
import pandas as pd
# resume parsing
import fitz # PyMuPDF
from docx import Document # python-docx
# Groq client (keep same import name as you used)
from groq import Groq
# Pydantic for schema validation
from pydantic import BaseModel, Field, ValidationError
# --- Streamlit UI config ---
st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
# --- Config / Secrets ---
GROQ_API_KEY = os.getenv("GROQ_API_KEY") # set in environment or .env or deploy secrets
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin") # optional override via env
# --- Initialize Groq client with safe error messaging ---
groq_client = None
if not GROQ_API_KEY:
st.error("GROQ_API_KEY not found. Please set GROQ_API_KEY as an environment variable or in Hugging Face secrets.")
# We won't stop here to allow UI to display, but analysis will error if used.
else:
try:
groq_client = Groq(api_key=GROQ_API_KEY)
except Exception as e:
st.error(f"Failed to initialize Groq client: {e}")
groq_client = None
# --- Session state defaults ---
if 'is_admin_logged_in' not in st.session_state:
st.session_state.is_admin_logged_in = False
if 'analyzed_data' not in st.session_state:
initial_cols = [
'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
]
st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
if 'individual_analysis' not in st.session_state:
st.session_state.individual_analysis = []
if 'run_analysis' not in st.session_state:
st.session_state.run_analysis = False
# --- Pydantic schema for Groq output ---
class ResumeAnalysis(BaseModel):
name: str = Field(description="Full name of the candidate.")
email: str = Field(description="Professional email address.")
phone: str = Field(description="Primary phone number.")
certifications: List[str] = Field(default_factory=list, description="List of professional certifications.")
experience_summary: str = Field(default="", description="Concise summary of experience.")
education_summary: str = Field(default="", description="Concise summary of education.")
communication_skills: str = Field(description="Communication score as a STRING ('8') or description.")
technical_skills: List[str] = Field(default_factory=list, description="List of skills/technologies.")
aba_therapy_skills: Optional[str] = Field(default="N/A", description="ABA Therapy score as STRING or 'N/A'.")
rbt_bcba_certification: Optional[str] = Field(default="N/A", description="'Yes'/'No'/'N/A'.")
autism_care_experience_score: Optional[str] = Field(default="N/A", description="Autism care experience score as STRING or 'N/A'.")
# --- Helper: File text extraction ---
def extract_text_from_file(uploaded_file) -> str:
"""
Accepts a Streamlit UploadedFile object and returns extracted text.
Supports PDF and DOCX. Returns empty string on failure.
"""
try:
content = uploaded_file.read()
# Reset pointer if needed (Streamlit UploadedFile likely returns bytes; after read it's consumed)
# We already consumed it into `content` so use BytesIO for downstream if needed.
# detect PDF by mime or header bytes
name_lower = uploaded_file.name.lower()
if name_lower.endswith(".pdf") or content[:5] == b"%PDF-":
# use fitz (PyMuPDF)
try:
with fitz.open(stream=content, filetype="pdf") as doc:
pages_text = []
for p in doc:
pages_text.append(p.get_text())
return "\n".join(pages_text).strip()
except Exception as e:
# fallback: try PyMuPDF alternative reading
st.warning(f"PDF extraction issue for {uploaded_file.name}: {e}")
return ""
elif name_lower.endswith(".docx"):
# python-docx can accept a file-like object
try:
doc = Document(io.BytesIO(content))
paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
return "\n".join(paragraphs).strip()
except Exception as e:
st.warning(f"DOCX extraction issue for {uploaded_file.name}: {e}")
return ""
else:
# Try simple decode for text-like files
try:
return content.decode('utf-8', errors='ignore')
except Exception:
return ""
except Exception as e:
st.error(f"Unexpected file extraction error: {e}")
return ""
# --- Helper: call Groq (safe wrapper) ---
def call_groq_chat_system(resume_text: str, job_role: str) -> Optional[str]:
"""
Calls Groq chat completion. Returns model text content or None on error.
Note: groq_client must be initialized.
"""
if not groq_client:
st.error("Groq client is not initialized. Set GROQ_API_KEY in environment or secrets.")
return None
# role-specific instructions
therapist_instructions = ""
if job_role == "Therapist":
therapist_instructions = (
"Because the job role is 'Therapist', you MUST carefully look for ABA Therapy Skills, "
"RBT/BCBA Certification, and Autism-Care Experience. Provide a score from 1-10 as a STRING "
"(e.g., '7') for the specialized fields. If any specialized field is not present, return 'N/A'."
)
else:
therapist_instructions = (
"This is NOT a Therapist role. Set 'aba_therapy_skills', 'autism_care_experience_score', "
"and 'rbt_bcba_certification' to 'N/A' if not applicable."
)
system_prompt = (
"You are a professional Resume Analyzer. Extract the requested fields and return a strict JSON object "
"matching the schema: name, email, phone, certifications (array), experience_summary, education_summary, "
"communication_skills (AS A STRING, e.g., '8'), technical_skills (array), aba_therapy_skills, "
"rbt_bcba_certification, autism_care_experience_score. " + therapist_instructions
)
user_prompt = f"Analyze the following resume text and return a JSON object:\n\n---\n{resume_text}\n---\nReturn only valid JSON."
try:
result = groq_client.chat.completions.create(
model="mixtral-8x7b-32768", # keep your original model choice; adapt if needed
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.0,
max_tokens=2000,
# Not all Groq SDK versions support response_model in the same way; we parse manually below.
)
# Depending on SDK, result structure varies; common: result.choices[0].message.content
model_text = None
try:
model_text = result.choices[0].message.content
except Exception:
# try alternate structure
try:
model_text = result["choices"][0]["message"]["content"]
except Exception:
model_text = str(result)
return model_text
except Exception as e:
st.error(f"Groq API call failed: {e}")
st.exception(e)
return None
# --- Cached wrapper for analysis (cache by resume_text + role) ---
@st.cache_data(show_spinner=False)
def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
"""
Calls Groq (or fallback) and returns a validated ResumeAnalysis Pydantic object.
This function is cached to avoid repeated calls for identical text+role.
"""
raw_response = call_groq_chat_system(resume_text, job_role)
if not raw_response:
# return safe failure object
return ResumeAnalysis(
name="Extraction Failed",
email="",
phone="",
certifications=[],
experience_summary="",
education_summary="",
communication_skills="N/A",
technical_skills=[],
aba_therapy_skills="N/A",
rbt_bcba_certification="N/A",
autism_care_experience_score="N/A"
)
# Attempt to parse JSON from the model text. The model might include commentary;
# so we try to extract the first JSON object in the text.
import json
import re
json_text = None
try:
# Find the first {...} JSON object in the string (greedy to closing brace)
match = re.search(r"(\{.*\})", raw_response, re.DOTALL)
if match:
json_text = match.group(1)
else:
# if no braces found, maybe the model returned just JSON-like lines
json_text = raw_response
parsed = json.loads(json_text)
except Exception as e:
# Try to be forgiving: if the model returned Python dict-like, attempt eval safely
try:
parsed = eval(json_text, {"__builtins__": None}, {}) # limited eval fallback
if not isinstance(parsed, dict):
raise ValueError("Parsed non-dict from model response fallback.")
except Exception as ex:
# Failed to parse model output -> return failure object and log both
st.warning("Failed to parse Groq output as JSON. Returning fallback extraction.")
st.text_area("Raw model output (for debugging)", raw_response, height=200)
return ResumeAnalysis(
name="Extraction Failed",
email="",
phone="",
certifications=[],
experience_summary="",
education_summary="",
communication_skills="N/A",
technical_skills=[],
aba_therapy_skills="N/A",
rbt_bcba_certification="N/A",
autism_care_experience_score="N/A"
)
# Validate & coerce to Pydantic model (safe defaults applied)
try:
# Ensure lists exist
parsed.setdefault("certifications", [])
parsed.setdefault("technical_skills", [])
# Ensure communication_skills is string
if "communication_skills" in parsed and parsed["communication_skills"] is not None:
parsed["communication_skills"] = str(parsed["communication_skills"])
else:
parsed["communication_skills"] = "N/A"
# Safety: set therapist-specific fields default to "N/A" if missing
for k in ["aba_therapy_skills", "rbt_bcba_certification", "autism_care_experience_score"]:
if k not in parsed or parsed[k] is None:
parsed[k] = "N/A"
else:
parsed[k] = str(parsed[k])
analysis = ResumeAnalysis.parse_obj(parsed)
# Final coercions to guarantee string types for some fields
analysis.communication_skills = str(analysis.communication_skills or "N/A")
analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or "N/A")
analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or "N/A")
analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or "N/A")
return analysis
except ValidationError as ve:
st.error("Model output failed schema validation. Returning fallback object.")
st.text_area("Model raw response (for debugging)", raw_response, height=200)
st.exception(ve)
return ResumeAnalysis(
name="Extraction Failed",
email="",
phone="",
certifications=[],
experience_summary="",
education_summary="",
communication_skills="N/A",
technical_skills=[],
aba_therapy_skills="N/A",
rbt_bcba_certification="N/A",
autism_care_experience_score="N/A"
)
except Exception as e:
st.error("Unexpected error while validating model output.")
st.exception(e)
return ResumeAnalysis(
name="Extraction Failed",
email="",
phone="",
certifications=[],
experience_summary="",
education_summary="",
communication_skills="N/A",
technical_skills=[],
aba_therapy_skills="N/A",
rbt_bcba_certification="N/A",
autism_care_experience_score="N/A"
)
# --- Scoring function ---
def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float:
total_score = 0.0
# Experience summary length -> up to 40 points
exp_len = len(analysis.experience_summary or "")
exp_factor = min(exp_len / 100.0, 1.0) # 100 chars or more -> full points
total_score += exp_factor * 40.0
# Skills count -> up to 30 points
skills_count = len(analysis.technical_skills or [])
skills_factor = min(skills_count / 10.0, 1.0)
total_score += skills_factor * 30.0
# Communication -> up to 20 points (expects 1-10 in string)
try:
comm_raw = str(analysis.communication_skills).strip()
# allow '8/10' or '8 - good' forms: extract leading number
import re
m = re.search(r"(\d+(\.\d+)?)", comm_raw)
comm_val = float(m.group(1)) if m else float(comm_raw)
comm_val = max(0.0, min(10.0, comm_val))
except Exception:
comm_val = 5.0
total_score += (comm_val / 10.0) * 20.0
# Certifications -> up to 10 points (1 point each up to 10)
cert_points = min(len(analysis.certifications or []), 10) * 1.0
total_score += cert_points
# Therapist-specific bonus up to 10 points
if role == "Therapist":
try:
def safe_score(x):
try:
m = re.search(r"(\d+(\.\d+)?)", str(x))
return float(m.group(1)) if m else 0.0
except Exception:
return 0.0
aba = safe_score(analysis.aba_therapy_skills)
autism = safe_score(analysis.autism_care_experience_score)
spec_bonus = ((aba + autism) / 20.0) * 10.0 # average scaled to 10
total_score += spec_bonus
except Exception:
pass
final = round(min(total_score, 100))
return float(final)
# --- Append to session DataFrame helper ---
def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
data = analysis.dict()
technical_skills_list = ", ".join(data.get('technical_skills') or [])
certifications_list = ", ".join(data.get('certifications') or [])
df_data = {
'Name': data.get('name') or "",
'Job Role': job_role,
'Resume Score (100)': score,
'Email': data.get('email') or "",
'Phone': data.get('phone') or "",
'Shortlisted': 'No',
'Experience Summary': data.get('experience_summary') or "",
'Education Summary': data.get('education_summary') or "",
'Communication Rating (1-10)': str(data.get('communication_skills') or "N/A"),
'Skills/Technologies': technical_skills_list,
'Certifications': certifications_list,
'ABA Skills (1-10)': str(data.get('aba_therapy_skills') or "N/A"),
'RBT/BCBA Cert': str(data.get('rbt_bcba_certification') or "N/A"),
'Autism-Care Exp (1-10)': str(data.get('autism_care_experience_score') or "N/A"),
}
new_df = pd.DataFrame([df_data])
st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
# --- Utility: Excel download as BytesIO for st.download_button ---
def df_to_excel_bytes(df: pd.DataFrame) -> bytes:
output = io.BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")
return output.getvalue()
# --- App Layout ---
st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")
tab_user, tab_admin = st.tabs(["πŸ‘€ Resume Uploader (User Panel)", "πŸ”’ Admin Dashboard (Password Protected)"])
# -------------------------
# User Panel
# -------------------------
with tab_user:
st.header("Upload Resumes for Analysis")
st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score key fields.")
job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")
uploaded_files = st.file_uploader(
"2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True
)
# Analyze button sets a session_state flag and reruns
if st.button("πŸš€ Analyze All Uploaded Resumes"):
if not uploaded_files:
st.warning("Please upload one or more resume files to begin analysis.")
else:
st.session_state.run_analysis = True
st.rerun()
# If run_analysis flag is set, process uploads
if st.session_state.get("run_analysis", False):
if not uploaded_files:
st.warning("No files found. Upload files and try again.")
st.session_state.run_analysis = False
else:
total = len(uploaded_files)
progress = st.progress(0)
st.session_state.individual_analysis = []
idx = 0
with st.spinner("Processing resumes..."):
for f in uploaded_files:
idx += 1
try:
st.write(f"Analyzing **{f.name}**...")
resume_text = extract_text_from_file(f)
if not resume_text:
st.error(f"Could not extract text from {f.name}. Skipping.")
progress.progress(idx / total)
continue
# Call cached analyze function
analysis = analyze_resume_with_groq_cached(resume_text, selected_role)
if analysis.name == "Extraction Failed":
st.error(f"Extraction failed for {f.name}. See debug output.")
progress.progress(idx / total)
continue
score = calculate_resume_score(analysis, selected_role)
append_analysis_to_dataframe(selected_role, analysis, score)
st.session_state.individual_analysis.append({
'name': analysis.name,
'score': score,
'role': selected_role,
'file_name': f.name
})
except Exception as e:
st.error(f"Error analyzing {f.name}: {e}")
st.exception(traceback.format_exc())
finally:
progress.progress(idx / total)
st.success(f"βœ… Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")
st.session_state.run_analysis = False # reset flag
# Show last analysis summary
if st.session_state.individual_analysis:
st.subheader("Last Analysis Summary")
for item in st.session_state.individual_analysis:
st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
st.markdown("---")
st.caption("All analyzed data is stored in the Admin Dashboard.")
# -------------------------
# Admin Panel (Password Protected)
# -------------------------
with tab_admin:
if not st.session_state.is_admin_logged_in:
st.header("Admin Login")
password = st.text_input("Enter Admin Password", type="password")
if st.button("πŸ”‘ Login"):
if password == ADMIN_PASSWORD:
st.session_state.is_admin_logged_in = True
st.rerun()
else:
st.error("Incorrect password.")
# stop further admin rendering while not logged in
st.stop()
st.header("🎯 Recruitment Dashboard")
if st.button("πŸšͺ Logout"):
st.session_state.is_admin_logged_in = False
st.rerun()
if st.session_state.analyzed_data.empty:
st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
else:
df = st.session_state.analyzed_data.copy()
st.subheader("Candidate Data Table")
st.success(f"**Total Candidates Analyzed: {len(df)}**")
display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
# data_editor with SelectboxColumn for 'Shortlisted'
edited_df = st.data_editor(
df[display_cols],
column_config={
"Shortlisted": st.column_config.SelectboxColumn(
"Shortlisted",
help="Mark the candidate as Shortlisted or Rejected.",
options=["No", "Yes"],
required=True
)
},
key="dashboard_editor",
hide_index=True
)
# propagate the 'Shortlisted' edits back to session dataframe
try:
st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values
except Exception:
# fallback for indexing mismatches
for i, val in enumerate(edited_df['Shortlisted'].tolist()):
if i < len(st.session_state.analyzed_data):
st.session_state.analyzed_data.at[i, 'Shortlisted'] = val
st.markdown("---")
st.subheader("πŸ“₯ Download Data")
df_export = st.session_state.analyzed_data.copy()
excel_bytes = df_to_excel_bytes(df_export)
st.download_button(
label="πŸ’Ύ Download All Data as Excel (.xlsx)",
data=excel_bytes,
file_name="quantum_scrutiny_report.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
help="Downloads the full table including all extracted fields and shortlist status."
)
# --- End of app.py ---