meesamraza's picture
Update app.py
b9548de verified
raw
history blame
11.6 kB
# app.py
"""
Quantum Scrutiny Platform β€” Groq-Powered Resume Analyzer
Fully updated + cleaned single-file Streamlit application
"""
import os
import io
import re
import json
import base64
import traceback
from typing import Optional, List
# Env
from dotenv import load_dotenv
load_dotenv()
import streamlit as st
import pandas as pd
# File parsing
import fitz # PyMuPDF
from docx import Document
# Groq client
from groq import Groq
# Validation
from pydantic import BaseModel, Field, ValidationError
# ---------------------------------------------------------
# Page config
# ---------------------------------------------------------
st.set_page_config(
page_title="Quantum Scrutiny Platform",
layout="wide"
)
# ---------------------------------------------------------
# Secrets
# ---------------------------------------------------------
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin")
groq_client = None
if GROQ_API_KEY:
try:
groq_client = Groq(api_key=GROQ_API_KEY)
except Exception as e:
st.error(f"Failed to initialize Groq client: {e}")
else:
st.warning("GROQ_API_KEY not found β€” model calls disabled.")
# ---------------------------------------------------------
# Session State
# ---------------------------------------------------------
if 'is_admin_logged_in' not in st.session_state:
st.session_state.is_admin_logged_in = False
if 'run_analysis' not in st.session_state:
st.session_state.run_analysis = False
if 'individual_analysis' not in st.session_state:
st.session_state.individual_analysis = []
if 'analyzed_data' not in st.session_state:
cols = [
'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
]
st.session_state.analyzed_data = pd.DataFrame(columns=cols)
# ---------------------------------------------------------
# Pydantic Schema
# ---------------------------------------------------------
class ResumeAnalysis(BaseModel):
name: str = Field(default="Unknown")
email: str = Field(default="")
phone: str = Field(default="")
certifications: List[str] = Field(default_factory=list)
experience_summary: str = Field(default="")
education_summary: str = Field(default="")
communication_skills: str = Field(default="N/A")
technical_skills: List[str] = Field(default_factory=list)
aba_therapy_skills: Optional[str] = Field(default="N/A")
rbt_bcba_certification: Optional[str] = Field(default="N/A")
autism_care_experience_score: Optional[str] = Field(default="N/A")
# ---------------------------------------------------------
# Text Extraction
# ---------------------------------------------------------
def extract_text_from_file(uploaded_file) -> str:
try:
content = uploaded_file.read()
name = uploaded_file.name.lower()
# PDF
if name.endswith(".pdf") or content[:5] == b"%PDF-":
try:
with fitz.open(stream=content, filetype="pdf") as doc:
return "".join([p.get_text() for p in doc]).strip()
except:
return ""
# DOCX
elif name.endswith(".docx"):
try:
doc = Document(io.BytesIO(content))
return "\n".join([p.text for p in doc.paragraphs]).strip()
except:
return ""
# Fallback
return content.decode("utf-8", errors="ignore")
except:
return ""
# ---------------------------------------------------------
# Groq Streaming Wrapper
# ---------------------------------------------------------
def call_groq_stream_collect(prompt: str) -> Optional[str]:
if not groq_client:
st.error("Groq client not initialized.")
return None
try:
completion = groq_client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[
{"role": "system", "content": "You are an AI resume analyzer."},
{"role": "user", "content": prompt}
],
stream=True,
temperature=0.0,
max_completion_tokens=2048
)
collected = ""
for chunk in completion:
try:
delta = getattr(chunk.choices[0].delta, "content", None)
if delta:
collected += delta
except:
pass
return collected
except Exception as e:
st.error(f"Groq API error: {e}")
return None
# ---------------------------------------------------------
# JSON Extraction
# ---------------------------------------------------------
def extract_first_json(text: str):
if not text:
return None
# Try simple balanced regex
match = re.search(r"\{[\s\S]*\}", text)
if not match:
return None
raw_json = match.group(0)
# Attempt parse
try:
return json.loads(raw_json)
except:
try:
return json.loads(raw_json.replace("'", '"'))
except:
return None
# ---------------------------------------------------------
# Cached Analysis
# ---------------------------------------------------------
@st.cache_data(show_spinner=False)
def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
therapist_instruction = (
"If role is Therapist, extract ABA skills, BCBA/RBT, and Autism-care scores."
if job_role.lower() == "therapist" else
"For non-therapist roles, set therapist fields to 'N/A'."
)
prompt = f"""
Return a JSON object with keys:
name, email, phone, certifications, experience_summary,
education_summary, communication_skills, technical_skills,
aba_therapy_skills, rbt_bcba_certification, autism_care_experience_score.
{therapist_instruction}
Resume Text:
{resume_text}
Return only JSON.
"""
raw = call_groq_stream_collect(prompt)
parsed = extract_first_json(raw)
if not parsed:
return ResumeAnalysis(name="Extraction Failed")
try:
return ResumeAnalysis.parse_obj(parsed)
except:
return ResumeAnalysis(name="Extraction Failed")
# ---------------------------------------------------------
# Scoring
# ---------------------------------------------------------
def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float:
score = 0
# Experience length (40)
score += min(len(analysis.experience_summary) / 100, 1) * 40
# Skills count (30)
score += min(len(analysis.technical_skills) / 10, 1) * 30
# Communication (20)
try:
c = float(re.findall(r"\d+", analysis.communication_skills)[0])
except:
c = 5
score += (min(c, 10) / 10) * 20
# Certifications (10)
score += min(len(analysis.certifications), 10)
# Therapist bonus (10)
if role.lower() == "therapist":
try:
aba = float(re.findall(r"\d+", analysis.aba_therapy_skills)[0])
autism = float(re.findall(r"\d+", analysis.autism_care_experience_score)[0])
score += ((aba + autism) / 20) * 10
except:
pass
return float(round(min(score, 100)))
# ---------------------------------------------------------
# Add Row
# ---------------------------------------------------------
def append_analysis_to_dataframe(role, analysis: ResumeAnalysis, score: float):
df = st.session_state.analyzed_data
df.loc[len(df)] = [
analysis.name,
role,
score,
analysis.email,
analysis.phone,
"No",
analysis.experience_summary,
analysis.education_summary,
analysis.communication_skills,
", ".join(analysis.technical_skills),
", ".join(analysis.certifications),
analysis.aba_therapy_skills,
analysis.rbt_bcba_certification,
analysis.autism_care_experience_score
]
st.session_state.analyzed_data = df
# ---------------------------------------------------------
# Excel Export
# ---------------------------------------------------------
def df_to_excel_bytes(df):
output = io.BytesIO()
with pd.ExcelWriter(output, engine="openpyxl") as w:
df.to_excel(w, index=False, sheet_name="Resume Analysis")
return output.getvalue()
# ---------------------------------------------------------
# UI
# ---------------------------------------------------------
st.title("🌌 Quantum Scrutiny Platform β€” AI Resume Analyzer")
tab_user, tab_admin = st.tabs([
"πŸ‘€ User Resume Panel",
"πŸ”’ Admin Dashboard"
])
# ---------------------------------------------------------
# USER PANEL
# ---------------------------------------------------------
with tab_user:
st.header("Upload Resumes")
job_role = st.selectbox(
"Select Job Role",
["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
)
files = st.file_uploader(
"Upload PDF or DOCX",
type=["pdf", "docx"],
accept_multiple_files=True
)
if st.button("πŸš€ Analyze All"):
if not files:
st.warning("Upload at least one file.")
else:
st.session_state.run_analysis = True
st.rerun()
if st.session_state.run_analysis:
if not files:
st.error("No files found.")
st.session_state.run_analysis = False
else:
total = len(files)
progress = st.progress(0)
for i, f in enumerate(files, 1):
st.write(f"Analyzing **{f.name}**...")
text = extract_text_from_file(f)
if not text:
st.error(f"Could not extract text from {f.name}. Skipped.")
progress.progress(i / total)
continue
analysis = analyze_resume_with_groq_cached(text, job_role)
score = calculate_resume_score(analysis, job_role)
append_analysis_to_dataframe(job_role, analysis, score)
progress.progress(i / total)
st.success("All files processed!")
st.session_state.run_analysis = False
# ---------------------------------------------------------
# ADMIN PANEL
# ---------------------------------------------------------
with tab_admin:
if not st.session_state.is_admin_logged_in:
pwd = st.text_input("Admin Password", type="password")
if st.button("Login"):
if pwd == ADMIN_PASSWORD:
st.session_state.is_admin_logged_in = True
st.rerun()
else:
st.error("Incorrect password.")
else:
st.subheader("Admin Dashboard β€” Analyzed Data")
df = st.session_state.analyzed_data
st.dataframe(df, use_container_width=True)
if st.button("Download Excel"):
xls = df_to_excel_bytes(df)
st.download_button(
label="Download File",
data=xls,
file_name="resume_analysis.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
if st.button("Clear Database"):
st.session_state.analyzed_data = st.session_state.analyzed_data.iloc[0:0]
st.success("Cleared.")