|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import io |
|
|
import os |
|
|
import fitz |
|
|
import docx2txt |
|
|
import tempfile |
|
|
from groq import Groq |
|
|
from dotenv import load_dotenv |
|
|
from pydantic import BaseModel, Field, ValidationError |
|
|
from typing import Optional, List |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered") |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
GROQ_API_KEY = os.getenv("GROQ_API_KEY") or os.getenv("GROQ_APIKEY") |
|
|
if not GROQ_API_KEY: |
|
|
|
|
|
try: |
|
|
GROQ_API_KEY = st.secrets["GROQ_API_KEY"] |
|
|
except Exception: |
|
|
GROQ_API_KEY = None |
|
|
|
|
|
|
|
|
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin") |
|
|
|
|
|
|
|
|
groq_client = None |
|
|
if GROQ_API_KEY: |
|
|
try: |
|
|
groq_client = Groq(api_key=GROQ_API_KEY) |
|
|
except Exception as e: |
|
|
st.warning(f"Warning: Failed to initialize Groq client: {e}") |
|
|
groq_client = None |
|
|
else: |
|
|
st.warning("GROQ_API_KEY not found in environment or Streamlit secrets. The app will run in fallback mode.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if 'is_admin_logged_in' not in st.session_state: |
|
|
st.session_state.is_admin_logged_in = False |
|
|
|
|
|
if 'analyzed_data' not in st.session_state: |
|
|
initial_cols = [ |
|
|
'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted', |
|
|
'Experience Summary', 'Education Summary', 'Communication Rating (1-10)', |
|
|
'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)', |
|
|
'RBT/BCBA Cert', 'Autism-Care Exp (1-10)' |
|
|
] |
|
|
st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ResumeAnalysis(BaseModel): |
|
|
name: str = Field(description="Full name of the candidate.") |
|
|
email: str = Field(description="Professional email address.") |
|
|
phone: str = Field(description="Primary phone number.") |
|
|
certifications: List[str] = Field(default_factory=list, description="List of professional certifications.") |
|
|
experience_summary: str = Field(default="", description="A concise summary of the candidate's professional experience.") |
|
|
education_summary: str = Field(default="", description="A concise summary of the candidate's highest education.") |
|
|
|
|
|
communication_skills: str = Field(default="N/A", description="A score as a STRING (e.g., '8') or description of communication skills.") |
|
|
technical_skills: List[str] = Field(default_factory=list, description="List of technical skills/technologies mentioned.") |
|
|
|
|
|
aba_therapy_skills: Optional[str] = Field(default="N/A", description="Specific score as a STRING (e.g., '7').") |
|
|
rbt_bcba_certification: Optional[str] = Field(default="N/A", description="Indicate 'Yes' or 'No'.") |
|
|
autism_care_experience_score: Optional[str] = Field(default="N/A", description="A score as a STRING (e.g., '9').") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_text_from_file(uploaded_file) -> str: |
|
|
"""Extract text from uploaded file safely by writing to a temp file.""" |
|
|
try: |
|
|
suffix = os.path.splitext(uploaded_file.name)[1].lower() |
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: |
|
|
tmp.write(uploaded_file.read()) |
|
|
tmp_path = tmp.name |
|
|
|
|
|
text = "" |
|
|
if suffix == '.pdf': |
|
|
try: |
|
|
doc = fitz.open(tmp_path) |
|
|
for page in doc: |
|
|
text += page.get_text() |
|
|
doc.close() |
|
|
except Exception as e: |
|
|
st.error(f"PDF extraction error for {uploaded_file.name}: {e}") |
|
|
text = "" |
|
|
elif suffix in ['.docx', '.doc']: |
|
|
try: |
|
|
text = docx2txt.process(tmp_path) or "" |
|
|
except Exception as e: |
|
|
st.error(f"DOCX extraction error for {uploaded_file.name}: {e}") |
|
|
text = "" |
|
|
else: |
|
|
st.warning(f"Unsupported file type: {suffix}") |
|
|
|
|
|
|
|
|
try: |
|
|
os.unlink(tmp_path) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
return text |
|
|
except Exception as e: |
|
|
st.error(f"Failed to extract text: {e}") |
|
|
return "" |
|
|
|
|
|
|
|
|
@st.cache_data |
|
|
def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis: |
|
|
"""Call Groq to extract structured data. If Groq is not available or returns invalid JSON, |
|
|
fall back to a lightweight heuristic parser. |
|
|
""" |
|
|
|
|
|
if not groq_client: |
|
|
return fallback_simple_extraction(resume_text, job_role) |
|
|
|
|
|
|
|
|
therapist_instructions = "" |
|
|
if job_role == "Therapist": |
|
|
therapist_instructions = ( |
|
|
"Because the job role is 'Therapist', carefully look for ABA Therapy Skills, RBT/BCBA Certification, " |
|
|
"and Autism-Care Experience. Provide a score from 1-10 as a STRING (e.g., '7') for these fields. " |
|
|
"If not found, return 'N/A'." |
|
|
) |
|
|
else: |
|
|
therapist_instructions = ( |
|
|
"Since the role is not 'Therapist', set specialized therapist fields to 'N/A' if not present." |
|
|
) |
|
|
|
|
|
system_prompt = ( |
|
|
f"You are a professional Resume Analyzer. Extract fields exactly matching the JSON schema: name, email, phone, certifications (list), " |
|
|
f"experience_summary, education_summary, communication_skills (STRING), technical_skills (list), aba_therapy_skills, rbt_bcba_certification, autism_care_experience_score. " |
|
|
f"The candidate is applying for '{job_role}'. {therapist_instructions} Return valid JSON only." |
|
|
) |
|
|
|
|
|
try: |
|
|
chat_completion = groq_client.chat.completions.create( |
|
|
model="mixtral-8x7b-32768", |
|
|
messages=[ |
|
|
{"role": "system", "content": system_prompt}, |
|
|
{"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"} |
|
|
], |
|
|
response_model={"type": "json_object", "schema": ResumeAnalysis.schema()}, |
|
|
temperature=0.0 |
|
|
) |
|
|
|
|
|
|
|
|
raw = None |
|
|
try: |
|
|
raw = chat_completion.choices[0].message.content |
|
|
except Exception: |
|
|
raw = str(chat_completion) |
|
|
|
|
|
|
|
|
try: |
|
|
analysis = ResumeAnalysis.parse_raw(raw) |
|
|
except ValidationError as ve: |
|
|
st.warning(f"Groq returned invalid format; falling back to heuristic extraction. Details: {ve}") |
|
|
return fallback_simple_extraction(resume_text, job_role) |
|
|
|
|
|
|
|
|
analysis.communication_skills = str(analysis.communication_skills or 'N/A') |
|
|
analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or 'N/A') |
|
|
analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or 'N/A') |
|
|
analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or 'N/A') |
|
|
|
|
|
return analysis |
|
|
|
|
|
except Exception as e: |
|
|
st.warning(f"Groq API call failed: {e}. Using fallback extraction.") |
|
|
return fallback_simple_extraction(resume_text, job_role) |
|
|
|
|
|
|
|
|
def fallback_simple_extraction(text: str, job_role: str) -> ResumeAnalysis: |
|
|
"""A minimal, robust heuristic extractor used when the LLM call fails. |
|
|
It tries to find name/email/phone and picks up some keywords for skills and certifications. |
|
|
""" |
|
|
import re |
|
|
|
|
|
|
|
|
email_match = re.search(r"[\w\.-]+@[\w\.-]+", text) |
|
|
phone_match = re.search(r"(\+?\d[\d\-\s]{7,}\d)", text) |
|
|
|
|
|
name = "Unknown" |
|
|
|
|
|
lines = [l.strip() for l in text.splitlines() if l.strip()] |
|
|
if lines: |
|
|
for line in lines[:5]: |
|
|
if len(line.split()) <= 4 and any(ch.isalpha() for ch in line) and line[0].isupper(): |
|
|
name = line |
|
|
break |
|
|
|
|
|
email = email_match.group(0) if email_match else "" |
|
|
phone = phone_match.group(0) if phone_match else "" |
|
|
|
|
|
|
|
|
skills_candidates = [] |
|
|
certifications = [] |
|
|
keywords = ['python','java','c++','machine learning','deep learning','tensorflow','pytorch','rbt','bcba','aba','autism'] |
|
|
lower_text = text.lower() |
|
|
for kw in keywords: |
|
|
if kw in lower_text: |
|
|
skills_candidates.append(kw) |
|
|
if kw in ['rbt','bcba']: |
|
|
certifications.append(kw.upper()) |
|
|
|
|
|
experience_summary = ' '.join(lines[:4]) if lines else '' |
|
|
education_summary = '' |
|
|
|
|
|
|
|
|
aba = 'N/A' |
|
|
rbt_cert = 'Yes' if 'rbt' in lower_text or 'registered behavior technician' in lower_text else 'N/A' |
|
|
autism_score = 'N/A' |
|
|
|
|
|
return ResumeAnalysis( |
|
|
name=name, |
|
|
email=email, |
|
|
phone=phone, |
|
|
certifications=certifications, |
|
|
experience_summary=experience_summary, |
|
|
education_summary=education_summary, |
|
|
communication_skills='5', |
|
|
technical_skills=list(set(skills_candidates)), |
|
|
aba_therapy_skills=aba, |
|
|
rbt_bcba_certification=rbt_cert, |
|
|
autism_care_experience_score=autism_score |
|
|
) |
|
|
|
|
|
|
|
|
def calculate_resume_score(analysis: ResumeAnalysis) -> float: |
|
|
"""Calculates a weighted score out of 100 based on heuristics and extracted values.""" |
|
|
total_score = 0.0 |
|
|
|
|
|
|
|
|
exp_len = len(analysis.experience_summary or "") |
|
|
exp_factor = min(exp_len / 100.0, 1.0) |
|
|
total_score += exp_factor * 40.0 |
|
|
|
|
|
|
|
|
skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0) |
|
|
total_score += skills_factor * 30.0 |
|
|
|
|
|
|
|
|
try: |
|
|
score_str = str(analysis.communication_skills).split('-')[0].strip() |
|
|
comm_rating = float(score_str) |
|
|
except Exception: |
|
|
comm_rating = 5.0 |
|
|
total_score += (comm_rating / 10.0) * 20.0 |
|
|
|
|
|
|
|
|
total_score += min(len(analysis.certifications), 10) * 1.0 |
|
|
|
|
|
|
|
|
if st.session_state.get('selected_role') == 'Therapist': |
|
|
try: |
|
|
aba = float(str(analysis.aba_therapy_skills)) if str(analysis.aba_therapy_skills).upper() not in ['N/A', 'NONE', ''] else 0.0 |
|
|
autism = float(str(analysis.autism_care_experience_score)) if str(analysis.autism_care_experience_score).upper() not in ['N/A', 'NONE', ''] else 0.0 |
|
|
total_score += ((aba + autism) / 20.0) * 10.0 |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
final_score = round(min(total_score, 100)) |
|
|
return float(final_score) |
|
|
|
|
|
|
|
|
def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float): |
|
|
data = analysis.dict() |
|
|
data['Job Role'] = job_role |
|
|
data['Resume Score'] = score |
|
|
data['Shortlisted'] = 'No' |
|
|
|
|
|
technical_skills_list = ", ".join(data.get('technical_skills', [])) |
|
|
certifications_list = ", ".join(data.get('certifications', [])) |
|
|
|
|
|
df_data = { |
|
|
'Name': data.get('name', ''), |
|
|
'Job Role': job_role, |
|
|
'Resume Score (100)': score, |
|
|
'Email': data.get('email', ''), |
|
|
'Phone': data.get('phone', ''), |
|
|
'Shortlisted': data.get('Shortlisted', 'No'), |
|
|
'Experience Summary': data.get('experience_summary', ''), |
|
|
'Education Summary': data.get('education_summary', ''), |
|
|
'Communication Rating (1-10)': str(data.get('communication_skills', 'N/A')), |
|
|
'Skills/Technologies': technical_skills_list, |
|
|
'Certifications': certifications_list, |
|
|
'ABA Skills (1-10)': str(data.get('aba_therapy_skills', 'N/A')), |
|
|
'RBT/BCBA Cert': str(data.get('rbt_bcba_certification', 'N/A')), |
|
|
'Autism-Care Exp (1-10)': str(data.get('autism_care_experience_score', 'N/A')), |
|
|
} |
|
|
|
|
|
new_df = pd.DataFrame([df_data]) |
|
|
st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.title("π Quantum Scrutiny Platform: AI Resume Analysis") |
|
|
|
|
|
tab_user, tab_admin = st.tabs(["π€ Resume Uploader (User Panel)", "π Admin Dashboard (Password Protected)"]) |
|
|
|
|
|
with tab_user: |
|
|
st.header("Upload Resumes for Analysis") |
|
|
st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score key data. If the API key is missing, a fallback heuristic extractor will run.") |
|
|
|
|
|
job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"] |
|
|
selected_role = st.selectbox("**1. Select the Target Job Role**", options=job_role_options, key='selected_role') |
|
|
|
|
|
uploaded_files = st.file_uploader("**2. Upload Resumes** (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True) |
|
|
|
|
|
if st.button("π Analyze All Uploaded Resumes"): |
|
|
if not uploaded_files: |
|
|
st.warning("Please upload one or more resume files to begin analysis.") |
|
|
else: |
|
|
total_files = len(uploaded_files) |
|
|
progress_bar = st.progress(0.0) |
|
|
st.session_state.individual_analysis = [] |
|
|
|
|
|
with st.spinner("Processing resumes..."): |
|
|
for i, file in enumerate(uploaded_files): |
|
|
file_name = file.name |
|
|
st.write(f"Analyzing **{file_name}**...") |
|
|
|
|
|
resume_text = extract_text_from_file(file) |
|
|
if not resume_text: |
|
|
st.error(f"Could not extract text from {file_name}. Skipping.") |
|
|
continue |
|
|
|
|
|
analysis = analyze_resume_with_groq(resume_text, selected_role) |
|
|
if isinstance(analysis, ResumeAnalysis) and analysis.name == "Extraction Failed": |
|
|
st.error(f"Extraction failed for {file_name}. Skipping.") |
|
|
continue |
|
|
|
|
|
score = calculate_resume_score(analysis) |
|
|
append_analysis_to_dataframe(selected_role, analysis, score) |
|
|
|
|
|
st.session_state.individual_analysis.append({ |
|
|
'name': analysis.name, |
|
|
'score': score, |
|
|
'role': selected_role, |
|
|
'file_name': file_name |
|
|
}) |
|
|
|
|
|
progress_bar.progress((i + 1) / total_files) |
|
|
|
|
|
st.success(f"**β
Successfully processed {len(st.session_state.individual_analysis)} / {total_files} resumes.**") |
|
|
|
|
|
if 'individual_analysis' in st.session_state and st.session_state.individual_analysis: |
|
|
st.subheader("Last Analysis Summary") |
|
|
for item in st.session_state.individual_analysis: |
|
|
st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**") |
|
|
|
|
|
st.markdown("---") |
|
|
st.caption("All analyzed data is stored in the **Admin Dashboard**.") |
|
|
|
|
|
with tab_admin: |
|
|
if not st.session_state.is_admin_logged_in: |
|
|
st.header("Admin Login") |
|
|
password = st.text_input("Enter Admin Password", type="password") |
|
|
if st.button("π Login"): |
|
|
if password == ADMIN_PASSWORD: |
|
|
st.session_state.is_admin_logged_in = True |
|
|
st.experimental_rerun() |
|
|
else: |
|
|
st.error("Incorrect password.") |
|
|
st.stop() |
|
|
|
|
|
st.header("π― Recruitment Dashboard") |
|
|
st.markdown("---") |
|
|
|
|
|
if st.button("πͺ Logout"): |
|
|
st.session_state.is_admin_logged_in = False |
|
|
st.experimental_rerun() |
|
|
|
|
|
if st.session_state.analyzed_data.empty: |
|
|
st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.") |
|
|
else: |
|
|
df = st.session_state.analyzed_data.copy() |
|
|
st.subheader("Candidate Data Table") |
|
|
st.success(f"**Total Candidates Analyzed: {len(df)}**") |
|
|
|
|
|
display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies'] |
|
|
|
|
|
edited_df = st.data_editor( |
|
|
df[display_cols], |
|
|
column_config={ |
|
|
"Shortlisted": st.column_config.SelectboxColumn( |
|
|
"Shortlisted", |
|
|
help="Mark the candidate as Shortlisted or Rejected.", |
|
|
options=["No", "Yes"], |
|
|
required=True, |
|
|
) |
|
|
}, |
|
|
key="dashboard_editor", |
|
|
hide_index=True |
|
|
) |
|
|
|
|
|
|
|
|
st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted'] |
|
|
|
|
|
st.markdown("---") |
|
|
st.subheader("π₯ Download Data") |
|
|
|
|
|
df_export = st.session_state.analyzed_data.copy() |
|
|
excel_buffer = io.BytesIO() |
|
|
with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer: |
|
|
df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data') |
|
|
excel_buffer.seek(0) |
|
|
|
|
|
st.download_button( |
|
|
label="πΎ Download All Data as Excel (.xlsx)", |
|
|
data=excel_buffer.getvalue(), |
|
|
file_name="quantum_scrutiny_report.xlsx", |
|
|
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
|
|
help="Downloads the full table including all extracted fields and shortlist status." |
|
|
) |
|
|
|
|
|
|
|
|
|