Spaces:

meesamraza
/

Programming_Developer_Advisor_Chatbot

Sleeping

App Files Files Community

Programming_Developer_Advisor_Chatbot / app.py

meesamraza

Update app.py

6e2aa61 verified 2 months ago

raw

history blame

17.2 kB

	# src/streamlit_app.py

	import streamlit as st
	import pandas as pd
	import io
	import os
	import fitz
	import docx2txt
	from groq import Groq
	from dotenv import load_dotenv
	from pydantic import BaseModel, Field, ValidationError # Added ValidationError
	from typing import Optional, List # Added Optional and List

	# --- 0. FIX: SET PAGE CONFIG AS THE FIRST STREAMLIT COMMAND ---
	st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform \| Groq-Powered")
	# FIX for Hugging Face Deployment: Read the key from the environment/Secrets.
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")

	# Admin Password (as requested)
	ADMIN_PASSWORD = "admin"

	# Initialize Groq Client
	if GROQ_API_KEY:
	try:
	groq_client = Groq(api_key=GROQ_API_KEY)
	except Exception as e:
	st.error(f"Error initializing Groq Client: {e}")
	st.stop()
	else:
	st.error("GROQ_API_KEY not found. Please ensure the key is set as a Secret in Hugging Face or in the local .env file.")
	st.stop()

	# Initialize Session State
	if 'is_admin_logged_in' not in st.session_state:
	st.session_state.is_admin_logged_in = False
	if 'analyzed_data' not in st.session_state:
	initial_cols = [
	'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
	'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
	'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
	'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
	]
	st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)


	# --- 2. DATA STRUCTURE FOR GROQ OUTPUT (Pydantic Schema) ---

	class ResumeAnalysis(BaseModel):
	"""Pydantic model for structured resume data extraction."""
	name: str = Field(description="Full name of the candidate.")
	email: str = Field(description="Professional email address.")
	phone: str = Field(description="Primary phone number.")
	certifications: List[str] = Field(description="List of professional certifications.")
	experience_summary: str = Field(description="A concise summary of the candidate's professional experience.")
	education_summary: str = Field(description="A concise summary of the candidate's highest education.")

	# --- CRITICAL FIX: Use str or Optional[str] and improve coercion ---
	# The Groq model is returning INT (8) instead of STR ('8') for communication_skills.
	# The most stable fix is to keep the field as str and rely on Groq's JSON mode
	# but improve the prompt guidance. We will also update the helper functions to be more robust.
	communication_skills: str = Field(description="A score as a STRING (e.g., '8') or description of communication skills.")
	technical_skills: List[str] = Field(description="List of technical skills/technologies mentioned.")

	# These fields can sometimes return None, so we make them Optional[str]
	# and default them to "N/A" in the final output in the analyze function if still None.
	aba_therapy_skills: Optional[str] = Field(default="N/A", description="Specific score as a STRING (e.g., '7'). Use 'N/A' if not applicable.")
	rbt_bcba_certification: Optional[str] = Field(default="N/A", description="Indicate 'Yes' or 'No'. Use 'N/A' if not applicable.")
	autism_care_experience_score: Optional[str] = Field(default="N/A", description="A score as a STRING (e.g., '9'). Use 'N/A' if not applicable.")


	# --- 3. HELPER FUNCTIONS ---

	def extract_text_from_file(uploaded_file):
	"""Extracts text from PDF or DOCX files."""
	file_type = uploaded_file.type
	try:
	if file_type == "application/pdf":
	with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
	text = ""
	for page in doc:
	text += page.get_text()
	return text
	elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	return docx2txt.process(uploaded_file)
	else:
	return ""
	except Exception as e:
	print(f"Error extracting text: {e}")
	return ""

	@st.cache_data(show_spinner="Analyzing resume with Groq...")
	def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
	"""Uses Groq and the Pydantic schema for structured extraction."""

	# Custom instructions for Therapist role
	therapist_instructions = ""
	if job_role == "Therapist":
	therapist_instructions = (
	"Because the job role is 'Therapist', you MUST carefully look for ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. "
	"Provide a score from 1-10 as a STRING (e.g., '7') for the specialized fields. "
	"If any specialized therapist field is not found, you MUST return null or N/A for that field."
	)
	else:
	# For non-therapist roles, explicitly instruct the model to use 'null'
	# so Optional[str] handles it cleanly.
	therapist_instructions = (
	"Since the role is not 'Therapist', set 'aba_therapy_skills', 'autism_care_experience_score', and 'rbt_bcba_certification' to null or N/A."
	)

	# System Prompt for Groq
	system_prompt = (
	f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. "
	f"The candidate is applying for the role of '{job_role}'. "
	f"Return a JSON object that strictly adheres to the provided Pydantic schema. "
	f"CRITICAL: Ensure 'communication_skills' is returned as a STRING value, even if it's a number (e.g., \"8\" NOT 8). " # <-- Re-emphasizing string output for the specific failing field
	f"{therapist_instructions}"
	)

	try:
	chat_completion = groq_client.chat.completions.create(
	model="mixtral-8x7b-32768",
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"}
	],
	response_model={"type": "json_object", "schema": ResumeAnalysis.schema()},
	temperature=0.0
	)

	# Parse the JSON response
	analysis = ResumeAnalysis.parse_raw(chat_completion.choices[0].message.content)

	# Post-processing: Ensure Optional fields are strings for score calculation
	analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or 'N/A')
	analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or 'N/A')
	analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or 'N/A')
	analysis.communication_skills = str(analysis.communication_skills) # Coerce communication_skills to string just in case it passed validation as an int somehow

	return analysis

	except ValidationError as ve:
	st.error(f"Groq API Validation Error: The model returned incompatible data. Details: {ve}")
	print(f"Failed JSON: {chat_completion.choices[0].message.content}") # Print the bad JSON for debugging
	return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")
	except Exception as e:
	st.error(f"Groq API Error: {e}")
	return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")


	def calculate_resume_score(analysis: ResumeAnalysis) -> float:
	"""Calculates the weighted score out of 100."""

	total_score = 0.0

	# 1. Experience Score (Max 40 points)
	exp_factor = min(len(analysis.experience_summary) / 100.0, 1.0)
	total_score += exp_factor * 40.0

	# 2. Skills Score (Max 30 points)
	skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0)
	total_score += skills_factor * 30.0

	# 3. Communication Score (Max 20 points)
	try:
	# Safely parse the communication score string, handling N/A or raw numbers
	score_str = str(analysis.communication_skills).split('-')[0].strip() # Use str() to handle if it somehow remained an int
	comm_rating = float(score_str)
	except (ValueError, IndexError):
	comm_rating = 5.0

	score_comm = (comm_rating / 10.0) * 20.0
	total_score += score_comm

	# 4. Certification Score (Max 10 points)
	score_cert = min(len(analysis.certifications), 10) * 1.0
	total_score += score_cert

	# --- Therapist-Specific Bonus Checks ---
	if st.session_state.get('selected_role') == "Therapist":
	try:
	# Safely parse specialized scores, handling 'N/A' or None
	aba_score = float(str(analysis.aba_therapy_skills).split('-')[0].strip()) if str(analysis.aba_therapy_skills).upper() not in ['N/A', 'NONE'] else 0.0
	autism_score = float(str(analysis.autism_care_experience_score).split('-')[0].strip()) if str(analysis.autism_care_experience_score).upper() not in ['N/A', 'NONE'] else 0.0

	# Add a bonus based on the average specialized scores (max 10 points)
	specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0
	total_score += specialized_bonus
	except (ValueError, IndexError, TypeError):
	pass # Ignore if specialized scores are still corrupted


	final_score = round(min(total_score, 100))
	return float(final_score)


	def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
	"""Formats and appends the new analysis to the session state DataFrame."""

	data = analysis.dict()
	data['Job Role'] = job_role
	data['Resume Score'] = score
	data['Shortlisted'] = 'No'

	technical_skills_list = ", ".join(data['technical_skills'])
	certifications_list = ", ".join(data['certifications'])

	# Ensure fields that might have been None are now strings for the DataFrame
	comm_skills = str(data['communication_skills'] or 'N/A')
	aba_skills = str(data['aba_therapy_skills'] or 'N/A')
	rbt_cert = str(data['rbt_bcba_certification'] or 'N/A')
	autism_exp = str(data['autism_care_experience_score'] or 'N/A')

	df_data = {
	'Name': data['name'],
	'Job Role': job_role,
	'Resume Score (100)': score,
	'Email': data['email'],
	'Phone': data['phone'],
	'Shortlisted': data['Shortlisted'],
	'Experience Summary': data['experience_summary'],
	'Education Summary': data['education_summary'],
	'Communication Rating (1-10)': comm_skills,
	'Skills/Technologies': technical_skills_list,
	'Certifications': certifications_list,
	'ABA Skills (1-10)': aba_skills,
	'RBT/BCBA Cert': rbt_cert,
	'Autism-Care Exp (1-10)': autism_exp,
	}

	new_df = pd.DataFrame([df_data])
	st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)


	# --- 4. APP LAYOUT AND LOGIC ---

	st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")

	tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"])

	# =========================================================================
	# A. Resume Upload (User Panel)
	# =========================================================================
	with tab_user:
	st.header("Upload Resumes for Analysis")
	st.info("Upload multiple PDF or DOCX files. The Groq AI engine will quickly extract and score the key data.")

	job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
	selected_role = st.selectbox(
	"1. Select the Target Job Role (Influences analysis and scoring)",
	options=job_role_options,
	key='selected_role'
	)

	uploaded_files = st.file_uploader(
	"2. Upload Resumes (PDF or DOCX)",
	type=["pdf", "docx"],
	accept_multiple_files=True
	)

	if st.button("🚀 Analyze All Uploaded Resumes"):
	if not uploaded_files:
	st.warning("Please upload one or more resume files to begin analysis.")
	else:
	total_files = len(uploaded_files)
	progress_bar = st.progress(0)

	st.session_state.individual_analysis = []

	with st.status("Processing Resumes...", expanded=True) as status_box:

	for i, file in enumerate(uploaded_files):
	file_name = file.name
	st.write(f"Analyzing {file_name}...")

	resume_text = extract_text_from_file(file)

	if not resume_text:
	st.error(f"Could not extract text from {file_name}. Skipping.")
	continue

	analysis = analyze_resume_with_groq(resume_text, selected_role)

	if analysis.name == "Extraction Failed":
	st.error(f"Groq extraction failed for {file_name}. Skipping.")
	continue

	score = calculate_resume_score(analysis)
	append_analysis_to_dataframe(selected_role, analysis, score)

	st.session_state.individual_analysis.append({
	'name': analysis.name,
	'score': score,
	'role': selected_role,
	'file_name': file_name
	})

	progress_bar.progress((i + 1) / total_files)

	status_box.update(label="Analysis Complete!", state="complete", expanded=False)

	st.success(f"✅ Successfully analyzed {total_files} resumes.")

	if 'individual_analysis' in st.session_state and st.session_state.individual_analysis:
	st.subheader("Last Analysis Summary")
	for item in st.session_state.individual_analysis:
	st.markdown(f"{item['name']} (for {item['role']}) - Score: {item['score']}/100")

	st.markdown("---")
	st.caption("All analyzed data is stored in the Admin Dashboard.")

	# =========================================================================
	# B. Admin Panel (Password Protected)
	# =========================================================================
	with tab_admin:

	if not st.session_state.is_admin_logged_in:
	st.header("Admin Login")
	password = st.text_input("Enter Admin Password", type="password")
	if st.button("🔑 Login"):
	if password == ADMIN_PASSWORD:
	st.session_state.is_admin_logged_in = True
	st.rerun()
	else:
	st.error("Incorrect password.")
	st.stop()

	st.header("🎯 Recruitment Dashboard")
	st.markdown("---")

	if st.button("🚪 Logout"):
	st.session_state.is_admin_logged_in = False
	st.rerun()

	if st.session_state.analyzed_data.empty:
	st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
	else:
	df = st.session_state.analyzed_data.copy()

	st.subheader("Candidate Data Table")
	st.success(f"Total Candidates Analyzed: {len(df)}")

	display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']

	edited_df = st.data_editor(
	df[display_cols],
	column_config={
	"Shortlisted": st.column_config.SelectboxColumn(
	"Shortlisted",
	help="Mark the candidate as Shortlisted or Rejected.",
	options=["No", "Yes"],
	required=True,
	)
	},
	key="dashboard_editor",
	hide_index=True
	)

	st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted']

	st.markdown("---")

	st.subheader("📥 Download Data")

	df_export = st.session_state.analyzed_data.copy()
	excel_buffer = io.BytesIO()
	with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
	df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data')
	excel_buffer.seek(0)

	st.download_button(
	label="💾 Download All Data as Excel (.xlsx)",
	data=excel_buffer,
	file_name="quantum_scrutiny_report.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
	help="Downloads the full table including all extracted fields and shortlist status."
	)

	# --- End of src/streamlit_app.py ---