Spaces:

meesamraza
/

Programming_Developer_Advisor_Chatbot

Sleeping

App Files Files Community

Programming_Developer_Advisor_Chatbot / app.py

meesamraza

Update app.py

d915eee verified 2 months ago

raw

history blame

16.1 kB

	# src/streamlit_app.py

	import streamlit as st
	import pandas as pd
	import io
	import os
	import fitz
	import docx2txt
	from groq import Groq
	from dotenv import load_dotenv
	from pydantic import BaseModel, Field

	# --- 0. FIX: SET PAGE CONFIG AS THE FIRST STREAMLIT COMMAND ---
	st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform \| Groq-Powered")

	# --- 1. CONFIGURATION AND INITIALIZATION ---

	# FIX for .env on local machine: Load environment variables by explicitly pointing up one directory.
	load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env'))

	# FIX for Hugging Face Deployment: Read the key from the environment/Secrets.
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")

	# Admin Password (as requested)
	ADMIN_PASSWORD = "admin"

	# Initialize Groq Client
	if GROQ_API_KEY:
	try:
	groq_client = Groq(api_key=GROQ_API_KEY)
	except Exception as e:
	st.error(f"Error initializing Groq Client: {e}")
	st.stop()
	else:
	st.error("GROQ_API_KEY not found. Please ensure the key is set as a Secret in Hugging Face or in the local .env file.")
	st.stop()

	# Initialize Session State
	if 'is_admin_logged_in' not in st.session_state:
	st.session_state.is_admin_logged_in = False
	if 'analyzed_data' not in st.session_state:
	initial_cols = [
	'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
	'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
	'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
	'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
	]
	st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)


	# --- 2. DATA STRUCTURE FOR GROQ OUTPUT (Pydantic Schema) ---

	class ResumeAnalysis(BaseModel):
	"""Pydantic model for structured resume data extraction."""
	name: str = Field(description="Full name of the candidate.")
	email: str = Field(description="Professional email address.")
	phone: str = Field(description="Primary phone number.")
	certifications: list[str] = Field(description="List of professional certifications (e.g., PMP, AWS Certified).")
	experience_summary: str = Field(description="A concise summary of the candidate's professional experience.")
	education_summary: str = Field(description="A concise summary of the candidate's highest education.")

	# --- FIX 1: Explicitly describe required STRING output format ---
	communication_skills: str = Field(description="A score as a STRING (e.g., '8') or brief description of communication skills.")
	technical_skills: list[str] = Field(description="List of technical skills/technologies mentioned (e.g., Python, SQL, Docker).")
	aba_therapy_skills: str = Field(description="Specific mention or score as a STRING (e.g., '7') for ABA Therapy skills, ONLY if the role is 'Therapist'. Use the STRING 'N/A' if not applicable or found.")
	rbt_bcba_certification: str = Field(description="Indicate the STRING 'Yes' or 'No' if RBT/BCBA certification is mentioned, ONLY if the role is 'Therapist'. Use the STRING 'N/A' if not applicable or found.")
	autism_care_experience_score: str = Field(description="A score as a STRING (e.g., '9') for Autism-Care Experience, ONLY if the role is 'Therapist'. Use the STRING 'N/A' if not applicable or found.")


	# --- 3. HELPER FUNCTIONS ---

	def extract_text_from_file(uploaded_file):
	"""Extracts text from PDF or DOCX files."""
	file_type = uploaded_file.type
	try:
	if file_type == "application/pdf":
	with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
	text = ""
	for page in doc:
	text += page.get_text()
	return text
	elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	return docx2txt.process(uploaded_file)
	else:
	return ""
	except Exception as e:
	print(f"Error extracting text: {e}")
	return ""

	@st.cache_data(show_spinner="Analyzing resume with Groq...")
	def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
	"""Uses Groq and the Pydantic schema for structured extraction."""

	# Custom instructions for Therapist role
	therapist_instructions = ""
	if job_role == "Therapist":
	therapist_instructions = (
	"Because the job role is 'Therapist', you MUST carefully look for: "
	"1. ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. "
	"2. Provide a score from 1-10 as a STRING (e.g., '7') for the specialized fields: 'aba_therapy_skills' and 'autism_care_experience_score'. "
	"3. If any specialized therapist field is not found, you MUST use the STRING 'N/A'. "
	"4. Set 'rbt_bcba_certification' to the STRING 'Yes' or 'No'."
	)
	else:
	# For non-therapist roles, explicitly instruct the model to use 'N/A' for therapist fields
	therapist_instructions = (
	"Since the role is not 'Therapist', set 'aba_therapy_skills', 'autism_care_experience_score', and 'rbt_bcba_certification' to the STRING 'N/A'."
	)

	# System Prompt for Groq
	system_prompt = (
	f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. "
	f"The candidate is applying for the role of '{job_role}'. "
	f"Follow the instructions precisely and return a JSON object that strictly adheres to the provided Pydantic schema. "
	f"IMPORTANT: All values must be returned as the data type specified. Numerical scores must be enclosed in quotes to be treated as STRING types (e.g., \"8\"). "
	f"For skills, provide a list of 5-10 most relevant items. {therapist_instructions}"
	)

	try:
	chat_completion = groq_client.chat.completions.create(
	model="mixtral-8x7b-32768", # Fast model suitable for this task
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"}
	],
	response_model={"type": "json_object", "schema": ResumeAnalysis.schema()},
	temperature=0.0
	)

	# The response is a JSON string, which we can parse into the Pydantic model
	analysis = ResumeAnalysis.parse_raw(chat_completion.choices[0].message.content)
	return analysis

	except Exception as e:
	# This will now only catch errors related to the API connection or Pydantic structural errors
	# (e.g., list vs string), not the common type mismatches.
	st.error(f"Groq API Error: {e}")
	# Return an empty/default analysis object on failure
	return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")


	def calculate_resume_score(analysis: ResumeAnalysis) -> float:
	"""Calculates the weighted score out of 100."""

	total_score = 0.0

	# 1. Experience Score (Max 40 points)
	exp_factor = min(len(analysis.experience_summary) / 100.0, 1.0)
	total_score += exp_factor * 40.0

	# 2. Skills Score (Max 30 points)
	skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0)
	total_score += skills_factor * 30.0

	# 3. Communication Score (Max 20 points)
	try:
	# Safely parse the communication score string (e.g., '8' or '8-High')
	score_str = analysis.communication_skills.split('-')[0].strip()
	comm_rating = float(score_str)
	except (ValueError, IndexError):
	comm_rating = 5.0 # Default if unparsable

	score_comm = (comm_rating / 10.0) * 20.0
	total_score += score_comm

	# 4. Certification Score (Max 10 points)
	score_cert = min(len(analysis.certifications), 10) * 1.0
	total_score += score_cert

	# --- Therapist-Specific Bonus Checks ---
	if st.session_state.get('selected_role') == "Therapist":
	try:
	# Safely parse specialized scores, handling 'N/A'
	aba_score = float(analysis.aba_therapy_skills.split('-')[0].strip()) if analysis.aba_therapy_skills != 'N/A' else 0.0
	autism_score = float(analysis.autism_care_experience_score.split('-')[0].strip()) if analysis.autism_care_experience_score != 'N/A' else 0.0

	# Add a bonus based on the average specialized scores (max 10 points)
	specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0
	total_score += specialized_bonus
	except (ValueError, IndexError):
	pass


	final_score = round(min(total_score, 100))
	return float(final_score)


	def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
	"""Formats and appends the new analysis to the session state DataFrame."""

	data = analysis.dict()
	data['Job Role'] = job_role
	data['Resume Score'] = score
	data['Shortlisted'] = 'No'

	technical_skills_list = ", ".join(data['technical_skills'])
	certifications_list = ", ".join(data['certifications'])

	df_data = {
	'Name': data['name'],
	'Job Role': job_role,
	'Resume Score (100)': score,
	'Email': data['email'],
	'Phone': data['phone'],
	'Shortlisted': data['Shortlisted'],
	'Experience Summary': data['experience_summary'],
	'Education Summary': data['education_summary'],
	'Communication Rating (1-10)': data['communication_skills'],
	'Skills/Technologies': technical_skills_list,
	'Certifications': certifications_list,
	'ABA Skills (1-10)': data['aba_therapy_skills'],
	'RBT/BCBA Cert': data['rbt_bcba_certification'],
	'Autism-Care Exp (1-10)': data['autism_care_experience_score'],
	}

	new_df = pd.DataFrame([df_data])
	st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)


	# --- 4. APP LAYOUT AND LOGIC ---

	st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")

	tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"])

	# =========================================================================
	# A. Resume Upload (User Panel)
	# =========================================================================
	with tab_user:
	st.header("Upload Resumes for Analysis")
	st.info("Upload multiple PDF or DOCX files. The Groq AI engine will quickly extract and score the key data.")

	job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
	selected_role = st.selectbox(
	"1. Select the Target Job Role (Influences analysis and scoring)",
	options=job_role_options,
	key='selected_role'
	)

	uploaded_files = st.file_uploader(
	"2. Upload Resumes (PDF or DOCX)",
	type=["pdf", "docx"],
	accept_multiple_files=True
	)

	if st.button("🚀 Analyze All Uploaded Resumes"):
	if not uploaded_files:
	st.warning("Please upload one or more resume files to begin analysis.")
	else:
	total_files = len(uploaded_files)
	progress_bar = st.progress(0)

	st.session_state.individual_analysis = []

	with st.status("Processing Resumes...", expanded=True) as status_box:

	for i, file in enumerate(uploaded_files):
	file_name = file.name
	st.write(f"Analyzing {file_name}...")

	resume_text = extract_text_from_file(file)

	if not resume_text:
	st.error(f"Could not extract text from {file_name}. Skipping.")
	continue

	analysis = analyze_resume_with_groq(resume_text, selected_role)

	if analysis.name == "Extraction Failed":
	st.error(f"Groq extraction failed for {file_name}. Skipping.")
	continue

	score = calculate_resume_score(analysis)
	append_analysis_to_dataframe(selected_role, analysis, score)

	st.session_state.individual_analysis.append({
	'name': analysis.name,
	'score': score,
	'role': selected_role,
	'file_name': file_name
	})

	progress_bar.progress((i + 1) / total_files)

	status_box.update(label="Analysis Complete!", state="complete", expanded=False)

	st.success(f"✅ Successfully analyzed {total_files} resumes.")

	if 'individual_analysis' in st.session_state and st.session_state.individual_analysis:
	st.subheader("Last Analysis Summary")
	for item in st.session_state.individual_analysis:
	st.markdown(f"{item['name']} (for {item['role']}) - Score: {item['score']}/100")

	st.markdown("---")
	st.caption("All analyzed data is stored in the Admin Dashboard.")

	# =========================================================================
	# B. Admin Panel (Password Protected)
	# =========================================================================
	with tab_admin:

	if not st.session_state.is_admin_logged_in:
	st.header("Admin Login")
	password = st.text_input("Enter Admin Password", type="password")
	if st.button("🔑 Login"):
	if password == ADMIN_PASSWORD:
	st.session_state.is_admin_logged_in = True
	st.rerun()
	else:
	st.error("Incorrect password.")
	st.stop()

	st.header("🎯 Recruitment Dashboard")
	st.markdown("---")

	if st.button("🚪 Logout"):
	st.session_state.is_admin_logged_in = False
	st.rerun()

	if st.session_state.analyzed_data.empty:
	st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
	else:
	df = st.session_state.analyzed_data.copy()

	st.subheader("Candidate Data Table")
	st.success(f"Total Candidates Analyzed: {len(df)}")

	display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']

	edited_df = st.data_editor(
	df[display_cols],
	column_config={
	"Shortlisted": st.column_config.SelectboxColumn(
	"Shortlisted",
	help="Mark the candidate as Shortlisted or Rejected.",
	options=["No", "Yes"],
	required=True,
	)
	},
	key="dashboard_editor",
	hide_index=True
	)

	st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted']

	st.markdown("---")

	st.subheader("📥 Download Data")

	df_export = st.session_state.analyzed_data.copy()
	excel_buffer = io.BytesIO()
	with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
	df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data')
	excel_buffer.seek(0)

	st.download_button(
	label="💾 Download All Data as Excel (.xlsx)",
	data=excel_buffer,
	file_name="quantum_scrutiny_report.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
	help="Downloads the full table including all extracted fields and shortlist status."
	)

	# --- End of src/streamlit_app.py ---