Spaces:

meesamraza
/

Programming_Developer_Advisor_Chatbot

Sleeping

App Files Files Community

Programming_Developer_Advisor_Chatbot / app.py

meesamraza

Update app.py

58b9e2b verified 3 months ago

raw

history blame

17 kB

	# src/streamlit_app.py

	import streamlit as st
	import pandas as pd
	import io
	import os
	import fitz # PyMuPDF
	import docx2txt
	from groq import Groq
	from dotenv import load_dotenv
	from pydantic import BaseModel, Field

	# --- 1. CONFIGURATION AND INITIALIZATION ---

	# 🚨 FIX for .env: Load environment variables by explicitly pointing up one directory.
	# This ensures the script finds the .env file even though it's run from the 'src' folder.
	load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env'))

	GROQ_API_KEY = os.getenv("GROQ_API_KEY")

	# Initialize Groq Client
	if GROQ_API_KEY:
	try:
	groq_client = Groq(api_key=GROQ_API_KEY)
	except Exception as e:
	st.error(f"Error initializing Groq Client: {e}")
	st.stop()
	else:
	# This message should no longer appear if the .env fix works
	st.error("GROQ_API_KEY not found. Please ensure the .env file is in the project root and contains your key.")
	st.stop()

	# Admin Password (as requested)
	ADMIN_PASSWORD = "admin"

	# Initialize Session State
	if 'is_admin_logged_in' not in st.session_state:
	st.session_state.is_admin_logged_in = False
	if 'analyzed_data' not in st.session_state:
	# Define DataFrame with columns for initial structure
	initial_cols = [
	'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
	'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
	'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
	'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
	]
	st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)


	# --- 2. DATA STRUCTURE FOR GROQ OUTPUT (Pydantic Schema) ---

	class ResumeAnalysis(BaseModel):
	"""Pydantic model for structured resume data extraction."""
	name: str = Field(description="Full name of the candidate.")
	email: str = Field(description="Professional email address.")
	phone: str = Field(description="Primary phone number.")
	certifications: list[str] = Field(description="List of professional certifications (e.g., PMP, AWS Certified).")
	experience_summary: str = Field(description="A concise summary of the candidate's professional experience.")
	education_summary: str = Field(description="A concise summary of the candidate's highest education.")
	communication_skills: str = Field(description="A rating (1-10) or brief description of communication skills based on the resume language.")
	technical_skills: list[str] = Field(description="List of technical skills/technologies mentioned (e.g., Python, SQL, Docker).")
	aba_therapy_skills: str = Field(description="Specific mention or score (1-10) for ABA Therapy skills, ONLY if the role is 'Therapist'.")
	rbt_bcba_certification: str = Field(description="Indicate 'Yes' or 'No' if RBT/BCBA certification is mentioned, ONLY if the role is 'Therapist'.")
	autism_care_experience_score: str = Field(description="A score (1-10) for Autism-Care Experience, ONLY if the role is 'Therapist'.")

	# --- 3. HELPER FUNCTIONS ---

	def extract_text_from_file(uploaded_file):
	"""Extracts text from PDF or DOCX files."""
	file_type = uploaded_file.type
	try:
	if file_type == "application/pdf":
	# Use PyMuPDF for PDF
	with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
	text = ""
	for page in doc:
	text += page.get_text()
	return text
	elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	# Use docx2txt for DOCX
	return docx2txt.process(uploaded_file)
	else:
	return ""
	except Exception as e:
	st.error(f"Error extracting text: {e}")
	return ""

	@st.cache_data(show_spinner="Analyzing resume with Groq...")
	def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
	"""Uses Groq and the Pydantic schema for structured extraction."""

	# Custom instructions for Therapist role
	therapist_instructions = ""
	if job_role == "Therapist":
	therapist_instructions = (
	"Because the job role is 'Therapist', you MUST carefully look for: "
	"1. ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. "
	"2. Provide a score from 1-10 for the specialized fields: 'aba_therapy_skills' and 'autism_care_experience_score'. "
	"3. Set 'rbt_bcba_certification' to 'Yes' or 'No'."
	)

	# System Prompt for Groq
	system_prompt = (
	f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. "
	f"The candidate is applying for the role of '{job_role}'. "
	f"Follow the instructions precisely and return a JSON object that strictly adheres to the provided Pydantic schema. "
	f"For skills, provide a list of 5-10 most relevant items. {therapist_instructions}"
	)

	try:
	chat_completion = groq_client.chat.completions.create(
	model="mixtral-8x7b-32768", # Fast model suitable for this task
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"}
	],
	response_model={"type": "json_object", "schema": ResumeAnalysis.schema()},
	temperature=0.0
	)

	# The response is a JSON string, which we can parse into the Pydantic model
	analysis = ResumeAnalysis.parse_raw(chat_completion.choices[0].message.content)
	return analysis

	except Exception as e:
	st.error(f"Groq API Error: {e}")
	# Return an empty/default analysis object on failure
	return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="0", technical_skills=[], aba_therapy_skills="0", rbt_bcba_certification="No", autism_care_experience_score="0")


	def calculate_resume_score(analysis: ResumeAnalysis) -> float:
	"""Calculates the weighted score out of 100."""

	# Weights for maximum possible score contribution:
	# Experience (40%), Skills (30%), Communication (20%), Certifications (10%)

	total_score = 0.0

	# 1. Experience Score (Max 40 points)
	# Simple heuristic: longer summary means more experience found.
	# Max score is 40.
	exp_factor = min(len(analysis.experience_summary) / 100.0, 1.0) # Use 100 chars as the max point
	total_score += exp_factor * 40.0

	# 2. Skills Score (Max 30 points)
	# Based on number of skills found (up to 10 relevant skills)
	# Max score is 30.
	skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0)
	total_score += skills_factor * 30.0

	# 3. Communication Score (Max 20 points)
	# Assuming 'communication_skills' is a score string '1-10' from Groq
	try:
	# Tries to extract the first number from the string (e.g., '7-High' -> 7)
	comm_rating = float(analysis.communication_skills.split('-')[0].strip())
	except (ValueError, IndexError):
	comm_rating = 5.0 # Default if Groq returns unparsable text

	score_comm = (comm_rating / 10.0) * 20.0 # Scale 1-10 rating to max 20 points
	total_score += score_comm

	# 4. Certification Score (Max 10 points)
	# Each certification adds a point, max 10 certs.
	score_cert = min(len(analysis.certifications), 10) * 1.0
	total_score += score_cert

	# --- Therapist-Specific Bonus Checks ---
	if st.session_state.get('selected_role') == "Therapist":
	# Additional points based on specialized scores (e.g., up to 5 points bonus)
	try:
	aba_score = float(analysis.aba_therapy_skills.split('-')[0].strip())
	autism_score = float(analysis.autism_care_experience_score.split('-')[0].strip())

	# Add a bonus based on the average specialized scores (max 10 points)
	specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0
	total_score += specialized_bonus
	except (ValueError, IndexError):
	pass # Ignore if specialized scores are not numbers


	# Final cleanup and capping
	final_score = round(min(total_score, 100))
	return float(final_score)


	def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
	"""Formats and appends the new analysis to the session state DataFrame."""

	# Convert Pydantic model to dictionary
	data = analysis.dict()

	# Add computed and derived fields
	data['Job Role'] = job_role
	data['Resume Score'] = score
	data['Shortlisted'] = 'No' # Default status

	# Clean up list fields for display/Excel
	technical_skills_list = ", ".join(data['technical_skills'])
	certifications_list = ", ".join(data['certifications'])

	# The new row data
	df_data = {
	'Name': data['name'],
	'Job Role': job_role,
	'Resume Score (100)': score,
	'Email': data['email'],
	'Phone': data['phone'],
	'Shortlisted': data['Shortlisted'],
	'Experience Summary': data['experience_summary'],
	'Education Summary': data['education_summary'],
	'Communication Rating (1-10)': data['communication_skills'],
	'Skills/Technologies': technical_skills_list,
	'Certifications': certifications_list,
	'ABA Skills (1-10)': data['aba_therapy_skills'],
	'RBT/BCBA Cert': data['rbt_bcba_certification'],
	'Autism-Care Exp (1-10)': data['autism_care_experience_score'],
	}

	# Convert to a single-row DataFrame and concatenate
	new_df = pd.DataFrame([df_data])
	st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)


	# --- 4. APP LAYOUT AND LOGIC ---

	st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform \| Groq-Powered")

	st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")

	# --- Tabs for User and Admin ---
	tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"])

	# =========================================================================
	# A. Resume Upload (User Panel)
	# =========================================================================
	with tab_user:
	st.header("Upload Resumes for Analysis")
	st.info("Upload multiple PDF or DOCX files. The Groq AI engine will quickly extract and score the key data.")

	# Job Role Selection
	job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
	selected_role = st.selectbox(
	"1. Select the Target Job Role (Influences analysis and scoring)",
	options=job_role_options,
	key='selected_role' # Store role in session state for scoring logic
	)

	# File Uploader
	uploaded_files = st.file_uploader(
	"2. Upload Resumes (PDF or DOCX)",
	type=["pdf", "docx"],
	accept_multiple_files=True
	)

	if st.button("🚀 Analyze All Uploaded Resumes"):
	if not uploaded_files:
	st.warning("Please upload one or more resume files to begin analysis.")
	else:
	total_files = len(uploaded_files)
	progress_bar = st.progress(0)

	# Clear previous individual file analysis displays
	st.session_state.individual_analysis = []

	with st.status("Processing Resumes...", expanded=True) as status_box:

	for i, file in enumerate(uploaded_files):
	file_name = file.name
	st.write(f"Analyzing {file_name}...")

	# 1. Extract Text
	resume_text = extract_text_from_file(file)

	if not resume_text:
	st.error(f"Could not extract text from {file_name}. Skipping.")
	continue

	# 2. Analyze with Groq
	analysis = analyze_resume_with_groq(resume_text, selected_role)

	if analysis.name == "Extraction Failed":
	st.error(f"Groq extraction failed for {file_name}. Skipping.")
	continue

	# 3. Calculate Score
	score = calculate_resume_score(analysis)

	# 4. Store Data
	append_analysis_to_dataframe(selected_role, analysis, score)

	# Store data for individual display below
	st.session_state.individual_analysis.append({
	'name': analysis.name,
	'score': score,
	'role': selected_role,
	'file_name': file_name
	})

	# Update progress
	progress_bar.progress((i + 1) / total_files)

	status_box.update(label="Analysis Complete!", state="complete", expanded=False)

	st.success(f"✅ Successfully analyzed {total_files} resumes.")

	# Display results of the last batch of analysis
	if 'individual_analysis' in st.session_state and st.session_state.individual_analysis:
	st.subheader("Last Analysis Summary")
	for item in st.session_state.individual_analysis:
	st.markdown(f"{item['name']} (for {item['role']}) - Score: {item['score']}/100")

	st.markdown("---")
	st.caption("All analyzed data is stored in the Admin Dashboard.")

	# =========================================================================
	# B. Admin Panel (Password Protected)
	# =========================================================================
	with tab_admin:

	# --- Login Logic ---
	if not st.session_state.is_admin_logged_in:
	st.header("Admin Login")
	password = st.text_input("Enter Admin Password", type="password")
	if st.button("🔑 Login"):
	if password == ADMIN_PASSWORD:
	st.session_state.is_admin_logged_in = True
	st.rerun()
	else:
	st.error("Incorrect password.")
	st.stop() # Stop execution until logged in

	# --- Dashboard Content (Logged In) ---
	st.header("🎯 Recruitment Dashboard")
	st.markdown("---")

	if st.button("🚪 Logout"):
	st.session_state.is_admin_logged_in = False
	st.rerun()

	if st.session_state.analyzed_data.empty:
	st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
	else:
	df = st.session_state.analyzed_data.copy()

	# --- 1. Shortlisting & Data Display ---
	st.subheader("Candidate Data Table")
	st.success(f"Total Candidates Analyzed: {len(df)}")

	# Key columns for display
	display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']

	# Editable Data Table (allowing admin to change 'Shortlisted' status)
	edited_df = st.data_editor(
	df[display_cols],
	column_config={
	"Shortlisted": st.column_config.SelectboxColumn(
	"Shortlisted",
	help="Mark the candidate as Shortlisted or Rejected.",
	options=["No", "Yes"],
	required=True,
	)
	},
	key="dashboard_editor",
	hide_index=True
	)

	# Update the session state DataFrame with the edited shortlisting status
	# This keeps the changes persistent
	st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted']

	st.markdown("---")

	# --- 2. Download Excel File ---
	st.subheader("📥 Download Data")

	# The full DataFrame to export
	df_export = st.session_state.analyzed_data.copy()

	# Create an in-memory Excel file buffer
	excel_buffer = io.BytesIO()
	with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
	df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data')
	excel_buffer.seek(0)

	st.download_button(
	label="💾 Download All Data as Excel (.xlsx)",
	data=excel_buffer,
	file_name="quantum_scrutiny_report.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
	help="Downloads the full table including all extracted fields and shortlist status."
	)

	# --- End of src/streamlit_app.py ---