Spaces:

meesamraza
/

Programming_Developer_Advisor_Chatbot

Sleeping

App Files Files Community

Programming_Developer_Advisor_Chatbot / app.py

meesamraza

Update app.py

80e6947 verified 3 months ago

raw

history blame

22.9 kB

	# app.py
	"""
	Quantum Scrutiny Platform \| Groq-Powered
	Single-file Streamlit app (refactored & debugged)
	"""

	# --- 0. Always set page config as the first Streamlit command ---
	import os
	from dotenv import load_dotenv

	load_dotenv() # load local .env if present (during local dev)

	import io
	import base64
	import traceback
	from typing import Optional, List

	import streamlit as st
	import pandas as pd

	# resume parsing
	import fitz # PyMuPDF
	from docx import Document # python-docx

	# Groq client (keep same import name as you used)
	from groq import Groq

	# Pydantic for schema validation
	from pydantic import BaseModel, Field, ValidationError

	# --- Streamlit UI config ---
	st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform \| Groq-Powered")

	# --- Config / Secrets ---
	GROQ_API_KEY = os.getenv("GROQ_API_KEY") # set in environment or .env or deploy secrets
	ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin") # optional override via env

	# --- Initialize Groq client with safe error messaging ---
	groq_client = None
	if not GROQ_API_KEY:
	st.error("GROQ_API_KEY not found. Please set GROQ_API_KEY as an environment variable or in Hugging Face secrets.")
	# We won't stop here to allow UI to display, but analysis will error if used.
	else:
	try:
	groq_client = Groq(api_key=GROQ_API_KEY)
	except Exception as e:
	st.error(f"Failed to initialize Groq client: {e}")
	groq_client = None


	# --- Session state defaults ---
	if 'is_admin_logged_in' not in st.session_state:
	st.session_state.is_admin_logged_in = False
	if 'analyzed_data' not in st.session_state:
	initial_cols = [
	'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
	'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
	'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
	'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
	]
	st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
	if 'individual_analysis' not in st.session_state:
	st.session_state.individual_analysis = []
	if 'run_analysis' not in st.session_state:
	st.session_state.run_analysis = False


	# --- Pydantic schema for Groq output ---
	class ResumeAnalysis(BaseModel):
	name: str = Field(description="Full name of the candidate.")
	email: str = Field(description="Professional email address.")
	phone: str = Field(description="Primary phone number.")
	certifications: List[str] = Field(default_factory=list, description="List of professional certifications.")
	experience_summary: str = Field(default="", description="Concise summary of experience.")
	education_summary: str = Field(default="", description="Concise summary of education.")
	communication_skills: str = Field(description="Communication score as a STRING ('8') or description.")
	technical_skills: List[str] = Field(default_factory=list, description="List of skills/technologies.")
	aba_therapy_skills: Optional[str] = Field(default="N/A", description="ABA Therapy score as STRING or 'N/A'.")
	rbt_bcba_certification: Optional[str] = Field(default="N/A", description="'Yes'/'No'/'N/A'.")
	autism_care_experience_score: Optional[str] = Field(default="N/A", description="Autism care experience score as STRING or 'N/A'.")


	# --- Helper: File text extraction ---
	def extract_text_from_file(uploaded_file) -> str:
	"""
	Accepts a Streamlit UploadedFile object and returns extracted text.
	Supports PDF and DOCX. Returns empty string on failure.
	"""
	try:
	content = uploaded_file.read()
	# Reset pointer if needed (Streamlit UploadedFile likely returns bytes; after read it's consumed)
	# We already consumed it into `content` so use BytesIO for downstream if needed.

	# detect PDF by mime or header bytes
	name_lower = uploaded_file.name.lower()
	if name_lower.endswith(".pdf") or content[:5] == b"%PDF-":
	# use fitz (PyMuPDF)
	try:
	with fitz.open(stream=content, filetype="pdf") as doc:
	pages_text = []
	for p in doc:
	pages_text.append(p.get_text())
	return "\n".join(pages_text).strip()
	except Exception as e:
	# fallback: try PyMuPDF alternative reading
	st.warning(f"PDF extraction issue for {uploaded_file.name}: {e}")
	return ""
	elif name_lower.endswith(".docx"):
	# python-docx can accept a file-like object
	try:
	doc = Document(io.BytesIO(content))
	paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
	return "\n".join(paragraphs).strip()
	except Exception as e:
	st.warning(f"DOCX extraction issue for {uploaded_file.name}: {e}")
	return ""
	else:
	# Try simple decode for text-like files
	try:
	return content.decode('utf-8', errors='ignore')
	except Exception:
	return ""
	except Exception as e:
	st.error(f"Unexpected file extraction error: {e}")
	return ""


	# --- Helper: call Groq (safe wrapper) ---
	def call_groq_chat_system(resume_text: str, job_role: str) -> Optional[str]:
	"""
	Calls Groq chat completion. Returns model text content or None on error.
	Note: groq_client must be initialized.
	"""
	if not groq_client:
	st.error("Groq client is not initialized. Set GROQ_API_KEY in environment or secrets.")
	return None

	# role-specific instructions
	therapist_instructions = ""
	if job_role == "Therapist":
	therapist_instructions = (
	"Because the job role is 'Therapist', you MUST carefully look for ABA Therapy Skills, "
	"RBT/BCBA Certification, and Autism-Care Experience. Provide a score from 1-10 as a STRING "
	"(e.g., '7') for the specialized fields. If any specialized field is not present, return 'N/A'."
	)
	else:
	therapist_instructions = (
	"This is NOT a Therapist role. Set 'aba_therapy_skills', 'autism_care_experience_score', "
	"and 'rbt_bcba_certification' to 'N/A' if not applicable."
	)

	system_prompt = (
	"You are a professional Resume Analyzer. Extract the requested fields and return a strict JSON object "
	"matching the schema: name, email, phone, certifications (array), experience_summary, education_summary, "
	"communication_skills (AS A STRING, e.g., '8'), technical_skills (array), aba_therapy_skills, "
	"rbt_bcba_certification, autism_care_experience_score. " + therapist_instructions
	)

	user_prompt = f"Analyze the following resume text and return a JSON object:\n\n---\n{resume_text}\n---\nReturn only valid JSON."

	try:
	result = groq_client.chat.completions.create(
	model="mixtral-8x7b-32768", # keep your original model choice; adapt if needed
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt}
	],
	temperature=0.0,
	max_tokens=2000,
	# Not all Groq SDK versions support response_model in the same way; we parse manually below.
	)
	# Depending on SDK, result structure varies; common: result.choices[0].message.content
	model_text = None
	try:
	model_text = result.choices[0].message.content
	except Exception:
	# try alternate structure
	try:
	model_text = result["choices"][0]["message"]["content"]
	except Exception:
	model_text = str(result)

	return model_text
	except Exception as e:
	st.error(f"Groq API call failed: {e}")
	st.exception(e)
	return None


	# --- Cached wrapper for analysis (cache by resume_text + role) ---
	@st.cache_data(show_spinner=False)
	def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
	"""
	Calls Groq (or fallback) and returns a validated ResumeAnalysis Pydantic object.
	This function is cached to avoid repeated calls for identical text+role.
	"""
	raw_response = call_groq_chat_system(resume_text, job_role)
	if not raw_response:
	# return safe failure object
	return ResumeAnalysis(
	name="Extraction Failed",
	email="",
	phone="",
	certifications=[],
	experience_summary="",
	education_summary="",
	communication_skills="N/A",
	technical_skills=[],
	aba_therapy_skills="N/A",
	rbt_bcba_certification="N/A",
	autism_care_experience_score="N/A"
	)

	# Attempt to parse JSON from the model text. The model might include commentary;
	# so we try to extract the first JSON object in the text.
	import json
	import re

	json_text = None
	try:
	# Find the first {...} JSON object in the string (greedy to closing brace)
	match = re.search(r"(\{.*\})", raw_response, re.DOTALL)
	if match:
	json_text = match.group(1)
	else:
	# if no braces found, maybe the model returned just JSON-like lines
	json_text = raw_response
	parsed = json.loads(json_text)
	except Exception as e:
	# Try to be forgiving: if the model returned Python dict-like, attempt eval safely
	try:
	parsed = eval(json_text, {"__builtins__": None}, {}) # limited eval fallback
	if not isinstance(parsed, dict):
	raise ValueError("Parsed non-dict from model response fallback.")
	except Exception as ex:
	# Failed to parse model output -> return failure object and log both
	st.warning("Failed to parse Groq output as JSON. Returning fallback extraction.")
	st.text_area("Raw model output (for debugging)", raw_response, height=200)
	return ResumeAnalysis(
	name="Extraction Failed",
	email="",
	phone="",
	certifications=[],
	experience_summary="",
	education_summary="",
	communication_skills="N/A",
	technical_skills=[],
	aba_therapy_skills="N/A",
	rbt_bcba_certification="N/A",
	autism_care_experience_score="N/A"
	)

	# Validate & coerce to Pydantic model (safe defaults applied)
	try:
	# Ensure lists exist
	parsed.setdefault("certifications", [])
	parsed.setdefault("technical_skills", [])
	# Ensure communication_skills is string
	if "communication_skills" in parsed and parsed["communication_skills"] is not None:
	parsed["communication_skills"] = str(parsed["communication_skills"])
	else:
	parsed["communication_skills"] = "N/A"

	# Safety: set therapist-specific fields default to "N/A" if missing
	for k in ["aba_therapy_skills", "rbt_bcba_certification", "autism_care_experience_score"]:
	if k not in parsed or parsed[k] is None:
	parsed[k] = "N/A"
	else:
	parsed[k] = str(parsed[k])

	analysis = ResumeAnalysis.parse_obj(parsed)
	# Final coercions to guarantee string types for some fields
	analysis.communication_skills = str(analysis.communication_skills or "N/A")
	analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or "N/A")
	analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or "N/A")
	analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or "N/A")

	return analysis
	except ValidationError as ve:
	st.error("Model output failed schema validation. Returning fallback object.")
	st.text_area("Model raw response (for debugging)", raw_response, height=200)
	st.exception(ve)
	return ResumeAnalysis(
	name="Extraction Failed",
	email="",
	phone="",
	certifications=[],
	experience_summary="",
	education_summary="",
	communication_skills="N/A",
	technical_skills=[],
	aba_therapy_skills="N/A",
	rbt_bcba_certification="N/A",
	autism_care_experience_score="N/A"
	)
	except Exception as e:
	st.error("Unexpected error while validating model output.")
	st.exception(e)
	return ResumeAnalysis(
	name="Extraction Failed",
	email="",
	phone="",
	certifications=[],
	experience_summary="",
	education_summary="",
	communication_skills="N/A",
	technical_skills=[],
	aba_therapy_skills="N/A",
	rbt_bcba_certification="N/A",
	autism_care_experience_score="N/A"
	)


	# --- Scoring function ---
	def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float:
	total_score = 0.0

	# Experience summary length -> up to 40 points
	exp_len = len(analysis.experience_summary or "")
	exp_factor = min(exp_len / 100.0, 1.0) # 100 chars or more -> full points
	total_score += exp_factor * 40.0

	# Skills count -> up to 30 points
	skills_count = len(analysis.technical_skills or [])
	skills_factor = min(skills_count / 10.0, 1.0)
	total_score += skills_factor * 30.0

	# Communication -> up to 20 points (expects 1-10 in string)
	try:
	comm_raw = str(analysis.communication_skills).strip()
	# allow '8/10' or '8 - good' forms: extract leading number
	import re
	m = re.search(r"(\d+(\.\d+)?)", comm_raw)
	comm_val = float(m.group(1)) if m else float(comm_raw)
	comm_val = max(0.0, min(10.0, comm_val))
	except Exception:
	comm_val = 5.0
	total_score += (comm_val / 10.0) * 20.0

	# Certifications -> up to 10 points (1 point each up to 10)
	cert_points = min(len(analysis.certifications or []), 10) * 1.0
	total_score += cert_points

	# Therapist-specific bonus up to 10 points
	if role == "Therapist":
	try:
	def safe_score(x):
	try:
	m = re.search(r"(\d+(\.\d+)?)", str(x))
	return float(m.group(1)) if m else 0.0
	except Exception:
	return 0.0

	aba = safe_score(analysis.aba_therapy_skills)
	autism = safe_score(analysis.autism_care_experience_score)
	spec_bonus = ((aba + autism) / 20.0) * 10.0 # average scaled to 10
	total_score += spec_bonus
	except Exception:
	pass

	final = round(min(total_score, 100))
	return float(final)


	# --- Append to session DataFrame helper ---
	def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
	data = analysis.dict()
	technical_skills_list = ", ".join(data.get('technical_skills') or [])
	certifications_list = ", ".join(data.get('certifications') or [])

	df_data = {
	'Name': data.get('name') or "",
	'Job Role': job_role,
	'Resume Score (100)': score,
	'Email': data.get('email') or "",
	'Phone': data.get('phone') or "",
	'Shortlisted': 'No',
	'Experience Summary': data.get('experience_summary') or "",
	'Education Summary': data.get('education_summary') or "",
	'Communication Rating (1-10)': str(data.get('communication_skills') or "N/A"),
	'Skills/Technologies': technical_skills_list,
	'Certifications': certifications_list,
	'ABA Skills (1-10)': str(data.get('aba_therapy_skills') or "N/A"),
	'RBT/BCBA Cert': str(data.get('rbt_bcba_certification') or "N/A"),
	'Autism-Care Exp (1-10)': str(data.get('autism_care_experience_score') or "N/A"),
	}

	new_df = pd.DataFrame([df_data])
	st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)


	# --- Utility: Excel download as BytesIO for st.download_button ---
	def df_to_excel_bytes(df: pd.DataFrame) -> bytes:
	output = io.BytesIO()
	with pd.ExcelWriter(output, engine='openpyxl') as writer:
	df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")
	return output.getvalue()


	# --- App Layout ---
	st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")

	tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"])

	# -------------------------
	# User Panel
	# -------------------------
	with tab_user:
	st.header("Upload Resumes for Analysis")
	st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score key fields.")

	job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
	selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")

	uploaded_files = st.file_uploader(
	"2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True
	)

	# Analyze button sets a session_state flag and reruns
	if st.button("🚀 Analyze All Uploaded Resumes"):
	if not uploaded_files:
	st.warning("Please upload one or more resume files to begin analysis.")
	else:
	st.session_state.run_analysis = True
	st.rerun()

	# If run_analysis flag is set, process uploads
	if st.session_state.get("run_analysis", False):
	if not uploaded_files:
	st.warning("No files found. Upload files and try again.")
	st.session_state.run_analysis = False
	else:
	total = len(uploaded_files)
	progress = st.progress(0)
	st.session_state.individual_analysis = []
	idx = 0
	with st.spinner("Processing resumes..."):
	for f in uploaded_files:
	idx += 1
	try:
	st.write(f"Analyzing {f.name}...")
	resume_text = extract_text_from_file(f)
	if not resume_text:
	st.error(f"Could not extract text from {f.name}. Skipping.")
	progress.progress(idx / total)
	continue

	# Call cached analyze function
	analysis = analyze_resume_with_groq_cached(resume_text, selected_role)

	if analysis.name == "Extraction Failed":
	st.error(f"Extraction failed for {f.name}. See debug output.")
	progress.progress(idx / total)
	continue

	score = calculate_resume_score(analysis, selected_role)
	append_analysis_to_dataframe(selected_role, analysis, score)

	st.session_state.individual_analysis.append({
	'name': analysis.name,
	'score': score,
	'role': selected_role,
	'file_name': f.name
	})
	except Exception as e:
	st.error(f"Error analyzing {f.name}: {e}")
	st.exception(traceback.format_exc())
	finally:
	progress.progress(idx / total)

	st.success(f"✅ Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")
	st.session_state.run_analysis = False # reset flag

	# Show last analysis summary
	if st.session_state.individual_analysis:
	st.subheader("Last Analysis Summary")
	for item in st.session_state.individual_analysis:
	st.markdown(f"{item['name']} (for {item['role']}) - Score: {item['score']}/100")
	st.markdown("---")
	st.caption("All analyzed data is stored in the Admin Dashboard.")

	# -------------------------
	# Admin Panel (Password Protected)
	# -------------------------
	with tab_admin:
	if not st.session_state.is_admin_logged_in:
	st.header("Admin Login")
	password = st.text_input("Enter Admin Password", type="password")
	if st.button("🔑 Login"):
	if password == ADMIN_PASSWORD:
	st.session_state.is_admin_logged_in = True
	st.rerun()
	else:
	st.error("Incorrect password.")
	# stop further admin rendering while not logged in
	st.stop()

	st.header("🎯 Recruitment Dashboard")
	if st.button("🚪 Logout"):
	st.session_state.is_admin_logged_in = False
	st.rerun()

	if st.session_state.analyzed_data.empty:
	st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
	else:
	df = st.session_state.analyzed_data.copy()
	st.subheader("Candidate Data Table")
	st.success(f"Total Candidates Analyzed: {len(df)}")

	display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']

	# data_editor with SelectboxColumn for 'Shortlisted'
	edited_df = st.data_editor(
	df[display_cols],
	column_config={
	"Shortlisted": st.column_config.SelectboxColumn(
	"Shortlisted",
	help="Mark the candidate as Shortlisted or Rejected.",
	options=["No", "Yes"],
	required=True
	)
	},
	key="dashboard_editor",
	hide_index=True
	)

	# propagate the 'Shortlisted' edits back to session dataframe
	try:
	st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values
	except Exception:
	# fallback for indexing mismatches
	for i, val in enumerate(edited_df['Shortlisted'].tolist()):
	if i < len(st.session_state.analyzed_data):
	st.session_state.analyzed_data.at[i, 'Shortlisted'] = val

	st.markdown("---")
	st.subheader("📥 Download Data")
	df_export = st.session_state.analyzed_data.copy()
	excel_bytes = df_to_excel_bytes(df_export)

	st.download_button(
	label="💾 Download All Data as Excel (.xlsx)",
	data=excel_bytes,
	file_name="quantum_scrutiny_report.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
	help="Downloads the full table including all extracted fields and shortlist status."
	)

	# --- End of app.py ---