Spaces:

meesamraza
/

Programming_Developer_Advisor_Chatbot

Sleeping

App Files Files Community

Programming_Developer_Advisor_Chatbot / app.py

meesamraza

Update app.py

ce2a8d8 verified 3 months ago

raw

history blame

26.6 kB

	"""
	Quantum Scrutiny Platform \| Groq-Powered
	Single-file Streamlit app (refactored, Groq streaming-compatible)
	"""

	import os
	import io
	import re
	import json
	import base64
	import traceback
	from typing import Optional, List

	from dotenv import load_dotenv
	load_dotenv()

	import streamlit as st
	import pandas as pd

	# File parsing
	import fitz # PyMuPDF
	from docx import Document # python-docx

	# Groq client
	from groq import Groq

	# Validation
	from pydantic import BaseModel, Field, ValidationError

	# --- Page config ---
	st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform \| Groq-Powered")

	# --- Config / Secrets ---
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")
	ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin")

	# Initialize Groq client (no API key -> UI warning but app still loads)
	groq_client = None
	if GROQ_API_KEY:
	try:
	groq_client = Groq(api_key=GROQ_API_KEY)
	except Exception as e:
	st.error(f"Failed to initialize Groq client: {e}")
	else:
	st.warning("GROQ_API_KEY not found. Set it as an environment variable or in .env for model calls to work.")

	# --- Session state defaults ---
	if 'is_admin_logged_in' not in st.session_state:
	st.session_state.is_admin_logged_in = False
	if 'analyzed_data' not in st.session_state:
	initial_cols = [
	'Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Phone',
	# NEW SCORE COLUMNS
	'Experience Score (40)', 'Skills Score (30)', 'Communication Score (20)', 'Certifications Score (10)',
	'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
	'Skills/Technologies', 'Certifications',
	# THERAPIST FIELDS
	'ABA Skills (1-10)', 'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
	]
	st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
	if 'individual_analysis' not in st.session_state:
	st.session_state.individual_analysis = []
	if 'run_analysis' not in st.session_state:
	st.session_state.run_analysis = False

	# --- Pydantic schema (No change needed here, as the new scores are derived) ---
	class ResumeAnalysis(BaseModel):
	name: str = Field(default="Unknown")
	email: str = Field(default="")
	phone: str = Field(default="")
	certifications: List[str] = Field(default_factory=list)
	experience_summary: str = Field(default="")
	education_summary: str = Field(default="")
	communication_skills: str = Field(default="N/A")
	technical_skills: List[str] = Field(default_factory=list)
	aba_therapy_skills: Optional[str] = Field(default="N/A")
	rbt_bcba_certification: Optional[str] = Field(default="N/A")
	autism_care_experience_score: Optional[str] = Field(default="N/A")

	# --- Helpers: file text extraction (No change) ---
	def extract_text_from_file(uploaded_file) -> str:
	"""Extract text from PDF or DOCX. Returns empty string on failure."""
	try:
	content = uploaded_file.read()
	filename = uploaded_file.name.lower()
	if filename.endswith(".pdf") or content[:5] == b"%PDF-":
	try:
	with fitz.open(stream=content, filetype="pdf") as doc:
	text = ""
	for p in doc:
	text += p.get_text()
	return text.strip()
	except Exception:
	return ""
	elif filename.endswith(".docx"):
	try:
	doc = Document(io.BytesIO(content))
	paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
	return "\n".join(paragraphs).strip()
	except Exception:
	return ""
	else:
	# fallback: decode bytes as text
	try:
	return content.decode('utf-8', errors='ignore')
	except Exception:
	return ""
	except Exception:
	return ""

	# --- Groq call with streaming (collects chunks) (No change) ---
	def call_groq_stream_collect(prompt: str, model_name: str = "llama-3.3-70b-versatile", temperature: float = 0.2, max_completion_tokens: int = 2048, top_p: float = 1.0) -> Optional[str]:
	"""
	Calls Groq with streaming enabled and collects the textual output.
	Returns the full model text, or None on failure.
	"""
	if not groq_client:
	st.error("Groq client not initialized. Set GROQ_API_KEY in environment/secrets.")
	return None

	try:
	completion = groq_client.chat.completions.create(
	model=model_name,
	messages=[
	{"role": "system", "content": "You are a professional Resume Analyzer. Return JSON only when asked."},
	{"role": "user", "content": prompt}
	],
	temperature=temperature,
	max_completion_tokens=max_completion_tokens,
	top_p=top_p,
	stream=True
	)

	# completion is an iterator/streamable object; collect chunks
	collected = ""
	# some SDKs yield dict-like chunks, some objects; handle both
	for chunk in completion:
	try:
	# Common pattern: chunk.choices[0].delta.content
	delta = getattr(chunk.choices[0].delta, "content", None) if hasattr(chunk, "choices") else None
	if delta is None:
	# fallback for dict-like object
	if isinstance(chunk, dict):
	delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content")
	if delta:
	collected += delta
	else:
	# Some SDKs return final message in chunk.choices[0].message.content
	try:
	msg = getattr(chunk.choices[0].message, "content", None)
	if msg:
	collected += msg
	except Exception:
	pass
	except Exception:
	# last-resort: append str(chunk)
	try:
	collected += str(chunk)
	except Exception:
	pass

	return collected.strip()
	except Exception as e:
	st.error(f"Groq API call failed: {e}")
	return None

	# --- Parsing model output safely to JSON (No change) ---
	def extract_first_json(text: str) -> Optional[dict]:
	"""
	Find the first JSON object in text and parse it; return dict or None.
	"""
	if not text:
	return None
	# find first balanced braces block
	# quick heuristic regex for {...}
	try:
	match = re.search(r"(\{(?:[^{}]\|(?R))*\})", text, re.DOTALL)
	except re.error:
	# Python's re doesn't support (?R); fallback to simpler greedy
	match = re.search(r"(\{.*\})", text, re.DOTALL)
	if match:
	json_text = match.group(1)
	else:
	# maybe the model returned only JSON-like lines -> try to parse full text
	json_text = text

	try:
	parsed = json.loads(json_text)
	return parsed
	except Exception:
	# try to clean common issues: single quotes -> double quotes
	try:
	json_text_fixed = json_text.replace("'", '"')
	parsed = json.loads(json_text_fixed)
	return parsed
	except Exception:
	return None

	# --- Analyze with Groq (cached by resume text + role) (No change) ---
	@st.cache_data(show_spinner=False)
	def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
	"""
	Calls Groq (streaming) and returns a ResumeAnalysis instance.
	Uses caching to avoid duplicate calls for same resume_text+role.
	"""
	# Build prompt instructing JSON structure
	therapist_instructions = ""
	if job_role.lower() == "therapist":
	therapist_instructions = (
	"Because the role is 'Therapist', carefully search for ABA Therapy Skills, "
	"RBT/BCBA Certification, and Autism-Care Experience. Provide scores 1-10 as STRINGS, or 'N/A'."
	)
	else:
	therapist_instructions = "If therapist-specific fields are not relevant, set them to 'N/A'."

	system_user_prompt = (
	"Return a single JSON object with the following keys exactly: "
	"name (string), email (string), phone (string), certifications (array of strings), "
	"experience_summary (string), education_summary (string), communication_skills (STRING, e.g., '8'), "
	"technical_skills (array of strings), aba_therapy_skills (STRING or 'N/A'), "
	"rbt_bcba_certification (STRING 'Yes'/'No'/'N/A'), autism_care_experience_score (STRING or 'N/A'). "
	f"{therapist_instructions}\n\nResume Text:\n\n{resume_text}\n\nReturn only the JSON object."
	)

	raw = call_groq_stream_collect(system_user_prompt, model_name="llama-3.3-70b-versatile", temperature=0.0, max_completion_tokens=2048)

	if not raw:
	# fallback empty object
	return ResumeAnalysis(
	name="Extraction Failed",
	email="",
	phone="",
	certifications=[],
	experience_summary="",
	education_summary="",
	communication_skills="N/A",
	technical_skills=[],
	aba_therapy_skills="N/A",
	rbt_bcba_certification="N/A",
	autism_care_experience_score="N/A"
	)

	parsed = extract_first_json(raw)
	if not parsed:
	# show raw output for debugging when developer runs app locally (admin panel will show too)
	st.warning("Failed to parse model JSON output. See raw output below for debugging.")
	st.text_area("Raw model output (debug)", raw, height=200)
	return ResumeAnalysis(
	name="Extraction Failed",
	email="",
	phone="",
	certifications=[],
	experience_summary="",
	education_summary="",
	communication_skills="N/A",
	technical_skills=[],
	aba_therapy_skills="N/A",
	rbt_bcba_certification="N/A",
	autism_care_experience_score="N/A"
	)

	# Ensure keys exist and coerce types
	parsed.setdefault("name", "Unknown")
	parsed.setdefault("email", "")
	parsed.setdefault("phone", "")
	parsed.setdefault("certifications", [])
	parsed.setdefault("experience_summary", "")
	parsed.setdefault("education_summary", "")
	parsed.setdefault("communication_skills", "N/A")
	parsed.setdefault("technical_skills", [])
	parsed.setdefault("aba_therapy_skills", "N/A")
	parsed.setdefault("rbt_bcba_certification", "N/A")
	parsed.setdefault("autism_care_experience_score", "N/A")

	# Ensure string coercions for some fields
	try:
	parsed["communication_skills"] = str(parsed.get("communication_skills") or "N/A")
	parsed["aba_therapy_skills"] = str(parsed.get("aba_therapy_skills") or "N/A")
	parsed["rbt_bcba_certification"] = str(parsed.get("rbt_bcba_certification") or "N/A")
	parsed["autism_care_experience_score"] = str(parsed.get("autism_care_experience_score") or "N/A")
	except Exception:
	pass

	# Validate via Pydantic
	try:
	analysis = ResumeAnalysis.parse_obj(parsed)
	return analysis
	except ValidationError as ve:
	st.error("Model output failed schema validation.")
	st.text_area("Raw model output (debug)", raw, height=200)
	st.exception(ve)
	return ResumeAnalysis(
	name="Extraction Failed",
	email="",
	phone="",
	certifications=[],
	experience_summary="",
	education_summary="",
	communication_skills="N/A",
	technical_skills=[],
	aba_therapy_skills="N/A",
	rbt_bcba_certification="N/A",
	autism_care_experience_score="N/A"
	)

	# --- Scoring logic (MODIFIED) ---
	def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> tuple[float, float, float, float, float]:
	"""
	Calculates the overall score and the individual component scores.
	Returns (final_score, exp_score, skills_score, comm_score, certs_score)
	"""
	total_score = 0.0

	# 1. Experience score: up to 40 points
	exp_len = len(analysis.experience_summary or "")
	# Cap factor at 1.0 (e.g., 100+ chars = 1.0)
	exp_factor = min(exp_len / 100.0, 1.0)
	exp_score = round(exp_factor * 40.0)
	total_score += exp_score

	# 2. Skills score: up to 30 points
	skills_count = len(analysis.technical_skills or [])
	# Cap factor at 1.0 (e.g., 10+ skills = 1.0)
	skills_factor = min(skills_count / 10.0, 1.0)
	skills_score = round(skills_factor * 30.0)
	total_score += skills_score

	# 3. Communication score: up to 20 points (expects 0-10 in string)
	try:
	m = re.search(r"(\d+(\.\d+)?)", str(analysis.communication_skills))
	# Use regex match if available, otherwise try direct float conversion
	comm_val = float(m.group(1)) if m else float(str(analysis.communication_skills))
	comm_val = max(0.0, min(10.0, comm_val)) # Clamp to 0-10
	except Exception:
	comm_val = 5.0 # Default if model extraction failed
	comm_score = round((comm_val / 10.0) * 20.0)
	total_score += comm_score

	# 4. Certifications score: up to 10 points
	# Max 10 points for 10 or more certifications
	certs_score = min(len(analysis.certifications or []), 10) * 1.0
	total_score += certs_score

	# 5. Therapist bonus: up to 10 points (added to overall score if applicable)
	spec_bonus = 0.0
	if role.lower() == "therapist":
	def safe_score(x):
	try:
	m = re.search(r"(\d+(\.\d+)?)", str(x))
	return float(m.group(1)) if m else 0.0
	except Exception:
	return 0.0
	aba = safe_score(analysis.aba_therapy_skills)
	autism = safe_score(analysis.autism_care_experience_score)
	# Average of the two specialized scores, scaled to a max of 10 points
	spec_bonus = ((aba + autism) / 20.0) * 10.0
	total_score += spec_bonus

	final_score = round(min(total_score, 100))

	return (float(final_score), float(exp_score), float(skills_score), float(comm_score), float(certs_score))

	# --- Append to DataFrame (MODIFIED) ---
	def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, scores: tuple[float, float, float, float, float]):
	final_score, exp_score, skills_score, comm_score, certs_score = scores

	data = analysis.dict()
	tech = ", ".join(data.get("technical_skills") or [])
	certs = ", ".join(data.get("certifications") or [])

	row = {
	'Name': data.get("name") or "",
	'Job Role': job_role,
	'Resume Score (100)': final_score,
	'Shortlisted': 'No',
	'Email': data.get("email") or "",
	'Phone': data.get("phone") or "",

	# NEW SCORE COLUMNS
	'Experience Score (40)': exp_score,
	'Skills Score (30)': skills_score,
	'Communication Score (20)': comm_score,
	'Certifications Score (10)': certs_score,

	'Experience Summary': data.get("experience_summary") or "",
	'Education Summary': data.get("education_summary") or "",
	'Communication Rating (1-10)': str(data.get("communication_skills") or "N/A"),
	'Skills/Technologies': tech,
	'Certifications': certs,
	'ABA Skills (1-10)': str(data.get("aba_therapy_skills") or "N/A"),
	'RBT/BCBA Cert': str(data.get("rbt_bcba_certification") or "N/A"),
	'Autism-Care Exp (1-10)': str(data.get("autism_care_experience_score") or "N/A"),
	}
	new_df = pd.DataFrame([row])
	st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)

	# --- Excel export helper (No change) ---
	def df_to_excel_bytes(df: pd.DataFrame) -> bytes:
	output = io.BytesIO()
	with pd.ExcelWriter(output, engine="openpyxl") as writer:
	df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")
	return output.getvalue()

	# --- UI Layout ---
	st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis (Single-file)")

	tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"])

	# --- User Panel (Minor change for scoring) ---
	with tab_user:
	st.header("Upload Resumes for Analysis")
	st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score fields.")

	job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
	selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")

	uploaded_files = st.file_uploader("2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)

	if st.button("🚀 Analyze All Uploaded Resumes"):
	if not uploaded_files:
	st.warning("Please upload one or more resume files to begin analysis.")
	else:
	st.session_state.run_analysis = True
	st.rerun()

	if st.session_state.get("run_analysis", False):
	if not uploaded_files:
	st.warning("No files found. Upload files and try again.")
	st.session_state.run_analysis = False
	else:
	total = len(uploaded_files)
	progress = st.progress(0)
	st.session_state.individual_analysis = []
	idx = 0
	with st.spinner("Processing resumes..."):
	for f in uploaded_files:
	idx += 1
	try:
	st.write(f"Analyzing {f.name}...")
	resume_text = extract_text_from_file(f)
	if not resume_text:
	st.error(f"Could not extract text from {f.name}. Skipping.")
	progress.progress(idx / total)
	continue

	analysis = analyze_resume_with_groq_cached(resume_text, selected_role)

	if analysis.name == "Extraction Failed":
	st.error(f"Extraction failed for {f.name}. See debug output.")
	progress.progress(idx / total)
	continue

	scores = calculate_resume_score(analysis, selected_role)
	final_score = scores[0]

	append_analysis_to_dataframe(selected_role, analysis, scores)

	st.session_state.individual_analysis.append({
	'name': analysis.name,
	'score': final_score,
	'role': selected_role,
	'file_name': f.name
	})
	except Exception as e:
	st.error(f"Error analyzing {f.name}: {e}")
	st.exception(traceback.format_exc())
	finally:
	progress.progress(idx / total)

	st.success(f"✅ Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")
	st.session_state.run_analysis = False

	# Display last results summary
	if st.session_state.individual_analysis:
	st.subheader("Last Analysis Summary")
	for item in st.session_state.individual_analysis:
	st.markdown(f"{item['name']} (for {item['role']}) - Score: {item['score']}/100")
	st.markdown("---")
	st.caption("All analyzed data is stored in the Admin Dashboard.")

	# --- Admin Panel (MODIFIED for new columns) ---
	with tab_admin:
	if not st.session_state.is_admin_logged_in:
	st.header("Admin Login")
	password = st.text_input("Enter Admin Password", type="password")
	if st.button("🔑 Login"):
	if password == ADMIN_PASSWORD:
	st.session_state.is_admin_logged_in = True
	st.rerun()
	else:
	st.error("Incorrect password.")
	st.stop()

	st.header("🎯 Recruitment Dashboard")
	if st.button("🚪 Logout"):
	st.session_state.is_admin_logged_in = False
	st.rerun()

	if st.session_state.analyzed_data.empty:
	st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
	else:
	df = st.session_state.analyzed_data.copy()
	st.subheader("Candidate Data Table")
	st.success(f"Total Candidates Analyzed: {len(df)}")

	# Updated columns for display in the data editor
	display_cols = [
	'Name',
	'Job Role',
	'Resume Score (100)',
	'Experience Score (40)',
	'Skills Score (30)',
	'Communication Score (20)',
	'Certifications Score (10)',
	'Shortlisted',
	'Email',
	'Skills/Technologies'
	]
	# Filter columns to only those present in the current dataframe (safety check)
	current_display_cols = [col for col in display_cols if col in df.columns]

	edited_df = st.data_editor(
	df[current_display_cols],
	column_config={
	"Shortlisted": st.column_config.SelectboxColumn(
	"Shortlisted",
	help="Mark the candidate as Shortlisted or Rejected.",
	options=["No", "Yes"],
	required=True
	),
	"Resume Score (100)": st.column_config.ProgressColumn(
	"Total Score",
	format="%f",
	min_value=0, max_value=100,
	),
	"Experience Score (40)": st.column_config.ProgressColumn(
	"Experience (40)",
	format="%f",
	min_value=0, max_value=40,
	),
	"Skills Score (30)": st.column_config.ProgressColumn(
	"Skills (30)",
	format="%f",
	min_value=0, max_value=30,
	),
	"Communication Score (20)": st.column_config.ProgressColumn(
	"Comms (20)",
	format="%f",
	min_value=0, max_value=20,
	),
	"Certifications Score (10)": st.column_config.ProgressColumn(
	"Certs (10)",
	format="%f",
	min_value=0, max_value=10,
	),
	},
	key="dashboard_editor",
	hide_index=True
	)

	# The logic to update the session state with the edited 'Shortlisted' column remains the same
	try:
	# Update the master dataframe with the edited 'Shortlisted' column
	for col in edited_df.columns:
	if col in st.session_state.analyzed_data.columns and not edited_df[col].equals(st.session_state.analyzed_data[col]):
	# Only update 'Shortlisted' which is the only editable field
	if col == 'Shortlisted':
	st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values
	except Exception:
	# Fallback for index issues on data_editor changes
	for i, val in enumerate(edited_df.get('Shortlisted', []).tolist()):
	if i < len(st.session_state.analyzed_data):
	st.session_state.analyzed_data.at[i, 'Shortlisted'] = val


	st.markdown("---")
	st.subheader("📥 Download Data")
	df_export = st.session_state.analyzed_data.copy()
	excel_bytes = df_to_excel_bytes(df_export)

	st.download_button(
	label="💾 Download All Data as Excel (.xlsx)",
	data=excel_bytes,
	file_name="quantum_scrutiny_report.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
	help="Downloads the full table including all extracted fields and shortlist status."
	)

	# --- End of file ---