Spaces:

meesamraza
/

Programming_Developer_Advisor_Chatbot

Sleeping

App Files Files Community

Programming_Developer_Advisor_Chatbot / app.py

meesamraza

Update app.py

b9548de verified about 2 months ago

raw

history blame

11.6 kB

	# app.py
	"""
	Quantum Scrutiny Platform — Groq-Powered Resume Analyzer
	Fully updated + cleaned single-file Streamlit application
	"""

	import os
	import io
	import re
	import json
	import base64
	import traceback
	from typing import Optional, List

	# Env
	from dotenv import load_dotenv
	load_dotenv()

	import streamlit as st
	import pandas as pd

	# File parsing
	import fitz # PyMuPDF
	from docx import Document

	# Groq client
	from groq import Groq

	# Validation
	from pydantic import BaseModel, Field, ValidationError


	# ---------------------------------------------------------
	# Page config
	# ---------------------------------------------------------
	st.set_page_config(
	page_title="Quantum Scrutiny Platform",
	layout="wide"
	)


	# ---------------------------------------------------------
	# Secrets
	# ---------------------------------------------------------
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")
	ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin")

	groq_client = None
	if GROQ_API_KEY:
	try:
	groq_client = Groq(api_key=GROQ_API_KEY)
	except Exception as e:
	st.error(f"Failed to initialize Groq client: {e}")
	else:
	st.warning("GROQ_API_KEY not found — model calls disabled.")


	# ---------------------------------------------------------
	# Session State
	# ---------------------------------------------------------
	if 'is_admin_logged_in' not in st.session_state:
	st.session_state.is_admin_logged_in = False

	if 'run_analysis' not in st.session_state:
	st.session_state.run_analysis = False

	if 'individual_analysis' not in st.session_state:
	st.session_state.individual_analysis = []

	if 'analyzed_data' not in st.session_state:
	cols = [
	'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
	'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
	'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
	'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
	]
	st.session_state.analyzed_data = pd.DataFrame(columns=cols)


	# ---------------------------------------------------------
	# Pydantic Schema
	# ---------------------------------------------------------
	class ResumeAnalysis(BaseModel):
	name: str = Field(default="Unknown")
	email: str = Field(default="")
	phone: str = Field(default="")
	certifications: List[str] = Field(default_factory=list)
	experience_summary: str = Field(default="")
	education_summary: str = Field(default="")
	communication_skills: str = Field(default="N/A")
	technical_skills: List[str] = Field(default_factory=list)
	aba_therapy_skills: Optional[str] = Field(default="N/A")
	rbt_bcba_certification: Optional[str] = Field(default="N/A")
	autism_care_experience_score: Optional[str] = Field(default="N/A")


	# ---------------------------------------------------------
	# Text Extraction
	# ---------------------------------------------------------
	def extract_text_from_file(uploaded_file) -> str:
	try:
	content = uploaded_file.read()
	name = uploaded_file.name.lower()

	# PDF
	if name.endswith(".pdf") or content[:5] == b"%PDF-":
	try:
	with fitz.open(stream=content, filetype="pdf") as doc:
	return "".join([p.get_text() for p in doc]).strip()
	except:
	return ""

	# DOCX
	elif name.endswith(".docx"):
	try:
	doc = Document(io.BytesIO(content))
	return "\n".join([p.text for p in doc.paragraphs]).strip()
	except:
	return ""

	# Fallback
	return content.decode("utf-8", errors="ignore")

	except:
	return ""


	# ---------------------------------------------------------
	# Groq Streaming Wrapper
	# ---------------------------------------------------------
	def call_groq_stream_collect(prompt: str) -> Optional[str]:

	if not groq_client:
	st.error("Groq client not initialized.")
	return None

	try:
	completion = groq_client.chat.completions.create(
	model="llama-3.3-70b-versatile",
	messages=[
	{"role": "system", "content": "You are an AI resume analyzer."},
	{"role": "user", "content": prompt}
	],
	stream=True,
	temperature=0.0,
	max_completion_tokens=2048
	)

	collected = ""
	for chunk in completion:
	try:
	delta = getattr(chunk.choices[0].delta, "content", None)
	if delta:
	collected += delta
	except:
	pass
	return collected

	except Exception as e:
	st.error(f"Groq API error: {e}")
	return None


	# ---------------------------------------------------------
	# JSON Extraction
	# ---------------------------------------------------------
	def extract_first_json(text: str):
	if not text:
	return None

	# Try simple balanced regex
	match = re.search(r"\{[\s\S]*\}", text)
	if not match:
	return None

	raw_json = match.group(0)

	# Attempt parse
	try:
	return json.loads(raw_json)
	except:
	try:
	return json.loads(raw_json.replace("'", '"'))
	except:
	return None


	# ---------------------------------------------------------
	# Cached Analysis
	# ---------------------------------------------------------
	@st.cache_data(show_spinner=False)
	def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:

	therapist_instruction = (
	"If role is Therapist, extract ABA skills, BCBA/RBT, and Autism-care scores."
	if job_role.lower() == "therapist" else
	"For non-therapist roles, set therapist fields to 'N/A'."
	)

	prompt = f"""
	Return a JSON object with keys:
	name, email, phone, certifications, experience_summary,
	education_summary, communication_skills, technical_skills,
	aba_therapy_skills, rbt_bcba_certification, autism_care_experience_score.

	{therapist_instruction}

	Resume Text:
	{resume_text}

	Return only JSON.
	"""

	raw = call_groq_stream_collect(prompt)
	parsed = extract_first_json(raw)

	if not parsed:
	return ResumeAnalysis(name="Extraction Failed")

	try:
	return ResumeAnalysis.parse_obj(parsed)
	except:
	return ResumeAnalysis(name="Extraction Failed")


	# ---------------------------------------------------------
	# Scoring
	# ---------------------------------------------------------
	def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float:
	score = 0

	# Experience length (40)
	score += min(len(analysis.experience_summary) / 100, 1) * 40

	# Skills count (30)
	score += min(len(analysis.technical_skills) / 10, 1) * 30

	# Communication (20)
	try:
	c = float(re.findall(r"\d+", analysis.communication_skills)[0])
	except:
	c = 5
	score += (min(c, 10) / 10) * 20

	# Certifications (10)
	score += min(len(analysis.certifications), 10)

	# Therapist bonus (10)
	if role.lower() == "therapist":
	try:
	aba = float(re.findall(r"\d+", analysis.aba_therapy_skills)[0])
	autism = float(re.findall(r"\d+", analysis.autism_care_experience_score)[0])
	score += ((aba + autism) / 20) * 10
	except:
	pass

	return float(round(min(score, 100)))


	# ---------------------------------------------------------
	# Add Row
	# ---------------------------------------------------------
	def append_analysis_to_dataframe(role, analysis: ResumeAnalysis, score: float):

	df = st.session_state.analyzed_data

	df.loc[len(df)] = [
	analysis.name,
	role,
	score,
	analysis.email,
	analysis.phone,
	"No",
	analysis.experience_summary,
	analysis.education_summary,
	analysis.communication_skills,
	", ".join(analysis.technical_skills),
	", ".join(analysis.certifications),
	analysis.aba_therapy_skills,
	analysis.rbt_bcba_certification,
	analysis.autism_care_experience_score
	]

	st.session_state.analyzed_data = df


	# ---------------------------------------------------------
	# Excel Export
	# ---------------------------------------------------------
	def df_to_excel_bytes(df):
	output = io.BytesIO()
	with pd.ExcelWriter(output, engine="openpyxl") as w:
	df.to_excel(w, index=False, sheet_name="Resume Analysis")
	return output.getvalue()


	# ---------------------------------------------------------
	# UI
	# ---------------------------------------------------------
	st.title("🌌 Quantum Scrutiny Platform — AI Resume Analyzer")

	tab_user, tab_admin = st.tabs([
	"👤 User Resume Panel",
	"🔒 Admin Dashboard"
	])

	# ---------------------------------------------------------
	# USER PANEL
	# ---------------------------------------------------------
	with tab_user:

	st.header("Upload Resumes")

	job_role = st.selectbox(
	"Select Job Role",
	["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
	)

	files = st.file_uploader(
	"Upload PDF or DOCX",
	type=["pdf", "docx"],
	accept_multiple_files=True
	)

	if st.button("🚀 Analyze All"):
	if not files:
	st.warning("Upload at least one file.")
	else:
	st.session_state.run_analysis = True
	st.rerun()

	if st.session_state.run_analysis:

	if not files:
	st.error("No files found.")
	st.session_state.run_analysis = False

	else:
	total = len(files)
	progress = st.progress(0)

	for i, f in enumerate(files, 1):
	st.write(f"Analyzing {f.name}...")
	text = extract_text_from_file(f)

	if not text:
	st.error(f"Could not extract text from {f.name}. Skipped.")
	progress.progress(i / total)
	continue

	analysis = analyze_resume_with_groq_cached(text, job_role)
	score = calculate_resume_score(analysis, job_role)

	append_analysis_to_dataframe(job_role, analysis, score)
	progress.progress(i / total)

	st.success("All files processed!")
	st.session_state.run_analysis = False


	# ---------------------------------------------------------
	# ADMIN PANEL
	# ---------------------------------------------------------
	with tab_admin:

	if not st.session_state.is_admin_logged_in:

	pwd = st.text_input("Admin Password", type="password")
	if st.button("Login"):
	if pwd == ADMIN_PASSWORD:
	st.session_state.is_admin_logged_in = True
	st.rerun()
	else:
	st.error("Incorrect password.")

	else:
	st.subheader("Admin Dashboard — Analyzed Data")

	df = st.session_state.analyzed_data
	st.dataframe(df, use_container_width=True)

	if st.button("Download Excel"):
	xls = df_to_excel_bytes(df)
	st.download_button(
	label="Download File",
	data=xls,
	file_name="resume_analysis.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)

	if st.button("Clear Database"):
	st.session_state.analyzed_data = st.session_state.analyzed_data.iloc[0:0]
	st.success("Cleared.")