Spaces:

sharadrajore
/

Resume-Screening

Runtime error

App Files Files Community

Resume-Screening / app.py

sharadrajore

Update app.py

206e7ab verified over 1 year ago

raw

history blame contribute delete

27.4 kB

	import streamlit as st
	import os
	from dotenv import load_dotenv
	from langchain_aws import BedrockEmbeddings, BedrockLLM
	import boto3
	from langchain_core.prompts import PromptTemplate
	import docx
	import zipfile
	import PyPDF2
	import io
	from typing import List, Dict
	import pandas as pd
	from io import BytesIO
	from pathlib import Path

	def extract_text_from_file(file_content: bytes, file_extension: str) -> str:
	"""Extract text from different file types"""
	text = ""
	try:
	if file_extension == '.pdf':
	pdf_reader = PyPDF2.PdfReader(BytesIO(file_content))
	for page in pdf_reader.pages:
	text += page.extract_text() + "\n"
	elif file_extension in ['.docx', '.doc']:
	doc = docx.Document(BytesIO(file_content))
	for paragraph in doc.paragraphs:
	text += paragraph.text + "\n"
	elif file_extension == '.txt':
	text = file_content.decode('utf-8')
	except Exception as e:
	print(f"Error extracting text from {file_extension} file: {str(e)}")
	return text

	def process_zip_file(zip_content: bytes, blacklist: set) -> List[dict]:
	"""Process contents of a ZIP file"""
	processed_files = []

	with zipfile.ZipFile(BytesIO(zip_content)) as z:
	for zip_filename in z.namelist():
	if not zip_filename.endswith(('.txt', '.docx', '.doc', '.pdf')):
	continue

	try:
	with z.open(zip_filename) as f:
	file_content = f.read()
	file_extension = Path(zip_filename).suffix.lower()
	text = extract_text_from_file(file_content, file_extension)

	if text and not check_for_blacklisted_companies(text, blacklist):
	processed_files.append({
	"id": f"{zip_filename}_{hash(text)}",
	"name": zip_filename,
	"content": text
	})
	else:
	print(f"Skipping {zip_filename} from ZIP - contains blacklisted company")
	except Exception as e:
	print(f"Error processing {zip_filename} from ZIP: {str(e)}")

	return processed_files


	def load_blacklist() -> set:
	"""Load blacklisted company names from a file"""
	try:
	with open('blacklist.txt', 'r', encoding='utf-8') as file:
	# Convert to lowercase and remove whitespace
	return {line.strip().lower() for line in file if line.strip()}
	except FileNotFoundError:
	print("Warning: blacklist.txt not found. Creating empty blacklist.")
	# Create empty blacklist file
	with open('blacklist.txt', 'w', encoding='utf-8') as file:
	pass
	return set()

	def check_for_blacklisted_companies(text: str, blacklist: set) -> bool:
	"""
	Check if any blacklisted company names appear in the text
	Args:
	text: The text to check
	blacklist: Set of blacklisted company names
	Returns:
	True if blacklisted company found, False otherwise
	"""
	if not text or not blacklist:
	return False

	text_lower = text.lower()
	return any(company in text_lower for company in blacklist)



	def save_uploaded_resumes(uploaded_files):
	"""Save uploaded resume files to the Docs folder"""
	docs_folder = Path("Docs")
	docs_folder.mkdir(exist_ok=True)
	blacklist = load_blacklist()

	saved_files = []
	for uploaded_file in uploaded_files:
	try:
	content = uploaded_file.read()
	file_extension = Path(uploaded_file.name).suffix.lower()

	# Handle ZIP files
	if file_extension == '.zip':
	processed_zip_files = process_zip_file(content, blacklist)
	for processed_file in processed_zip_files:
	zip_content = processed_file["content"].encode('utf-8')
	new_filename = processed_file["name"]
	counter = 1

	while (docs_folder / new_filename).exists():
	base_name = Path(new_filename).stem
	ext = Path(new_filename).suffix
	new_filename = f"{base_name}_{counter}{ext}"
	counter += 1

	file_path = docs_folder / new_filename
	with open(file_path, "wb") as f:
	f.write(zip_content)
	saved_files.append(new_filename)

	else:
	# Handle individual files
	text = extract_text_from_file(content, file_extension)

	if text and not check_for_blacklisted_companies(text, blacklist):
	base_name = Path(uploaded_file.name).stem
	new_filename = uploaded_file.name
	counter = 1

	while (docs_folder / new_filename).exists():
	new_filename = f"{base_name}_{counter}{file_extension}"
	counter += 1

	file_path = docs_folder / new_filename
	with open(file_path, "wb") as f:
	uploaded_file.seek(0)
	f.write(uploaded_file.getbuffer())
	saved_files.append(new_filename)
	else:
	print(f"Skipping {uploaded_file.name} - contains blacklisted company")

	except Exception as e:
	print(f"Error processing {uploaded_file.name}: {str(e)}")

	return saved_files

	def upload_section():
	st.subheader("Upload Resumes")
	uploaded_files = st.file_uploader(
	"Upload one or more resumes",
	type=['pdf', 'docx', 'doc', 'txt', 'zip'],
	accept_multiple_files=True
	)

	if uploaded_files:
	if st.button("Process Uploaded Resumes"):
	saved_files = save_uploaded_resumes(uploaded_files)
	if saved_files:
	st.success(f"Successfully saved {len(saved_files)} files to Docs folder")
	st.write("Saved files:", ", ".join(saved_files))
	if any(f.endswith('.zip') for f in [f.name for f in uploaded_files]):
	st.info("ZIP files were processed and their contents were extracted")
	else:
	st.warning("No files were saved. They may contain blacklisted content")


	def create_aws_client():
	"""Create and return AWS Bedrock client"""
	AWS_ACCESS_KEY = os.getenv('ACCESS_KEY')
	AWS_SECRET_ACCESS_KEY = os.getenv('SECRET_ACCESS_KEY')
	REGION_NAME = os.getenv('REGION')

	bedrock_client = boto3.client(
	'bedrock-runtime',
	region_name=REGION_NAME,
	aws_access_key_id=AWS_ACCESS_KEY,
	aws_secret_access_key=AWS_SECRET_ACCESS_KEY
	)
	return bedrock_client

	def process_docs_folder(folder_path: str) -> List[dict]:
	"""Process all documents in the specified folder"""
	processed_files = []
	blacklist = load_blacklist()

	try:
	if not os.path.exists(folder_path):
	raise Exception(f"Folder not found: {folder_path}")

	for filename in os.listdir(folder_path):
	file_path = os.path.join(folder_path, filename)

	if not os.path.isfile(file_path):
	continue

	file_extension = f".{filename.split('.')[-1].lower()}"

	try:
	with open(file_path, 'rb') as file:
	content = file.read()

	if file_extension == '.zip':
	processed_files.extend(process_zip_file(content, blacklist))
	else:
	text = extract_text_from_file(content, file_extension)
	if text and not check_for_blacklisted_companies(text, blacklist):
	processed_files.append({
	"id": f"{filename}_{hash(text)}",
	"name": filename,
	"content": text
	})
	else:
	print(f"Skipping {filename} - contains blacklisted company")

	except Exception as e:
	print(f"Error processing {filename}: {str(e)}")

	except Exception as e:
	raise Exception(f"Error accessing docs folder: {str(e)}")

	return processed_files

	def check_resume_relevance(job_desc: str, resume_content: str, required_skills: List[str], client) -> dict:
	"""Check if resume matches required skills and job description"""
	llm = BedrockLLM(
	model_id="amazon.titan-text-lite-v1",
	client=client
	)

	# First, do a direct text search for skills
	resume_lower = resume_content.lower()
	found_skills_direct = []
	for skill in required_skills:
	if skill.lower() in resume_lower:
	found_skills_direct.append(skill)

	# If we found any skills directly, proceed with detailed analysis
	if found_skills_direct:
	relevance_prompt = PromptTemplate.from_template("""
	Analyze this resume for the following skills. Be lenient in matching skills.

	Required Skills to Check:
	{skills}

	Resume Content:
	{resume}

	For each skill, determine:
	1. If it's present (including variations and related technologies)
	2. The experience level with the skill
	3. How recently it was used

	Respond in this format:
	{{
	"skills_found": [
	List of skills found (including variations)
	],
	"match_percentage": Percentage of required skills found (0-100),
	"skill_details": {{
	"skill_name": {{
	"found": true/false,
	"experience": "description of experience",
	"evidence": "where found in resume"
	}}
	}}
	}}

	Be generous in skill matching. If you find related technologies or variations, count them as matches.
	""")

	message = relevance_prompt.format(
	skills="\n".join([f"- {skill}" for skill in required_skills]),
	resume=resume_content
	)

	try:
	response = llm.invoke(message)
	response_lower = response.lower()

	# Calculate match score
	total_skills = len(required_skills)
	found_skills = len(found_skills_direct)
	match_score = (found_skills / total_skills) * 100 if total_skills > 0 else 0

	# Determine relevance (more lenient threshold)
	is_relevant = match_score >= 50 # Lower threshold for relevance

	return {
	"is_relevant": is_relevant,
	"score": match_score,
	"found_skills": found_skills_direct,
	"total_skills": total_skills,
	"key_matches": response
	}

	except Exception as e:
	print(f"Error in LLM analysis: {e}")
	# Fall back to direct matching results
	match_score = (len(found_skills_direct) / len(required_skills)) * 100
	return {
	"is_relevant": len(found_skills_direct) > 0, # Consider relevant if any skills found
	"score": match_score,
	"found_skills": found_skills_direct,
	"total_skills": len(required_skills),
	"key_matches": f"Skills found through direct matching: {', '.join(found_skills_direct)}"
	}
	else:
	# If no direct matches, do a more lenient check with LLM
	lenient_prompt = PromptTemplate.from_template("""
	Analyze this resume for skills related to or equivalent to:
	{skills}

	Consider variations and related technologies.

	Resume Content:
	{resume}

	List any matches found, including:
	1. Direct matches
	2. Related technologies
	3. Equivalent skills

	Respond with found matches only.
	""")

	message = lenient_prompt.format(
	skills="\n".join([f"- {skill}" for skill in required_skills]),
	resume=resume_content
	)

	try:
	response = llm.invoke(message)
	# Check if any skills are mentioned in the response
	found_skills = []
	for skill in required_skills:
	if skill.lower() in response.lower():
	found_skills.append(skill)

	match_score = (len(found_skills) / len(required_skills)) * 100 if required_skills else 0
	return {
	"is_relevant": len(found_skills) > 0, # Consider relevant if any skills found
	"score": match_score,
	"found_skills": found_skills,
	"total_skills": len(required_skills),
	"key_matches": response
	}
	except Exception as e:
	print(f"Error in lenient LLM analysis: {e}")
	return {
	"is_relevant": False,
	"score": 0,
	"found_skills": [],
	"total_skills": len(required_skills),
	"key_matches": "Error in analysis"
	}

	def get_summary_from_llm(job_desc: str, resume_content: str, required_skills: List[str], client) -> str:
	"""Generate detailed analysis of resume against requirements"""
	llm = BedrockLLM(
	model_id="amazon.titan-text-lite-v1",
	client=client
	)

	map_prompt_template = PromptTemplate.from_template("""
	Provide a detailed analysis of this resume against the job requirements.

	Required Skills:
	{skills}

	Additional Requirements:
	{job_desc}

	Resume Content:
	{resume_details}

	Provide analysis in this format:

	## Skills Analysis
	### Required Skills Match
	{skills_analysis}

	### Technical Proficiency
	- For each required skill:
	* Experience level
	* Years of usage
	* Recent projects

	### Additional Technical Skills
	- Only list relevant additional skills

	## Experience Analysis
	- Total years of relevant experience
	- Key projects using multiple required skills
	- Notable achievements with required technologies

	## Overall Assessment
	- Skills Match Score: X/Y required skills found
	- Technical Proficiency Score: (0-100)
	- Experience Level Match: (Junior/Mid/Senior)

	## Recommendation
	- Hiring Decision: (Strong Match/Potential Match/Not Recommended)
	- Key Strengths: (list top 3)
	- Areas to Verify: (list specific areas)

	Focus only on exact matches and verifiable experience.
	""")

	message = map_prompt_template.format(
	skills="\n".join([f"- {skill}" for skill in required_skills]),
	job_desc=job_desc,
	resume_details=resume_content,
	skills_analysis="\n".join([f"- {skill}: Found/Not Found, Experience Level, Evidence" for skill in required_skills])
	)

	try:
	summary = llm.invoke(message)
	return summary
	except Exception as e:
	return f"Error generating analysis: {str(e)}"

	def export_to_excel(matches: List[dict], required_skills: List[str]) -> BytesIO:
	"""Create Excel report from matches"""
	# Prepare data for Excel
	excel_data = []

	for match in matches:
	found_skills = match.get('found_skills', [])

	row_data = {
	'Candidate Name': match['name'],
	'Match Score': f"{match.get('match_score', 0):.1f}%",
	'Skills Found': ', '.join(found_skills),
	'Missing Skills': ', '.join([skill for skill in required_skills if skill not in found_skills]),
	'Total Skills Found': len(found_skills),
	'Total Required Skills': len(required_skills)
	}

	# Add individual skill columns
	for skill in required_skills:
	row_data[f'Skill - {skill}'] = '✓' if skill in found_skills else '✗'

	excel_data.append(row_data)

	# Create DataFrame
	df = pd.DataFrame(excel_data)

	# Create Excel file
	output = BytesIO()
	with pd.ExcelWriter(output, engine='openpyxl') as writer:
	df.to_excel(writer, index=False, sheet_name='Resume Matches')

	# Get workbook and worksheet
	workbook = writer.book
	worksheet = writer.sheets['Resume Matches']

	# Format columns
	for idx, col in enumerate(df.columns):
	# Get maximum length of column content
	max_length = max(
	df[col].astype(str).apply(len).max(),
	len(col)
	)
	# Set column width
	worksheet.column_dimensions[chr(65 + idx)].width = min(max_length + 2, 50)

	return output

	def main():
	try:
	# Load environment variables and setup
	load_dotenv()
	client = create_aws_client()

	# Streamlit UI setup
	st.set_page_config(
	page_title="Resume Screening Assistant",
	layout="wide"
	)

	st.title("Resume Screening AI Assistant")
	st.subheader("Match resumes with required skills and experience")
	upload_section()
	# Skills input
	st.write("Enter required skills (one per line):")
	skills_input = st.text_area(
	"Required Skills",
	placeholder="Example:\nPython\nJava\nAWS\nDocker",
	height=150
	)

	# Additional requirements
	additional_reqs = st.text_area(
	"Additional Requirements (optional)",
	placeholder="Enter any additional requirements like:\n- Years of experience\n- Education\n- Specific domain knowledge",
	height=100
	)

	# Process inputs
	required_skills = [skill.strip() for skill in skills_input.split('\n') if skill.strip()]

	analyze_button = st.button("Find Matching Profiles", use_container_width=True)

	if analyze_button:
	if not required_skills:
	st.error("Please enter at least one required skill!")
	return

	docs_folder = os.path.join(os.path.dirname(__file__), 'Docs')

	with st.spinner("Analyzing resumes..."):
	try:
	processed_files = process_docs_folder(docs_folder)

	if not processed_files:
	st.error("No resumes found in the Docs folder!")
	return

	# Analyze each resume
	matches = []
	progress_bar = st.progress(0)

	for idx, file_data in enumerate(processed_files):
	progress = (idx + 1) / len(processed_files)
	progress_bar.progress(progress)

	relevance = check_resume_relevance(
	additional_reqs,
	file_data['content'],
	required_skills,
	client
	)

	if relevance['found_skills']: # Show if any skills found
	matches.append({
	**file_data,
	"match_score": relevance['score'],
	"found_skills": relevance['found_skills'],
	"total_skills": relevance['total_skills'],
	"key_matches": relevance['key_matches']
	})

	progress_bar.empty()

	# Sort matches by score
	matches.sort(key=lambda x: x['match_score'], reverse=True)

	if matches:
	st.success(f"Found {len(matches)} profiles with matching skills")

	# Create columns for filters
	col1, col2 = st.columns(2)
	with col1:
	min_score = st.slider(
	"Minimum Match Score",
	min_value=0,
	max_value=100,
	value=50,
	step=5
	)
	with col2:
	min_skills = st.slider(
	"Minimum Required Skills",
	min_value=0,
	max_value=len(required_skills),
	value=1,
	step=1
	)

	# Filter matches based on criteria
	filtered_matches = [
	match for match in matches
	if match['match_score'] >= min_score and
	len(match['found_skills']) >= min_skills
	]

	# Display filtered matches
	st.subheader(f"Showing {len(filtered_matches)} matches meeting criteria")

	# Display matches
	for idx, match in enumerate(filtered_matches):
	with st.container():
	st.markdown("---")
	col1, col2 = st.columns([1, 3])

	with col1:
	st.subheader(f"Match #{idx + 1}")
	st.write(f"📄 {match['name']}")
	st.write(f"Match Score: {match['match_score']:.1f}%")

	# Display skills breakdown
	st.write("Skills Found:")
	found_skills = match.get('found_skills', [])
	for skill in required_skills:
	if skill in found_skills:
	st.write(f"✅ {skill}")
	else:
	st.write(f"❌ {skill}")

	with col2:
	with st.expander("Show Detailed Analysis"):
	analysis = get_summary_from_llm(
	additional_reqs,
	match['content'],
	required_skills,
	client
	)
	st.markdown(analysis)

	# Add export section
	st.markdown("---")
	st.subheader("Export Results")

	# Create Excel file
	excel_output = export_to_excel(filtered_matches, required_skills)

	# Add download button with count of matches
	st.download_button(
	label=f"📥 Download Excel Report ({len(filtered_matches)} matches)",
	data=excel_output.getvalue(),
	file_name="resume_matches.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)

	# Display summary statistics
	st.markdown("---")
	st.subheader("Summary Statistics")
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric("Total Matches", len(filtered_matches))
	with col2:
	avg_score = sum(match['match_score'] for match in filtered_matches) / len(filtered_matches)
	st.metric("Average Match Score", f"{avg_score:.1f}%")
	with col3:
	perfect_matches = sum(1 for match in filtered_matches if match['match_score'] == 100)
	st.metric("Perfect Matches", perfect_matches)
	with col4:
	avg_skills = sum(len(match['found_skills']) for match in filtered_matches) / len(filtered_matches)
	st.metric("Avg. Skills Found", f"{avg_skills:.1f}")

	# Add skill distribution chart
	st.subheader("Skill Distribution")
	skill_counts = {skill: 0 for skill in required_skills}
	for match in filtered_matches:
	for skill in match['found_skills']:
	if skill in skill_counts:
	skill_counts[skill] += 1

	# Create DataFrame for chart
	chart_data = pd.DataFrame({
	'Skill': list(skill_counts.keys()),
	'Count': list(skill_counts.values())
	})

	# Display bar chart
	st.bar_chart(chart_data.set_index('Skill'))

	else:
	st.warning(
	"No profiles found matching the required skills. "
	"Try adjusting the requirements or adding more resumes."
	)

	except Exception as e:
	st.error(f"Error during analysis: {str(e)}")
	print(f"Error Details: {e}")

	except Exception as error:
	st.error(f"An error occurred: {str(error)}")
	print(f"Error Details: {error}")

	if __name__ == "__main__":
	main()