import streamlit as st import os from dotenv import load_dotenv from langchain_aws import BedrockEmbeddings, BedrockLLM import boto3 from langchain_core.prompts import PromptTemplate import docx import zipfile import PyPDF2 import io from typing import List, Dict import pandas as pd from io import BytesIO from pathlib import Path def extract_text_from_file(file_content: bytes, file_extension: str) -> str: """Extract text from different file types""" text = "" try: if file_extension == '.pdf': pdf_reader = PyPDF2.PdfReader(BytesIO(file_content)) for page in pdf_reader.pages: text += page.extract_text() + "\n" elif file_extension in ['.docx', '.doc']: doc = docx.Document(BytesIO(file_content)) for paragraph in doc.paragraphs: text += paragraph.text + "\n" elif file_extension == '.txt': text = file_content.decode('utf-8') except Exception as e: print(f"Error extracting text from {file_extension} file: {str(e)}") return text def process_zip_file(zip_content: bytes, blacklist: set) -> List[dict]: """Process contents of a ZIP file""" processed_files = [] with zipfile.ZipFile(BytesIO(zip_content)) as z: for zip_filename in z.namelist(): if not zip_filename.endswith(('.txt', '.docx', '.doc', '.pdf')): continue try: with z.open(zip_filename) as f: file_content = f.read() file_extension = Path(zip_filename).suffix.lower() text = extract_text_from_file(file_content, file_extension) if text and not check_for_blacklisted_companies(text, blacklist): processed_files.append({ "id": f"{zip_filename}_{hash(text)}", "name": zip_filename, "content": text }) else: print(f"Skipping {zip_filename} from ZIP - contains blacklisted company") except Exception as e: print(f"Error processing {zip_filename} from ZIP: {str(e)}") return processed_files def load_blacklist() -> set: """Load blacklisted company names from a file""" try: with open('blacklist.txt', 'r', encoding='utf-8') as file: # Convert to lowercase and remove whitespace return {line.strip().lower() for line in file if line.strip()} except FileNotFoundError: print("Warning: blacklist.txt not found. Creating empty blacklist.") # Create empty blacklist file with open('blacklist.txt', 'w', encoding='utf-8') as file: pass return set() def check_for_blacklisted_companies(text: str, blacklist: set) -> bool: """ Check if any blacklisted company names appear in the text Args: text: The text to check blacklist: Set of blacklisted company names Returns: True if blacklisted company found, False otherwise """ if not text or not blacklist: return False text_lower = text.lower() return any(company in text_lower for company in blacklist) def save_uploaded_resumes(uploaded_files): """Save uploaded resume files to the Docs folder""" docs_folder = Path("Docs") docs_folder.mkdir(exist_ok=True) blacklist = load_blacklist() saved_files = [] for uploaded_file in uploaded_files: try: content = uploaded_file.read() file_extension = Path(uploaded_file.name).suffix.lower() # Handle ZIP files if file_extension == '.zip': processed_zip_files = process_zip_file(content, blacklist) for processed_file in processed_zip_files: zip_content = processed_file["content"].encode('utf-8') new_filename = processed_file["name"] counter = 1 while (docs_folder / new_filename).exists(): base_name = Path(new_filename).stem ext = Path(new_filename).suffix new_filename = f"{base_name}_{counter}{ext}" counter += 1 file_path = docs_folder / new_filename with open(file_path, "wb") as f: f.write(zip_content) saved_files.append(new_filename) else: # Handle individual files text = extract_text_from_file(content, file_extension) if text and not check_for_blacklisted_companies(text, blacklist): base_name = Path(uploaded_file.name).stem new_filename = uploaded_file.name counter = 1 while (docs_folder / new_filename).exists(): new_filename = f"{base_name}_{counter}{file_extension}" counter += 1 file_path = docs_folder / new_filename with open(file_path, "wb") as f: uploaded_file.seek(0) f.write(uploaded_file.getbuffer()) saved_files.append(new_filename) else: print(f"Skipping {uploaded_file.name} - contains blacklisted company") except Exception as e: print(f"Error processing {uploaded_file.name}: {str(e)}") return saved_files def upload_section(): st.subheader("Upload Resumes") uploaded_files = st.file_uploader( "Upload one or more resumes", type=['pdf', 'docx', 'doc', 'txt', 'zip'], accept_multiple_files=True ) if uploaded_files: if st.button("Process Uploaded Resumes"): saved_files = save_uploaded_resumes(uploaded_files) if saved_files: st.success(f"Successfully saved {len(saved_files)} files to Docs folder") st.write("Saved files:", ", ".join(saved_files)) if any(f.endswith('.zip') for f in [f.name for f in uploaded_files]): st.info("ZIP files were processed and their contents were extracted") else: st.warning("No files were saved. They may contain blacklisted content") def create_aws_client(): """Create and return AWS Bedrock client""" AWS_ACCESS_KEY = os.getenv('ACCESS_KEY') AWS_SECRET_ACCESS_KEY = os.getenv('SECRET_ACCESS_KEY') REGION_NAME = os.getenv('REGION') bedrock_client = boto3.client( 'bedrock-runtime', region_name=REGION_NAME, aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_ACCESS_KEY ) return bedrock_client def process_docs_folder(folder_path: str) -> List[dict]: """Process all documents in the specified folder""" processed_files = [] blacklist = load_blacklist() try: if not os.path.exists(folder_path): raise Exception(f"Folder not found: {folder_path}") for filename in os.listdir(folder_path): file_path = os.path.join(folder_path, filename) if not os.path.isfile(file_path): continue file_extension = f".{filename.split('.')[-1].lower()}" try: with open(file_path, 'rb') as file: content = file.read() if file_extension == '.zip': processed_files.extend(process_zip_file(content, blacklist)) else: text = extract_text_from_file(content, file_extension) if text and not check_for_blacklisted_companies(text, blacklist): processed_files.append({ "id": f"{filename}_{hash(text)}", "name": filename, "content": text }) else: print(f"Skipping {filename} - contains blacklisted company") except Exception as e: print(f"Error processing {filename}: {str(e)}") except Exception as e: raise Exception(f"Error accessing docs folder: {str(e)}") return processed_files def check_resume_relevance(job_desc: str, resume_content: str, required_skills: List[str], client) -> dict: """Check if resume matches required skills and job description""" llm = BedrockLLM( model_id="amazon.titan-text-lite-v1", client=client ) # First, do a direct text search for skills resume_lower = resume_content.lower() found_skills_direct = [] for skill in required_skills: if skill.lower() in resume_lower: found_skills_direct.append(skill) # If we found any skills directly, proceed with detailed analysis if found_skills_direct: relevance_prompt = PromptTemplate.from_template(""" Analyze this resume for the following skills. Be lenient in matching skills. Required Skills to Check: {skills} Resume Content: {resume} For each skill, determine: 1. If it's present (including variations and related technologies) 2. The experience level with the skill 3. How recently it was used Respond in this format: {{ "skills_found": [ List of skills found (including variations) ], "match_percentage": Percentage of required skills found (0-100), "skill_details": {{ "skill_name": {{ "found": true/false, "experience": "description of experience", "evidence": "where found in resume" }} }} }} Be generous in skill matching. If you find related technologies or variations, count them as matches. """) message = relevance_prompt.format( skills="\n".join([f"- {skill}" for skill in required_skills]), resume=resume_content ) try: response = llm.invoke(message) response_lower = response.lower() # Calculate match score total_skills = len(required_skills) found_skills = len(found_skills_direct) match_score = (found_skills / total_skills) * 100 if total_skills > 0 else 0 # Determine relevance (more lenient threshold) is_relevant = match_score >= 50 # Lower threshold for relevance return { "is_relevant": is_relevant, "score": match_score, "found_skills": found_skills_direct, "total_skills": total_skills, "key_matches": response } except Exception as e: print(f"Error in LLM analysis: {e}") # Fall back to direct matching results match_score = (len(found_skills_direct) / len(required_skills)) * 100 return { "is_relevant": len(found_skills_direct) > 0, # Consider relevant if any skills found "score": match_score, "found_skills": found_skills_direct, "total_skills": len(required_skills), "key_matches": f"Skills found through direct matching: {', '.join(found_skills_direct)}" } else: # If no direct matches, do a more lenient check with LLM lenient_prompt = PromptTemplate.from_template(""" Analyze this resume for skills related to or equivalent to: {skills} Consider variations and related technologies. Resume Content: {resume} List any matches found, including: 1. Direct matches 2. Related technologies 3. Equivalent skills Respond with found matches only. """) message = lenient_prompt.format( skills="\n".join([f"- {skill}" for skill in required_skills]), resume=resume_content ) try: response = llm.invoke(message) # Check if any skills are mentioned in the response found_skills = [] for skill in required_skills: if skill.lower() in response.lower(): found_skills.append(skill) match_score = (len(found_skills) / len(required_skills)) * 100 if required_skills else 0 return { "is_relevant": len(found_skills) > 0, # Consider relevant if any skills found "score": match_score, "found_skills": found_skills, "total_skills": len(required_skills), "key_matches": response } except Exception as e: print(f"Error in lenient LLM analysis: {e}") return { "is_relevant": False, "score": 0, "found_skills": [], "total_skills": len(required_skills), "key_matches": "Error in analysis" } def get_summary_from_llm(job_desc: str, resume_content: str, required_skills: List[str], client) -> str: """Generate detailed analysis of resume against requirements""" llm = BedrockLLM( model_id="amazon.titan-text-lite-v1", client=client ) map_prompt_template = PromptTemplate.from_template(""" Provide a detailed analysis of this resume against the job requirements. Required Skills: {skills} Additional Requirements: {job_desc} Resume Content: {resume_details} Provide analysis in this format: ## Skills Analysis ### Required Skills Match {skills_analysis} ### Technical Proficiency - For each required skill: * Experience level * Years of usage * Recent projects ### Additional Technical Skills - Only list relevant additional skills ## Experience Analysis - Total years of relevant experience - Key projects using multiple required skills - Notable achievements with required technologies ## Overall Assessment - Skills Match Score: X/Y required skills found - Technical Proficiency Score: (0-100) - Experience Level Match: (Junior/Mid/Senior) ## Recommendation - Hiring Decision: (Strong Match/Potential Match/Not Recommended) - Key Strengths: (list top 3) - Areas to Verify: (list specific areas) Focus only on exact matches and verifiable experience. """) message = map_prompt_template.format( skills="\n".join([f"- {skill}" for skill in required_skills]), job_desc=job_desc, resume_details=resume_content, skills_analysis="\n".join([f"- {skill}: Found/Not Found, Experience Level, Evidence" for skill in required_skills]) ) try: summary = llm.invoke(message) return summary except Exception as e: return f"Error generating analysis: {str(e)}" def export_to_excel(matches: List[dict], required_skills: List[str]) -> BytesIO: """Create Excel report from matches""" # Prepare data for Excel excel_data = [] for match in matches: found_skills = match.get('found_skills', []) row_data = { 'Candidate Name': match['name'], 'Match Score': f"{match.get('match_score', 0):.1f}%", 'Skills Found': ', '.join(found_skills), 'Missing Skills': ', '.join([skill for skill in required_skills if skill not in found_skills]), 'Total Skills Found': len(found_skills), 'Total Required Skills': len(required_skills) } # Add individual skill columns for skill in required_skills: row_data[f'Skill - {skill}'] = '✓' if skill in found_skills else '✗' excel_data.append(row_data) # Create DataFrame df = pd.DataFrame(excel_data) # Create Excel file output = BytesIO() with pd.ExcelWriter(output, engine='openpyxl') as writer: df.to_excel(writer, index=False, sheet_name='Resume Matches') # Get workbook and worksheet workbook = writer.book worksheet = writer.sheets['Resume Matches'] # Format columns for idx, col in enumerate(df.columns): # Get maximum length of column content max_length = max( df[col].astype(str).apply(len).max(), len(col) ) # Set column width worksheet.column_dimensions[chr(65 + idx)].width = min(max_length + 2, 50) return output def main(): try: # Load environment variables and setup load_dotenv() client = create_aws_client() # Streamlit UI setup st.set_page_config( page_title="Resume Screening Assistant", layout="wide" ) st.title("Resume Screening AI Assistant") st.subheader("Match resumes with required skills and experience") upload_section() # Skills input st.write("Enter required skills (one per line):") skills_input = st.text_area( "Required Skills", placeholder="Example:\nPython\nJava\nAWS\nDocker", height=150 ) # Additional requirements additional_reqs = st.text_area( "Additional Requirements (optional)", placeholder="Enter any additional requirements like:\n- Years of experience\n- Education\n- Specific domain knowledge", height=100 ) # Process inputs required_skills = [skill.strip() for skill in skills_input.split('\n') if skill.strip()] analyze_button = st.button("Find Matching Profiles", use_container_width=True) if analyze_button: if not required_skills: st.error("Please enter at least one required skill!") return docs_folder = os.path.join(os.path.dirname(__file__), 'Docs') with st.spinner("Analyzing resumes..."): try: processed_files = process_docs_folder(docs_folder) if not processed_files: st.error("No resumes found in the Docs folder!") return # Analyze each resume matches = [] progress_bar = st.progress(0) for idx, file_data in enumerate(processed_files): progress = (idx + 1) / len(processed_files) progress_bar.progress(progress) relevance = check_resume_relevance( additional_reqs, file_data['content'], required_skills, client ) if relevance['found_skills']: # Show if any skills found matches.append({ **file_data, "match_score": relevance['score'], "found_skills": relevance['found_skills'], "total_skills": relevance['total_skills'], "key_matches": relevance['key_matches'] }) progress_bar.empty() # Sort matches by score matches.sort(key=lambda x: x['match_score'], reverse=True) if matches: st.success(f"Found {len(matches)} profiles with matching skills") # Create columns for filters col1, col2 = st.columns(2) with col1: min_score = st.slider( "Minimum Match Score", min_value=0, max_value=100, value=50, step=5 ) with col2: min_skills = st.slider( "Minimum Required Skills", min_value=0, max_value=len(required_skills), value=1, step=1 ) # Filter matches based on criteria filtered_matches = [ match for match in matches if match['match_score'] >= min_score and len(match['found_skills']) >= min_skills ] # Display filtered matches st.subheader(f"Showing {len(filtered_matches)} matches meeting criteria") # Display matches for idx, match in enumerate(filtered_matches): with st.container(): st.markdown("---") col1, col2 = st.columns([1, 3]) with col1: st.subheader(f"Match #{idx + 1}") st.write(f"📄 {match['name']}") st.write(f"Match Score: {match['match_score']:.1f}%") # Display skills breakdown st.write("Skills Found:") found_skills = match.get('found_skills', []) for skill in required_skills: if skill in found_skills: st.write(f"✅ {skill}") else: st.write(f"❌ {skill}") with col2: with st.expander("Show Detailed Analysis"): analysis = get_summary_from_llm( additional_reqs, match['content'], required_skills, client ) st.markdown(analysis) # Add export section st.markdown("---") st.subheader("Export Results") # Create Excel file excel_output = export_to_excel(filtered_matches, required_skills) # Add download button with count of matches st.download_button( label=f"📥 Download Excel Report ({len(filtered_matches)} matches)", data=excel_output.getvalue(), file_name="resume_matches.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) # Display summary statistics st.markdown("---") st.subheader("Summary Statistics") col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Total Matches", len(filtered_matches)) with col2: avg_score = sum(match['match_score'] for match in filtered_matches) / len(filtered_matches) st.metric("Average Match Score", f"{avg_score:.1f}%") with col3: perfect_matches = sum(1 for match in filtered_matches if match['match_score'] == 100) st.metric("Perfect Matches", perfect_matches) with col4: avg_skills = sum(len(match['found_skills']) for match in filtered_matches) / len(filtered_matches) st.metric("Avg. Skills Found", f"{avg_skills:.1f}") # Add skill distribution chart st.subheader("Skill Distribution") skill_counts = {skill: 0 for skill in required_skills} for match in filtered_matches: for skill in match['found_skills']: if skill in skill_counts: skill_counts[skill] += 1 # Create DataFrame for chart chart_data = pd.DataFrame({ 'Skill': list(skill_counts.keys()), 'Count': list(skill_counts.values()) }) # Display bar chart st.bar_chart(chart_data.set_index('Skill')) else: st.warning( "No profiles found matching the required skills. " "Try adjusting the requirements or adding more resumes." ) except Exception as e: st.error(f"Error during analysis: {str(e)}") print(f"Error Details: {e}") except Exception as error: st.error(f"An error occurred: {str(error)}") print(f"Error Details: {error}") if __name__ == "__main__": main()