Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import os | |
| from dotenv import load_dotenv | |
| from langchain_aws import BedrockEmbeddings, BedrockLLM | |
| import boto3 | |
| from langchain_core.prompts import PromptTemplate | |
| import docx | |
| import zipfile | |
| import PyPDF2 | |
| import io | |
| from typing import List, Dict | |
| import pandas as pd | |
| from io import BytesIO | |
| from pathlib import Path | |
| def extract_text_from_file(file_content: bytes, file_extension: str) -> str: | |
| """Extract text from different file types""" | |
| text = "" | |
| try: | |
| if file_extension == '.pdf': | |
| pdf_reader = PyPDF2.PdfReader(BytesIO(file_content)) | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() + "\n" | |
| elif file_extension in ['.docx', '.doc']: | |
| doc = docx.Document(BytesIO(file_content)) | |
| for paragraph in doc.paragraphs: | |
| text += paragraph.text + "\n" | |
| elif file_extension == '.txt': | |
| text = file_content.decode('utf-8') | |
| except Exception as e: | |
| print(f"Error extracting text from {file_extension} file: {str(e)}") | |
| return text | |
| def process_zip_file(zip_content: bytes, blacklist: set) -> List[dict]: | |
| """Process contents of a ZIP file""" | |
| processed_files = [] | |
| with zipfile.ZipFile(BytesIO(zip_content)) as z: | |
| for zip_filename in z.namelist(): | |
| if not zip_filename.endswith(('.txt', '.docx', '.doc', '.pdf')): | |
| continue | |
| try: | |
| with z.open(zip_filename) as f: | |
| file_content = f.read() | |
| file_extension = Path(zip_filename).suffix.lower() | |
| text = extract_text_from_file(file_content, file_extension) | |
| if text and not check_for_blacklisted_companies(text, blacklist): | |
| processed_files.append({ | |
| "id": f"{zip_filename}_{hash(text)}", | |
| "name": zip_filename, | |
| "content": text | |
| }) | |
| else: | |
| print(f"Skipping {zip_filename} from ZIP - contains blacklisted company") | |
| except Exception as e: | |
| print(f"Error processing {zip_filename} from ZIP: {str(e)}") | |
| return processed_files | |
| def load_blacklist() -> set: | |
| """Load blacklisted company names from a file""" | |
| try: | |
| with open('blacklist.txt', 'r', encoding='utf-8') as file: | |
| # Convert to lowercase and remove whitespace | |
| return {line.strip().lower() for line in file if line.strip()} | |
| except FileNotFoundError: | |
| print("Warning: blacklist.txt not found. Creating empty blacklist.") | |
| # Create empty blacklist file | |
| with open('blacklist.txt', 'w', encoding='utf-8') as file: | |
| pass | |
| return set() | |
| def check_for_blacklisted_companies(text: str, blacklist: set) -> bool: | |
| """ | |
| Check if any blacklisted company names appear in the text | |
| Args: | |
| text: The text to check | |
| blacklist: Set of blacklisted company names | |
| Returns: | |
| True if blacklisted company found, False otherwise | |
| """ | |
| if not text or not blacklist: | |
| return False | |
| text_lower = text.lower() | |
| return any(company in text_lower for company in blacklist) | |
| def save_uploaded_resumes(uploaded_files): | |
| """Save uploaded resume files to the Docs folder""" | |
| docs_folder = Path("Docs") | |
| docs_folder.mkdir(exist_ok=True) | |
| blacklist = load_blacklist() | |
| saved_files = [] | |
| for uploaded_file in uploaded_files: | |
| try: | |
| content = uploaded_file.read() | |
| file_extension = Path(uploaded_file.name).suffix.lower() | |
| # Handle ZIP files | |
| if file_extension == '.zip': | |
| processed_zip_files = process_zip_file(content, blacklist) | |
| for processed_file in processed_zip_files: | |
| zip_content = processed_file["content"].encode('utf-8') | |
| new_filename = processed_file["name"] | |
| counter = 1 | |
| while (docs_folder / new_filename).exists(): | |
| base_name = Path(new_filename).stem | |
| ext = Path(new_filename).suffix | |
| new_filename = f"{base_name}_{counter}{ext}" | |
| counter += 1 | |
| file_path = docs_folder / new_filename | |
| with open(file_path, "wb") as f: | |
| f.write(zip_content) | |
| saved_files.append(new_filename) | |
| else: | |
| # Handle individual files | |
| text = extract_text_from_file(content, file_extension) | |
| if text and not check_for_blacklisted_companies(text, blacklist): | |
| base_name = Path(uploaded_file.name).stem | |
| new_filename = uploaded_file.name | |
| counter = 1 | |
| while (docs_folder / new_filename).exists(): | |
| new_filename = f"{base_name}_{counter}{file_extension}" | |
| counter += 1 | |
| file_path = docs_folder / new_filename | |
| with open(file_path, "wb") as f: | |
| uploaded_file.seek(0) | |
| f.write(uploaded_file.getbuffer()) | |
| saved_files.append(new_filename) | |
| else: | |
| print(f"Skipping {uploaded_file.name} - contains blacklisted company") | |
| except Exception as e: | |
| print(f"Error processing {uploaded_file.name}: {str(e)}") | |
| return saved_files | |
| def upload_section(): | |
| st.subheader("Upload Resumes") | |
| uploaded_files = st.file_uploader( | |
| "Upload one or more resumes", | |
| type=['pdf', 'docx', 'doc', 'txt', 'zip'], | |
| accept_multiple_files=True | |
| ) | |
| if uploaded_files: | |
| if st.button("Process Uploaded Resumes"): | |
| saved_files = save_uploaded_resumes(uploaded_files) | |
| if saved_files: | |
| st.success(f"Successfully saved {len(saved_files)} files to Docs folder") | |
| st.write("Saved files:", ", ".join(saved_files)) | |
| if any(f.endswith('.zip') for f in [f.name for f in uploaded_files]): | |
| st.info("ZIP files were processed and their contents were extracted") | |
| else: | |
| st.warning("No files were saved. They may contain blacklisted content") | |
| def create_aws_client(): | |
| """Create and return AWS Bedrock client""" | |
| AWS_ACCESS_KEY = os.getenv('ACCESS_KEY') | |
| AWS_SECRET_ACCESS_KEY = os.getenv('SECRET_ACCESS_KEY') | |
| REGION_NAME = os.getenv('REGION') | |
| bedrock_client = boto3.client( | |
| 'bedrock-runtime', | |
| region_name=REGION_NAME, | |
| aws_access_key_id=AWS_ACCESS_KEY, | |
| aws_secret_access_key=AWS_SECRET_ACCESS_KEY | |
| ) | |
| return bedrock_client | |
| def process_docs_folder(folder_path: str) -> List[dict]: | |
| """Process all documents in the specified folder""" | |
| processed_files = [] | |
| blacklist = load_blacklist() | |
| try: | |
| if not os.path.exists(folder_path): | |
| raise Exception(f"Folder not found: {folder_path}") | |
| for filename in os.listdir(folder_path): | |
| file_path = os.path.join(folder_path, filename) | |
| if not os.path.isfile(file_path): | |
| continue | |
| file_extension = f".{filename.split('.')[-1].lower()}" | |
| try: | |
| with open(file_path, 'rb') as file: | |
| content = file.read() | |
| if file_extension == '.zip': | |
| processed_files.extend(process_zip_file(content, blacklist)) | |
| else: | |
| text = extract_text_from_file(content, file_extension) | |
| if text and not check_for_blacklisted_companies(text, blacklist): | |
| processed_files.append({ | |
| "id": f"{filename}_{hash(text)}", | |
| "name": filename, | |
| "content": text | |
| }) | |
| else: | |
| print(f"Skipping {filename} - contains blacklisted company") | |
| except Exception as e: | |
| print(f"Error processing {filename}: {str(e)}") | |
| except Exception as e: | |
| raise Exception(f"Error accessing docs folder: {str(e)}") | |
| return processed_files | |
| def check_resume_relevance(job_desc: str, resume_content: str, required_skills: List[str], client) -> dict: | |
| """Check if resume matches required skills and job description""" | |
| llm = BedrockLLM( | |
| model_id="amazon.titan-text-lite-v1", | |
| client=client | |
| ) | |
| # First, do a direct text search for skills | |
| resume_lower = resume_content.lower() | |
| found_skills_direct = [] | |
| for skill in required_skills: | |
| if skill.lower() in resume_lower: | |
| found_skills_direct.append(skill) | |
| # If we found any skills directly, proceed with detailed analysis | |
| if found_skills_direct: | |
| relevance_prompt = PromptTemplate.from_template(""" | |
| Analyze this resume for the following skills. Be lenient in matching skills. | |
| Required Skills to Check: | |
| {skills} | |
| Resume Content: | |
| {resume} | |
| For each skill, determine: | |
| 1. If it's present (including variations and related technologies) | |
| 2. The experience level with the skill | |
| 3. How recently it was used | |
| Respond in this format: | |
| {{ | |
| "skills_found": [ | |
| List of skills found (including variations) | |
| ], | |
| "match_percentage": Percentage of required skills found (0-100), | |
| "skill_details": {{ | |
| "skill_name": {{ | |
| "found": true/false, | |
| "experience": "description of experience", | |
| "evidence": "where found in resume" | |
| }} | |
| }} | |
| }} | |
| Be generous in skill matching. If you find related technologies or variations, count them as matches. | |
| """) | |
| message = relevance_prompt.format( | |
| skills="\n".join([f"- {skill}" for skill in required_skills]), | |
| resume=resume_content | |
| ) | |
| try: | |
| response = llm.invoke(message) | |
| response_lower = response.lower() | |
| # Calculate match score | |
| total_skills = len(required_skills) | |
| found_skills = len(found_skills_direct) | |
| match_score = (found_skills / total_skills) * 100 if total_skills > 0 else 0 | |
| # Determine relevance (more lenient threshold) | |
| is_relevant = match_score >= 50 # Lower threshold for relevance | |
| return { | |
| "is_relevant": is_relevant, | |
| "score": match_score, | |
| "found_skills": found_skills_direct, | |
| "total_skills": total_skills, | |
| "key_matches": response | |
| } | |
| except Exception as e: | |
| print(f"Error in LLM analysis: {e}") | |
| # Fall back to direct matching results | |
| match_score = (len(found_skills_direct) / len(required_skills)) * 100 | |
| return { | |
| "is_relevant": len(found_skills_direct) > 0, # Consider relevant if any skills found | |
| "score": match_score, | |
| "found_skills": found_skills_direct, | |
| "total_skills": len(required_skills), | |
| "key_matches": f"Skills found through direct matching: {', '.join(found_skills_direct)}" | |
| } | |
| else: | |
| # If no direct matches, do a more lenient check with LLM | |
| lenient_prompt = PromptTemplate.from_template(""" | |
| Analyze this resume for skills related to or equivalent to: | |
| {skills} | |
| Consider variations and related technologies. | |
| Resume Content: | |
| {resume} | |
| List any matches found, including: | |
| 1. Direct matches | |
| 2. Related technologies | |
| 3. Equivalent skills | |
| Respond with found matches only. | |
| """) | |
| message = lenient_prompt.format( | |
| skills="\n".join([f"- {skill}" for skill in required_skills]), | |
| resume=resume_content | |
| ) | |
| try: | |
| response = llm.invoke(message) | |
| # Check if any skills are mentioned in the response | |
| found_skills = [] | |
| for skill in required_skills: | |
| if skill.lower() in response.lower(): | |
| found_skills.append(skill) | |
| match_score = (len(found_skills) / len(required_skills)) * 100 if required_skills else 0 | |
| return { | |
| "is_relevant": len(found_skills) > 0, # Consider relevant if any skills found | |
| "score": match_score, | |
| "found_skills": found_skills, | |
| "total_skills": len(required_skills), | |
| "key_matches": response | |
| } | |
| except Exception as e: | |
| print(f"Error in lenient LLM analysis: {e}") | |
| return { | |
| "is_relevant": False, | |
| "score": 0, | |
| "found_skills": [], | |
| "total_skills": len(required_skills), | |
| "key_matches": "Error in analysis" | |
| } | |
| def get_summary_from_llm(job_desc: str, resume_content: str, required_skills: List[str], client) -> str: | |
| """Generate detailed analysis of resume against requirements""" | |
| llm = BedrockLLM( | |
| model_id="amazon.titan-text-lite-v1", | |
| client=client | |
| ) | |
| map_prompt_template = PromptTemplate.from_template(""" | |
| Provide a detailed analysis of this resume against the job requirements. | |
| Required Skills: | |
| {skills} | |
| Additional Requirements: | |
| {job_desc} | |
| Resume Content: | |
| {resume_details} | |
| Provide analysis in this format: | |
| ## Skills Analysis | |
| ### Required Skills Match | |
| {skills_analysis} | |
| ### Technical Proficiency | |
| - For each required skill: | |
| * Experience level | |
| * Years of usage | |
| * Recent projects | |
| ### Additional Technical Skills | |
| - Only list relevant additional skills | |
| ## Experience Analysis | |
| - Total years of relevant experience | |
| - Key projects using multiple required skills | |
| - Notable achievements with required technologies | |
| ## Overall Assessment | |
| - Skills Match Score: X/Y required skills found | |
| - Technical Proficiency Score: (0-100) | |
| - Experience Level Match: (Junior/Mid/Senior) | |
| ## Recommendation | |
| - Hiring Decision: (Strong Match/Potential Match/Not Recommended) | |
| - Key Strengths: (list top 3) | |
| - Areas to Verify: (list specific areas) | |
| Focus only on exact matches and verifiable experience. | |
| """) | |
| message = map_prompt_template.format( | |
| skills="\n".join([f"- {skill}" for skill in required_skills]), | |
| job_desc=job_desc, | |
| resume_details=resume_content, | |
| skills_analysis="\n".join([f"- {skill}: Found/Not Found, Experience Level, Evidence" for skill in required_skills]) | |
| ) | |
| try: | |
| summary = llm.invoke(message) | |
| return summary | |
| except Exception as e: | |
| return f"Error generating analysis: {str(e)}" | |
| def export_to_excel(matches: List[dict], required_skills: List[str]) -> BytesIO: | |
| """Create Excel report from matches""" | |
| # Prepare data for Excel | |
| excel_data = [] | |
| for match in matches: | |
| found_skills = match.get('found_skills', []) | |
| row_data = { | |
| 'Candidate Name': match['name'], | |
| 'Match Score': f"{match.get('match_score', 0):.1f}%", | |
| 'Skills Found': ', '.join(found_skills), | |
| 'Missing Skills': ', '.join([skill for skill in required_skills if skill not in found_skills]), | |
| 'Total Skills Found': len(found_skills), | |
| 'Total Required Skills': len(required_skills) | |
| } | |
| # Add individual skill columns | |
| for skill in required_skills: | |
| row_data[f'Skill - {skill}'] = '✓' if skill in found_skills else '✗' | |
| excel_data.append(row_data) | |
| # Create DataFrame | |
| df = pd.DataFrame(excel_data) | |
| # Create Excel file | |
| output = BytesIO() | |
| with pd.ExcelWriter(output, engine='openpyxl') as writer: | |
| df.to_excel(writer, index=False, sheet_name='Resume Matches') | |
| # Get workbook and worksheet | |
| workbook = writer.book | |
| worksheet = writer.sheets['Resume Matches'] | |
| # Format columns | |
| for idx, col in enumerate(df.columns): | |
| # Get maximum length of column content | |
| max_length = max( | |
| df[col].astype(str).apply(len).max(), | |
| len(col) | |
| ) | |
| # Set column width | |
| worksheet.column_dimensions[chr(65 + idx)].width = min(max_length + 2, 50) | |
| return output | |
| def main(): | |
| try: | |
| # Load environment variables and setup | |
| load_dotenv() | |
| client = create_aws_client() | |
| # Streamlit UI setup | |
| st.set_page_config( | |
| page_title="Resume Screening Assistant", | |
| layout="wide" | |
| ) | |
| st.title("Resume Screening AI Assistant") | |
| st.subheader("Match resumes with required skills and experience") | |
| upload_section() | |
| # Skills input | |
| st.write("Enter required skills (one per line):") | |
| skills_input = st.text_area( | |
| "Required Skills", | |
| placeholder="Example:\nPython\nJava\nAWS\nDocker", | |
| height=150 | |
| ) | |
| # Additional requirements | |
| additional_reqs = st.text_area( | |
| "Additional Requirements (optional)", | |
| placeholder="Enter any additional requirements like:\n- Years of experience\n- Education\n- Specific domain knowledge", | |
| height=100 | |
| ) | |
| # Process inputs | |
| required_skills = [skill.strip() for skill in skills_input.split('\n') if skill.strip()] | |
| analyze_button = st.button("Find Matching Profiles", use_container_width=True) | |
| if analyze_button: | |
| if not required_skills: | |
| st.error("Please enter at least one required skill!") | |
| return | |
| docs_folder = os.path.join(os.path.dirname(__file__), 'Docs') | |
| with st.spinner("Analyzing resumes..."): | |
| try: | |
| processed_files = process_docs_folder(docs_folder) | |
| if not processed_files: | |
| st.error("No resumes found in the Docs folder!") | |
| return | |
| # Analyze each resume | |
| matches = [] | |
| progress_bar = st.progress(0) | |
| for idx, file_data in enumerate(processed_files): | |
| progress = (idx + 1) / len(processed_files) | |
| progress_bar.progress(progress) | |
| relevance = check_resume_relevance( | |
| additional_reqs, | |
| file_data['content'], | |
| required_skills, | |
| client | |
| ) | |
| if relevance['found_skills']: # Show if any skills found | |
| matches.append({ | |
| **file_data, | |
| "match_score": relevance['score'], | |
| "found_skills": relevance['found_skills'], | |
| "total_skills": relevance['total_skills'], | |
| "key_matches": relevance['key_matches'] | |
| }) | |
| progress_bar.empty() | |
| # Sort matches by score | |
| matches.sort(key=lambda x: x['match_score'], reverse=True) | |
| if matches: | |
| st.success(f"Found {len(matches)} profiles with matching skills") | |
| # Create columns for filters | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| min_score = st.slider( | |
| "Minimum Match Score", | |
| min_value=0, | |
| max_value=100, | |
| value=50, | |
| step=5 | |
| ) | |
| with col2: | |
| min_skills = st.slider( | |
| "Minimum Required Skills", | |
| min_value=0, | |
| max_value=len(required_skills), | |
| value=1, | |
| step=1 | |
| ) | |
| # Filter matches based on criteria | |
| filtered_matches = [ | |
| match for match in matches | |
| if match['match_score'] >= min_score and | |
| len(match['found_skills']) >= min_skills | |
| ] | |
| # Display filtered matches | |
| st.subheader(f"Showing {len(filtered_matches)} matches meeting criteria") | |
| # Display matches | |
| for idx, match in enumerate(filtered_matches): | |
| with st.container(): | |
| st.markdown("---") | |
| col1, col2 = st.columns([1, 3]) | |
| with col1: | |
| st.subheader(f"Match #{idx + 1}") | |
| st.write(f"📄 {match['name']}") | |
| st.write(f"Match Score: {match['match_score']:.1f}%") | |
| # Display skills breakdown | |
| st.write("Skills Found:") | |
| found_skills = match.get('found_skills', []) | |
| for skill in required_skills: | |
| if skill in found_skills: | |
| st.write(f"✅ {skill}") | |
| else: | |
| st.write(f"❌ {skill}") | |
| with col2: | |
| with st.expander("Show Detailed Analysis"): | |
| analysis = get_summary_from_llm( | |
| additional_reqs, | |
| match['content'], | |
| required_skills, | |
| client | |
| ) | |
| st.markdown(analysis) | |
| # Add export section | |
| st.markdown("---") | |
| st.subheader("Export Results") | |
| # Create Excel file | |
| excel_output = export_to_excel(filtered_matches, required_skills) | |
| # Add download button with count of matches | |
| st.download_button( | |
| label=f"📥 Download Excel Report ({len(filtered_matches)} matches)", | |
| data=excel_output.getvalue(), | |
| file_name="resume_matches.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
| ) | |
| # Display summary statistics | |
| st.markdown("---") | |
| st.subheader("Summary Statistics") | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Total Matches", len(filtered_matches)) | |
| with col2: | |
| avg_score = sum(match['match_score'] for match in filtered_matches) / len(filtered_matches) | |
| st.metric("Average Match Score", f"{avg_score:.1f}%") | |
| with col3: | |
| perfect_matches = sum(1 for match in filtered_matches if match['match_score'] == 100) | |
| st.metric("Perfect Matches", perfect_matches) | |
| with col4: | |
| avg_skills = sum(len(match['found_skills']) for match in filtered_matches) / len(filtered_matches) | |
| st.metric("Avg. Skills Found", f"{avg_skills:.1f}") | |
| # Add skill distribution chart | |
| st.subheader("Skill Distribution") | |
| skill_counts = {skill: 0 for skill in required_skills} | |
| for match in filtered_matches: | |
| for skill in match['found_skills']: | |
| if skill in skill_counts: | |
| skill_counts[skill] += 1 | |
| # Create DataFrame for chart | |
| chart_data = pd.DataFrame({ | |
| 'Skill': list(skill_counts.keys()), | |
| 'Count': list(skill_counts.values()) | |
| }) | |
| # Display bar chart | |
| st.bar_chart(chart_data.set_index('Skill')) | |
| else: | |
| st.warning( | |
| "No profiles found matching the required skills. " | |
| "Try adjusting the requirements or adding more resumes." | |
| ) | |
| except Exception as e: | |
| st.error(f"Error during analysis: {str(e)}") | |
| print(f"Error Details: {e}") | |
| except Exception as error: | |
| st.error(f"An error occurred: {str(error)}") | |
| print(f"Error Details: {error}") | |
| if __name__ == "__main__": | |
| main() | |