Spaces:
Build error
Build error
Upload 7 files
Browse files- app.py +85 -0
- gitattributes.txt +35 -0
- job_matcher.py +44 -0
- langflow_chain.py +189 -0
- requirements.txt +12 -0
- resume_parser.py +226 -0
- ui_components.py +99 -0
app.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from resume_parser import ResumeParser
|
| 3 |
+
from job_matcher import JobMatcher
|
| 4 |
+
from langflow_chain import LangflowChain
|
| 5 |
+
from ui_components import (
|
| 6 |
+
render_header,
|
| 7 |
+
render_upload_section,
|
| 8 |
+
render_results_section,
|
| 9 |
+
render_footer
|
| 10 |
+
)
|
| 11 |
+
import os
|
| 12 |
+
from dotenv import load_dotenv
|
| 13 |
+
|
| 14 |
+
# Load environment variables
|
| 15 |
+
load_dotenv()
|
| 16 |
+
|
| 17 |
+
# Check if API key is set
|
| 18 |
+
if not os.getenv("OPENAI_API_KEY"):
|
| 19 |
+
st.error("Please set your OPENAI_API_KEY in the .env file")
|
| 20 |
+
st.stop()
|
| 21 |
+
|
| 22 |
+
# Set page configuration
|
| 23 |
+
st.set_page_config(
|
| 24 |
+
page_title="Resume Analyzer & Job Matcher",
|
| 25 |
+
page_icon="📄",
|
| 26 |
+
layout="wide"
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Initialize session state
|
| 30 |
+
if "resume_data" not in st.session_state:
|
| 31 |
+
st.session_state.resume_data = None
|
| 32 |
+
st.session_state.job_matches = None
|
| 33 |
+
st.session_state.skill_gaps = None
|
| 34 |
+
st.session_state.improvement_tips = None
|
| 35 |
+
st.session_state.processed = False
|
| 36 |
+
|
| 37 |
+
def main():
|
| 38 |
+
# Render header
|
| 39 |
+
render_header()
|
| 40 |
+
|
| 41 |
+
# Render upload section
|
| 42 |
+
uploaded_file = render_upload_section()
|
| 43 |
+
|
| 44 |
+
# Process the uploaded file
|
| 45 |
+
if uploaded_file is not None and st.button("Analyze Resume"):
|
| 46 |
+
with st.spinner("Analyzing your resume..."):
|
| 47 |
+
# Parse resume
|
| 48 |
+
resume_parser = ResumeParser()
|
| 49 |
+
resume_data = resume_parser.parse(uploaded_file)
|
| 50 |
+
st.session_state.resume_data = resume_data
|
| 51 |
+
|
| 52 |
+
# Process with LLM
|
| 53 |
+
langflow_chain = LangflowChain()
|
| 54 |
+
analysis_results = langflow_chain.analyze_resume(resume_data)
|
| 55 |
+
|
| 56 |
+
# Match jobs
|
| 57 |
+
job_matcher = JobMatcher()
|
| 58 |
+
job_matches = job_matcher.find_matches(analysis_results)
|
| 59 |
+
|
| 60 |
+
# Store results in session state
|
| 61 |
+
st.session_state.job_matches = analysis_results.get("job_matches", [])
|
| 62 |
+
st.session_state.skill_gaps = analysis_results.get("skill_gaps", [])
|
| 63 |
+
st.session_state.improvement_tips = analysis_results.get("improvement_tips", [])
|
| 64 |
+
st.session_state.processed = True
|
| 65 |
+
|
| 66 |
+
st.success("Resume analyzed successfully!")
|
| 67 |
+
|
| 68 |
+
# Render results
|
| 69 |
+
if st.session_state.processed:
|
| 70 |
+
render_results_section(
|
| 71 |
+
st.session_state.resume_data,
|
| 72 |
+
st.session_state.job_matches,
|
| 73 |
+
st.session_state.skill_gaps,
|
| 74 |
+
st.session_state.improvement_tips
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# Render footer
|
| 78 |
+
render_footer()
|
| 79 |
+
|
| 80 |
+
if __name__ == "__main__":
|
| 81 |
+
main()
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
|
gitattributes.txt
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
job_matcher.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, List, Any
|
| 2 |
+
|
| 3 |
+
class JobMatcher:
|
| 4 |
+
def __init__(self):
|
| 5 |
+
# In a real application, this could connect to a job database or API
|
| 6 |
+
pass
|
| 7 |
+
|
| 8 |
+
def find_matches(self, analysis_results: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 9 |
+
"""
|
| 10 |
+
Process the job matches already generated by the LLM
|
| 11 |
+
|
| 12 |
+
In a real application, this module could:
|
| 13 |
+
1. Connect to job databases or APIs
|
| 14 |
+
2. Implement sophisticated matching algorithms
|
| 15 |
+
3. Apply filters based on location, experience level, etc.
|
| 16 |
+
|
| 17 |
+
For this implementation, we're using the matches already provided by the LLM
|
| 18 |
+
"""
|
| 19 |
+
job_matches = analysis_results.get("job_matches", [])
|
| 20 |
+
|
| 21 |
+
# Additional processing or filtering could be applied here
|
| 22 |
+
# For now, we're just returning the matches as provided by the LLM
|
| 23 |
+
|
| 24 |
+
return job_matches
|
| 25 |
+
|
| 26 |
+
def calculate_match_score(self, job_requirements: List[str], candidate_skills: List[str]) -> float:
|
| 27 |
+
"""
|
| 28 |
+
Calculate a match score between job requirements and candidate skills
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
job_requirements: List of skills required for the job
|
| 32 |
+
candidate_skills: List of skills possessed by the candidate
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
A match score between 0 and 1
|
| 36 |
+
"""
|
| 37 |
+
if not job_requirements:
|
| 38 |
+
return 0.0
|
| 39 |
+
|
| 40 |
+
matched_skills = [skill for skill in candidate_skills if any(req.lower() in skill.lower() for req in job_requirements)]
|
| 41 |
+
match_score = len(matched_skills) / len(job_requirements)
|
| 42 |
+
|
| 43 |
+
return min(match_score, 1.0)
|
| 44 |
+
|
langflow_chain.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Dict, Any
|
| 3 |
+
from langchain_openai import ChatOpenAI
|
| 4 |
+
from langchain.prompts import PromptTemplate
|
| 5 |
+
from langchain.chains import LLMChain
|
| 6 |
+
|
| 7 |
+
class LangflowChain:
|
| 8 |
+
def __init__(self):
|
| 9 |
+
"""Initialize the LangflowChain with OpenAI LLM"""
|
| 10 |
+
self.llm = ChatOpenAI(
|
| 11 |
+
model="gpt-3.5-turbo",
|
| 12 |
+
temperature=0.2
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
self.template = """
|
| 16 |
+
You are an expert resume analyzer and career advisor. Your task is to analyze the resume details provided
|
| 17 |
+
and generate actionable insights.
|
| 18 |
+
|
| 19 |
+
Resume Details:
|
| 20 |
+
{resume_data}
|
| 21 |
+
|
| 22 |
+
Based on the resume details above, please provide the following:
|
| 23 |
+
|
| 24 |
+
1. Suitable Job Roles: List the top 5 job roles that match this candidate's skills and experience.
|
| 25 |
+
2. Skill Gap Analysis: Identify important skills that are missing for the suggested job roles and
|
| 26 |
+
recommend courses or certifications to acquire these skills.
|
| 27 |
+
3. Resume Improvement Tips: Provide specific, actionable recommendations for enhancing the resume.
|
| 28 |
+
|
| 29 |
+
Format your response as a JSON object with the following structure:
|
| 30 |
+
{{
|
| 31 |
+
"job_matches": [
|
| 32 |
+
{{
|
| 33 |
+
"title": "Job Title 1",
|
| 34 |
+
"match_score": 85,
|
| 35 |
+
"key_matching_skills": ["Skill 1", "Skill 2", "Skill 3"],
|
| 36 |
+
"description": "Brief description of why this role is suitable"
|
| 37 |
+
}},
|
| 38 |
+
// Other job matches...
|
| 39 |
+
],
|
| 40 |
+
"skill_gaps": [
|
| 41 |
+
{{
|
| 42 |
+
"skill": "Missing Skill 1",
|
| 43 |
+
"importance": "High",
|
| 44 |
+
"acquisition_recommendation": "Specific course, certification, or project to gain this skill"
|
| 45 |
+
}},
|
| 46 |
+
// Other skill gaps...
|
| 47 |
+
],
|
| 48 |
+
"improvement_tips": [
|
| 49 |
+
"Specific tip 1 for improving the resume",
|
| 50 |
+
"Specific tip 2 for improving the resume",
|
| 51 |
+
// Other tips...
|
| 52 |
+
]
|
| 53 |
+
}}
|
| 54 |
+
|
| 55 |
+
Ensure your response is properly formatted as valid JSON.
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
self.prompt = PromptTemplate(
|
| 59 |
+
input_variables=["resume_data"],
|
| 60 |
+
template=self.template
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
self.chain = LLMChain(
|
| 64 |
+
llm=self.llm,
|
| 65 |
+
prompt=self.prompt
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
def analyze_resume(self, resume_data: Any) -> Dict[str, Any]:
|
| 69 |
+
"""
|
| 70 |
+
Analyze a resume and generate insights using the LLM chain
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
resume_data: Structured resume data
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
Dictionary containing job matches, skill gaps, and improvement tips
|
| 77 |
+
"""
|
| 78 |
+
# Convert resume data to string representation
|
| 79 |
+
resume_str = self._format_resume_data(resume_data)
|
| 80 |
+
|
| 81 |
+
# Execute the chain
|
| 82 |
+
try:
|
| 83 |
+
result = self.chain.invoke({"resume_data": resume_str})
|
| 84 |
+
|
| 85 |
+
# Extract the response text
|
| 86 |
+
response_text = result.get("text", "{}")
|
| 87 |
+
|
| 88 |
+
# In a real implementation, we'd parse the JSON here
|
| 89 |
+
# For simplicity, we're returning a mock result
|
| 90 |
+
import json
|
| 91 |
+
try:
|
| 92 |
+
return json.loads(response_text)
|
| 93 |
+
except json.JSONDecodeError:
|
| 94 |
+
# Fall back to a mock response if JSON parsing fails
|
| 95 |
+
return self._create_mock_response()
|
| 96 |
+
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print(f"Error in LLM chain: {e}")
|
| 99 |
+
return self._create_mock_response()
|
| 100 |
+
|
| 101 |
+
def _format_resume_data(self, resume_data: Any) -> str:
|
| 102 |
+
"""Format resume data into a string for the prompt"""
|
| 103 |
+
if hasattr(resume_data, "dict"):
|
| 104 |
+
# If it's a Pydantic model
|
| 105 |
+
data_dict = resume_data.dict()
|
| 106 |
+
else:
|
| 107 |
+
# Assume it's already a dictionary or can be converted to string
|
| 108 |
+
data_dict = resume_data
|
| 109 |
+
|
| 110 |
+
formatted_str = ""
|
| 111 |
+
|
| 112 |
+
# Personal Info
|
| 113 |
+
personal_info = data_dict.get("personal_info", {})
|
| 114 |
+
formatted_str += "Personal Information:\n"
|
| 115 |
+
for key, value in personal_info.items():
|
| 116 |
+
if value:
|
| 117 |
+
formatted_str += f"- {key.replace('_', ' ').title()}: {value}\n"
|
| 118 |
+
|
| 119 |
+
# Education
|
| 120 |
+
education = data_dict.get("education", [])
|
| 121 |
+
formatted_str += "\nEducation:\n"
|
| 122 |
+
for edu in education:
|
| 123 |
+
if isinstance(edu, dict):
|
| 124 |
+
inst = edu.get("institution", "")
|
| 125 |
+
degree = edu.get("degree", "")
|
| 126 |
+
formatted_str += f"- {degree} at {inst}\n"
|
| 127 |
+
else:
|
| 128 |
+
formatted_str += f"- {str(edu)}\n"
|
| 129 |
+
|
| 130 |
+
# Work Experience
|
| 131 |
+
work_exp = data_dict.get("work_experience", [])
|
| 132 |
+
formatted_str += "\nWork Experience:\n"
|
| 133 |
+
for exp in work_exp:
|
| 134 |
+
if isinstance(exp, dict):
|
| 135 |
+
company = exp.get("company", "")
|
| 136 |
+
position = exp.get("position", "")
|
| 137 |
+
formatted_str += f"- {position} at {company}\n"
|
| 138 |
+
else:
|
| 139 |
+
formatted_str += f"- {str(exp)}\n"
|
| 140 |
+
|
| 141 |
+
# Skills
|
| 142 |
+
skills = data_dict.get("skills", [])
|
| 143 |
+
formatted_str += "\nSkills:\n"
|
| 144 |
+
for skill in skills:
|
| 145 |
+
formatted_str += f"- {skill}\n"
|
| 146 |
+
|
| 147 |
+
# Certifications
|
| 148 |
+
certifications = data_dict.get("certifications", [])
|
| 149 |
+
formatted_str += "\nCertifications:\n"
|
| 150 |
+
for cert in certifications:
|
| 151 |
+
formatted_str += f"- {cert}\n"
|
| 152 |
+
|
| 153 |
+
return formatted_str
|
| 154 |
+
|
| 155 |
+
def _create_mock_response(self) -> Dict[str, Any]:
|
| 156 |
+
"""Create a mock response for testing or fallback"""
|
| 157 |
+
return {
|
| 158 |
+
"job_matches": [
|
| 159 |
+
{
|
| 160 |
+
"title": "Data Scientist",
|
| 161 |
+
"match_score": 85,
|
| 162 |
+
"key_matching_skills": ["Python", "Data Analysis", "Machine Learning"],
|
| 163 |
+
"description": "Your strong analytical skills and programming experience make you well-suited for this role."
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"title": "Software Engineer",
|
| 167 |
+
"match_score": 80,
|
| 168 |
+
"key_matching_skills": ["Python", "JavaScript", "Git"],
|
| 169 |
+
"description": "Your technical skills and project experience align well with software engineering positions."
|
| 170 |
+
}
|
| 171 |
+
],
|
| 172 |
+
"skill_gaps": [
|
| 173 |
+
{
|
| 174 |
+
"skill": "Cloud Computing (AWS/Azure)",
|
| 175 |
+
"importance": "High",
|
| 176 |
+
"acquisition_recommendation": "AWS Certified Solutions Architect or Azure Fundamentals certification"
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"skill": "SQL and Database Management",
|
| 180 |
+
"importance": "Medium",
|
| 181 |
+
"acquisition_recommendation": "Take an online course on SQL and database design"
|
| 182 |
+
}
|
| 183 |
+
],
|
| 184 |
+
"improvement_tips": [
|
| 185 |
+
"Quantify your achievements with specific metrics and results",
|
| 186 |
+
"Add a professional summary section highlighting your key strengths",
|
| 187 |
+
"Reorganize your skills section to prioritize the most relevant skills for your target roles"
|
| 188 |
+
]
|
| 189 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
streamlit==1.30.0
|
| 3 |
+
langchain==0.1.0
|
| 4 |
+
langchain-openai==0.0.5
|
| 5 |
+
python-docx==0.8.11
|
| 6 |
+
PyPDF2==3.0.1
|
| 7 |
+
pydantic==2.5.2
|
| 8 |
+
python-dotenv==1.0.0
|
| 9 |
+
streamlit-extras==0.3.5
|
| 10 |
+
numpy==1.26.2
|
| 11 |
+
pandas==2.1.3
|
| 12 |
+
|
resume_parser.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import PyPDF2
|
| 2 |
+
import docx
|
| 3 |
+
import io
|
| 4 |
+
import re
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
from typing import List, Optional, Dict
|
| 7 |
+
|
| 8 |
+
class Education(BaseModel):
|
| 9 |
+
institution: str
|
| 10 |
+
degree: str
|
| 11 |
+
field_of_study: Optional[str] = None
|
| 12 |
+
graduation_date: Optional[str] = None
|
| 13 |
+
|
| 14 |
+
class WorkExperience(BaseModel):
|
| 15 |
+
company: str
|
| 16 |
+
position: str
|
| 17 |
+
start_date: Optional[str] = None
|
| 18 |
+
end_date: Optional[str] = None
|
| 19 |
+
description: Optional[str] = None
|
| 20 |
+
|
| 21 |
+
class PersonalInfo(BaseModel):
|
| 22 |
+
name: Optional[str] = None
|
| 23 |
+
email: Optional[str] = None
|
| 24 |
+
phone: Optional[str] = None
|
| 25 |
+
location: Optional[str] = None
|
| 26 |
+
github: Optional[str] = None
|
| 27 |
+
linkedin: Optional[str] = None
|
| 28 |
+
portfolio: Optional[str] = None
|
| 29 |
+
|
| 30 |
+
class ResumeData(BaseModel):
|
| 31 |
+
personal_info: PersonalInfo
|
| 32 |
+
education: List[Education]
|
| 33 |
+
work_experience: List[WorkExperience]
|
| 34 |
+
skills: List[str]
|
| 35 |
+
certifications: List[str]
|
| 36 |
+
raw_text: str
|
| 37 |
+
|
| 38 |
+
class ResumeParser:
|
| 39 |
+
def __init__(self):
|
| 40 |
+
self.email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
|
| 41 |
+
self.phone_pattern = r'(\+\d{1,3}[\s-]?)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}'
|
| 42 |
+
self.github_pattern = r'github\.com/([A-Za-z0-9_-]+)'
|
| 43 |
+
self.linkedin_pattern = r'linkedin\.com/in/([A-Za-z0-9_-]+)'
|
| 44 |
+
|
| 45 |
+
def parse(self, uploaded_file) -> ResumeData:
|
| 46 |
+
"""Parse the uploaded resume file and extract key information"""
|
| 47 |
+
# Extract text from file
|
| 48 |
+
file_extension = uploaded_file.name.split('.')[-1].lower()
|
| 49 |
+
|
| 50 |
+
if file_extension == 'pdf':
|
| 51 |
+
raw_text = self._extract_text_from_pdf(uploaded_file)
|
| 52 |
+
elif file_extension in ['docx', 'doc']:
|
| 53 |
+
raw_text = self._extract_text_from_docx(uploaded_file)
|
| 54 |
+
else:
|
| 55 |
+
raise ValueError("Unsupported file format. Please upload a PDF or DOCX file.")
|
| 56 |
+
|
| 57 |
+
# Extract components
|
| 58 |
+
personal_info = self._extract_personal_info(raw_text)
|
| 59 |
+
education = self._extract_education(raw_text)
|
| 60 |
+
work_experience = self._extract_work_experience(raw_text)
|
| 61 |
+
skills = self._extract_skills(raw_text)
|
| 62 |
+
certifications = self._extract_certifications(raw_text)
|
| 63 |
+
|
| 64 |
+
# Create and return ResumeData
|
| 65 |
+
resume_data = ResumeData(
|
| 66 |
+
personal_info=personal_info,
|
| 67 |
+
education=education,
|
| 68 |
+
work_experience=work_experience,
|
| 69 |
+
skills=skills,
|
| 70 |
+
certifications=certifications,
|
| 71 |
+
raw_text=raw_text
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
return resume_data
|
| 75 |
+
|
| 76 |
+
def _extract_text_from_pdf(self, file) -> str:
|
| 77 |
+
"""Extract text from PDF file"""
|
| 78 |
+
pdf_reader = PyPDF2.PdfReader(io.BytesIO(file.getvalue()))
|
| 79 |
+
text = ""
|
| 80 |
+
for page in pdf_reader.pages:
|
| 81 |
+
text += page.extract_text()
|
| 82 |
+
return text
|
| 83 |
+
|
| 84 |
+
def _extract_text_from_docx(self, file) -> str:
|
| 85 |
+
"""Extract text from DOCX file"""
|
| 86 |
+
doc = docx.Document(io.BytesIO(file.getvalue()))
|
| 87 |
+
text = ""
|
| 88 |
+
for paragraph in doc.paragraphs:
|
| 89 |
+
text += paragraph.text + "\n"
|
| 90 |
+
return text
|
| 91 |
+
|
| 92 |
+
def _extract_personal_info(self, text) -> PersonalInfo:
|
| 93 |
+
"""Extract personal information from resume text"""
|
| 94 |
+
# Basic extraction with regex
|
| 95 |
+
email = re.search(self.email_pattern, text)
|
| 96 |
+
phone = re.search(self.phone_pattern, text)
|
| 97 |
+
github = re.search(self.github_pattern, text)
|
| 98 |
+
linkedin = re.search(self.linkedin_pattern, text)
|
| 99 |
+
|
| 100 |
+
# First line often contains the name
|
| 101 |
+
lines = text.split('\n')
|
| 102 |
+
name = lines[0].strip() if lines else None
|
| 103 |
+
|
| 104 |
+
return PersonalInfo(
|
| 105 |
+
name=name,
|
| 106 |
+
email=email.group(0) if email else None,
|
| 107 |
+
phone=phone.group(0) if phone else None,
|
| 108 |
+
github=github.group(1) if github else None,
|
| 109 |
+
linkedin=linkedin.group(1) if linkedin else None
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
def _extract_education(self, text) -> List[Education]:
|
| 113 |
+
"""Extract education information from resume text"""
|
| 114 |
+
# Simple implementation - in a real system this would be more sophisticated
|
| 115 |
+
education_section = self._extract_section(text, ["EDUCATION", "Education", "ACADEMIC BACKGROUND"])
|
| 116 |
+
if not education_section:
|
| 117 |
+
return []
|
| 118 |
+
|
| 119 |
+
# Very basic parsing - a real implementation would use more sophisticated NLP
|
| 120 |
+
educations = []
|
| 121 |
+
lines = education_section.split('\n')
|
| 122 |
+
current_education = None
|
| 123 |
+
|
| 124 |
+
for line in lines:
|
| 125 |
+
if not line.strip():
|
| 126 |
+
continue
|
| 127 |
+
|
| 128 |
+
if any(degree in line for degree in ["Bachelor", "Master", "PhD", "B.S.", "M.S.", "Ph.D"]):
|
| 129 |
+
if current_education:
|
| 130 |
+
educations.append(current_education)
|
| 131 |
+
|
| 132 |
+
parts = line.split(',')
|
| 133 |
+
degree = parts[0].strip() if parts else line.strip()
|
| 134 |
+
institution = parts[1].strip() if len(parts) > 1 else ""
|
| 135 |
+
|
| 136 |
+
current_education = Education(
|
| 137 |
+
institution=institution,
|
| 138 |
+
degree=degree
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
if current_education:
|
| 142 |
+
educations.append(current_education)
|
| 143 |
+
|
| 144 |
+
return educations
|
| 145 |
+
|
| 146 |
+
def _extract_work_experience(self, text) -> List[WorkExperience]:
|
| 147 |
+
"""Extract work experience from resume text"""
|
| 148 |
+
experience_section = self._extract_section(text, ["EXPERIENCE", "Experience", "WORK EXPERIENCE", "EMPLOYMENT"])
|
| 149 |
+
if not experience_section:
|
| 150 |
+
return []
|
| 151 |
+
|
| 152 |
+
# Simple implementation
|
| 153 |
+
experiences = []
|
| 154 |
+
lines = experience_section.split('\n')
|
| 155 |
+
current_experience = None
|
| 156 |
+
|
| 157 |
+
for line in lines:
|
| 158 |
+
if not line.strip():
|
| 159 |
+
continue
|
| 160 |
+
|
| 161 |
+
if re.search(r'\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{4}\b', line):
|
| 162 |
+
if current_experience:
|
| 163 |
+
experiences.append(current_experience)
|
| 164 |
+
|
| 165 |
+
# Very simplistic parsing
|
| 166 |
+
company_match = re.search(r'([A-Za-z0-9\s]+)', line)
|
| 167 |
+
company = company_match.group(1).strip() if company_match else "Unknown Company"
|
| 168 |
+
|
| 169 |
+
position_match = re.search(r'([A-Za-z\s]+)', line)
|
| 170 |
+
position = position_match.group(1).strip() if position_match else "Unknown Position"
|
| 171 |
+
|
| 172 |
+
current_experience = WorkExperience(
|
| 173 |
+
company=company,
|
| 174 |
+
position=position
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
if current_experience:
|
| 178 |
+
experiences.append(current_experience)
|
| 179 |
+
|
| 180 |
+
return experiences
|
| 181 |
+
|
| 182 |
+
def _extract_skills(self, text) -> List[str]:
|
| 183 |
+
"""Extract skills from resume text"""
|
| 184 |
+
skills_section = self._extract_section(text, ["SKILLS", "Skills", "TECHNICAL SKILLS"])
|
| 185 |
+
if not skills_section:
|
| 186 |
+
return []
|
| 187 |
+
|
| 188 |
+
# Simple split by commas and cleanup
|
| 189 |
+
skills_text = skills_section.replace('\n', ' ')
|
| 190 |
+
skills = [skill.strip() for skill in re.split(r'[,•]', skills_text) if skill.strip()]
|
| 191 |
+
|
| 192 |
+
return skills
|
| 193 |
+
|
| 194 |
+
def _extract_certifications(self, text) -> List[str]:
|
| 195 |
+
"""Extract certifications from resume text"""
|
| 196 |
+
cert_section = self._extract_section(text, ["CERTIFICATIONS", "Certifications", "CERTIFICATES"])
|
| 197 |
+
if not cert_section:
|
| 198 |
+
return []
|
| 199 |
+
|
| 200 |
+
# Simple split by newlines and cleanup
|
| 201 |
+
certifications = [cert.strip() for cert in cert_section.split('\n') if cert.strip()]
|
| 202 |
+
|
| 203 |
+
return certifications
|
| 204 |
+
|
| 205 |
+
def _extract_section(self, text, section_headers) -> str:
|
| 206 |
+
"""Extract a section from the resume text based on headers"""
|
| 207 |
+
lines = text.split('\n')
|
| 208 |
+
section_text = ""
|
| 209 |
+
in_section = False
|
| 210 |
+
|
| 211 |
+
for i, line in enumerate(lines):
|
| 212 |
+
# Check if this line contains a section header
|
| 213 |
+
if any(header in line for header in section_headers):
|
| 214 |
+
in_section = True
|
| 215 |
+
continue
|
| 216 |
+
|
| 217 |
+
# Check if we've reached the next section
|
| 218 |
+
if in_section and i < len(lines) - 1:
|
| 219 |
+
next_line = lines[i+1]
|
| 220 |
+
if next_line.isupper() and len(next_line.strip()) > 0:
|
| 221 |
+
break
|
| 222 |
+
|
| 223 |
+
if in_section:
|
| 224 |
+
section_text += line + "\n"
|
| 225 |
+
|
| 226 |
+
return section_text.strip()
|
ui_components.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from typing import Dict, List, Any, Optional
|
| 3 |
+
|
| 4 |
+
def render_header():
|
| 5 |
+
"""Render the application header"""
|
| 6 |
+
st.title("📄 Resume Analyzer & Job Matcher")
|
| 7 |
+
st.markdown("""
|
| 8 |
+
Upload your resume to get personalized job matches, identify skill gaps,
|
| 9 |
+
and receive recommendations for improvement.
|
| 10 |
+
""")
|
| 11 |
+
st.divider()
|
| 12 |
+
|
| 13 |
+
def render_upload_section():
|
| 14 |
+
"""Render the file upload section"""
|
| 15 |
+
st.subheader("Upload Your Resume")
|
| 16 |
+
st.markdown("Supported formats: PDF, DOCX")
|
| 17 |
+
|
| 18 |
+
uploaded_file = st.file_uploader("Choose a file", type=["pdf", "docx"])
|
| 19 |
+
|
| 20 |
+
if uploaded_file is not None:
|
| 21 |
+
st.success(f"File uploaded: {uploaded_file.name}")
|
| 22 |
+
|
| 23 |
+
file_details = {
|
| 24 |
+
"Filename": uploaded_file.name,
|
| 25 |
+
"File size": f"{uploaded_file.size / 1024:.2f} KB",
|
| 26 |
+
"File type": uploaded_file.type
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
with st.expander("File Details"):
|
| 30 |
+
for key, value in file_details.items():
|
| 31 |
+
st.write(f"**{key}:** {value}")
|
| 32 |
+
|
| 33 |
+
return uploaded_file
|
| 34 |
+
|
| 35 |
+
def render_results_section(
|
| 36 |
+
resume_data: Any,
|
| 37 |
+
job_matches: List[Dict[str, Any]],
|
| 38 |
+
skill_gaps: List[Dict[str, Any]],
|
| 39 |
+
improvement_tips: List[str]
|
| 40 |
+
):
|
| 41 |
+
"""Render the results section with analysis output"""
|
| 42 |
+
st.divider()
|
| 43 |
+
st.header("Analysis Results")
|
| 44 |
+
|
| 45 |
+
# Create tabs for different result categories
|
| 46 |
+
tab1, tab2, tab3 = st.tabs(["Job Matches", "Skill Gaps", "Resume Improvement"])
|
| 47 |
+
|
| 48 |
+
# Tab 1: Job Matches
|
| 49 |
+
with tab1:
|
| 50 |
+
st.subheader("Recommended Job Roles")
|
| 51 |
+
if not job_matches:
|
| 52 |
+
st.info("No job matches found. Please try uploading a different resume.")
|
| 53 |
+
else:
|
| 54 |
+
for i, job in enumerate(job_matches):
|
| 55 |
+
with st.container():
|
| 56 |
+
col1, col2 = st.columns([3, 1])
|
| 57 |
+
with col1:
|
| 58 |
+
st.markdown(f"### {i+1}. {job.get('title', 'Unknown Job')}")
|
| 59 |
+
st.markdown(f"**Match Score:** {job.get('match_score', 'N/A')}%")
|
| 60 |
+
st.markdown(f"**Description:** {job.get('description', 'No description available')}")
|
| 61 |
+
with col2:
|
| 62 |
+
st.markdown("**Matching Skills:**")
|
| 63 |
+
for skill in job.get('key_matching_skills', []):
|
| 64 |
+
st.markdown(f"- {skill}")
|
| 65 |
+
st.divider()
|
| 66 |
+
|
| 67 |
+
# Tab 2: Skill Gaps
|
| 68 |
+
with tab2:
|
| 69 |
+
st.subheader("Skill Gap Analysis")
|
| 70 |
+
if not skill_gaps:
|
| 71 |
+
st.info("No skill gaps identified.")
|
| 72 |
+
else:
|
| 73 |
+
for skill_gap in skill_gaps:
|
| 74 |
+
with st.container():
|
| 75 |
+
col1, col2 = st.columns([1, 2])
|
| 76 |
+
with col1:
|
| 77 |
+
st.markdown(f"### {skill_gap.get('skill', 'Unknown Skill')}")
|
| 78 |
+
st.markdown(f"**Importance:** {skill_gap.get('importance', 'Medium')}")
|
| 79 |
+
with col2:
|
| 80 |
+
st.markdown("**How to acquire this skill:**")
|
| 81 |
+
st.markdown(skill_gap.get('acquisition_recommendation', 'No recommendation available'))
|
| 82 |
+
st.divider()
|
| 83 |
+
|
| 84 |
+
# Tab 3: Resume Improvement
|
| 85 |
+
with tab3:
|
| 86 |
+
st.subheader("Resume Improvement Tips")
|
| 87 |
+
if not improvement_tips:
|
| 88 |
+
st.info("No improvement tips available.")
|
| 89 |
+
else:
|
| 90 |
+
for i, tip in enumerate(improvement_tips):
|
| 91 |
+
st.markdown(f"**{i+1}.** {tip}")
|
| 92 |
+
|
| 93 |
+
def render_footer():
|
| 94 |
+
"""Render the application footer"""
|
| 95 |
+
st.divider()
|
| 96 |
+
st.markdown("""
|
| 97 |
+
**Note:** This application uses AI to analyze your resume and provide recommendations.
|
| 98 |
+
The results should be considered as suggestions and may not be 100% accurate.
|
| 99 |
+
""")
|