""" Advanced Gap Analysis - Provides true skills matching with semantic similarity """ from typing import Dict, Any, List, Optional from dataclasses import dataclass from difflib import SequenceMatcher from metrics import log_metric from llm_client import LLMClient @dataclass class SkillMatch: """Detailed skill matching result""" job_requirement: str resume_skill: str match_score: float match_type: str # "strong", "partial", "weak", "missing" importance: str # "required", "preferred", "nice_to_have" recommendation: str category: str @dataclass class GapAnalysisResult: """Complete gap analysis result""" overall_match_score: float strong_matches: List[SkillMatch] partial_matches: List[SkillMatch] gaps: List[SkillMatch] strengths_summary: str gaps_summary: str competitive_advantages: List[str] preparation_priority: List[str] interview_focus_areas: List[str] skill_categories_analysis: Dict[str, float] class AdvancedSkillMatcher: """Advanced skill matching with semantic similarity""" def __init__(self): self.skill_synonyms = { # Programming Languages "python": ["python3", "py", "python programming"], "javascript": ["js", "ecmascript", "node.js", "nodejs"], "typescript": ["ts"], "react": ["reactjs", "react.js"], "vue": ["vue.js", "vuejs"], "angular": ["angularjs"], # Frameworks & Libraries "express": ["express.js", "expressjs"], "django": ["django framework"], "flask": ["flask framework"], "spring": ["spring boot", "spring framework"], "laravel": ["laravel framework"], # Databases "postgresql": ["postgres", "psql"], "mongodb": ["mongo"], "mysql": ["my sql"], # Cloud Platforms "aws": ["amazon web services"], "gcp": ["google cloud platform", "google cloud"], "azure": ["microsoft azure"], # DevOps & Tools "kubernetes": ["k8s"], "docker": ["containers", "containerization"], "jenkins": ["ci/cd"], "git": ["version control", "github", "gitlab"], # Data & ML "machine learning": ["ml", "artificial intelligence", "ai"], "deep learning": ["dl", "neural networks"], "tensorflow": ["tf"], "pytorch": ["torch"], "pandas": ["data analysis"], "numpy": ["numerical computing"], # Other "agile": ["scrum", "kanban"], "restful": ["rest api", "rest apis", "api development"], "microservices": ["micro services", "service oriented architecture"] } def normalize_skill(self, skill: str) -> str: """Normalize skill name using synonyms""" skill_lower = skill.lower().strip() # Direct match if skill_lower in self.skill_synonyms: return skill_lower # Check if it's a synonym for main_skill, synonyms in self.skill_synonyms.items(): if skill_lower in synonyms: return main_skill return skill_lower def calculate_similarity(self, skill1: str, skill2: str) -> float: """Calculate semantic similarity between two skills""" norm1 = self.normalize_skill(skill1) norm2 = self.normalize_skill(skill2) # Direct match after normalization if norm1 == norm2: return 1.0 # Partial matching using SequenceMatcher similarity = SequenceMatcher(None, norm1, norm2).ratio() # Boost for containing relationships if norm1 in norm2 or norm2 in norm1: similarity = max(similarity, 0.8) return similarity def find_best_match(self, job_requirement: str, resume_skills: List[str]) -> tuple[str, float]: """Find the best matching resume skill for a job requirement""" best_skill = "" best_score = 0.0 for resume_skill in resume_skills: score = self.calculate_similarity(job_requirement, resume_skill) if score > best_score: best_score = score best_skill = resume_skill return best_skill, best_score class AdvancedGapAnalysis: """Advanced gap analysis with true skills matching""" def __init__(self): self.matcher = AdvancedSkillMatcher() self.llm_client = LLMClient() async def run(self, data: Dict[str, Any]) -> Dict[str, Any]: """Main entry point for gap analysis""" try: # Extract resume and job data resume_data = data.get("resume_data_enhanced", {}) job_data = data.get("job_data_enhanced", {}) if not resume_data or not job_data: return {**data, "gap_analysis_advanced": { "error": "Missing resume or job data"}} # Perform advanced gap analysis analysis_result = await self._analyze_comprehensive_fit( resume_data, job_data) log_metric("gap_analysis_advanced_success", { "match_score": analysis_result.overall_match_score, "strong_matches": len(analysis_result.strong_matches), "gaps": len(analysis_result.gaps) }) return {**data, "gap_analysis_advanced": self._format_result( analysis_result)} except Exception as e: log_metric("gap_analysis_advanced_error", {"error": str(e)}) return {**data, "gap_analysis_advanced": { "error": f"Advanced gap analysis failed: {e}"}} async def _analyze_comprehensive_fit( self, resume_data: Dict[str, Any], job_data: Dict[str, Any] ) -> GapAnalysisResult: """Perform comprehensive fit analysis""" # Extract skills from resume resume_skills = self._extract_resume_skills(resume_data) # Extract requirements from job job_requirements = self._extract_job_requirements(job_data) # Perform detailed matching skill_matches = self._match_skills_detailed( job_requirements, resume_skills) # Calculate overall score overall_score = self._calculate_overall_score(skill_matches) # Categorize matches strong_matches = [m for m in skill_matches if m.match_score >= 0.8] partial_matches = [m for m in skill_matches if 0.4 <= m.match_score < 0.8] gaps = [m for m in skill_matches if m.match_score < 0.4] # Generate AI-powered analysis strengths_summary = await self._generate_strengths_summary( strong_matches, resume_data) gaps_summary = await self._generate_gaps_summary(gaps, job_data) # Extract competitive advantages competitive_advantages = self._identify_competitive_advantages( resume_data, job_data, strong_matches) # Generate preparation priorities preparation_priority = self._generate_preparation_priority( gaps, partial_matches) # Identify interview focus areas interview_focus = self._identify_interview_focus_areas( strong_matches, gaps) # Analyze by skill categories categories_analysis = self._analyze_skill_categories(skill_matches) return GapAnalysisResult( overall_match_score=overall_score, strong_matches=strong_matches, partial_matches=partial_matches, gaps=gaps, strengths_summary=strengths_summary, gaps_summary=gaps_summary, competitive_advantages=competitive_advantages, preparation_priority=preparation_priority, interview_focus_areas=interview_focus, skill_categories_analysis=categories_analysis ) def _extract_resume_skills(self, resume_data: Dict[str, Any]) -> List[str]: """Extract all skills from resume data""" all_skills = [] # Get skills from structured skills section skills_obj = resume_data.get("skills", {}) if isinstance(skills_obj, dict): for category, skills_list in skills_obj.items(): if isinstance(skills_list, list): all_skills.extend(skills_list) # Get skills from experience experience = resume_data.get("experience", []) for exp in experience: if isinstance(exp, dict): tech_skills = exp.get("technologies", []) if isinstance(tech_skills, list): all_skills.extend(tech_skills) # Get skills from projects projects = resume_data.get("projects", []) for project in projects: if isinstance(project, dict): tech_skills = project.get("technologies", []) if isinstance(tech_skills, list): all_skills.extend(tech_skills) # Deduplicate and clean return list(set([skill.strip() for skill in all_skills if skill])) def _extract_job_requirements(self, job_data: Dict[str, Any]) -> List[Dict[str, Any]]: """Extract requirements from job data""" requirements = [] # Handle enhanced job parser structure for req_type in ["tech_requirements", "experience_requirements", "education_requirements", "soft_skill_requirements"]: structured_reqs = job_data.get(req_type, []) if isinstance(structured_reqs, list): # Convert JobRequirement objects to dicts if needed for req in structured_reqs: if isinstance(req, dict): requirements.append(req) else: # Handle dataclass objects requirements.append({ "skill": getattr(req, 'skill', str(req)), "importance": getattr(req, 'importance', 'required'), "category": getattr(req, 'category', 'technical') }) # Fallback: legacy requirements key if not requirements: structured_reqs = job_data.get("requirements", []) if isinstance(structured_reqs, list): requirements.extend(structured_reqs) # Last fallback: extract from text fields if not requirements: requirements = self._extract_requirements_from_text(job_data) return requirements def _extract_requirements_from_text(self, job_data: Dict[str, Any]) -> List[Dict[str, Any]]: """Extract requirements from job description text""" requirements = [] # Common skill patterns common_skills = [ "python", "javascript", "react", "node.js", "sql", "aws", "docker", "kubernetes", "git", "machine learning", "tensorflow", "django", "flask", "express", "mongodb", "postgresql" ] # Extract from various text fields text_content = "" for field in ["description", "content", "scraped"]: if field in job_data: if isinstance(job_data[field], str): text_content += job_data[field] elif isinstance(job_data[field], dict): text_content += str(job_data[field]) text_lower = text_content.lower() for skill in common_skills: if skill.lower() in text_lower: requirements.append({ "skill": skill, "importance": "required", "category": "technical" }) return requirements def _match_skills_detailed(self, job_requirements: List[Dict[str, Any]], resume_skills: List[str]) -> List[SkillMatch]: """Perform detailed skill matching""" matches = [] for req in job_requirements: req_skill = req.get("skill", "") importance = req.get("importance", "required") category = req.get("category", "technical") # Find best match best_match, score = self.matcher.find_best_match( req_skill, resume_skills) # Determine match type if score >= 0.8: match_type = "strong" elif score >= 0.4: match_type = "partial" elif score > 0: match_type = "weak" else: match_type = "missing" # Generate recommendation recommendation = self._generate_skill_recommendation( req_skill, best_match, score, importance) matches.append(SkillMatch( job_requirement=req_skill, resume_skill=best_match if score > 0 else "Not Found", match_score=score, match_type=match_type, importance=importance, recommendation=recommendation, category=category )) return matches def _calculate_overall_score(self, matches: List[SkillMatch]) -> float: """Calculate weighted overall match score""" if not matches: return 0.0 # Weight by importance importance_weights = { "required": 1.0, "preferred": 0.7, "nice_to_have": 0.3 } total_weighted_score = 0.0 total_weight = 0.0 for match in matches: weight = importance_weights.get(match.importance, 0.5) total_weighted_score += match.match_score * weight total_weight += weight return (total_weighted_score / total_weight * 100) if total_weight > 0 else 0 def _generate_skill_recommendation(self, job_skill: str, resume_skill: str, score: float, importance: str) -> str: """Generate actionable recommendation for skill match""" if score >= 0.8: return f"Highlight your {resume_skill} experience" elif score >= 0.4: return f"Connect your {resume_skill} to {job_skill} requirements" elif importance == "required": return f"Critical: Learn {job_skill} before applying" elif importance == "preferred": return f"Important: Gain experience with {job_skill}" else: return f"Nice-to-have: Consider learning {job_skill}" async def _generate_strengths_summary(self, strong_matches: List[SkillMatch], resume_data: Dict[str, Any]) -> str: """Generate AI-powered strengths summary""" if not strong_matches: return "No strong technical matches found." skills_list = [match.job_requirement for match in strong_matches[:5]] experience_years = resume_data.get("years_of_experience", 0) prompt = f""" Based on these strong skill matches: {', '.join(skills_list)} and {experience_years} years of experience, write a 2-sentence summary of the candidate's key strengths for this role. Focus on practical value and competitive advantages. """ try: response = self.llm_client.call_llm( prompt, temperature=0.3, max_tokens=150) return response.strip() except Exception: return (f"Strong technical foundation in {', '.join(skills_list[:3])} " f"with {experience_years} years of experience.") async def _generate_gaps_summary(self, gaps: List[SkillMatch], job_data: Dict[str, Any]) -> str: """Generate AI-powered gaps summary""" if not gaps: return "No significant skill gaps identified." critical_gaps = [gap.job_requirement for gap in gaps if gap.importance == "required"] if not critical_gaps: return "No critical skill gaps. Focus on strengthening preferences." prompt = f""" The candidate is missing these required skills: {', '.join(critical_gaps[:3])}. Write a 2-sentence summary of the main gaps and preparation strategy. Be constructive and actionable. """ try: response = self.llm_client.call_llm( prompt, temperature=0.3, max_tokens=150) return response.strip() except Exception: return (f"Key gaps in {', '.join(critical_gaps[:2])}. " "Focus preparation on these critical areas.") def _identify_competitive_advantages(self, resume_data: Dict[str, Any], job_data: Dict[str, Any], strong_matches: List[SkillMatch]) -> List[str]: """Identify unique competitive advantages""" advantages = [] # Experience level advantage years_exp = resume_data.get("years_of_experience", 0) if years_exp > 5: advantages.append(f"{years_exp}+ years of proven experience") # Education advantage education = resume_data.get("education", []) for edu in education: if isinstance(edu, dict) and "degree" in edu: degree = edu["degree"] if "master" in degree.lower() or "phd" in degree.lower(): advantages.append(f"Advanced degree: {degree}") break # Skill combination advantages strong_skills = [match.job_requirement for match in strong_matches] if len(strong_skills) >= 3: advantages.append( f"Strong combination: {', '.join(strong_skills[:3])}") # Project portfolio projects = resume_data.get("projects", []) if len(projects) >= 2: advantages.append(f"Proven track record: {len(projects)} projects") return advantages[:4] # Limit to top 4 def _generate_preparation_priority(self, gaps: List[SkillMatch], partial_matches: List[SkillMatch]) -> List[str]: """Generate preparation priority list""" priorities = [] # Critical gaps first critical_gaps = [gap.job_requirement for gap in gaps if gap.importance == "required"] priorities.extend(critical_gaps[:3]) # Important partial matches to strengthen important_partials = [match.job_requirement for match in partial_matches if match.importance in ["required", "preferred"]] priorities.extend(important_partials[:2]) return priorities[:5] def _identify_interview_focus_areas(self, strong_matches: List[SkillMatch], gaps: List[SkillMatch]) -> List[str]: """Identify areas to focus on during interview""" focus_areas = [] # Highlight strengths if strong_matches: top_strengths = [match.job_requirement for match in strong_matches[:2]] focus_areas.extend([f"Demonstrate {skill} expertise" for skill in top_strengths]) # Address concerns proactively critical_gaps = [gap.job_requirement for gap in gaps if gap.importance == "required"] if critical_gaps: focus_areas.append( f"Address learning plan for {critical_gaps[0]}") # General advice focus_areas.extend([ "Emphasize problem-solving approach", "Show enthusiasm for learning" ]) return focus_areas[:5] def _analyze_skill_categories(self, matches: List[SkillMatch]) -> Dict[str, float]: """Analyze performance by skill category""" categories = {} for match in matches: category = match.category if category not in categories: categories[category] = [] categories[category].append(match.match_score) # Calculate averages category_scores = {} for category, scores in categories.items(): if scores: category_scores[category] = sum(scores) / len(scores) * 100 return category_scores def _format_result(self, result: GapAnalysisResult) -> Dict[str, Any]: """Format result for output""" return { "overall_match_score": result.overall_match_score, "strong_matches": [self._format_skill_match(m) for m in result.strong_matches], "partial_matches": [self._format_skill_match(m) for m in result.partial_matches], "gaps": [self._format_skill_match(m) for m in result.gaps], "strengths_summary": result.strengths_summary, "gaps_summary": result.gaps_summary, "competitive_advantages": result.competitive_advantages, "preparation_priority": result.preparation_priority, "interview_focus_areas": result.interview_focus_areas, "skill_categories_analysis": result.skill_categories_analysis, "detailed_matches": [self._format_skill_match(m) for m in result.strong_matches + result.partial_matches + result.gaps] } def _format_skill_match(self, match: SkillMatch) -> Dict[str, Any]: """Format individual skill match""" return { "job_requirement": match.job_requirement, "resume_skill": match.resume_skill, "match_score": round(match.match_score, 2), "match_type": match.match_type, "importance": match.importance, "recommendation": match.recommendation, "category": match.category }