File size: 10,465 Bytes
16a9080
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
from typing import Any, Dict, List, Optional, Set
import re
from llm_client import llm_client
from prompt_loader import prompt_loader
from metrics import log_metric

class GapAnalysisMicroFunction:
    def run(self, data: Dict[str, Any]) -> Dict[str, Any]:
        resume_data = data.get("resume_data", {})
        enriched_data = data.get("enriched", {})
        
        if not resume_data or "error" in resume_data:
            return {**data, "gap_analysis": {"error": "No resume data available"}}
        
        if not enriched_data or enriched_data.get("error"):
            return {**data, "gap_analysis": {"error": "No job data available"}}
        
        try:
            # Perform comprehensive gap analysis
            gap_analysis = self._analyze_gaps(resume_data, enriched_data)
            
            log_metric("gap_analysis_success", {
                "match_score": gap_analysis.get("match_score", 0),
                "strong_matches": len(gap_analysis.get("strong_matches", [])),
                "gaps": len(gap_analysis.get("gaps", []))
            })
            
            return {**data, "gap_analysis": gap_analysis}
            
        except Exception as e:
            log_metric("gap_analysis_error", {"error": str(e)})
            return {**data, "gap_analysis": {"error": f"Gap analysis failed: {e}"}}
    
    def _analyze_gaps(self, resume_data: Dict[str, Any], job_data: Dict[str, Any]) -> Dict[str, Any]:
        """Perform detailed gap analysis between resume and job requirements"""
        
        # Extract skills from resume
        resume_skills = self._extract_resume_skills(resume_data)
        
        # Extract requirements from job
        job_requirements = self._extract_job_requirements(job_data)
        
        # Perform skill matching
        strong_matches = []
        partial_matches = []
        gaps = []
        
        for req in job_requirements:
            req_lower = req.lower()
            match_type = self._find_skill_match(req_lower, resume_skills)
            
            if match_type == "strong":
                strong_matches.append(req)
            elif match_type == "partial":
                partial_matches.append(req)
            else:
                gaps.append(req)
        
        # Calculate match score (0-100)
        total_requirements = len(job_requirements)
        if total_requirements == 0:
            match_score = 50  # Default if no requirements found
        else:
            strong_weight = 1.0
            partial_weight = 0.5
            score = (len(strong_matches) * strong_weight + len(partial_matches) * partial_weight) / total_requirements * 100
            match_score = min(100, max(0, round(score)))
        
        # Generate narrative summary
        summary = self._generate_summary(strong_matches, partial_matches, gaps, match_score)
        
        # Create skills map for visualization
        skills_map = self._create_skills_map(strong_matches, partial_matches, gaps)
        
        return {
            "match_score": match_score,
            "strong_matches": strong_matches,
            "partial_matches": partial_matches,
            "gaps": gaps,
            "summary": summary,
            "skills_map": skills_map,
            "resume_skills_count": len(resume_skills),
            "job_requirements_count": total_requirements
        }
    
    def _extract_resume_skills(self, resume_data: Dict[str, Any]) -> Set[str]:
        """Extract all skills from resume data"""
        skills = set()
        
        # Technical skills
        skills_section = resume_data.get("skills", {})
        if isinstance(skills_section, dict):
            for skill_category in skills_section.values():
                if isinstance(skill_category, list):
                    skills.update([skill.lower() for skill in skill_category])
        
        # Skills from experience
        experience = resume_data.get("experience", [])
        for exp in experience:
            if isinstance(exp, dict):
                technologies = exp.get("technologies", [])
                if isinstance(technologies, list):
                    skills.update([tech.lower() for tech in technologies])
        
        # Skills from projects
        projects = resume_data.get("projects", [])
        for proj in projects:
            if isinstance(proj, dict):
                technologies = proj.get("technologies", [])
                if isinstance(technologies, list):
                    skills.update([tech.lower() for tech in technologies])
        
        return skills
    
    def _extract_job_requirements(self, job_data: Dict[str, Any]) -> List[str]:
        """Extract requirements from job data"""
        requirements = []
        
        # From requirements field
        job_reqs = job_data.get("requirements", [])
        if isinstance(job_reqs, list):
            requirements.extend(job_reqs)
        elif isinstance(job_reqs, str):
            # Split by common delimiters
            requirements.extend(re.split(r'[,;\n•\-]', job_reqs))
        
        # From tech stack
        tech_stack = job_data.get("tech_stack", [])
        if isinstance(tech_stack, list):
            requirements.extend(tech_stack)
        elif isinstance(tech_stack, str):
            requirements.extend(re.split(r'[,;\n•\-]', tech_stack))
        
        # From responsibilities (extract technical terms)
        responsibilities = job_data.get("responsibilities", [])
        if isinstance(responsibilities, list):
            for resp in responsibilities:
                if isinstance(resp, str):
                    # Extract technical terms
                    tech_terms = self._extract_tech_terms(resp)
                    requirements.extend(tech_terms)
        
        # Clean and deduplicate
        cleaned_requirements = []
        for req in requirements:
            if isinstance(req, str):
                cleaned = req.strip().strip('•-').strip()
                if cleaned and len(cleaned) > 2:
                    cleaned_requirements.append(cleaned)
        
        return list(set(cleaned_requirements))
    
    def _extract_tech_terms(self, text: str) -> List[str]:
        """Extract technical terms from text"""
        # Common tech terms and patterns
        tech_patterns = [
            r'\b(Python|JavaScript|Java|C\+\+|C#|Ruby|Go|Rust|Swift|Kotlin)\b',
            r'\b(React|Angular|Vue|Django|Flask|Spring|Rails|Laravel)\b',
            r'\b(AWS|Azure|GCP|Docker|Kubernetes|Git|SQL|NoSQL)\b',
            r'\b(Machine Learning|ML|AI|Deep Learning|TensorFlow|PyTorch)\b',
            r'\b(Data Science|Analytics|Statistics|Pandas|NumPy)\b',
            r'\b(API|REST|GraphQL|Microservices|DevOps|CI/CD)\b'
        ]
        
        terms = []
        for pattern in tech_patterns:
            matches = re.findall(pattern, text, re.IGNORECASE)
            terms.extend([match.lower() for match in matches])
        
        return terms
    
    def _find_skill_match(self, requirement: str, resume_skills: Set[str]) -> str:
        """Find the type of match between requirement and resume skills"""
        req_clean = requirement.lower().strip()
        
        # Strong match: exact match or very close
        if req_clean in resume_skills:
            return "strong"
        
        # Check for partial matches
        for skill in resume_skills:
            # Substring match (both directions)
            if (req_clean in skill and len(req_clean) > 2) or (skill in req_clean and len(skill) > 2):
                return "partial"
            
            # Similar technologies (e.g., React/ReactJS, Python/Python3)
            if self._are_similar_technologies(req_clean, skill):
                return "strong"
        
        return "none"
    
    def _are_similar_technologies(self, tech1: str, tech2: str) -> bool:
        """Check if two technologies are similar/related"""
        similar_groups = [
            ["python", "python3", "python2"],
            ["javascript", "js", "node.js", "nodejs"],
            ["react", "reactjs", "react.js"],
            ["angular", "angularjs"],
            ["vue", "vue.js", "vuejs"],
            ["docker", "containerization"],
            ["kubernetes", "k8s"],
            ["aws", "amazon web services"],
            ["gcp", "google cloud platform", "google cloud"],
            ["azure", "microsoft azure"],
            ["sql", "mysql", "postgresql", "postgres"],
            ["nosql", "mongodb", "cassandra"],
            ["machine learning", "ml", "artificial intelligence", "ai"],
            ["tensorflow", "tf"],
            ["pytorch", "torch"]
        ]
        
        for group in similar_groups:
            if tech1 in group and tech2 in group:
                return True
        
        return False
    
    def _generate_summary(self, strong_matches: List[str], partial_matches: List[str], 
                         gaps: List[str], match_score: int) -> str:
        """Generate narrative summary of the gap analysis"""
        
        summary_parts = []
        
        # Overall assessment
        if match_score >= 80:
            summary_parts.append(f"Excellent match ({match_score}% compatibility)!")
        elif match_score >= 60:
            summary_parts.append(f"Good match ({match_score}% compatibility) with some areas for growth.")
        elif match_score >= 40:
            summary_parts.append(f"Moderate match ({match_score}% compatibility) requiring focused preparation.")
        else:
            summary_parts.append(f"Challenging match ({match_score}% compatibility) needing significant upskilling.")
        
        # Strengths
        if strong_matches:
            top_strengths = strong_matches[:3]
            summary_parts.append(f"Your strongest assets are {', '.join(top_strengths)}.")
        
        # Gaps to address
        if gaps:
            priority_gaps = gaps[:3]
            summary_parts.append(f"Focus your preparation on {', '.join(priority_gaps)}.")
        
        return " ".join(summary_parts)
    
    def _create_skills_map(self, strong_matches: List[str], partial_matches: List[str], 
                          gaps: List[str]) -> Dict[str, List[str]]:
        """Create a skills map for visualization"""
        return {
            "strong": strong_matches[:10],  # Limit for display
            "partial": partial_matches[:10],
            "gaps": gaps[:10]
        }