ananttripathiak commited on
Commit
fc82aa2
·
verified ·
1 Parent(s): b95c296

Create ats_scorer.py

Browse files
Files changed (1) hide show
  1. src/ats_scorer.py +312 -0
src/ats_scorer.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ATS Scorer Module
3
+ Calculates Applicant Tracking System compatibility scores.
4
+ """
5
+
6
+ import re
7
+ from typing import Dict, List
8
+ import logging
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class ATSScorer:
14
+ """
15
+ Calculates ATS compatibility score for resumes.
16
+ """
17
+
18
+ # Essential resume sections
19
+ ESSENTIAL_SECTIONS = [
20
+ 'experience', 'education', 'skills', 'summary', 'contact'
21
+ ]
22
+
23
+ # ATS-friendly keywords
24
+ COMMON_ATS_KEYWORDS = [
25
+ 'experience', 'education', 'skills', 'professional', 'summary',
26
+ 'objective', 'achievements', 'responsibilities', 'projects'
27
+ ]
28
+
29
+ def __init__(self):
30
+ """Initialize ATS scorer."""
31
+ logger.info("ATSScorer initialized")
32
+
33
+ def calculate_score(
34
+ self,
35
+ resume_text: str,
36
+ job_description: str = None,
37
+ parsed_data: Dict = None
38
+ ) -> Dict:
39
+ """
40
+ Calculate comprehensive ATS score.
41
+
42
+ Args:
43
+ resume_text: Resume content
44
+ job_description: Optional job description for keyword matching
45
+ parsed_data: Optional pre-parsed resume data
46
+
47
+ Returns:
48
+ Dictionary with scores and breakdown
49
+ """
50
+ scores = {}
51
+
52
+ # 1. Format Score (30%)
53
+ scores['format'] = self._calculate_format_score(resume_text)
54
+
55
+ # 2. Section Completeness (25%)
56
+ scores['sections'] = self._calculate_section_score(resume_text)
57
+
58
+ # 3. Keyword Density (20%)
59
+ scores['keywords'] = self._calculate_keyword_score(resume_text, job_description)
60
+
61
+ # 4. Content Quality (15%)
62
+ scores['content'] = self._calculate_content_score(resume_text)
63
+
64
+ # 5. Contact Information (10%)
65
+ scores['contact'] = self._calculate_contact_score(resume_text)
66
+
67
+ # Calculate weighted overall score
68
+ weights = {
69
+ 'format': 0.30,
70
+ 'sections': 0.25,
71
+ 'keywords': 0.20,
72
+ 'content': 0.15,
73
+ 'contact': 0.10
74
+ }
75
+
76
+ overall_score = sum(
77
+ scores[category]['score'] * weights[category]
78
+ for category in weights
79
+ )
80
+
81
+ # Generate feedback
82
+ feedback = self._generate_feedback(scores)
83
+
84
+ return {
85
+ 'overall_score': round(overall_score, 1),
86
+ 'category_scores': scores,
87
+ 'feedback': feedback,
88
+ 'grade': self._get_grade(overall_score)
89
+ }
90
+
91
+ def _calculate_format_score(self, text: str) -> Dict:
92
+ """Calculate formatting score."""
93
+ score = 100
94
+ issues = []
95
+
96
+ # Check for special characters that confuse ATS
97
+ special_chars = len(re.findall(r'[^\w\s\.,;:()\-@/]', text))
98
+ if special_chars > 50:
99
+ score -= 15
100
+ issues.append("Too many special characters")
101
+
102
+ # Check for tables/columns (hard for ATS)
103
+ if '\t' in text:
104
+ score -= 10
105
+ issues.append("Contains tabs (may indicate columns)")
106
+
107
+ # Check line length consistency
108
+ lines = text.split('\n')
109
+ avg_line_length = sum(len(line) for line in lines) / max(len(lines), 1)
110
+ if avg_line_length < 20:
111
+ score -= 10
112
+ issues.append("Inconsistent line formatting")
113
+
114
+ # Positive: Clean structure
115
+ if score > 80:
116
+ issues.append("Clean, ATS-friendly formatting")
117
+
118
+ return {
119
+ 'score': max(score, 0),
120
+ 'issues': issues
121
+ }
122
+
123
+ def _calculate_section_score(self, text: str) -> Dict:
124
+ """Calculate section completeness score."""
125
+ found_sections = []
126
+ missing_sections = []
127
+
128
+ text_lower = text.lower()
129
+
130
+ for section in self.ESSENTIAL_SECTIONS:
131
+ # Look for section headers
132
+ patterns = [
133
+ f'\n{section}',
134
+ f'\n{section.upper()}',
135
+ f'{section}:',
136
+ f'{section.upper()}:'
137
+ ]
138
+
139
+ found = any(pattern in text_lower for pattern in patterns)
140
+
141
+ if found:
142
+ found_sections.append(section)
143
+ else:
144
+ missing_sections.append(section)
145
+
146
+ score = (len(found_sections) / len(self.ESSENTIAL_SECTIONS)) * 100
147
+
148
+ issues = []
149
+ if missing_sections:
150
+ issues.append(f"Missing sections: {', '.join(missing_sections)}")
151
+ if len(found_sections) == len(self.ESSENTIAL_SECTIONS):
152
+ issues.append("All essential sections present")
153
+
154
+ return {
155
+ 'score': score,
156
+ 'found_sections': found_sections,
157
+ 'missing_sections': missing_sections,
158
+ 'issues': issues
159
+ }
160
+
161
+ def _calculate_keyword_score(self, text: str, job_description: str = None) -> Dict:
162
+ """Calculate keyword relevance score."""
163
+ text_lower = text.lower()
164
+ found_keywords = []
165
+
166
+ # Check for common ATS keywords
167
+ for keyword in self.COMMON_ATS_KEYWORDS:
168
+ if keyword in text_lower:
169
+ found_keywords.append(keyword)
170
+
171
+ base_score = (len(found_keywords) / len(self.COMMON_ATS_KEYWORDS)) * 100
172
+
173
+ issues = []
174
+
175
+ # If job description provided, check for matching keywords
176
+ if job_description:
177
+ jd_words = set(re.findall(r'\b\w+\b', job_description.lower()))
178
+ jd_words = {w for w in jd_words if len(w) > 4} # Filter short words
179
+
180
+ resume_words = set(re.findall(r'\b\w+\b', text_lower))
181
+
182
+ matching_keywords = jd_words & resume_words
183
+ match_ratio = len(matching_keywords) / max(len(jd_words), 1)
184
+
185
+ # Adjust score based on JD match
186
+ base_score = (base_score * 0.4) + (match_ratio * 100 * 0.6)
187
+
188
+ if match_ratio < 0.3:
189
+ issues.append("Low keyword match with job description")
190
+ else:
191
+ issues.append(f"Good keyword match: {len(matching_keywords)} relevant terms")
192
+ else:
193
+ issues.append("Using general ATS keywords (no job description provided)")
194
+
195
+ return {
196
+ 'score': min(base_score, 100),
197
+ 'found_keywords': found_keywords,
198
+ 'issues': issues
199
+ }
200
+
201
+ def _calculate_content_score(self, text: str) -> Dict:
202
+ """Calculate content quality score."""
203
+ score = 100
204
+ issues = []
205
+
206
+ word_count = len(text.split())
207
+
208
+ # Check word count
209
+ if word_count < 200:
210
+ score -= 30
211
+ issues.append("Resume too short (< 200 words)")
212
+ elif word_count > 1000:
213
+ score -= 15
214
+ issues.append("Resume too long (> 1000 words)")
215
+ else:
216
+ issues.append("Appropriate length")
217
+
218
+ # Check for numbers (quantifiable achievements)
219
+ numbers = re.findall(r'\d+', text)
220
+ if len(numbers) < 5:
221
+ score -= 20
222
+ issues.append("Add more quantifiable achievements")
223
+ else:
224
+ issues.append("Good use of metrics")
225
+
226
+ # Check for action verbs
227
+ action_verbs = [
228
+ 'led', 'managed', 'developed', 'created', 'implemented',
229
+ 'designed', 'achieved', 'improved', 'increased', 'built'
230
+ ]
231
+ verb_count = sum(1 for verb in action_verbs if verb in text.lower())
232
+
233
+ if verb_count < 3:
234
+ score -= 15
235
+ issues.append("Use more action verbs")
236
+ else:
237
+ issues.append("Strong action verbs present")
238
+
239
+ return {
240
+ 'score': max(score, 0),
241
+ 'word_count': word_count,
242
+ 'issues': issues
243
+ }
244
+
245
+ def _calculate_contact_score(self, text: str) -> Dict:
246
+ """Calculate contact information completeness."""
247
+ score = 0
248
+ found_contact = []
249
+ missing_contact = []
250
+
251
+ # Email
252
+ if re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text):
253
+ score += 40
254
+ found_contact.append('email')
255
+ else:
256
+ missing_contact.append('email')
257
+
258
+ # Phone
259
+ if re.search(r'(\+\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', text):
260
+ score += 30
261
+ found_contact.append('phone')
262
+ else:
263
+ missing_contact.append('phone')
264
+
265
+ # LinkedIn
266
+ if re.search(r'linkedin\.com/in/', text.lower()):
267
+ score += 20
268
+ found_contact.append('linkedin')
269
+
270
+ # Location (City, State)
271
+ if re.search(r',\s*[A-Z]{2}\b', text):
272
+ score += 10
273
+ found_contact.append('location')
274
+
275
+ issues = []
276
+ if missing_contact:
277
+ issues.append(f"Missing: {', '.join(missing_contact)}")
278
+ if score >= 70:
279
+ issues.append("Complete contact information")
280
+
281
+ return {
282
+ 'score': score,
283
+ 'found_contact': found_contact,
284
+ 'missing_contact': missing_contact,
285
+ 'issues': issues
286
+ }
287
+
288
+ def _generate_feedback(self, scores: Dict) -> List[str]:
289
+ """Generate actionable feedback based on scores."""
290
+ feedback = []
291
+
292
+ for category, data in scores.items():
293
+ if data['score'] < 60:
294
+ feedback.append(f"⚠️ {category.upper()}: {data['issues'][0] if data['issues'] else 'Needs improvement'}")
295
+ elif data['score'] >= 80:
296
+ feedback.append(f"✅ {category.upper()}: Excellent")
297
+
298
+ return feedback if feedback else ["Overall good ATS compatibility"]
299
+
300
+ def _get_grade(self, score: float) -> str:
301
+ """Get letter grade for score."""
302
+ if score >= 90:
303
+ return 'A+'
304
+ elif score >= 80:
305
+ return 'A'
306
+ elif score >= 70:
307
+ return 'B'
308
+ elif score >= 60:
309
+ return 'C'
310
+ else:
311
+ return 'D'
312
+