Sumit404 commited on
Commit
f5b8ae8
·
verified ·
1 Parent(s): 324db65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -210
app.py CHANGED
@@ -13,10 +13,8 @@ from typing import List, Dict, Tuple, Set
13
  import whisper
14
  import librosa
15
  import soundfile as sf
16
- from textblob import TextBlob
17
- from readability import Readability
18
 
19
- # Configure logging
20
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
21
  logger = logging.getLogger(__name__)
22
 
@@ -36,6 +34,7 @@ except Exception as e:
36
  logger.error(f"Failed to load SentenceTransformer model: {str(e)}")
37
  raise e
38
 
 
39
  try:
40
  whisper_model = whisper.load_model("base")
41
  except Exception as e:
@@ -54,7 +53,7 @@ SECTION_KEYWORDS = {
54
  }
55
  SECTION_WEIGHTS = {'experience': 0.4, 'education': 0.2, 'skills': 0.25, 'projects': 0.1, 'certifications': 0.05}
56
 
57
- # Expanded skill set for MAANG/FAANG
58
  KEY_SKILLS = {
59
  'python', 'javascript', 'java', 'sql', 'aws', 'docker', 'react', 'nodejs', 'node.js', 'node js', 'machine learning',
60
  'data analysis', 'git', 'html', 'css', 'tensorflow', 'pytorch', 'cloud', 'api', 'devops',
@@ -64,7 +63,7 @@ KEY_SKILLS = {
64
  'pandas', 'numpy', 'scikit-learn', 'react.js', 'next.js', 'nextjs', 'etl'
65
  }
66
 
67
- # Common interview questions
68
  INTERVIEW_QUESTIONS = {
69
  "Tell me about yourself": ["background", "experience", "skills", "achievements", "goals"],
70
  "What are your strengths?": ["strengths", "skills", "abilities", "teamwork", "problem-solving"],
@@ -80,94 +79,7 @@ class ATSInterviewAnalyzer:
80
  self.tfidf_vectorizer = TfidfVectorizer()
81
  self.whisper_model = whisper_model
82
 
83
- # New Feature: Resume Formatting Analysis
84
- def analyze_formatting(self, pdf_path: str) -> Tuple[bool, List[str]]:
85
- try:
86
- doc = fitz.open(pdf_path)
87
- fonts = set()
88
- font_sizes = []
89
- page_count = len(doc)
90
-
91
- for page in doc:
92
- text_dict = page.get_text("dict")
93
- for block in text_dict.get("blocks", []):
94
- for line in block.get("lines", []):
95
- for span in line.get("spans", []):
96
- fonts.add(span["font"])
97
- font_sizes.append(span["size"])
98
-
99
- issues = []
100
- is_ats_friendly = True
101
-
102
- # Check font consistency (max 2 different fonts recommended)
103
- if len(fonts) > 2:
104
- issues.append("Use no more than 2 different fonts for ATS compatibility (e.g., Arial, Times New Roman).")
105
- is_ats_friendly = False
106
-
107
- # Check font size (between 10 and 12 recommended)
108
- if font_sizes:
109
- avg_font_size = np.mean(font_sizes)
110
- if avg_font_size < 10 or avg_font_size > 12:
111
- issues.append("Ensure font size is between 10 and 12 points for readability.")
112
- is_ats_friendly = False
113
-
114
- # Check page length (1-2 pages recommended)
115
- if page_count > 2:
116
- issues.append("Keep resume to 1-2 pages for ATS compatibility and recruiter preference.")
117
- is_ats_friendly = False
118
-
119
- return is_ats_friendly, issues
120
- except Exception as e:
121
- logger.error(f"Error analyzing formatting: {str(e)}")
122
- return False, ["Unable to analyze formatting due to an error."]
123
-
124
- # New Feature: Keyword Density Analysis
125
- def analyze_keyword_density(self, resume_text: str, jd_keywords: Set[str]) -> Tuple[float, List[str]]:
126
- if not resume_text or not jd_keywords:
127
- return 0.0, ["No keywords provided for density analysis."]
128
-
129
- words = resume_text.lower().split()
130
- total_words = len(words)
131
- keyword_counts = {kw: words.count(kw.lower()) for kw in jd_keywords}
132
- keyword_density = sum(keyword_counts.values()) / max(total_words, 1) * 100
133
-
134
- suggestions = []
135
- if keyword_density < 1.0:
136
- suggestions.append("Increase keyword usage (aim for 1-3% density) by incorporating job description terms naturally.")
137
- elif keyword_density > 3.0:
138
- suggestions.append("Reduce keyword usage (aim for 1-3% density) to avoid appearing as keyword stuffing.")
139
-
140
- return keyword_density, suggestions
141
-
142
- # New Feature: Sentiment Analysis for Interview
143
- def analyze_sentiment(self, transcription: str) -> str:
144
- if not transcription:
145
- return "No transcription available for sentiment analysis."
146
-
147
- blob = TextBlob(transcription)
148
- sentiment_score = blob.sentiment.polarity
149
- if sentiment_score > 0.1:
150
- return "Positive: Your responses sound optimistic, which is great for engaging interviewers."
151
- elif sentiment_score < -0.1:
152
- return "Negative: Your responses may sound pessimistic. Try using more positive language (e.g., focus on solutions and achievements)."
153
- else:
154
- return "Neutral: Your responses are balanced. Consider adding more enthusiastic language to stand out."
155
-
156
- # New Feature: Job Description Complexity Analysis
157
- def analyze_jd_complexity(self, jd_text: str) -> str:
158
- if not jd_text:
159
- return "No job description provided for complexity analysis."
160
-
161
- try:
162
- r = Readability(jd_text)
163
- fk = r.flesch_kincaid()
164
- grade_level = round(fk.score)
165
- return f"Job Description Readability: Flesch-Kincaid Grade {grade_level}. Tailor your resume to match this complexity (e.g., use {grade_level}-grade level language)."
166
- except Exception as e:
167
- logger.error(f"Error analyzing JD complexity: {str(e)}")
168
- return "Unable to analyze job description complexity."
169
-
170
- # Existing Methods (unchanged except for integration with new features)
171
  def extract_text(self, pdf_path: str) -> str:
172
  if not pdf_path or not os.path.exists(pdf_path):
173
  logger.error(f"PDF file not found: {pdf_path}")
@@ -248,16 +160,16 @@ class ATSInterviewAnalyzer:
248
  text_lower = text.lower()
249
  return any(re.search(pattern, text_lower) for pattern in patterns)
250
 
251
- def analyze_resume(self, resume_path: str, job_description: str) -> Tuple[str, str, str, str, str]:
252
  logger.info("Starting resume analysis...")
253
  if not resume_path:
254
- return "Error: No resume file uploaded", "", "", "", ""
255
  if not job_description.strip():
256
- return "Error: Job description is empty", "", "", "", ""
257
 
258
  resume_text = self.extract_text(resume_path)
259
  if not resume_text:
260
- return "Error: Could not extract text from resume PDF", "", "", "", ""
261
 
262
  cleaned_resume = self.preprocess_text(resume_text)
263
  cleaned_jd = self.preprocess_text(job_description)
@@ -275,19 +187,9 @@ class ATSInterviewAnalyzer:
275
 
276
  achievement_bonus = 5 if self.detect_achievements(resume_text) else 0
277
 
278
- # New: Formatting Analysis
279
- is_ats_friendly, formatting_issues = self.analyze_formatting(resume_path)
280
-
281
- # New: Keyword Density Analysis
282
- jd_keywords = self.extract_keywords(job_description)
283
- keyword_density, density_suggestions = self.analyze_keyword_density(resume_text, jd_keywords)
284
-
285
- # New: JD Complexity Analysis
286
- jd_complexity = self.analyze_jd_complexity(job_description)
287
 
288
- ats_score = np.clip(0.35 * keyword_score + 0.35 * skills_score + 0.2 * section_score + achievement_bonus + (5 if is_ats_friendly else 0), 0, 100)
289
-
290
- skills_match = f"Matched Skills: {', '.join(sorted(matched_skills)) or 'None'}\nMissing Skills: {', '.join(sorted(missing_skills)) or 'None'}\nKeyword Density: {keyword_density:.2f}%"
291
 
292
  jd_keywords = self.extract_keywords(job_description)
293
  resume_keywords = self.extract_keywords(resume_text)
@@ -308,21 +210,15 @@ class ATSInterviewAnalyzer:
308
  improvements.append(f"Include these missing skills in your skills or experience section: {', '.join(sorted(missing_skills))}")
309
  if not achievement_bonus:
310
  improvements.append("Add measurable achievements to boost your score (e.g., 'increased efficiency by 20%', 'reduced processing time by 5 hours')")
311
- if formatting_issues:
312
- improvements.extend(formatting_issues)
313
- improvements.extend(density_suggestions)
314
  improvement_text = "\n".join(improvements) or "Your resume is well-aligned with the job description!"
315
 
316
  breakdown = f"Keyword Match: {keyword_score:.1f}%\nSection Score: {section_score:.1f}%\nSkills Match: {skills_score:.1f}%"
317
  if achievement_bonus:
318
  breakdown += f"\nAchievement Bonus: +{achievement_bonus}%"
319
- if is_ats_friendly:
320
- breakdown += "\nFormatting Bonus: +5%"
321
- breakdown += f"\nKeyword Density: {keyword_density:.2f}%"
322
-
323
  logger.info("Resume analysis completed.")
324
- return f"ATS Score: {ats_score:.1f}%", skills_match, improvement_text, breakdown, jd_complexity
325
 
 
326
  def transcribe_audio(self, audio_path: str) -> str:
327
  if not self.whisper_model:
328
  logger.error("Whisper model is not available. Cannot transcribe audio.")
@@ -379,18 +275,15 @@ class ATSInterviewAnalyzer:
379
  transcription_text = transcription if transcription else "No transcription available"
380
  return transcription_text, "\n".join(response_feedback)
381
 
382
- def analyze_interview(self, audio_path: str = None) -> Tuple[str, str, str, str]:
383
  logger.info("Starting interview analysis...")
384
  if not audio_path:
385
- return "Error: No audio file uploaded", "", "", ""
386
 
387
  transcription = self.transcribe_audio(audio_path)
388
  tone = self.analyze_tone(audio_path)
389
  transcription_text, response_feedback = self.evaluate_response_correctness(transcription)
390
 
391
- # New: Sentiment Analysis
392
- sentiment_feedback = self.analyze_sentiment(transcription)
393
-
394
  tone_feedback = f"Detected Tone: {tone}\n"
395
  if tone == "Confident":
396
  tone_feedback += "Your tone sounds confident, which is great for making a strong impression."
@@ -400,101 +293,53 @@ class ATSInterviewAnalyzer:
400
  tone_feedback += "Your tone is neutral. Consider adding more enthusiasm to engage the interviewer."
401
 
402
  logger.info("Interview analysis completed.")
403
- return transcription_text, tone_feedback, response_feedback, sentiment_feedback
404
 
405
  # Combined Analysis Function
406
  def process_combined(resume_file, job_description, interview_audio):
407
  analyzer = ATSInterviewAnalyzer()
408
 
409
- with gr.Blocks() as progress:
410
- gr.Markdown("### Analysis Progress")
411
- progress_bar = gr.Slider(minimum=0, maximum=100, value=0, interactive=False)
412
-
413
- if resume_file and job_description:
414
- progress_bar.value = 20
415
- ats_score, skills_match, ats_improvements, ats_breakdown, jd_complexity = analyzer.analyze_resume(resume_file, job_description)
416
- else:
417
- ats_score = "Not provided"
418
- skills_match = "Not provided"
419
- ats_improvements = "Not provided"
420
- ats_breakdown = "Not provided"
421
- jd_complexity = "Not provided"
422
-
423
- progress_bar.value = 60
424
- if interview_audio:
425
- transcription, tone_feedback, response_feedback, sentiment_feedback = analyzer.analyze_interview(audio_path=interview_audio)
426
- else:
427
- transcription = "Not provided"
428
- tone_feedback = "Not provided"
429
- response_feedback = "Not provided"
430
- sentiment_feedback = "Not provided"
431
-
432
- progress_bar.value = 100
433
-
434
- # New: Generate downloadable report
435
- report_content = f"""
436
- Job Application Analysis Report
437
- =============================
438
- ATS Analysis
439
- ------------
440
- ATS Score: {ats_score}
441
- Skills Analysis: {skills_match}
442
- ATS Suggestions: {ats_improvements}
443
- ATS Breakdown: {ats_breakdown}
444
- JD Complexity: {jd_complexity}
445
-
446
- Interview Analysis
447
- ------------------
448
- Transcription: {transcription}
449
- Tone Analysis: {tone_feedback}
450
- Response Feedback: {response_feedback}
451
- Sentiment Analysis: {sentiment_feedback}
452
- """
453
- report_file = "job_application_report.txt"
454
- with open(report_file, "w") as f:
455
- f.write(report_content)
456
-
457
- return (
458
- ats_score, skills_match, ats_improvements, ats_breakdown, jd_complexity,
459
- transcription, tone_feedback, response_feedback, sentiment_feedback,
460
- report_file
461
- )
462
 
463
  # Gradio Interface
464
- with gr.Blocks(theme=gr.themes.Soft()) as interface:
465
- gr.Markdown("# Ultimate Job Application Analyzer")
466
- gr.Markdown("Upload your resume and job description for ATS scoring, and/or upload an interview audio for performance analysis. Get detailed feedback to optimize your job application.")
467
-
468
- with gr Tabs():
469
- with gr.TabItem("ATS Analysis"):
470
- resume_file = gr.File(label="Upload Your Resume (PDF)", file_types=[".pdf"])
471
- job_description = gr.Textbox(label="Paste Job Description Here", lines=10, placeholder="Enter the job description...")
472
- ats_score = gr.Textbox(label="ATS Score")
473
- skills_match = gr.Textbox(label="Skills Analysis")
474
- ats_improvements = gr.Textbox(label="ATS Suggestions for Improvement")
475
- ats_breakdown = gr.Textbox(label="ATS Score Breakdown")
476
- jd_complexity = gr.Textbox(label="Job Description Complexity")
477
-
478
- with gr.TabItem("Interview Analysis"):
479
- interview_audio = gr.Audio(label="Upload Interview Audio (1-5 minutes)", type="filepath")
480
- transcription = gr.Textbox(label="Interview Transcription")
481
- tone_feedback = gr.Textbox(label="Tone Analysis")
482
- response_feedback = gr.Textbox(label="Response Correctness Feedback")
483
- sentiment_feedback = gr.Textbox(label="Sentiment Analysis")
484
-
485
- with gr.TabItem("Report"):
486
- report_download = gr.File(label="Download Analysis Report")
487
-
488
- submit_button = gr.Button("Analyze")
489
- submit_button.click(
490
- fn=process_combined,
491
- inputs=[resume_file, job_description, interview_audio],
492
- outputs=[
493
- ats_score, skills_match, ats_improvements, ats_breakdown, jd_complexity,
494
- transcription, tone_feedback, response_feedback, sentiment_feedback,
495
- report_download
496
- ]
497
- )
498
 
499
  if __name__ == "__main__":
500
  logger.info("Launching Gradio app...")
 
13
  import whisper
14
  import librosa
15
  import soundfile as sf
 
 
16
 
17
+ # Configure logging at the top
18
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
19
  logger = logging.getLogger(__name__)
20
 
 
34
  logger.error(f"Failed to load SentenceTransformer model: {str(e)}")
35
  raise e
36
 
37
+ # Load Whisper model
38
  try:
39
  whisper_model = whisper.load_model("base")
40
  except Exception as e:
 
53
  }
54
  SECTION_WEIGHTS = {'experience': 0.4, 'education': 0.2, 'skills': 0.25, 'projects': 0.1, 'certifications': 0.05}
55
 
56
+ # Expanded skill set for MAANG/FAANG with variations
57
  KEY_SKILLS = {
58
  'python', 'javascript', 'java', 'sql', 'aws', 'docker', 'react', 'nodejs', 'node.js', 'node js', 'machine learning',
59
  'data analysis', 'git', 'html', 'css', 'tensorflow', 'pytorch', 'cloud', 'api', 'devops',
 
63
  'pandas', 'numpy', 'scikit-learn', 'react.js', 'next.js', 'nextjs', 'etl'
64
  }
65
 
66
+ # Common interview questions and expected keywords
67
  INTERVIEW_QUESTIONS = {
68
  "Tell me about yourself": ["background", "experience", "skills", "achievements", "goals"],
69
  "What are your strengths?": ["strengths", "skills", "abilities", "teamwork", "problem-solving"],
 
79
  self.tfidf_vectorizer = TfidfVectorizer()
80
  self.whisper_model = whisper_model
81
 
82
+ # ATS Resume Analysis Methods
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  def extract_text(self, pdf_path: str) -> str:
84
  if not pdf_path or not os.path.exists(pdf_path):
85
  logger.error(f"PDF file not found: {pdf_path}")
 
160
  text_lower = text.lower()
161
  return any(re.search(pattern, text_lower) for pattern in patterns)
162
 
163
+ def analyze_resume(self, resume_path: str, job_description: str) -> Tuple[str, str, str, str]:
164
  logger.info("Starting resume analysis...")
165
  if not resume_path:
166
+ return "Error: No resume file uploaded", "", "", ""
167
  if not job_description.strip():
168
+ return "Error: Job description is empty", "", "", ""
169
 
170
  resume_text = self.extract_text(resume_path)
171
  if not resume_text:
172
+ return "Error: Could not extract text from resume PDF", "", "", ""
173
 
174
  cleaned_resume = self.preprocess_text(resume_text)
175
  cleaned_jd = self.preprocess_text(job_description)
 
187
 
188
  achievement_bonus = 5 if self.detect_achievements(resume_text) else 0
189
 
190
+ ats_score = np.clip(0.4 * keyword_score + 0.4 * skills_score + 0.2 * section_score + achievement_bonus, 0, 100)
 
 
 
 
 
 
 
 
191
 
192
+ skills_match = f"Matched Skills: {', '.join(sorted(matched_skills)) or 'None'}\nMissing Skills: {', '.join(sorted(missing_skills)) or 'None'}"
 
 
193
 
194
  jd_keywords = self.extract_keywords(job_description)
195
  resume_keywords = self.extract_keywords(resume_text)
 
210
  improvements.append(f"Include these missing skills in your skills or experience section: {', '.join(sorted(missing_skills))}")
211
  if not achievement_bonus:
212
  improvements.append("Add measurable achievements to boost your score (e.g., 'increased efficiency by 20%', 'reduced processing time by 5 hours')")
 
 
 
213
  improvement_text = "\n".join(improvements) or "Your resume is well-aligned with the job description!"
214
 
215
  breakdown = f"Keyword Match: {keyword_score:.1f}%\nSection Score: {section_score:.1f}%\nSkills Match: {skills_score:.1f}%"
216
  if achievement_bonus:
217
  breakdown += f"\nAchievement Bonus: +{achievement_bonus}%"
 
 
 
 
218
  logger.info("Resume analysis completed.")
219
+ return f"ATS Score: {ats_score:.1f}%", skills_match, improvement_text, breakdown
220
 
221
+ # Interview Analysis Methods (Audio Only)
222
  def transcribe_audio(self, audio_path: str) -> str:
223
  if not self.whisper_model:
224
  logger.error("Whisper model is not available. Cannot transcribe audio.")
 
275
  transcription_text = transcription if transcription else "No transcription available"
276
  return transcription_text, "\n".join(response_feedback)
277
 
278
+ def analyze_interview(self, audio_path: str = None) -> Tuple[str, str, str]:
279
  logger.info("Starting interview analysis...")
280
  if not audio_path:
281
+ return "Error: No audio file uploaded", "", ""
282
 
283
  transcription = self.transcribe_audio(audio_path)
284
  tone = self.analyze_tone(audio_path)
285
  transcription_text, response_feedback = self.evaluate_response_correctness(transcription)
286
 
 
 
 
287
  tone_feedback = f"Detected Tone: {tone}\n"
288
  if tone == "Confident":
289
  tone_feedback += "Your tone sounds confident, which is great for making a strong impression."
 
293
  tone_feedback += "Your tone is neutral. Consider adding more enthusiasm to engage the interviewer."
294
 
295
  logger.info("Interview analysis completed.")
296
+ return transcription_text, tone_feedback, response_feedback
297
 
298
  # Combined Analysis Function
299
  def process_combined(resume_file, job_description, interview_audio):
300
  analyzer = ATSInterviewAnalyzer()
301
 
302
+ if resume_file and job_description:
303
+ ats_score, skills_match, ats_improvements, ats_breakdown = analyzer.analyze_resume(resume_file, job_description)
304
+ else:
305
+ ats_score = "Not provided"
306
+ skills_match = "Not provided"
307
+ ats_improvements = "Not provided"
308
+ ats_breakdown = "Not provided"
309
+
310
+ if interview_audio:
311
+ transcription, tone_feedback, response_feedback = analyzer.analyze_interview(audio_path=interview_audio)
312
+ else:
313
+ transcription = "Not provided"
314
+ tone_feedback = "Not provided"
315
+ response_feedback = "Not provided"
316
+
317
+ return (
318
+ ats_score, skills_match, ats_improvements, ats_breakdown,
319
+ transcription, tone_feedback, response_feedback
320
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
 
322
  # Gradio Interface
323
+ interface = gr.Interface(
324
+ fn=process_combined,
325
+ inputs=[
326
+ gr.File(label="Upload Your Resume (PDF)", file_types=[".pdf"]),
327
+ gr.Textbox(label="Paste Job Description Here", lines=10, placeholder="Enter the job description..."),
328
+ gr.Audio(label="Upload Interview Audio (1-5 minutes)", type="filepath")
329
+ ],
330
+ outputs=[
331
+ gr.Textbox(label="ATS Score"),
332
+ gr.Textbox(label="Skills Analysis"),
333
+ gr.Textbox(label="ATS Suggestions for Improvement"),
334
+ gr.Textbox(label="ATS Score Breakdown"),
335
+ gr.Textbox(label="Interview Transcription"),
336
+ gr.Textbox(label="Tone Analysis"),
337
+ gr.Textbox(label="Response Correctness Feedback")
338
+ ],
339
+ title="Ultimate Job Application Analyzer (MAANG/FAANG Edition)",
340
+ description="Upload your resume and job description for ATS scoring, and/or upload an interview audio for performance analysis. Get detailed feedback to optimize your job application.",
341
+ theme=gr.themes.Soft()
342
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
 
344
  if __name__ == "__main__":
345
  logger.info("Launching Gradio app...")