Spaces:

Sumit404
/

ats-resume-matcher

Runtime error

App Files Files Community

Sumit404 commited on Apr 28, 2025

Commit

f5b8ae8

verified ·

1 Parent(s): 324db65

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -210

app.py CHANGED Viewed

@@ -13,10 +13,8 @@ from typing import List, Dict, Tuple, Set
 import whisper
 import librosa
 import soundfile as sf
-from textblob import TextBlob
-from readability import Readability
-# Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
@@ -36,6 +34,7 @@ except Exception as e:
     logger.error(f"Failed to load SentenceTransformer model: {str(e)}")
     raise e
 try:
     whisper_model = whisper.load_model("base")
 except Exception as e:
@@ -54,7 +53,7 @@ SECTION_KEYWORDS = {
 }
 SECTION_WEIGHTS = {'experience': 0.4, 'education': 0.2, 'skills': 0.25, 'projects': 0.1, 'certifications': 0.05}
-# Expanded skill set for MAANG/FAANG
 KEY_SKILLS = {
     'python', 'javascript', 'java', 'sql', 'aws', 'docker', 'react', 'nodejs', 'node.js', 'node js', 'machine learning',
     'data analysis', 'git', 'html', 'css', 'tensorflow', 'pytorch', 'cloud', 'api', 'devops',
@@ -64,7 +63,7 @@ KEY_SKILLS = {
     'pandas', 'numpy', 'scikit-learn', 'react.js', 'next.js', 'nextjs', 'etl'
 }
-# Common interview questions
 INTERVIEW_QUESTIONS = {
     "Tell me about yourself": ["background", "experience", "skills", "achievements", "goals"],
     "What are your strengths?": ["strengths", "skills", "abilities", "teamwork", "problem-solving"],
@@ -80,94 +79,7 @@ class ATSInterviewAnalyzer:
         self.tfidf_vectorizer = TfidfVectorizer()
         self.whisper_model = whisper_model
-    # New Feature: Resume Formatting Analysis
-    def analyze_formatting(self, pdf_path: str) -> Tuple[bool, List[str]]:
-        try:
-            doc = fitz.open(pdf_path)
-            fonts = set()
-            font_sizes = []
-            page_count = len(doc)
-            for page in doc:
-                text_dict = page.get_text("dict")
-                for block in text_dict.get("blocks", []):
-                    for line in block.get("lines", []):
-                        for span in line.get("spans", []):
-                            fonts.add(span["font"])
-                            font_sizes.append(span["size"])
-            issues = []
-            is_ats_friendly = True
-            # Check font consistency (max 2 different fonts recommended)
-            if len(fonts) > 2:
-                issues.append("Use no more than 2 different fonts for ATS compatibility (e.g., Arial, Times New Roman).")
-                is_ats_friendly = False
-            # Check font size (between 10 and 12 recommended)
-            if font_sizes:
-                avg_font_size = np.mean(font_sizes)
-                if avg_font_size < 10 or avg_font_size > 12:
-                    issues.append("Ensure font size is between 10 and 12 points for readability.")
-                    is_ats_friendly = False
-            # Check page length (1-2 pages recommended)
-            if page_count > 2:
-                issues.append("Keep resume to 1-2 pages for ATS compatibility and recruiter preference.")
-                is_ats_friendly = False
-            return is_ats_friendly, issues
-        except Exception as e:
-            logger.error(f"Error analyzing formatting: {str(e)}")
-            return False, ["Unable to analyze formatting due to an error."]
-    # New Feature: Keyword Density Analysis
-    def analyze_keyword_density(self, resume_text: str, jd_keywords: Set[str]) -> Tuple[float, List[str]]:
-        if not resume_text or not jd_keywords:
-            return 0.0, ["No keywords provided for density analysis."]
-        words = resume_text.lower().split()
-        total_words = len(words)
-        keyword_counts = {kw: words.count(kw.lower()) for kw in jd_keywords}
-        keyword_density = sum(keyword_counts.values()) / max(total_words, 1) * 100
-        suggestions = []
-        if keyword_density < 1.0:
-            suggestions.append("Increase keyword usage (aim for 1-3% density) by incorporating job description terms naturally.")
-        elif keyword_density > 3.0:
-            suggestions.append("Reduce keyword usage (aim for 1-3% density) to avoid appearing as keyword stuffing.")
-        return keyword_density, suggestions
-    # New Feature: Sentiment Analysis for Interview
-    def analyze_sentiment(self, transcription: str) -> str:
-        if not transcription:
-            return "No transcription available for sentiment analysis."
-        blob = TextBlob(transcription)
-        sentiment_score = blob.sentiment.polarity
-        if sentiment_score > 0.1:
-            return "Positive: Your responses sound optimistic, which is great for engaging interviewers."
-        elif sentiment_score < -0.1:
-            return "Negative: Your responses may sound pessimistic. Try using more positive language (e.g., focus on solutions and achievements)."
-        else:
-            return "Neutral: Your responses are balanced. Consider adding more enthusiastic language to stand out."
-    # New Feature: Job Description Complexity Analysis
-    def analyze_jd_complexity(self, jd_text: str) -> str:
-        if not jd_text:
-            return "No job description provided for complexity analysis."
-        try:
-            r = Readability(jd_text)
-            fk = r.flesch_kincaid()
-            grade_level = round(fk.score)
-            return f"Job Description Readability: Flesch-Kincaid Grade {grade_level}. Tailor your resume to match this complexity (e.g., use {grade_level}-grade level language)."
-        except Exception as e:
-            logger.error(f"Error analyzing JD complexity: {str(e)}")
-            return "Unable to analyze job description complexity."
-    # Existing Methods (unchanged except for integration with new features)
     def extract_text(self, pdf_path: str) -> str:
         if not pdf_path or not os.path.exists(pdf_path):
             logger.error(f"PDF file not found: {pdf_path}")
@@ -248,16 +160,16 @@ class ATSInterviewAnalyzer:
         text_lower = text.lower()
         return any(re.search(pattern, text_lower) for pattern in patterns)
-    def analyze_resume(self, resume_path: str, job_description: str) -> Tuple[str, str, str, str, str]:
         logger.info("Starting resume analysis...")
         if not resume_path:
-            return "Error: No resume file uploaded", "", "", "", ""
         if not job_description.strip():
-            return "Error: Job description is empty", "", "", "", ""
         resume_text = self.extract_text(resume_path)
         if not resume_text:
-            return "Error: Could not extract text from resume PDF", "", "", "", ""
         cleaned_resume = self.preprocess_text(resume_text)
         cleaned_jd = self.preprocess_text(job_description)
@@ -275,19 +187,9 @@ class ATSInterviewAnalyzer:
         achievement_bonus = 5 if self.detect_achievements(resume_text) else 0
-        # New: Formatting Analysis
-        is_ats_friendly, formatting_issues = self.analyze_formatting(resume_path)
-        # New: Keyword Density Analysis
-        jd_keywords = self.extract_keywords(job_description)
-        keyword_density, density_suggestions = self.analyze_keyword_density(resume_text, jd_keywords)
-        # New: JD Complexity Analysis
-        jd_complexity = self.analyze_jd_complexity(job_description)
-        ats_score = np.clip(0.35 * keyword_score + 0.35 * skills_score + 0.2 * section_score + achievement_bonus + (5 if is_ats_friendly else 0), 0, 100)
-        skills_match = f"Matched Skills: {', '.join(sorted(matched_skills)) or 'None'}\nMissing Skills: {', '.join(sorted(missing_skills)) or 'None'}\nKeyword Density: {keyword_density:.2f}%"
         jd_keywords = self.extract_keywords(job_description)
         resume_keywords = self.extract_keywords(resume_text)
@@ -308,21 +210,15 @@ class ATSInterviewAnalyzer:
             improvements.append(f"Include these missing skills in your skills or experience section: {', '.join(sorted(missing_skills))}")
         if not achievement_bonus:
             improvements.append("Add measurable achievements to boost your score (e.g., 'increased efficiency by 20%', 'reduced processing time by 5 hours')")
-        if formatting_issues:
-            improvements.extend(formatting_issues)
-        improvements.extend(density_suggestions)
         improvement_text = "\n".join(improvements) or "Your resume is well-aligned with the job description!"
         breakdown = f"Keyword Match: {keyword_score:.1f}%\nSection Score: {section_score:.1f}%\nSkills Match: {skills_score:.1f}%"
         if achievement_bonus:
             breakdown += f"\nAchievement Bonus: +{achievement_bonus}%"
-        if is_ats_friendly:
-            breakdown += "\nFormatting Bonus: +5%"
-        breakdown += f"\nKeyword Density: {keyword_density:.2f}%"
         logger.info("Resume analysis completed.")
-        return f"ATS Score: {ats_score:.1f}%", skills_match, improvement_text, breakdown, jd_complexity
     def transcribe_audio(self, audio_path: str) -> str:
         if not self.whisper_model:
             logger.error("Whisper model is not available. Cannot transcribe audio.")
@@ -379,18 +275,15 @@ class ATSInterviewAnalyzer:
         transcription_text = transcription if transcription else "No transcription available"
         return transcription_text, "\n".join(response_feedback)
-    def analyze_interview(self, audio_path: str = None) -> Tuple[str, str, str, str]:
         logger.info("Starting interview analysis...")
         if not audio_path:
-            return "Error: No audio file uploaded", "", "", ""
         transcription = self.transcribe_audio(audio_path)
         tone = self.analyze_tone(audio_path)
         transcription_text, response_feedback = self.evaluate_response_correctness(transcription)
-        # New: Sentiment Analysis
-        sentiment_feedback = self.analyze_sentiment(transcription)
         tone_feedback = f"Detected Tone: {tone}\n"
         if tone == "Confident":
             tone_feedback += "Your tone sounds confident, which is great for making a strong impression."
@@ -400,101 +293,53 @@ class ATSInterviewAnalyzer:
             tone_feedback += "Your tone is neutral. Consider adding more enthusiasm to engage the interviewer."
         logger.info("Interview analysis completed.")
-        return transcription_text, tone_feedback, response_feedback, sentiment_feedback
 # Combined Analysis Function
 def process_combined(resume_file, job_description, interview_audio):
     analyzer = ATSInterviewAnalyzer()
-    with gr.Blocks() as progress:
-        gr.Markdown("### Analysis Progress")
-        progress_bar = gr.Slider(minimum=0, maximum=100, value=0, interactive=False)
-        if resume_file and job_description:
-            progress_bar.value = 20
-            ats_score, skills_match, ats_improvements, ats_breakdown, jd_complexity = analyzer.analyze_resume(resume_file, job_description)
-        else:
-            ats_score = "Not provided"
-            skills_match = "Not provided"
-            ats_improvements = "Not provided"
-            ats_breakdown = "Not provided"
-            jd_complexity = "Not provided"
-        progress_bar.value = 60
-        if interview_audio:
-            transcription, tone_feedback, response_feedback, sentiment_feedback = analyzer.analyze_interview(audio_path=interview_audio)
-        else:
-            transcription = "Not provided"
-            tone_feedback = "Not provided"
-            response_feedback = "Not provided"
-            sentiment_feedback = "Not provided"
-        progress_bar.value = 100
-        # New: Generate downloadable report
-        report_content = f"""
-        Job Application Analysis Report
-        =============================
-        ATS Analysis
-        ------------
-        ATS Score: {ats_score}
-        Skills Analysis: {skills_match}
-        ATS Suggestions: {ats_improvements}
-        ATS Breakdown: {ats_breakdown}
-        JD Complexity: {jd_complexity}
-        Interview Analysis
-        ------------------
-        Transcription: {transcription}
-        Tone Analysis: {tone_feedback}
-        Response Feedback: {response_feedback}
-        Sentiment Analysis: {sentiment_feedback}
-        """
-        report_file = "job_application_report.txt"
-        with open(report_file, "w") as f:
-            f.write(report_content)
-        return (
-            ats_score, skills_match, ats_improvements, ats_breakdown, jd_complexity,
-            transcription, tone_feedback, response_feedback, sentiment_feedback,
-            report_file
-        )
 # Gradio Interface
-with gr.Blocks(theme=gr.themes.Soft()) as interface:
-    gr.Markdown("# Ultimate Job Application Analyzer")
-    gr.Markdown("Upload your resume and job description for ATS scoring, and/or upload an interview audio for performance analysis. Get detailed feedback to optimize your job application.")
-    with gr Tabs():
-        with gr.TabItem("ATS Analysis"):
-            resume_file = gr.File(label="Upload Your Resume (PDF)", file_types=[".pdf"])
-            job_description = gr.Textbox(label="Paste Job Description Here", lines=10, placeholder="Enter the job description...")
-            ats_score = gr.Textbox(label="ATS Score")
-            skills_match = gr.Textbox(label="Skills Analysis")
-            ats_improvements = gr.Textbox(label="ATS Suggestions for Improvement")
-            ats_breakdown = gr.Textbox(label="ATS Score Breakdown")
-            jd_complexity = gr.Textbox(label="Job Description Complexity")
-        with gr.TabItem("Interview Analysis"):
-            interview_audio = gr.Audio(label="Upload Interview Audio (1-5 minutes)", type="filepath")
-            transcription = gr.Textbox(label="Interview Transcription")
-            tone_feedback = gr.Textbox(label="Tone Analysis")
-            response_feedback = gr.Textbox(label="Response Correctness Feedback")
-            sentiment_feedback = gr.Textbox(label="Sentiment Analysis")
-        with gr.TabItem("Report"):
-            report_download = gr.File(label="Download Analysis Report")
-    submit_button = gr.Button("Analyze")
-    submit_button.click(
-        fn=process_combined,
-        inputs=[resume_file, job_description, interview_audio],
-        outputs=[
-            ats_score, skills_match, ats_improvements, ats_breakdown, jd_complexity,
-            transcription, tone_feedback, response_feedback, sentiment_feedback,
-            report_download
-        ]
-    )
 if __name__ == "__main__":
     logger.info("Launching Gradio app...")

 import whisper
 import librosa
 import soundfile as sf
+# Configure logging at the top
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
     logger.error(f"Failed to load SentenceTransformer model: {str(e)}")
     raise e
+# Load Whisper model
 try:
     whisper_model = whisper.load_model("base")
 except Exception as e:
 }
 SECTION_WEIGHTS = {'experience': 0.4, 'education': 0.2, 'skills': 0.25, 'projects': 0.1, 'certifications': 0.05}
+# Expanded skill set for MAANG/FAANG with variations
 KEY_SKILLS = {
     'python', 'javascript', 'java', 'sql', 'aws', 'docker', 'react', 'nodejs', 'node.js', 'node js', 'machine learning',
     'data analysis', 'git', 'html', 'css', 'tensorflow', 'pytorch', 'cloud', 'api', 'devops',
     'pandas', 'numpy', 'scikit-learn', 'react.js', 'next.js', 'nextjs', 'etl'
 }
+# Common interview questions and expected keywords
 INTERVIEW_QUESTIONS = {
     "Tell me about yourself": ["background", "experience", "skills", "achievements", "goals"],
     "What are your strengths?": ["strengths", "skills", "abilities", "teamwork", "problem-solving"],
         self.tfidf_vectorizer = TfidfVectorizer()
         self.whisper_model = whisper_model
+    # ATS Resume Analysis Methods
     def extract_text(self, pdf_path: str) -> str:
         if not pdf_path or not os.path.exists(pdf_path):
             logger.error(f"PDF file not found: {pdf_path}")
         text_lower = text.lower()
         return any(re.search(pattern, text_lower) for pattern in patterns)
+    def analyze_resume(self, resume_path: str, job_description: str) -> Tuple[str, str, str, str]:
         logger.info("Starting resume analysis...")
         if not resume_path:
+            return "Error: No resume file uploaded", "", "", ""
         if not job_description.strip():
+            return "Error: Job description is empty", "", "", ""
         resume_text = self.extract_text(resume_path)
         if not resume_text:
+            return "Error: Could not extract text from resume PDF", "", "", ""
         cleaned_resume = self.preprocess_text(resume_text)
         cleaned_jd = self.preprocess_text(job_description)
         achievement_bonus = 5 if self.detect_achievements(resume_text) else 0
+        ats_score = np.clip(0.4 * keyword_score + 0.4 * skills_score + 0.2 * section_score + achievement_bonus, 0, 100)
+        skills_match = f"Matched Skills: {', '.join(sorted(matched_skills)) or 'None'}\nMissing Skills: {', '.join(sorted(missing_skills)) or 'None'}"
         jd_keywords = self.extract_keywords(job_description)
         resume_keywords = self.extract_keywords(resume_text)
             improvements.append(f"Include these missing skills in your skills or experience section: {', '.join(sorted(missing_skills))}")
         if not achievement_bonus:
             improvements.append("Add measurable achievements to boost your score (e.g., 'increased efficiency by 20%', 'reduced processing time by 5 hours')")
         improvement_text = "\n".join(improvements) or "Your resume is well-aligned with the job description!"
         breakdown = f"Keyword Match: {keyword_score:.1f}%\nSection Score: {section_score:.1f}%\nSkills Match: {skills_score:.1f}%"
         if achievement_bonus:
             breakdown += f"\nAchievement Bonus: +{achievement_bonus}%"
         logger.info("Resume analysis completed.")
+        return f"ATS Score: {ats_score:.1f}%", skills_match, improvement_text, breakdown
+    # Interview Analysis Methods (Audio Only)
     def transcribe_audio(self, audio_path: str) -> str:
         if not self.whisper_model:
             logger.error("Whisper model is not available. Cannot transcribe audio.")
         transcription_text = transcription if transcription else "No transcription available"
         return transcription_text, "\n".join(response_feedback)
+    def analyze_interview(self, audio_path: str = None) -> Tuple[str, str, str]:
         logger.info("Starting interview analysis...")
         if not audio_path:
+            return "Error: No audio file uploaded", "", ""
         transcription = self.transcribe_audio(audio_path)
         tone = self.analyze_tone(audio_path)
         transcription_text, response_feedback = self.evaluate_response_correctness(transcription)
         tone_feedback = f"Detected Tone: {tone}\n"
         if tone == "Confident":
             tone_feedback += "Your tone sounds confident, which is great for making a strong impression."
             tone_feedback += "Your tone is neutral. Consider adding more enthusiasm to engage the interviewer."
         logger.info("Interview analysis completed.")
+        return transcription_text, tone_feedback, response_feedback
 # Combined Analysis Function
 def process_combined(resume_file, job_description, interview_audio):
     analyzer = ATSInterviewAnalyzer()
+    if resume_file and job_description:
+        ats_score, skills_match, ats_improvements, ats_breakdown = analyzer.analyze_resume(resume_file, job_description)
+    else:
+        ats_score = "Not provided"
+        skills_match = "Not provided"
+        ats_improvements = "Not provided"
+        ats_breakdown = "Not provided"
+    if interview_audio:
+        transcription, tone_feedback, response_feedback = analyzer.analyze_interview(audio_path=interview_audio)
+    else:
+        transcription = "Not provided"
+        tone_feedback = "Not provided"
+        response_feedback = "Not provided"
+    return (
+        ats_score, skills_match, ats_improvements, ats_breakdown,
+        transcription, tone_feedback, response_feedback
+    )
 # Gradio Interface
+interface = gr.Interface(
+    fn=process_combined,
+    inputs=[
+        gr.File(label="Upload Your Resume (PDF)", file_types=[".pdf"]),
+        gr.Textbox(label="Paste Job Description Here", lines=10, placeholder="Enter the job description..."),
+        gr.Audio(label="Upload Interview Audio (1-5 minutes)", type="filepath")
+    ],
+    outputs=[
+        gr.Textbox(label="ATS Score"),
+        gr.Textbox(label="Skills Analysis"),
+        gr.Textbox(label="ATS Suggestions for Improvement"),
+        gr.Textbox(label="ATS Score Breakdown"),
+        gr.Textbox(label="Interview Transcription"),
+        gr.Textbox(label="Tone Analysis"),
+        gr.Textbox(label="Response Correctness Feedback")
+    ],
+    title="Ultimate Job Application Analyzer (MAANG/FAANG Edition)",
+    description="Upload your resume and job description for ATS scoring, and/or upload an interview audio for performance analysis. Get detailed feedback to optimize your job application.",
+    theme=gr.themes.Soft()
+)
 if __name__ == "__main__":
     logger.info("Launching Gradio app...")