Spaces:

riteshkokam
/

ResAI

Sleeping

App Files Files Community

riteshkokam commited on Jun 19, 2025

Commit

882795b

verified ·

1 Parent(s): c809e4a

Create app.py

Browse files

Files changed (1) hide show

app.py +327 -0

app.py ADDED Viewed

	@@ -0,0 +1,327 @@

+import gradio as gr
+import torch
+from sentence_transformers import SentenceTransformer
+import PyPDF2
+import docx
+import io
+import re
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+import nltk
+from collections import Counter
+import warnings
+warnings.filterwarnings("ignore")
+# Download required NLTK data
+try:
+    nltk.data.find('tokenizers/punkt')
+except LookupError:
+    nltk.download('punkt')
+try:
+    nltk.data.find('corpora/stopwords')
+except LookupError:
+    nltk.download('stopwords')
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize, sent_tokenize
+class ResumeJobMatcher:
+    def __init__(self):
+        # Use a lightweight but effective sentence transformer model
+        # all-MiniLM-L6-v2 is optimized for CPU and works well on limited resources
+        self.model = SentenceTransformer('all-MiniLM-L6-v2')
+        self.stop_words = set(stopwords.words('english'))
+    def extract_text_from_pdf(self, pdf_file):
+        """Extract text from PDF file"""
+        try:
+            pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
+            text = ""
+            for page in pdf_reader.pages:
+                text += page.extract_text() + "\n"
+            return text
+        except Exception as e:
+            return f"Error reading PDF: {str(e)}"
+    def extract_text_from_docx(self, docx_file):
+        """Extract text from DOCX file"""
+        try:
+            doc = docx.Document(io.BytesIO(docx_file))
+            text = ""
+            for paragraph in doc.paragraphs:
+                text += paragraph.text + "\n"
+            return text
+        except Exception as e:
+            return f"Error reading DOCX: {str(e)}"
+    def preprocess_text(self, text):
+        """Clean and preprocess text"""
+        # Remove extra whitespace and normalize
+        text = re.sub(r'\s+', ' ', text)
+        text = re.sub(r'[^\w\s]', ' ', text)
+        text = text.lower().strip()
+        return text
+    def extract_keywords(self, text, top_n=20):
+        """Extract important keywords from text"""
+        words = word_tokenize(text.lower())
+        words = [word for word in words if word.isalpha() and word not in self.stop_words and len(word) > 2]
+        # Get most common words
+        word_freq = Counter(words)
+        keywords = [word for word, freq in word_freq.most_common(top_n)]
+        return keywords
+    def calculate_keyword_match(self, resume_keywords, job_keywords):
+        """Calculate keyword matching score"""
+        resume_set = set(resume_keywords)
+        job_set = set(job_keywords)
+        if not job_set:
+            return 0
+        intersection = resume_set.intersection(job_set)
+        return len(intersection) / len(job_set) * 100
+    def get_semantic_similarity(self, resume_text, job_text):
+        """Calculate semantic similarity using sentence transformers"""
+        # Split texts into sentences for better analysis
+        resume_sentences = sent_tokenize(resume_text)
+        job_sentences = sent_tokenize(job_text)
+        # Encode texts
+        resume_embedding = self.model.encode(resume_text)
+        job_embedding = self.model.encode(job_text)
+        # Calculate cosine similarity
+        similarity = cosine_similarity([resume_embedding], [job_embedding])[0][0]
+        return similarity * 100
+    def analyze_sections(self, resume_text, job_text):
+        """Analyze different sections of resume vs job requirements"""
+        # Common resume sections patterns
+        sections = {
+            'experience': r'(experience|work history|employment|career|professional)',
+            'skills': r'(skills|competencies|technical|technologies|tools)',
+            'education': r'(education|degree|university|college|academic)',
+            'projects': r'(projects|portfolio|achievements|accomplishments)'
+        }
+        section_scores = {}
+        for section, pattern in sections.items():
+            # Extract relevant text from resume
+            resume_section = self.extract_section_text(resume_text, pattern)
+            if resume_section:
+                score = self.get_semantic_similarity(resume_section, job_text)
+                section_scores[section] = min(score, 100)
+            else:
+                section_scores[section] = 0
+        return section_scores
+    def extract_section_text(self, text, pattern):
+        """Extract text from specific sections"""
+        sentences = sent_tokenize(text)
+        relevant_sentences = []
+        for sentence in sentences:
+            if re.search(pattern, sentence, re.IGNORECASE):
+                relevant_sentences.append(sentence)
+        # Also include sentences around matches for context
+        for i, sentence in enumerate(sentences):
+            if re.search(pattern, sentence, re.IGNORECASE):
+                if i > 0:
+                    relevant_sentences.append(sentences[i-1])
+                if i < len(sentences) - 1:
+                    relevant_sentences.append(sentences[i+1])
+        return ' '.join(relevant_sentences)
+    def generate_suggestions(self, resume_text, job_text, overall_score, section_scores, keyword_match_score):
+        """Generate improvement suggestions"""
+        suggestions = []
+        if overall_score < 70:
+            suggestions.append("📈 **Overall Score Enhancement**: Your resume needs significant improvement to match this job. Consider tailoring your resume more specifically to the job requirements.")
+        if keyword_match_score < 40:
+            job_keywords = self.extract_keywords(job_text, 15)
+            resume_keywords = self.extract_keywords(resume_text, 15)
+            missing_keywords = set(job_keywords) - set(resume_keywords)
+            if missing_keywords:
+                suggestions.append(f"🔑 **Missing Keywords**: Consider incorporating these relevant keywords: {', '.join(list(missing_keywords)[:8])}")
+        # Section-specific suggestions
+        if section_scores.get('skills', 0) < 50:
+            suggestions.append("🛠️ **Skills Section**: Enhance your skills section to better match the job requirements. Include both technical and soft skills mentioned in the job description.")
+        if section_scores.get('experience', 0) < 50:
+            suggestions.append("💼 **Experience Section**: Better highlight your relevant work experience. Use action verbs and quantify your achievements where possible.")
+        if section_scores.get('education', 0) < 30 and 'education' in job_text.lower():
+            suggestions.append("🎓 **Education Section**: If you have relevant educational background, make sure it's prominently featured and matches job requirements.")
+        if overall_score > 80:
+            suggestions.append("✅ **Great Match**: Your resume shows strong alignment with this job! Consider minor tweaks to optimize further.")
+        elif overall_score > 60:
+            suggestions.append("👍 **Good Foundation**: You have a solid foundation. Focus on highlighting the most relevant experiences and skills.")
+        # Always add a general suggestion
+        suggestions.append("💡 **Pro Tip**: Customize your resume for each application by emphasizing the most relevant experiences and using similar language to the job description.")
+        return suggestions
+    def process_files(self, resume_file, job_description):
+        """Main processing function"""
+        try:
+            # Extract text from resume file
+            if resume_file is None:
+                return "Please upload a resume file.", "", "", ""
+            if not job_description.strip():
+                return "Please provide a job description.", "", "", ""
+            # Determine file type and extract text
+            file_content = resume_file
+            filename = getattr(resume_file, 'name', '').lower()
+            if filename.endswith('.pdf'):
+                resume_text = self.extract_text_from_pdf(file_content)
+            elif filename.endswith('.docx'):
+                resume_text = self.extract_text_from_docx(file_content)
+            else:
+                return "Unsupported file format. Please upload PDF or DOCX files.", "", "", ""
+            if "Error reading" in resume_text:
+                return resume_text, "", "", ""
+            # Preprocess texts
+            resume_clean = self.preprocess_text(resume_text)
+            job_clean = self.preprocess_text(job_description)
+            if len(resume_clean) < 50:
+                return "Resume text is too short or couldn't be extracted properly.", "", "", ""
+            # Calculate different scores
+            semantic_score = self.get_semantic_similarity(resume_clean, job_clean)
+            # Keyword matching
+            resume_keywords = self.extract_keywords(resume_clean)
+            job_keywords = self.extract_keywords(job_clean)
+            keyword_score = self.calculate_keyword_match(resume_keywords, job_keywords)
+            # Section analysis
+            section_scores = self.analyze_sections(resume_clean, job_clean)
+            # Calculate overall score (weighted average)
+            overall_score = (
+                semantic_score * 0.4 +  # Semantic similarity (40%)
+                keyword_score * 0.3 +   # Keyword matching (30%)
+                np.mean(list(section_scores.values())) * 0.3  # Section scores (30%)
+            )
+            overall_score = min(round(overall_score), 100)  # Cap at 100
+            # Generate suggestions
+            suggestions = self.generate_suggestions(
+                resume_clean, job_clean, overall_score, section_scores, keyword_score
+            )
+            # Format results
+            score_text = f"# 🎯 Resume Match Score: {overall_score}/100\n\n"
+            details = f"""## 📊 Detailed Analysis
+**Semantic Similarity**: {semantic_score:.1f}/100
+**Keyword Match**: {keyword_score:.1f}/100
+### Section Scores:
+- **Experience**: {section_scores.get('experience', 0):.1f}/100
+- **Skills**: {section_scores.get('skills', 0):.1f}/100
+- **Education**: {section_scores.get('education', 0):.1f}/100
+- **Projects**: {section_scores.get('projects', 0):.1f}/100
+"""
+            suggestions_text = "## 💡 Improvement Suggestions\n\n" + "\n\n".join(suggestions)
+            # Keywords comparison
+            common_keywords = set(resume_keywords[:10]).intersection(set(job_keywords[:10]))
+            keywords_text = f"""## 🔍 Keyword Analysis
+**Top Resume Keywords**: {', '.join(resume_keywords[:10])}
+**Top Job Keywords**: {', '.join(job_keywords[:10])}
+**Matching Keywords**: {', '.join(common_keywords) if common_keywords else 'None found'}
+"""
+            return score_text, details, suggestions_text, keywords_text
+        except Exception as e:
+            return f"An error occurred: {str(e)}", "", "", ""
+# Initialize the matcher
+matcher = ResumeJobMatcher()
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="Resume Job Matcher", theme=gr.themes.Soft()) as interface:
+        gr.HTML("""
+        <div style='text-align: center; padding: 20px;'>
+            <h1>🎯 AI Resume Job Matcher</h1>
+            <p>Upload your resume and paste the job description to get a compatibility score and improvement suggestions!</p>
+        </div>
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.HTML("<h3>📄 Upload Resume</h3>")
+                resume_file = gr.File(
+                    label="Upload Resume (PDF/DOCX)",
+                    file_types=[".pdf", ".docx"],
+                    type="binary"
+                )
+                gr.HTML("<h3>📋 Job Description</h3>")
+                job_description = gr.Textbox(
+                    label="Paste Job Description",
+                    placeholder="Paste the complete job description here...",
+                    lines=10,
+                    max_lines=15
+                )
+                analyze_btn = gr.Button("🔍 Analyze Match", variant="primary", size="lg")
+            with gr.Column(scale=1):
+                score_output = gr.Markdown(label="Match Score")
+                details_output = gr.Markdown(label="Detailed Analysis")
+                suggestions_output = gr.Markdown(label="Suggestions")
+                keywords_output = gr.Markdown(label="Keywords Analysis")
+        # Set up the event handler
+        analyze_btn.click(
+            fn=matcher.process_files,
+            inputs=[resume_file, job_description],
+            outputs=[score_output, details_output, suggestions_output, keywords_output]
+        )
+        gr.HTML("""
+        <div style='text-align: center; padding: 20px; margin-top: 30px; border-top: 1px solid #ddd;'>
+            <p><strong>How it works:</strong> This tool uses advanced AI to analyze semantic similarity between your resume and job description,
+            performs keyword matching, and provides personalized suggestions for improvement.</p>
+            <p><em>Supported formats: PDF, DOCX</em></p>
+        </div>
+        """)
+    return interface
+# Launch the app
+if __name__ == "__main__":
+    app = create_interface()
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True
+    )