riteshkokam commited on
Commit
882795b
Β·
verified Β·
1 Parent(s): c809e4a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +327 -0
app.py ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from sentence_transformers import SentenceTransformer
4
+ import PyPDF2
5
+ import docx
6
+ import io
7
+ import re
8
+ import numpy as np
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+ import nltk
11
+ from collections import Counter
12
+ import warnings
13
+ warnings.filterwarnings("ignore")
14
+
15
+ # Download required NLTK data
16
+ try:
17
+ nltk.data.find('tokenizers/punkt')
18
+ except LookupError:
19
+ nltk.download('punkt')
20
+
21
+ try:
22
+ nltk.data.find('corpora/stopwords')
23
+ except LookupError:
24
+ nltk.download('stopwords')
25
+
26
+ from nltk.corpus import stopwords
27
+ from nltk.tokenize import word_tokenize, sent_tokenize
28
+
29
+ class ResumeJobMatcher:
30
+ def __init__(self):
31
+ # Use a lightweight but effective sentence transformer model
32
+ # all-MiniLM-L6-v2 is optimized for CPU and works well on limited resources
33
+ self.model = SentenceTransformer('all-MiniLM-L6-v2')
34
+ self.stop_words = set(stopwords.words('english'))
35
+
36
+ def extract_text_from_pdf(self, pdf_file):
37
+ """Extract text from PDF file"""
38
+ try:
39
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
40
+ text = ""
41
+ for page in pdf_reader.pages:
42
+ text += page.extract_text() + "\n"
43
+ return text
44
+ except Exception as e:
45
+ return f"Error reading PDF: {str(e)}"
46
+
47
+ def extract_text_from_docx(self, docx_file):
48
+ """Extract text from DOCX file"""
49
+ try:
50
+ doc = docx.Document(io.BytesIO(docx_file))
51
+ text = ""
52
+ for paragraph in doc.paragraphs:
53
+ text += paragraph.text + "\n"
54
+ return text
55
+ except Exception as e:
56
+ return f"Error reading DOCX: {str(e)}"
57
+
58
+ def preprocess_text(self, text):
59
+ """Clean and preprocess text"""
60
+ # Remove extra whitespace and normalize
61
+ text = re.sub(r'\s+', ' ', text)
62
+ text = re.sub(r'[^\w\s]', ' ', text)
63
+ text = text.lower().strip()
64
+ return text
65
+
66
+ def extract_keywords(self, text, top_n=20):
67
+ """Extract important keywords from text"""
68
+ words = word_tokenize(text.lower())
69
+ words = [word for word in words if word.isalpha() and word not in self.stop_words and len(word) > 2]
70
+
71
+ # Get most common words
72
+ word_freq = Counter(words)
73
+ keywords = [word for word, freq in word_freq.most_common(top_n)]
74
+ return keywords
75
+
76
+ def calculate_keyword_match(self, resume_keywords, job_keywords):
77
+ """Calculate keyword matching score"""
78
+ resume_set = set(resume_keywords)
79
+ job_set = set(job_keywords)
80
+
81
+ if not job_set:
82
+ return 0
83
+
84
+ intersection = resume_set.intersection(job_set)
85
+ return len(intersection) / len(job_set) * 100
86
+
87
+ def get_semantic_similarity(self, resume_text, job_text):
88
+ """Calculate semantic similarity using sentence transformers"""
89
+ # Split texts into sentences for better analysis
90
+ resume_sentences = sent_tokenize(resume_text)
91
+ job_sentences = sent_tokenize(job_text)
92
+
93
+ # Encode texts
94
+ resume_embedding = self.model.encode(resume_text)
95
+ job_embedding = self.model.encode(job_text)
96
+
97
+ # Calculate cosine similarity
98
+ similarity = cosine_similarity([resume_embedding], [job_embedding])[0][0]
99
+ return similarity * 100
100
+
101
+ def analyze_sections(self, resume_text, job_text):
102
+ """Analyze different sections of resume vs job requirements"""
103
+ # Common resume sections patterns
104
+ sections = {
105
+ 'experience': r'(experience|work history|employment|career|professional)',
106
+ 'skills': r'(skills|competencies|technical|technologies|tools)',
107
+ 'education': r'(education|degree|university|college|academic)',
108
+ 'projects': r'(projects|portfolio|achievements|accomplishments)'
109
+ }
110
+
111
+ section_scores = {}
112
+ for section, pattern in sections.items():
113
+ # Extract relevant text from resume
114
+ resume_section = self.extract_section_text(resume_text, pattern)
115
+ if resume_section:
116
+ score = self.get_semantic_similarity(resume_section, job_text)
117
+ section_scores[section] = min(score, 100)
118
+ else:
119
+ section_scores[section] = 0
120
+
121
+ return section_scores
122
+
123
+ def extract_section_text(self, text, pattern):
124
+ """Extract text from specific sections"""
125
+ sentences = sent_tokenize(text)
126
+ relevant_sentences = []
127
+
128
+ for sentence in sentences:
129
+ if re.search(pattern, sentence, re.IGNORECASE):
130
+ relevant_sentences.append(sentence)
131
+
132
+ # Also include sentences around matches for context
133
+ for i, sentence in enumerate(sentences):
134
+ if re.search(pattern, sentence, re.IGNORECASE):
135
+ if i > 0:
136
+ relevant_sentences.append(sentences[i-1])
137
+ if i < len(sentences) - 1:
138
+ relevant_sentences.append(sentences[i+1])
139
+
140
+ return ' '.join(relevant_sentences)
141
+
142
+ def generate_suggestions(self, resume_text, job_text, overall_score, section_scores, keyword_match_score):
143
+ """Generate improvement suggestions"""
144
+ suggestions = []
145
+
146
+ if overall_score < 70:
147
+ suggestions.append("πŸ“ˆ **Overall Score Enhancement**: Your resume needs significant improvement to match this job. Consider tailoring your resume more specifically to the job requirements.")
148
+
149
+ if keyword_match_score < 40:
150
+ job_keywords = self.extract_keywords(job_text, 15)
151
+ resume_keywords = self.extract_keywords(resume_text, 15)
152
+ missing_keywords = set(job_keywords) - set(resume_keywords)
153
+ if missing_keywords:
154
+ suggestions.append(f"πŸ”‘ **Missing Keywords**: Consider incorporating these relevant keywords: {', '.join(list(missing_keywords)[:8])}")
155
+
156
+ # Section-specific suggestions
157
+ if section_scores.get('skills', 0) < 50:
158
+ suggestions.append("πŸ› οΈ **Skills Section**: Enhance your skills section to better match the job requirements. Include both technical and soft skills mentioned in the job description.")
159
+
160
+ if section_scores.get('experience', 0) < 50:
161
+ suggestions.append("πŸ’Ό **Experience Section**: Better highlight your relevant work experience. Use action verbs and quantify your achievements where possible.")
162
+
163
+ if section_scores.get('education', 0) < 30 and 'education' in job_text.lower():
164
+ suggestions.append("πŸŽ“ **Education Section**: If you have relevant educational background, make sure it's prominently featured and matches job requirements.")
165
+
166
+ if overall_score > 80:
167
+ suggestions.append("βœ… **Great Match**: Your resume shows strong alignment with this job! Consider minor tweaks to optimize further.")
168
+ elif overall_score > 60:
169
+ suggestions.append("πŸ‘ **Good Foundation**: You have a solid foundation. Focus on highlighting the most relevant experiences and skills.")
170
+
171
+ # Always add a general suggestion
172
+ suggestions.append("πŸ’‘ **Pro Tip**: Customize your resume for each application by emphasizing the most relevant experiences and using similar language to the job description.")
173
+
174
+ return suggestions
175
+
176
+ def process_files(self, resume_file, job_description):
177
+ """Main processing function"""
178
+ try:
179
+ # Extract text from resume file
180
+ if resume_file is None:
181
+ return "Please upload a resume file.", "", "", ""
182
+
183
+ if not job_description.strip():
184
+ return "Please provide a job description.", "", "", ""
185
+
186
+ # Determine file type and extract text
187
+ file_content = resume_file
188
+ filename = getattr(resume_file, 'name', '').lower()
189
+
190
+ if filename.endswith('.pdf'):
191
+ resume_text = self.extract_text_from_pdf(file_content)
192
+ elif filename.endswith('.docx'):
193
+ resume_text = self.extract_text_from_docx(file_content)
194
+ else:
195
+ return "Unsupported file format. Please upload PDF or DOCX files.", "", "", ""
196
+
197
+ if "Error reading" in resume_text:
198
+ return resume_text, "", "", ""
199
+
200
+ # Preprocess texts
201
+ resume_clean = self.preprocess_text(resume_text)
202
+ job_clean = self.preprocess_text(job_description)
203
+
204
+ if len(resume_clean) < 50:
205
+ return "Resume text is too short or couldn't be extracted properly.", "", "", ""
206
+
207
+ # Calculate different scores
208
+ semantic_score = self.get_semantic_similarity(resume_clean, job_clean)
209
+
210
+ # Keyword matching
211
+ resume_keywords = self.extract_keywords(resume_clean)
212
+ job_keywords = self.extract_keywords(job_clean)
213
+ keyword_score = self.calculate_keyword_match(resume_keywords, job_keywords)
214
+
215
+ # Section analysis
216
+ section_scores = self.analyze_sections(resume_clean, job_clean)
217
+
218
+ # Calculate overall score (weighted average)
219
+ overall_score = (
220
+ semantic_score * 0.4 + # Semantic similarity (40%)
221
+ keyword_score * 0.3 + # Keyword matching (30%)
222
+ np.mean(list(section_scores.values())) * 0.3 # Section scores (30%)
223
+ )
224
+
225
+ overall_score = min(round(overall_score), 100) # Cap at 100
226
+
227
+ # Generate suggestions
228
+ suggestions = self.generate_suggestions(
229
+ resume_clean, job_clean, overall_score, section_scores, keyword_score
230
+ )
231
+
232
+ # Format results
233
+ score_text = f"# 🎯 Resume Match Score: {overall_score}/100\n\n"
234
+
235
+ details = f"""## πŸ“Š Detailed Analysis
236
+
237
+ **Semantic Similarity**: {semantic_score:.1f}/100
238
+ **Keyword Match**: {keyword_score:.1f}/100
239
+
240
+ ### Section Scores:
241
+ - **Experience**: {section_scores.get('experience', 0):.1f}/100
242
+ - **Skills**: {section_scores.get('skills', 0):.1f}/100
243
+ - **Education**: {section_scores.get('education', 0):.1f}/100
244
+ - **Projects**: {section_scores.get('projects', 0):.1f}/100
245
+ """
246
+
247
+ suggestions_text = "## πŸ’‘ Improvement Suggestions\n\n" + "\n\n".join(suggestions)
248
+
249
+ # Keywords comparison
250
+ common_keywords = set(resume_keywords[:10]).intersection(set(job_keywords[:10]))
251
+ keywords_text = f"""## πŸ” Keyword Analysis
252
+
253
+ **Top Resume Keywords**: {', '.join(resume_keywords[:10])}
254
+
255
+ **Top Job Keywords**: {', '.join(job_keywords[:10])}
256
+
257
+ **Matching Keywords**: {', '.join(common_keywords) if common_keywords else 'None found'}
258
+ """
259
+
260
+ return score_text, details, suggestions_text, keywords_text
261
+
262
+ except Exception as e:
263
+ return f"An error occurred: {str(e)}", "", "", ""
264
+
265
+ # Initialize the matcher
266
+ matcher = ResumeJobMatcher()
267
+
268
+ # Create Gradio interface
269
+ def create_interface():
270
+ with gr.Blocks(title="Resume Job Matcher", theme=gr.themes.Soft()) as interface:
271
+ gr.HTML("""
272
+ <div style='text-align: center; padding: 20px;'>
273
+ <h1>🎯 AI Resume Job Matcher</h1>
274
+ <p>Upload your resume and paste the job description to get a compatibility score and improvement suggestions!</p>
275
+ </div>
276
+ """)
277
+
278
+ with gr.Row():
279
+ with gr.Column(scale=1):
280
+ gr.HTML("<h3>πŸ“„ Upload Resume</h3>")
281
+ resume_file = gr.File(
282
+ label="Upload Resume (PDF/DOCX)",
283
+ file_types=[".pdf", ".docx"],
284
+ type="binary"
285
+ )
286
+
287
+ gr.HTML("<h3>πŸ“‹ Job Description</h3>")
288
+ job_description = gr.Textbox(
289
+ label="Paste Job Description",
290
+ placeholder="Paste the complete job description here...",
291
+ lines=10,
292
+ max_lines=15
293
+ )
294
+
295
+ analyze_btn = gr.Button("πŸ” Analyze Match", variant="primary", size="lg")
296
+
297
+ with gr.Column(scale=1):
298
+ score_output = gr.Markdown(label="Match Score")
299
+ details_output = gr.Markdown(label="Detailed Analysis")
300
+ suggestions_output = gr.Markdown(label="Suggestions")
301
+ keywords_output = gr.Markdown(label="Keywords Analysis")
302
+
303
+ # Set up the event handler
304
+ analyze_btn.click(
305
+ fn=matcher.process_files,
306
+ inputs=[resume_file, job_description],
307
+ outputs=[score_output, details_output, suggestions_output, keywords_output]
308
+ )
309
+
310
+ gr.HTML("""
311
+ <div style='text-align: center; padding: 20px; margin-top: 30px; border-top: 1px solid #ddd;'>
312
+ <p><strong>How it works:</strong> This tool uses advanced AI to analyze semantic similarity between your resume and job description,
313
+ performs keyword matching, and provides personalized suggestions for improvement.</p>
314
+ <p><em>Supported formats: PDF, DOCX</em></p>
315
+ </div>
316
+ """)
317
+
318
+ return interface
319
+
320
+ # Launch the app
321
+ if __name__ == "__main__":
322
+ app = create_interface()
323
+ app.launch(
324
+ server_name="0.0.0.0",
325
+ server_port=7860,
326
+ share=True
327
+ )