neelnsoni13 commited on
Commit
2ee0a0e
Β·
verified Β·
1 Parent(s): 6459b94

Upload ai_hiring.py

Browse files
Files changed (1) hide show
  1. ai_hiring.py +490 -0
ai_hiring.py ADDED
@@ -0,0 +1,490 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import openai
4
+ import streamlit as st
5
+ import fitz # PyMuPDF
6
+ import docx
7
+ import pandas as pd
8
+ import numpy as np
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+ import plotly.express as px
11
+ import plotly.graph_objects as go
12
+ import json
13
+ from typing import Dict, List, Tuple
14
+ import tempfile
15
+ from tenacity import retry, stop_after_attempt, wait_exponential
16
+
17
+ # Configure Streamlit page
18
+ st.set_page_config(
19
+ page_title="ResumeMatch Pro",
20
+ page_icon="🎯",
21
+ layout="wide",
22
+ initial_sidebar_state="expanded"
23
+ )
24
+
25
+ # Modern UI Theme with Dark Mode Support
26
+ def set_theme():
27
+ is_dark_theme = st.sidebar.checkbox("Dark Mode", value=False)
28
+
29
+ base_colors = {
30
+ "light": {
31
+ "bg": "#ffffff",
32
+ "text": "#1E293B",
33
+ "primary": "#3B82F6",
34
+ "secondary": "#64748B",
35
+ "accent": "#2563EB",
36
+ "success": "#10B981",
37
+ "warning": "#F59E0B",
38
+ "error": "#EF4444",
39
+ },
40
+ "dark": {
41
+ "bg": "#0F172A",
42
+ "text": "#E2E8F0",
43
+ "primary": "#60A5FA",
44
+ "secondary": "#94A3B8",
45
+ "accent": "#3B82F6",
46
+ "success": "#34D399",
47
+ "warning": "#FBBF24",
48
+ "error": "#F87171",
49
+ }
50
+ }
51
+
52
+ theme = "dark" if is_dark_theme else "light"
53
+ colors = base_colors[theme]
54
+
55
+ return colors
56
+
57
+ # Apply theme colors
58
+ colors = set_theme()
59
+
60
+ # Enhanced CSS with Modern Styling
61
+ st.markdown(f"""
62
+ <style>
63
+ /* Base Styles */
64
+ .main {{
65
+ background-color: {colors['bg']};
66
+ color: {colors['text']};
67
+ font-family: 'Inter', sans-serif;
68
+ }}
69
+
70
+ /* Typography */
71
+ h1, h2, h3 {{
72
+ color: {colors['primary']};
73
+ font-weight: 600;
74
+ }}
75
+
76
+ /* Components */
77
+ .stTextInput, .stTextArea, .stSelectbox {{
78
+ background-color: {colors['bg']};
79
+ border: 1px solid {colors['secondary']};
80
+ border-radius: 8px;
81
+ padding: 12px;
82
+ color: {colors['text']};
83
+ }}
84
+
85
+ .stButton>button {{
86
+ background: linear-gradient(45deg, {colors['primary']}, {colors['accent']});
87
+ color: white;
88
+ border: none;
89
+ border-radius: 8px;
90
+ padding: 12px 24px;
91
+ font-weight: 500;
92
+ transition: all 0.3s ease;
93
+ width: 100%;
94
+ }}
95
+
96
+ .stButton>button:hover {{
97
+ transform: translateY(-2px);
98
+ box-shadow: 0 4px 12px rgba(0,0,0,0.1);
99
+ }}
100
+
101
+ /* Cards */
102
+ .card {{
103
+ background-color: {colors['bg']};
104
+ border: 1px solid {colors['secondary']};
105
+ border-radius: 12px;
106
+ padding: 1.5rem;
107
+ margin-bottom: 1rem;
108
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);
109
+ }}
110
+
111
+ /* Header */
112
+ .header {{
113
+ background: linear-gradient(135deg, {colors['primary']}, {colors['accent']});
114
+ padding: 2rem;
115
+ border-radius: 16px;
116
+ margin-bottom: 2rem;
117
+ color: white;
118
+ text-align: center;
119
+ }}
120
+
121
+ /* Metrics */
122
+ .metric-card {{
123
+ background-color: {colors['bg']};
124
+ border: 1px solid {colors['secondary']};
125
+ border-radius: 12px;
126
+ padding: 1rem;
127
+ text-align: center;
128
+ }}
129
+
130
+ .metric-value {{
131
+ font-size: 2rem;
132
+ font-weight: 600;
133
+ color: {colors['primary']};
134
+ }}
135
+
136
+ .metric-label {{
137
+ color: {colors['secondary']};
138
+ font-size: 0.875rem;
139
+ }}
140
+
141
+ /* Progress Bars */
142
+ .stProgress > div > div > div {{
143
+ background-color: {colors['primary']};
144
+ }}
145
+
146
+ /* Tables */
147
+ .dataframe {{
148
+ border: 1px solid {colors['secondary']};
149
+ border-radius: 8px;
150
+ overflow: hidden;
151
+ }}
152
+
153
+ .dataframe th {{
154
+ background-color: {colors['primary']};
155
+ color: white;
156
+ padding: 12px;
157
+ }}
158
+
159
+ .dataframe td {{
160
+ padding: 12px;
161
+ border-bottom: 1px solid {colors['secondary']};
162
+ }}
163
+
164
+ /* Alerts */
165
+ .stAlert {{
166
+ border-radius: 8px;
167
+ border: none;
168
+ }}
169
+ </style>
170
+ """, unsafe_allow_html=True)
171
+
172
+ # # Set OpenAI API key
173
+ openai.api_key = "YOUR-API-KEY"
174
+
175
+ # Retry logic for OpenAI API calls
176
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
177
+ def get_embeddings(text: str) -> List[float]:
178
+ """Get embeddings for a given text using OpenAI's API."""
179
+ response = openai.Embedding.create(
180
+ input=text,
181
+ model="text-embedding-ada-002"
182
+ )
183
+ return response['data'][0]['embedding']
184
+
185
+ def extract_text_from_pdf(file_content: bytes) -> str:
186
+ """Extract text from PDF file content."""
187
+ try:
188
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
189
+ tmp_file.write(file_content)
190
+ tmp_file.flush()
191
+
192
+ doc = fitz.open(tmp_file.name)
193
+ text = " ".join([page.get_text("text") for page in doc])
194
+ doc.close()
195
+ return text
196
+ except Exception as e:
197
+ st.error(f"Error extracting text from PDF: {str(e)}")
198
+ return ""
199
+ finally:
200
+ if 'tmp_file' in locals():
201
+ os.unlink(tmp_file.name)
202
+
203
+ def extract_text_from_docx(file_content: bytes) -> str:
204
+ """Extract text from DOCX file content."""
205
+ try:
206
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as tmp_file:
207
+ tmp_file.write(file_content)
208
+ tmp_file.flush()
209
+
210
+ doc = docx.Document(tmp_file.name)
211
+ return "\n".join([para.text for para in doc.paragraphs])
212
+ except Exception as e:
213
+ st.error(f"Error extracting text from DOCX: {str(e)}")
214
+ return ""
215
+ finally:
216
+ if 'tmp_file' in locals():
217
+ os.unlink(tmp_file.name)
218
+
219
+ def extract_text(file) -> str:
220
+ """Extract text from uploaded file."""
221
+ try:
222
+ file_content = file.getvalue()
223
+ file_type = file.type
224
+
225
+ if file_type == "application/pdf":
226
+ return extract_text_from_pdf(file_content)
227
+ elif file_type in ["application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/msword"]:
228
+ return extract_text_from_docx(file_content)
229
+ else:
230
+ st.error(f"Unsupported file type: {file_type}")
231
+ return ""
232
+ except Exception as e:
233
+ st.error(f"Error processing file: {str(e)}")
234
+ return ""
235
+
236
+ def preprocess_text(text: str) -> str:
237
+ """Preprocess text by removing noise and normalizing."""
238
+ import re
239
+ text = re.sub(r'\s+', ' ', text) # Remove extra spaces
240
+ text = re.sub(r'[^\w\s]', '', text) # Remove special characters
241
+ return text.lower().strip()
242
+
243
+ def calculate_semantic_similarity(text1: str, text2: str) -> float:
244
+ """Calculate semantic similarity between two texts using embeddings."""
245
+ embedding1 = get_embeddings(preprocess_text(text1))
246
+ embedding2 = get_embeddings(preprocess_text(text2))
247
+ similarity = cosine_similarity([embedding1], [embedding2])[0][0]
248
+ return similarity
249
+
250
+ def analyze_resume_details(text: str, job_desc: str) -> Dict:
251
+ """Analyze resume text and provide actionable feedback."""
252
+ try:
253
+ prompt = f"""Please analyze the following resume text and provide insights in the following categories:
254
+ - Skills
255
+ - Experience
256
+ - Education
257
+ - Domain expertise
258
+ - Certifications
259
+
260
+ Additionally, provide actionable feedback on how the candidate can improve their resume to better match the following job description:
261
+
262
+ Job Description: {job_desc}
263
+
264
+ Resume Text: {text}
265
+
266
+ Provide the analysis in valid JSON format with these exact keys: skills, experience, education, domain, certifications, feedback"""
267
+
268
+ response = openai.ChatCompletion.create(
269
+ model="gpt-4",
270
+ messages=[
271
+ {"role": "system", "content": "You are a resume analysis expert. Respond only with valid JSON."},
272
+ {"role": "user", "content": prompt}
273
+ ],
274
+ temperature=0.9
275
+ )
276
+
277
+ # Get the response and ensure it's valid JSON
278
+ content = response['choices'][0]['message']['content'].strip()
279
+ if not content.startswith('{'): # Fix for common GPT formatting issues
280
+ content = content[content.find('{'):content.rfind('}')+1]
281
+ return json.loads(content)
282
+ except Exception as e:
283
+ st.error(f"Error in resume analysis: {str(e)}")
284
+ return {
285
+ "skills": "",
286
+ "experience": "",
287
+ "education": "",
288
+ "domain": "",
289
+ "certifications": "",
290
+ "feedback": "Unable to generate feedback due to an error."
291
+ }
292
+
293
+ def calculate_match_score(resume_text: str, job_desc: str) -> Tuple[Dict[str, float], float, Dict]:
294
+ """Calculate detailed match scores between resume and job description."""
295
+ weights = {
296
+ 'skills': 0.35,
297
+ 'experience': 0.25,
298
+ 'education': 0.15,
299
+ 'domain': 0.15,
300
+ 'certifications': 0.10
301
+ }
302
+
303
+ try:
304
+ resume_analysis = analyze_resume_details(resume_text, job_desc)
305
+ job_analysis = analyze_resume_details(job_desc, job_desc)
306
+
307
+ scores = {}
308
+ for category, weight in weights.items():
309
+ similarity = calculate_semantic_similarity(
310
+ str(resume_analysis.get(category, "")),
311
+ str(job_analysis.get(category, ""))
312
+ )
313
+ scores[category] = similarity * weight
314
+
315
+ return scores, sum(scores.values()), resume_analysis.get("feedback", "")
316
+ except Exception as e:
317
+ st.error(f"Error in match calculation: {str(e)}")
318
+ return {category: 0.0 for category in weights.keys()}, 0.0, "Unable to generate feedback due to an error."
319
+
320
+ def render_analysis_results(results_df: pd.DataFrame, detailed_results: List[Dict]):
321
+ """Render analysis results with visualizations and detailed match table."""
322
+ if len(results_df) == 0:
323
+ st.warning("No results to display")
324
+ return
325
+
326
+ # Summary metrics
327
+ col1, col2, col3 = st.columns(3)
328
+ with col1:
329
+ st.metric("Total Resumes", len(results_df))
330
+ with col2:
331
+ st.metric("Average Match Score", f"{results_df['Overall Match'].mean():.2f}%")
332
+ with col3:
333
+ st.metric("Top Match Score", f"{results_df['Overall Match'].max():.2f}%")
334
+
335
+ # Results table with custom formatting
336
+ st.dataframe(
337
+ results_df.style.format({
338
+ 'Overall Match': '{:.1f}%',
339
+ 'Skills Match': '{:.1f}%',
340
+ 'Experience Match': '{:.1f}%',
341
+ 'Education Match': '{:.1f}%',
342
+ 'Domain Match': '{:.1f}%',
343
+ 'Certifications Match': '{:.1f}%'
344
+ }),
345
+ use_container_width=True
346
+ )
347
+
348
+ # Dynamic detailed match table
349
+ st.markdown("### 🎯 Detailed Match Breakdown")
350
+ detailed_table_data = {
351
+ "Category": ["Skills", "Experience", "Education", "Certifications", "Domain", "Overall Match"]
352
+ }
353
+
354
+ for result in detailed_results:
355
+ resume_name = result['Resume Name']
356
+ detailed_table_data[resume_name] = [
357
+ f"βœ… {result['skills']}" if result['skills'] else "❌ No match",
358
+ f"βœ… {result['experience']}" if result['experience'] else "❌ No match",
359
+ f"βœ… {result['education']}" if result['education'] else "❌ No match",
360
+ f"βœ… {result['certifications']}" if result['certifications'] else "❌ No match",
361
+ f"βœ… {result['domain']}" if result['domain'] else "❌ No match",
362
+ f"βœ… {result['overall_match']}"
363
+ ]
364
+
365
+ detailed_df = pd.DataFrame(detailed_table_data)
366
+ st.dataframe(detailed_df, use_container_width=True)
367
+
368
+ # Person-wise feedback
369
+ st.markdown("### πŸ“ Person-Wise Feedback")
370
+ for result in detailed_results:
371
+ with st.expander(f"Feedback for {result['Resume Name']}"):
372
+ st.write(result['feedback'])
373
+
374
+ # Visualizations
375
+ if len(results_df) > 0:
376
+ col1, col2 = st.columns(2)
377
+
378
+ with col1:
379
+ fig = px.bar(
380
+ results_df,
381
+ x='Resume Name',
382
+ y='Overall Match',
383
+ title='Match Scores Comparison',
384
+ color='Overall Match',
385
+ color_continuous_scale='Blues'
386
+ )
387
+ st.plotly_chart(fig, use_container_width=True)
388
+
389
+ with col2:
390
+ top_candidate = results_df.iloc[0]
391
+ categories = ['Skills', 'Experience', 'Education', 'Domain', 'Certifications']
392
+ values = [top_candidate[f'{cat} Match'] for cat in categories]
393
+
394
+ fig = go.Figure()
395
+ fig.add_trace(go.Scatterpolar(
396
+ r=values,
397
+ theta=categories,
398
+ fill='toself',
399
+ name='Top Candidate'
400
+ ))
401
+
402
+ fig.update_layout(
403
+ polar=dict(
404
+ radialaxis=dict(
405
+ visible=True,
406
+ range=[0, 100]
407
+ )),
408
+ showlegend=False,
409
+ title='Top Candidate Analysis'
410
+ )
411
+ st.plotly_chart(fig, use_container_width=True)
412
+
413
+ def main():
414
+ # Header
415
+ st.markdown("""
416
+ <div class="header-container">
417
+ <h1>πŸš€ Smart Resume Analyzer & Matcher</h1>
418
+ <p>Empower Your Hiring with AI Insights</p>
419
+ </div>
420
+ """, unsafe_allow_html=True)
421
+
422
+ # Job Description Input with new styling
423
+ st.markdown("### πŸ“‹ Job Description")
424
+ job_description = st.text_area(
425
+ "Enter the job description",
426
+ height=200,
427
+ help="Paste the complete job description here for accurate matching.",
428
+ key="job_desc_input"
429
+ )
430
+
431
+ # Resume Upload
432
+ st.markdown("### πŸ“€ Resume Upload")
433
+ uploaded_files = st.file_uploader(
434
+ "Upload resumes (PDF/DOCX)",
435
+ type=["pdf", "docx"],
436
+ accept_multiple_files=True,
437
+ help="You can upload multiple resumes at once"
438
+ )
439
+
440
+ if uploaded_files and job_description:
441
+ with st.spinner('Analyzing resumes... Please wait.'):
442
+ results = []
443
+ detailed_results = []
444
+
445
+ for file in uploaded_files:
446
+ try:
447
+ text = extract_text(file)
448
+ if text:
449
+ scores, overall_score, feedback = calculate_match_score(text, job_description)
450
+ result = {
451
+ 'Resume Name': file.name,
452
+ 'Overall Match': overall_score * 100,
453
+ **{f'{k.title()} Match': v * 100 for k, v in scores.items()},
454
+ 'feedback': feedback
455
+ }
456
+ results.append(result)
457
+
458
+ # Detailed analysis for dynamic table
459
+ resume_analysis = analyze_resume_details(text, job_description)
460
+ detailed_results.append({
461
+ 'Resume Name': file.name,
462
+ 'skills': resume_analysis.get('skills', ''),
463
+ 'experience': resume_analysis.get('experience', ''),
464
+ 'education': resume_analysis.get('education', ''),
465
+ 'certifications': resume_analysis.get('certifications', ''),
466
+ 'domain': resume_analysis.get('domain', ''),
467
+ 'overall_match': f"Strong Match for {resume_analysis.get('domain', 'General')} roles",
468
+ 'feedback': resume_analysis.get('feedback', '')
469
+ })
470
+ except Exception as e:
471
+ st.error(f"Error processing {file.name}: {str(e)}")
472
+
473
+ if results:
474
+ results_df = pd.DataFrame(results).sort_values('Overall Match', ascending=False)
475
+ render_analysis_results(results_df, detailed_results)
476
+
477
+ # Download results
478
+ csv = results_df.to_csv(index=False)
479
+ st.download_button(
480
+ "Download Analysis Report",
481
+ csv,
482
+ "resume_analysis_report.csv",
483
+ "text/csv",
484
+ key='download-csv'
485
+ )
486
+ else:
487
+ st.warning("No valid results were generated from the analysis.")
488
+
489
+ if __name__ == "__main__":
490
+ main()