mkshari commited on
Commit
89e2ebc
Β·
verified Β·
1 Parent(s): 2b73f22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -75
app.py CHANGED
@@ -7,32 +7,35 @@ import pandas as pd
7
  import re
8
 
9
  # Load models
 
10
  try:
11
  nlp = spacy.load("en_core_web_sm")
12
- except (ValueError, OSError):
13
- import os
14
- os.system("python -m spacy download en_core_web_sm")
15
- nlp = spacy.load("en_core_web_sm")
16
  except Exception as e:
17
- # Fallback for some HF environments where direct model loading is needed
18
  try:
19
  import en_core_web_sm
20
  nlp = en_core_web_sm.load()
21
  except:
22
- print(f"Error loading spaCy model: {e}")
23
- nlp = None
24
 
25
  model = SentenceTransformer('all-MiniLM-L6-v2')
 
26
 
27
- # Common Skill Dictionary (Simplified for the demo)
28
  SKILLS_DB = [
29
  "python", "javascript", "react", "fastapi", "aws", "docker", "kubernetes", "sql",
30
  "git", "machine learning", "nlp", "tensorflow", "pytorch", "java", "c++", "golang",
31
  "postgresql", "mongodb", "redis", "cloud computing", "devops", "rest api", "graphql",
32
- "scikit-learn", "pandas", "numpy", "django", "flask", "typescript", "angular", "vue"
 
 
 
33
  ]
34
 
35
  ROADMAP_DB = {
 
36
  "python": "Master Python: [Real Python](https://realpython.com/) | [Programming with Mosh](https://www.youtube.com/user/programmingwithmosh)",
37
  "react": "Build UI with React: [Official Docs](https://react.dev/) | [FreeCodeCamp React Course](https://www.freecodecamp.org/news/free-react-course-2024/)",
38
  "aws": "Cloud Mastery: [AWS Skill Builder](https://explore.skillbuilder.aws/) | [Cloud Guru](https://www.pluralsight.com/cloud-computing/aws)",
@@ -46,74 +49,101 @@ ROADMAP_DB = {
46
  "javascript": "JS Deep Dive: [MDN Web Docs](https://developer.mozilla.org/en-US/docs/Web/JavaScript) | [JavaScript.info](https://javascript.info/)",
47
  "typescript": "Strict Typing: [TypeScript Handbook](https://www.typescriptlang.org/docs/handbook/intro.html)",
48
  "postgresql": "Advanced Data: [Postgres Tutorial](https://www.postgresqltutorial.com/)",
49
- "rest api": "API Design: [RESTful API Guide](https://restfulapi.net/)"
 
 
 
50
  }
51
 
52
  def extract_text_from_pdf(pdf_file):
53
- with pdfplumber.open(pdf_file) as pdf:
54
- text = ""
55
- for page in pdf.pages:
56
- text += page.extract_text() or ""
57
- return text
 
 
 
 
58
 
59
  def extract_text_from_docx(docx_file):
60
- doc = Document(docx_file)
61
- text = ""
62
- for para in doc.paragraphs:
63
- text += para.text + "\n"
64
- return text
 
 
 
 
65
 
66
  def clean_text(text):
67
- text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
68
  return text.lower().strip()
69
 
70
  def get_skills(text):
71
- text = clean_text(text)
72
  found_skills = set()
73
  for skill in SKILLS_DB:
74
- if re.search(r'\b' + re.escape(skill) + r'\b', text):
 
 
75
  found_skills.add(skill)
76
  return found_skills
77
 
78
  def analyze_resume(resume_file, jd_text):
 
79
  if resume_file is None or not jd_text.strip():
80
- return "Please upload a resume and provide a job description.", "", "", 0, None
81
 
82
  # Step 1: Extract text
83
- if resume_file.name.endswith('.pdf'):
84
- resume_text = extract_text_from_pdf(resume_file)
85
- elif resume_file.name.endswith('.docx'):
86
- resume_text = extract_text_from_docx(resume_file)
 
 
 
87
  else:
88
- return "Unsupported file format. Please upload PDF or DOCX.", "", "", 0, None
 
 
 
89
 
90
- # Step 2: NLP Analysis (Skills)
91
  resume_skills = get_skills(resume_text)
92
  jd_skills = get_skills(jd_text)
 
 
 
93
 
94
  present_skills = list(resume_skills.intersection(jd_skills))
95
  missing_skills = list(jd_skills - resume_skills)
96
 
97
- # Step 3: Similarity Score (Sentence Transformers)
98
  embeddings1 = model.encode(resume_text, convert_to_tensor=True)
99
  embeddings2 = model.encode(jd_text, convert_to_tensor=True)
100
  cosine_score = util.pytorch_cos_sim(embeddings1, embeddings2)
101
  match_percentage = round(cosine_score.item() * 100, 2)
 
 
 
 
102
 
103
- # Format output
104
- present_str = ", ".join([s.capitalize() for s in present_skills]) if present_skills else "None found."
105
- missing_str = ", ".join([s.capitalize() for s in missing_skills]) if missing_skills else "None! You are a great match."
106
 
 
107
  return f"{match_percentage}%", present_str, missing_str, match_percentage, missing_skills
108
 
109
  def get_roadmap(missing_skills):
110
  if not missing_skills:
111
- return "πŸŽ‰ Great job! You have all the key skills mentioned. Keep up explicitly highlighting them in your experience section."
112
 
113
  roadmap_items = []
114
  for skill in missing_skills:
115
- resource = ROADMAP_DB.get(skill.lower(), f"Search for {skill} tutorials on YouTube or Coursera.")
116
- roadmap_items.append(f"### {skill.capitalize()}\n{resource}")
117
 
118
  return "\n\n".join(roadmap_items)
119
 
@@ -122,47 +152,56 @@ custom_css = """
122
  #logo-img {
123
  margin: auto;
124
  display: block;
 
125
  }
126
  .gradio-container {
127
- background-color: #f8f9fa;
 
128
  }
129
  .main-header {
130
  text-align: center;
131
- color: #003366; /* Navy Blue from Logo */
132
- margin-bottom: 20px;
133
  }
134
  .sub-header {
135
  text-align: center;
136
- color: #b8860b; /* Gold from Logo */
137
- font-weight: bold;
 
138
  }
139
  .sastra-text {
140
  text-align: center;
141
- font-size: 0.9em;
142
- color: #555;
143
- letter-spacing: 1px;
144
  }
145
  #analyze-btn {
146
- background: linear-gradient(90deg, #003366 0%, #004080 100%) !important;
 
147
  color: white !important;
148
- border: none;
149
- border-radius: 8px;
150
- padding: 10px 20px;
151
- font-weight: bold;
152
  }
153
  #roadmap-btn {
154
- background: linear-gradient(90deg, #b8860b 0%, #daa520 100%) !important;
 
155
  color: white !important;
156
- border: none;
 
 
 
 
157
  }
158
  """
159
 
160
- # Gradio Interface
161
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css) as demo:
162
- with gr.Row(variant="compact"):
163
  with gr.Column(scale=1):
164
- gr.Image("logo.png", show_label=False, height=120, container=False, elem_id="logo-img")
165
- with gr.Column(scale=4):
 
 
 
166
  gr.Markdown("# SETHU AI", elem_classes=["main-header"])
167
  gr.Markdown("### From Resume to Career Readiness", elem_classes=["sub-header"])
168
  gr.Markdown("SASTRA DEEMED UNIVERSITY", elem_classes=["sastra-text"])
@@ -170,27 +209,26 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css) as de
170
  gr.Markdown("---")
171
 
172
  with gr.Row():
173
- with gr.Column():
174
- gr.Markdown("### πŸ“„ Input Details")
175
- resume_input = gr.File(label="Upload Resume (PDF or DOCX)", file_types=[".pdf", ".docx"])
176
- jd_input = gr.Textbox(label="Job Description", placeholder="Paste the job requirements here...", lines=8)
177
- analyze_btn = gr.Button("Analyze Resume", variant="primary", elem_id="analyze-btn")
178
 
179
- with gr.Column():
180
- gr.Markdown("### πŸ“Š Analysis Dashboard")
181
- match_score_output = gr.Label(label="Match Percentage")
182
 
183
  with gr.Tabs():
184
- with gr.TabItem("Skills Found"):
185
- present_skills_output = gr.Textbox(label="Available in Resume", interactive=False)
186
- with gr.TabItem("Gap Analysis"):
187
- missing_skills_output = gr.Textbox(label="Skills to Acquire", interactive=False)
188
 
189
  gr.Markdown("---")
190
- roadmap_btn = gr.Button("Get Guidance & Roadmap", interactive=True, elem_id="roadmap-btn")
191
- roadmap_output = gr.Markdown(visible=False)
192
 
193
- # State for hidden analysis results
194
  missing_skills_state = gr.State([])
195
 
196
  def on_analyze(resume, jd):
@@ -199,19 +237,18 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css) as de
199
  match_score_output: score_str,
200
  present_skills_output: present,
201
  missing_skills_output: missing,
202
- roadmap_btn: gr.update(interactive=True),
203
  missing_skills_state: missing_list,
204
  roadmap_output: gr.update(visible=False)
205
  }
206
 
207
  def on_roadmap(missing_list):
208
- roadmap_content = get_roadmap(missing_list)
209
- return gr.update(value=roadmap_content, visible=True)
210
 
211
  analyze_btn.click(
212
  on_analyze,
213
  inputs=[resume_input, jd_input],
214
- outputs=[match_score_output, present_skills_output, missing_skills_output, roadmap_btn, missing_skills_state, roadmap_output]
215
  )
216
 
217
  roadmap_btn.click(
@@ -222,3 +259,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css) as de
222
 
223
  if __name__ == "__main__":
224
  demo.launch()
 
 
 
 
7
  import re
8
 
9
  # Load models
10
+ print("Loading models...")
11
  try:
12
  nlp = spacy.load("en_core_web_sm")
13
+ print("spaCy model loaded successfully.")
 
 
 
14
  except Exception as e:
15
+ print(f"spaCy load error: {e}. Trying direct import...")
16
  try:
17
  import en_core_web_sm
18
  nlp = en_core_web_sm.load()
19
  except:
20
+ print("Model not found. Using fallback keyword matching only.")
21
+ nlp = None
22
 
23
  model = SentenceTransformer('all-MiniLM-L6-v2')
24
+ print("Sentence Transformer loaded.")
25
 
26
+ # Common Skill Dictionary (Expanded)
27
  SKILLS_DB = [
28
  "python", "javascript", "react", "fastapi", "aws", "docker", "kubernetes", "sql",
29
  "git", "machine learning", "nlp", "tensorflow", "pytorch", "java", "c++", "golang",
30
  "postgresql", "mongodb", "redis", "cloud computing", "devops", "rest api", "graphql",
31
+ "scikit-learn", "pandas", "numpy", "django", "flask", "typescript", "angular", "vue",
32
+ "html", "css", "node.js", "express", "azure", "gcp", "linux", "bash", "jenkins",
33
+ "terraform", "ansible", "prompt engineering", "openai", "llm", "bert", "transformer",
34
+ "tableau", "powerbi", "excel", "dynamic programming", "data structures", "algorithms"
35
  ]
36
 
37
  ROADMAP_DB = {
38
+ # (previous content kept, adding more)
39
  "python": "Master Python: [Real Python](https://realpython.com/) | [Programming with Mosh](https://www.youtube.com/user/programmingwithmosh)",
40
  "react": "Build UI with React: [Official Docs](https://react.dev/) | [FreeCodeCamp React Course](https://www.freecodecamp.org/news/free-react-course-2024/)",
41
  "aws": "Cloud Mastery: [AWS Skill Builder](https://explore.skillbuilder.aws/) | [Cloud Guru](https://www.pluralsight.com/cloud-computing/aws)",
 
49
  "javascript": "JS Deep Dive: [MDN Web Docs](https://developer.mozilla.org/en-US/docs/Web/JavaScript) | [JavaScript.info](https://javascript.info/)",
50
  "typescript": "Strict Typing: [TypeScript Handbook](https://www.typescriptlang.org/docs/handbook/intro.html)",
51
  "postgresql": "Advanced Data: [Postgres Tutorial](https://www.postgresqltutorial.com/)",
52
+ "rest api": "API Design: [RESTful API Guide](https://restfulapi.net/)",
53
+ "html": "Web Basics: [W3Schools HTML](https://www.w3schools.com/html/)",
54
+ "css": "Styling: [CSS-Tricks](https://css-tricks.com/)",
55
+ "node.js": "Backend JS: [Node.js Guide](https://nodejs.dev/en/learn/)"
56
  }
57
 
58
  def extract_text_from_pdf(pdf_file):
59
+ try:
60
+ with pdfplumber.open(pdf_file) as pdf:
61
+ text = ""
62
+ for page in pdf.pages:
63
+ text += page.extract_text() or ""
64
+ return text
65
+ except Exception as e:
66
+ print(f"PDF Extraction Error: {e}")
67
+ return ""
68
 
69
  def extract_text_from_docx(docx_file):
70
+ try:
71
+ doc = Document(docx_file)
72
+ text = ""
73
+ for para in doc.paragraphs:
74
+ text += para.text + "\n"
75
+ return text
76
+ except Exception as e:
77
+ print(f"DOCX Extraction Error: {e}")
78
+ return ""
79
 
80
  def clean_text(text):
81
+ text = re.sub(r'[^a-zA-Z0-9\s#\.\+]', ' ', text) # Preserve # for C#, . for Node.js, + for C++
82
  return text.lower().strip()
83
 
84
  def get_skills(text):
85
+ clean_t = clean_text(text)
86
  found_skills = set()
87
  for skill in SKILLS_DB:
88
+ # Improved regex to handle skills with dots or pluses
89
+ pattern = r'\b' + re.escape(skill) + r'\b'
90
+ if re.search(pattern, clean_t):
91
  found_skills.add(skill)
92
  return found_skills
93
 
94
  def analyze_resume(resume_file, jd_text):
95
+ print("Analysis started...")
96
  if resume_file is None or not jd_text.strip():
97
+ return "Please upload a resume and provide a job description.", "", "", 0, []
98
 
99
  # Step 1: Extract text
100
+ resume_path = resume_file.name
101
+ print(f"Extracting text from: {resume_path}")
102
+
103
+ if resume_path.lower().endswith('.pdf'):
104
+ resume_text = extract_text_from_pdf(resume_path)
105
+ elif resume_path.lower().endswith('.docx'):
106
+ resume_text = extract_text_from_docx(resume_path)
107
  else:
108
+ return "Unsupported file format. Please upload PDF or DOCX.", "", "", 0, []
109
+
110
+ if not resume_text.strip():
111
+ return "Could not extract text from the file. Please check the file content.", "", "", 0, []
112
 
113
+ # Step 2: Skill Extraction
114
  resume_skills = get_skills(resume_text)
115
  jd_skills = get_skills(jd_text)
116
+
117
+ print(f"Resume Skills: {resume_skills}")
118
+ print(f"JD Skills: {jd_skills}")
119
 
120
  present_skills = list(resume_skills.intersection(jd_skills))
121
  missing_skills = list(jd_skills - resume_skills)
122
 
123
+ # Step 3: Similarity Score
124
  embeddings1 = model.encode(resume_text, convert_to_tensor=True)
125
  embeddings2 = model.encode(jd_text, convert_to_tensor=True)
126
  cosine_score = util.pytorch_cos_sim(embeddings1, embeddings2)
127
  match_percentage = round(cosine_score.item() * 100, 2)
128
+
129
+ # Adjust score if no skills overlap but similarity is high
130
+ if not present_skills and match_percentage > 50:
131
+ match_percentage -= 20 # Penalize for lack of keyword match
132
 
133
+ present_str = ", ".join([s.capitalize() for s in present_skills]) if present_skills else "No matching skills found."
134
+ missing_str = ", ".join([s.capitalize() for s in missing_skills]) if missing_skills else "All JD skills found in resume!"
 
135
 
136
+ print(f"Match: {match_percentage}%")
137
  return f"{match_percentage}%", present_str, missing_str, match_percentage, missing_skills
138
 
139
  def get_roadmap(missing_skills):
140
  if not missing_skills:
141
+ return "### πŸŽ‰ Perfect Match!\nYou already possess all the key skills mentioned in the job description. Tip: ensure you've highlighted these clearly in your experience sections."
142
 
143
  roadmap_items = []
144
  for skill in missing_skills:
145
+ resource = ROADMAP_DB.get(skill.lower(), f"Suggested resource for {skill}: Check out specialized courses on Coursera, Udemy, or YouTube.")
146
+ roadmap_items.append(f"#### πŸ“– {skill.capitalize()}\n{resource}")
147
 
148
  return "\n\n".join(roadmap_items)
149
 
 
152
  #logo-img {
153
  margin: auto;
154
  display: block;
155
+ max-width: 150px;
156
  }
157
  .gradio-container {
158
+ background-color: #f0f2f5;
159
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
160
  }
161
  .main-header {
162
  text-align: center;
163
+ color: #003366;
164
+ margin-bottom: 5px;
165
  }
166
  .sub-header {
167
  text-align: center;
168
+ color: #b8860b;
169
+ margin-top: 0;
170
+ font-style: italic;
171
  }
172
  .sastra-text {
173
  text-align: center;
174
+ font-size: 1em;
175
+ color: #444;
176
+ font-weight: bold;
177
  }
178
  #analyze-btn {
179
+ background: linear-gradient(135deg, #003366 0%, #00509d 100%) !important;
180
+ border: none !important;
181
  color: white !important;
182
+ height: 50px;
183
+ font-size: 1.1em;
 
 
184
  }
185
  #roadmap-btn {
186
+ background: linear-gradient(135deg, #b8860b 0%, #daa520 100%) !important;
187
+ border: none !important;
188
  color: white !important;
189
+ height: 45px;
190
+ }
191
+ .output-label {
192
+ text-align: center;
193
+ font-size: 2em;
194
  }
195
  """
196
 
 
197
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css) as demo:
198
+ with gr.Row():
199
  with gr.Column(scale=1):
200
+ try:
201
+ gr.Image("logo.png", show_label=False, height=130, container=False, elem_id="logo-img")
202
+ except:
203
+ gr.Markdown("### [LOGO MISSING]")
204
+ with gr.Column(scale=3):
205
  gr.Markdown("# SETHU AI", elem_classes=["main-header"])
206
  gr.Markdown("### From Resume to Career Readiness", elem_classes=["sub-header"])
207
  gr.Markdown("SASTRA DEEMED UNIVERSITY", elem_classes=["sastra-text"])
 
209
  gr.Markdown("---")
210
 
211
  with gr.Row():
212
+ with gr.Column(scale=1):
213
+ gr.Markdown("### πŸ“₯ Step 1: Upload & Paste")
214
+ resume_input = gr.File(label="Upload Resume (PDF/DOCX)", file_types=[".pdf", ".docx"], type="filepath")
215
+ jd_input = gr.Textbox(label="Job Description", placeholder="Paste the job requirements here...", lines=10)
216
+ analyze_btn = gr.Button("Analyze Match", variant="primary", elem_id="analyze-btn")
217
 
218
+ with gr.Column(scale=1):
219
+ gr.Markdown("### πŸ“Š Step 2: Evaluation")
220
+ match_score_output = gr.Label(label="Match Quality", elem_classes=["output-label"])
221
 
222
  with gr.Tabs():
223
+ with gr.TabItem("βœ… Skills Matched"):
224
+ present_skills_output = gr.Textbox(label="", interactive=False, lines=4)
225
+ with gr.TabItem("❌ Missing Skills"):
226
+ missing_skills_output = gr.Textbox(label="", interactive=False, lines=4)
227
 
228
  gr.Markdown("---")
229
+ roadmap_btn = gr.Button("πŸš€ Generate Learning Roadmap", interactive=True, elem_id="roadmap-btn")
 
230
 
231
+ roadmap_output = gr.Markdown(visible=False)
232
  missing_skills_state = gr.State([])
233
 
234
  def on_analyze(resume, jd):
 
237
  match_score_output: score_str,
238
  present_skills_output: present,
239
  missing_skills_output: missing,
 
240
  missing_skills_state: missing_list,
241
  roadmap_output: gr.update(visible=False)
242
  }
243
 
244
  def on_roadmap(missing_list):
245
+ content = get_roadmap(missing_list)
246
+ return gr.update(value=content, visible=True)
247
 
248
  analyze_btn.click(
249
  on_analyze,
250
  inputs=[resume_input, jd_input],
251
+ outputs=[match_score_output, present_skills_output, missing_skills_output, missing_skills_state, roadmap_output]
252
  )
253
 
254
  roadmap_btn.click(
 
259
 
260
  if __name__ == "__main__":
261
  demo.launch()
262
+
263
+ if __name__ == "__main__":
264
+ demo.launch()