riteshkokam commited on
Commit
92bb6b1
·
verified ·
1 Parent(s): 86190d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -20
app.py CHANGED
@@ -11,6 +11,7 @@ from google import genai
11
  from google.genai.types import GenerateContentConfig, ThinkingConfig
12
  from datetime import datetime
13
  import math
 
14
 
15
  # Initialize components
16
  kw_extractor = yake.KeywordExtractor(n=2, top=30)
@@ -23,6 +24,22 @@ You are a job-matching assistant. Given a resume and job listings,
23
  rank and explain why each job is a good fit.
24
  """
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # 1️⃣ Extract text from resume
27
  def extract_text(file):
28
  ext = file.name.lower().split('.')[-1]
@@ -34,7 +51,7 @@ def extract_text(file):
34
 
35
  # 2️⃣ Extract keywords using YAKE
36
  def extract_keywords(text):
37
- # Remove the first line (often the candidates name/header)
38
  parts = text.split("\n", 1)
39
  body = parts[1] if len(parts) > 1 else text
40
 
@@ -78,7 +95,56 @@ def fetch_remoteok(keywords):
78
  return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
79
  return []
80
 
81
- # 4️⃣ Rank jobs by semantic similarity
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def rank_jobs(resume_text, jobs):
83
  if not jobs:
84
  return []
@@ -87,7 +153,7 @@ def rank_jobs(resume_text, jobs):
87
  sims = cosine_similarity(emb_r, emb_j)[0]
88
  return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
89
 
90
- # 5️⃣ Gemini refinement (optional)
91
  def refine_with_ai(ranked, resume_text):
92
  lines = []
93
  for job, _ in ranked:
@@ -116,7 +182,7 @@ def format_posted(job):
116
  return datetime.fromtimestamp(raw).strftime("%Y-%m-%d")
117
  return str(raw)[:10]
118
 
119
- # 6️⃣ Main pipeline
120
  def find_jobs(file, added_kw, use_ai):
121
  resume = extract_text(file) or ""
122
  base_kws = added_kw.split(",") if added_kw.strip() else extract_keywords(resume)
@@ -126,27 +192,33 @@ def find_jobs(file, added_kw, use_ai):
126
  print("Rank_jobs", ranked)
127
 
128
  table = []
129
- for job, score in ranked:
130
  role = job.get("title") or job.get("position", "")
131
  company = job.get("company") or job.get("company_name", "")
132
  location = job.get("location", "N/A")
133
- # Normalize date (as we did before)
134
  posted = format_posted(job)
135
  apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
 
 
 
 
 
136
  # Make sure none of these are dicts/lists
137
  table.append({
138
- "Role": str(role),
139
- "Company": str(company),
140
- "Location": str(location),
141
- "Posted": str(posted),
142
- "Score": f"{score*100:.1f}%",
143
- "Apply": str(apply_url)
 
 
144
  })
145
 
146
  explanation = refine_with_ai(ranked, resume) if use_ai else ""
147
  return table, explanation
148
 
149
- # 7️⃣ Jobs in Markdown
150
  def jobs_to_markdown(table, page, per_page=PER_PAGE):
151
  total = len(table)
152
  pages = max(1, math.ceil(total / per_page))
@@ -155,13 +227,15 @@ def jobs_to_markdown(table, page, per_page=PER_PAGE):
155
  slice_ = table[start:end]
156
 
157
  md = f"**Showing jobs {start+1}–{min(end,total)} of {total} (Page {page}/{pages})**\n\n"
158
- md += "| Role | Company | Location | Posted | Score | Apply |\n"
159
- md += "| ---- | ------- | -------- | ------ | ----- | ----- |\n"
160
  for row in slice_:
161
  link = f"[Apply]({row['Apply']})" if row['Apply'] else ""
 
 
162
  md += (
163
- f"| {row['Role']} | {row['Company']} | {row['Location']} "
164
- f"| {row['Posted']} | {row['Score']} | {link} |\n"
165
  )
166
  return md
167
 
@@ -175,20 +249,25 @@ def load_jobs_and_pages(resume, added_kw, use_ai):
175
 
176
  return full_table, explanation, expl_header, slider_update, first_page_md
177
 
178
- # 7️⃣ Gradio UI
179
  with gr.Blocks(theme=gr.themes.Base()) as demo:
180
  gr.Markdown("## 🌍 Global Job Finder")
 
 
181
  with gr.Row():
182
  resume = gr.File(label="Upload Resume (PDF/DOCX)")
183
  added = gr.Textbox(label="Add keywords (comma-separated)", placeholder="e.g. Python, ML")
184
 
185
  resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
186
  use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
 
 
 
187
  find_btn = gr.Button("Find Jobs")
188
 
189
  jobs_state = gr.State([]) # holds full table
190
  page_sel = gr.Slider(1, 1, step=1, value=1, label="Page")
191
- jobs_md = gr.Markdown() # shows the current pages markdown
192
  expl_md_h = gr.Markdown()
193
  expl_md = gr.Markdown() # shows AI explanation
194
 
@@ -208,4 +287,4 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
208
  )
209
 
210
  if __name__ == "__main__":
211
- demo.launch()
 
11
  from google.genai.types import GenerateContentConfig, ThinkingConfig
12
  from datetime import datetime
13
  import math
14
+ import json
15
 
16
  # Initialize components
17
  kw_extractor = yake.KeywordExtractor(n=2, top=30)
 
24
  rank and explain why each job is a good fit.
25
  """
26
 
27
+ ANALYSIS_PROMPT = """
28
+ Analyze the following job posting and provide:
29
+ 1. Company Type: One of [Startup, Mid-size, Enterprise, Non-profit, Government, Consulting, Agency, Other]
30
+ 2. Job Summary: A concise 1-2 sentence summary of the role
31
+
32
+ Job Title: {title}
33
+ Company: {company}
34
+ Job Description: {description}
35
+
36
+ Please respond in JSON format:
37
+ {
38
+ "company_type": "one of the categories above",
39
+ "job_summary": "1-2 sentence summary"
40
+ }
41
+ """
42
+
43
  # 1️⃣ Extract text from resume
44
  def extract_text(file):
45
  ext = file.name.lower().split('.')[-1]
 
51
 
52
  # 2️⃣ Extract keywords using YAKE
53
  def extract_keywords(text):
54
+ # Remove the first line (often the candidate's name/header)
55
  parts = text.split("\n", 1)
56
  body = parts[1] if len(parts) > 1 else text
57
 
 
95
  return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
96
  return []
97
 
98
+ # 4️⃣ Analyze job with AI to get company type and summary
99
+ def analyze_job_with_ai(job):
100
+ """Use AI to extract company type and job summary"""
101
+ try:
102
+ title = job.get("title") or job.get("position", "")
103
+ company = job.get("company") or job.get("company_name", "")
104
+ description = job.get("description", "")
105
+
106
+ # Truncate description if too long (to stay within token limits)
107
+ if len(description) > 1000:
108
+ description = description[:1000] + "..."
109
+
110
+ prompt = ANALYSIS_PROMPT.format(
111
+ title=title,
112
+ company=company,
113
+ description=description
114
+ )
115
+
116
+ resp = genai_client.models.generate_content(
117
+ model="gemini-2.5-flash",
118
+ contents=prompt,
119
+ )
120
+
121
+ # Try to parse JSON response
122
+ try:
123
+ result = json.loads(resp.text)
124
+ return result.get("company_type", "Other"), result.get("job_summary", "No summary available")
125
+ except json.JSONDecodeError:
126
+ # Fallback: try to extract from text response
127
+ text = resp.text or ""
128
+ if "company_type" in text.lower() and "job_summary" in text.lower():
129
+ lines = text.split('\n')
130
+ company_type = "Other"
131
+ job_summary = "No summary available"
132
+
133
+ for line in lines:
134
+ if "company_type" in line.lower():
135
+ company_type = line.split(":")[-1].strip().strip('"')
136
+ elif "job_summary" in line.lower():
137
+ job_summary = line.split(":")[-1].strip().strip('"')
138
+
139
+ return company_type, job_summary
140
+ else:
141
+ return "Other", "No summary available"
142
+
143
+ except Exception as e:
144
+ print(f"Error analyzing job with AI: {e}")
145
+ return "Other", "No summary available"
146
+
147
+ # 5️⃣ Rank jobs by semantic similarity
148
  def rank_jobs(resume_text, jobs):
149
  if not jobs:
150
  return []
 
153
  sims = cosine_similarity(emb_r, emb_j)[0]
154
  return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
155
 
156
+ # 6️⃣ Gemini refinement (optional)
157
  def refine_with_ai(ranked, resume_text):
158
  lines = []
159
  for job, _ in ranked:
 
182
  return datetime.fromtimestamp(raw).strftime("%Y-%m-%d")
183
  return str(raw)[:10]
184
 
185
+ # 7️⃣ Main pipeline
186
  def find_jobs(file, added_kw, use_ai):
187
  resume = extract_text(file) or ""
188
  base_kws = added_kw.split(",") if added_kw.strip() else extract_keywords(resume)
 
192
  print("Rank_jobs", ranked)
193
 
194
  table = []
195
+ for i, (job, score) in enumerate(ranked):
196
  role = job.get("title") or job.get("position", "")
197
  company = job.get("company") or job.get("company_name", "")
198
  location = job.get("location", "N/A")
 
199
  posted = format_posted(job)
200
  apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
201
+
202
+ # Get company type and summary using AI
203
+ print(f"Analyzing job {i+1}/{len(ranked)}: {role} at {company}")
204
+ company_type, job_summary = analyze_job_with_ai(job)
205
+
206
  # Make sure none of these are dicts/lists
207
  table.append({
208
+ "Role": str(role),
209
+ "Company": str(company),
210
+ "Company Type": str(company_type),
211
+ "Location": str(location),
212
+ "Posted": str(posted),
213
+ "Score": f"{score*100:.1f}%",
214
+ "Summary": str(job_summary),
215
+ "Apply": str(apply_url)
216
  })
217
 
218
  explanation = refine_with_ai(ranked, resume) if use_ai else ""
219
  return table, explanation
220
 
221
+ # 8️⃣ Jobs in Markdown
222
  def jobs_to_markdown(table, page, per_page=PER_PAGE):
223
  total = len(table)
224
  pages = max(1, math.ceil(total / per_page))
 
227
  slice_ = table[start:end]
228
 
229
  md = f"**Showing jobs {start+1}–{min(end,total)} of {total} (Page {page}/{pages})**\n\n"
230
+ md += "| Role | Company | Type | Location | Posted | Score | Summary | Apply |\n"
231
+ md += "| ---- | ------- | ---- | -------- | ------ | ----- | ------- | ----- |\n"
232
  for row in slice_:
233
  link = f"[Apply]({row['Apply']})" if row['Apply'] else ""
234
+ # Truncate summary if too long for table display
235
+ summary = row['Summary'][:100] + "..." if len(row['Summary']) > 100 else row['Summary']
236
  md += (
237
+ f"| {row['Role']} | {row['Company']} | {row['Company Type']} | {row['Location']} "
238
+ f"| {row['Posted']} | {row['Score']} | {summary} | {link} |\n"
239
  )
240
  return md
241
 
 
249
 
250
  return full_table, explanation, expl_header, slider_update, first_page_md
251
 
252
+ # 9️⃣ Gradio UI
253
  with gr.Blocks(theme=gr.themes.Base()) as demo:
254
  gr.Markdown("## 🌍 Global Job Finder")
255
+ gr.Markdown("*Now with AI-powered company type classification and job summaries*")
256
+
257
  with gr.Row():
258
  resume = gr.File(label="Upload Resume (PDF/DOCX)")
259
  added = gr.Textbox(label="Add keywords (comma-separated)", placeholder="e.g. Python, ML")
260
 
261
  resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
262
  use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
263
+
264
+ gr.Markdown("**Note:** AI analysis for company type and job summaries is automatically enabled and may take a few moments per job.")
265
+
266
  find_btn = gr.Button("Find Jobs")
267
 
268
  jobs_state = gr.State([]) # holds full table
269
  page_sel = gr.Slider(1, 1, step=1, value=1, label="Page")
270
+ jobs_md = gr.Markdown() # shows the current page's markdown
271
  expl_md_h = gr.Markdown()
272
  expl_md = gr.Markdown() # shows AI explanation
273
 
 
287
  )
288
 
289
  if __name__ == "__main__":
290
+ demo.launch()