riteshkokam commited on
Commit
23ea7fd
·
verified ·
1 Parent(s): f2cc98a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -24
app.py CHANGED
@@ -10,9 +10,10 @@ from sklearn.metrics.pairwise import cosine_similarity
10
  from google import genai
11
  from google.genai.types import GenerateContentConfig, ThinkingConfig
12
  from datetime import datetime
 
13
 
14
  # Initialize components
15
- kw_extractor = yake.KeywordExtractor(n=2, top=20)
16
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
17
  genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
18
 
@@ -32,12 +33,20 @@ def extract_text(file):
32
 
33
  # 2️⃣ Extract keywords using YAKE
34
  def extract_keywords(text):
35
- """
36
- Returns the top 20 1–2‑gram keywords from the text.
37
- """
38
- kws = kw_extractor.extract_keywords(text)
39
- # kws is list of (keyword, score). We only want the keyword string.
40
- return [kw for kw,score in kws]
 
 
 
 
 
 
 
 
41
 
42
  def on_resume_upload(file):
43
  text = extract_text(file)
@@ -75,12 +84,24 @@ def rank_jobs(resume_text, jobs):
75
  emb_r = embedder.encode([resume_text])
76
  emb_j = embedder.encode([j.get("description","") for j in jobs])
77
  sims = cosine_similarity(emb_r, emb_j)[0]
78
- return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)[:5]
79
 
80
  # 5️⃣ Gemini refinement (optional)
81
  def refine_with_ai(ranked, resume_text):
82
- lines = "\n".join(f"- {j['title']} at {j.get('company', j.get('category', ''))}" for j, _ in ranked)
83
- prompt = f"Resume:\n{resume_text[:500]}\n\nJobs:\n{lines}\n\nRank & justify each."
 
 
 
 
 
 
 
 
 
 
 
 
84
  resp = genai_client.models.generate_content(
85
  model="gemini-2.5-flash",
86
  contents=SYSTEM_PROMPT + prompt,
@@ -111,10 +132,10 @@ def find_jobs(file, added_kw, use_ai):
111
  for job, score in ranked:
112
  role = job.get("title") or job.get("position", "")
113
  company = job.get("company") or job.get("company_name", "")
114
- location = job.get("location", "")
115
  # Normalize date (as we did before)
116
  posted = format_posted(job)
117
- apply_url= job.get("url") or job.get("apply_url","") or ""
118
  # Make sure none of these are dicts/lists
119
  table.append({
120
  "Role": str(role),
@@ -128,19 +149,33 @@ def find_jobs(file, added_kw, use_ai):
128
  explanation = refine_with_ai(ranked, resume) if use_ai else ""
129
  return table, explanation
130
 
 
 
 
 
 
131
  # 7️⃣ Jobs in Markdown
132
- def jobs_to_markdown(table, explanation):
133
- # Build the markdown table
134
- md = "| Role | Company | Location | Posted | Score | Apply |\n"
135
- md += "| --- | ------- | -------- | ------ | ----- | ----- |\n"
136
- for row in table:
 
 
 
 
 
 
 
 
 
137
  link = f"[Apply]({row['Apply']})" if row['Apply'] else ""
138
  md += (
139
  f"| {row['Role']} | {row['Company']} | {row['Location']} "
140
  f"| {row['Posted']} | {row['Score']} | {link} |\n"
141
  )
142
- # Append the AI explanation, if any
143
- if explanation:
144
  md += "\n---\n**AI Explanation:**\n\n" + explanation
145
  return md
146
 
@@ -153,12 +188,35 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
153
 
154
  resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
155
  use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
156
- btn = gr.Button("Find Jobs")
157
- jobs_md = gr.Markdown()
158
- btn.click(
159
- fn=lambda f,k,ai: jobs_to_markdown(*find_jobs(f,k,ai)),
 
 
 
 
 
 
 
 
 
 
 
160
  inputs=[resume, added, use_ai],
161
- outputs=[jobs_md]
 
 
 
 
 
 
 
 
 
 
 
 
162
  )
163
 
164
  if __name__ == "__main__":
 
10
  from google import genai
11
  from google.genai.types import GenerateContentConfig, ThinkingConfig
12
  from datetime import datetime
13
+ import math
14
 
15
  # Initialize components
16
+ kw_extractor = yake.KeywordExtractor(n=2, top=30)
17
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
18
  genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
19
 
 
33
 
34
  # 2️⃣ Extract keywords using YAKE
35
  def extract_keywords(text):
36
+ # Remove the first line (often the candidate’s name/header)
37
+ parts = text.split("\n", 1)
38
+ body = parts[1] if len(parts) > 1 else text
39
+
40
+ # Extract 1–2‑gram keywords, top 20
41
+ kws = kw_extractor.extract_keywords(body)
42
+ # Filter out any that look like names or generic headers
43
+ filtered = []
44
+ for kw, score in kws:
45
+ # drop if any word is all-caps (e.g. "SUMMARY", "RITESH")
46
+ if any(w.isupper() and len(w) > 2 for w in kw.split()):
47
+ continue
48
+ filtered.append(kw)
49
+ return filtered
50
 
51
  def on_resume_upload(file):
52
  text = extract_text(file)
 
84
  emb_r = embedder.encode([resume_text])
85
  emb_j = embedder.encode([j.get("description","") for j in jobs])
86
  sims = cosine_similarity(emb_r, emb_j)[0]
87
+ return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
88
 
89
  # 5️⃣ Gemini refinement (optional)
90
  def refine_with_ai(ranked, resume_text):
91
+ lines = []
92
+ for job, _ in ranked:
93
+ title = job.get("title") or job.get("position") or "N/A"
94
+ company = job.get("company") or job.get("company_name") or ""
95
+ loc = job.get("location") or ""
96
+ lines.append(f"- {title} at {company} ({loc})")
97
+
98
+ prompt = (
99
+ f"Resume:\n{resume_text[:500]}\n\n"
100
+ "Here are the top matched jobs:\n" +
101
+ "\n".join(lines) +
102
+ "\n\nPlease rank these top to bottom and explain why each is a good match."
103
+ )
104
+
105
  resp = genai_client.models.generate_content(
106
  model="gemini-2.5-flash",
107
  contents=SYSTEM_PROMPT + prompt,
 
132
  for job, score in ranked:
133
  role = job.get("title") or job.get("position", "")
134
  company = job.get("company") or job.get("company_name", "")
135
+ location = job.get("location", "N/A")
136
  # Normalize date (as we did before)
137
  posted = format_posted(job)
138
+ apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
139
  # Make sure none of these are dicts/lists
140
  table.append({
141
  "Role": str(role),
 
149
  explanation = refine_with_ai(ranked, resume) if use_ai else ""
150
  return table, explanation
151
 
152
+ def paginate(table, page):
153
+ per_page = 10
154
+ start = (page-1)*per_page
155
+ return table[start:start+per_page]
156
+
157
  # 7️⃣ Jobs in Markdown
158
+ def jobs_to_markdown(table, explanation, page, per_page=10):
159
+ total = len(table)
160
+ pages = max(1, math.ceil(total / per_page))
161
+ page = max(1, min(page, pages))
162
+ start = (page - 1) * per_page
163
+ end = start + per_page
164
+ slice = table[start:end]
165
+
166
+ # Header
167
+ md = f"**Showing jobs {start+1}–{min(end,total)} of {total} (Page {page}/{pages})**\n\n"
168
+ md += "| Role | Company | Location | Posted | Score | Apply |\n"
169
+ md += "| ---- | ------- | -------- | ------ | ----- | ----- |\n"
170
+ # Rows
171
+ for row in slice:
172
  link = f"[Apply]({row['Apply']})" if row['Apply'] else ""
173
  md += (
174
  f"| {row['Role']} | {row['Company']} | {row['Location']} "
175
  f"| {row['Posted']} | {row['Score']} | {link} |\n"
176
  )
177
+ # Append explanation *only* on page 1
178
+ if page == 1 and explanation:
179
  md += "\n---\n**AI Explanation:**\n\n" + explanation
180
  return md
181
 
 
188
 
189
  resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
190
  use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
191
+ find_btn = gr.Button("Find Jobs")
192
+
193
+ # hidden states for full data
194
+ jobs_state = gr.State([])
195
+ expl_state = gr.State("")
196
+
197
+ # outputs
198
+ md_out = gr.Markdown()
199
+ page_sel = gr.Slider(1, 1, step=1, value=1, label="Page")
200
+
201
+ # 1) When "Find Jobs" is clicked:
202
+ # - run find_jobs → (full_table, explanation)
203
+ # - store in state, reset page to 1
204
+ find_btn.click(
205
+ fn=lambda f,k,ai: (*find_jobs(f,k,ai), 1),
206
  inputs=[resume, added, use_ai],
207
+ outputs=[jobs_state, expl_state, page_sel]
208
+ ).then(
209
+ # render page 1 immediately
210
+ fn=lambda tbl, expl, pg: jobs_to_markdown(tbl, expl, pg),
211
+ inputs=[jobs_state, expl_state, page_sel],
212
+ outputs=md_out
213
+ )
214
+
215
+ # 2) When page changes, re‑render just the Markdown
216
+ page_sel.change(
217
+ fn=lambda tbl, expl, pg: jobs_to_markdown(tbl, expl, pg),
218
+ inputs=[jobs_state, expl_state, page_sel],
219
+ outputs=md_out
220
  )
221
 
222
  if __name__ == "__main__":