riteshkokam commited on
Commit
e2af976
·
verified ·
1 Parent(s): 389e4f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -73
app.py CHANGED
@@ -1,10 +1,9 @@
1
- import os, re
2
  import gradio as gr
3
  from PyPDF2 import PdfReader
4
  from docx import Document
5
  import yake
6
  import requests
7
- from bs4 import BeautifulSoup
8
  from sentence_transformers import SentenceTransformer
9
  from sklearn.metrics.pairwise import cosine_similarity
10
  from google import genai
@@ -20,7 +19,6 @@ You are an expert job-matching assistant. Given a resume and job listings,
20
  rank and refine the top 5 jobs by relevance, explaining why each is a strong fit.
21
  """
22
 
23
- # 1. Resume text extraction
24
  def extract_text(file):
25
  ext = file.name.split('.')[-1].lower()
26
  if ext == "pdf":
@@ -29,100 +27,63 @@ def extract_text(file):
29
  return "\n".join(para.text for para in Document(file.name).paragraphs)
30
  return ""
31
 
32
- # 2. Keyword extraction
33
  def extract_keywords(text):
34
  return [k for k, _ in kw_extractor.extract_keywords(text)]
35
 
36
- # 3. Scrape LinkedIn jobs via public API
37
- def scrape_linkedin(keywords, location, start=0):
38
- url = "https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search"
39
- params = {
40
- "keywords": keywords,
41
- "location": location,
42
- "start": start
43
- }
44
- headers = {
45
- "Accept": "*/*",
46
- "Accept-Language": "en-US,en;q=0.9",
47
- "Referer": "https://www.linkedin.com/jobs",
48
- # optional: fake user-agent to reduce blocking
49
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
50
- }
51
-
52
- resp = requests.get(url, headers=headers, params=params)
53
- html = resp.text # raw HTML
54
- soup = BeautifulSoup(html, "html.parser")
55
- items = soup.find_all("li", class_="result-card")
56
- jobs = []
57
- for li in items:
58
- title_tag = li.select_one("h3.base-search-card__title")
59
- comp_tag = li.select_one("h4.base-search-card__subtitle")
60
- loc_tag = li.select_one("span.job-result-card__location")
61
- link_tag = li.find("a", class_="base-card__full-link")
62
- date_tag = li.find("time")
63
-
64
- jobs.append({
65
- "title": title_tag.get_text(strip=True) if title_tag else None,
66
- "company": comp_tag.get_text(strip=True) if comp_tag else None,
67
- "location": loc_tag.get_text(strip=True) if loc_tag else None,
68
- "url": link_tag["href"] if link_tag else None,
69
- "date": date_tag["datetime"] if date_tag else None,
70
- "description": "" # left blank; full detail requires another request
71
- })
72
- return jobs
73
 
74
- # 4. Semantic ranking
75
  def rank_jobs(resume_text, jobs):
76
  if not jobs:
77
  return []
78
  r_emb = embedder.encode([resume_text])
79
- j_embs = embedder.encode([j["description"] for j in jobs])
80
  sims = cosine_similarity(r_emb, j_embs)[0]
81
  ranked = sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
82
  return ranked[:5]
83
 
84
- # 5. (Optional) AI refinement
85
  def refine_with_ai(ranked, resume_text):
86
  lines = "\n".join(f"- {j['title']} at {j['company']} ({j['location']})" for j, _ in ranked)
87
- user_prompt = f"Resume:\n{resume_text[:500]}\n\nJobs:\n{lines}\n\nRank and justify:"
88
  resp = genai_client.models.generate_content(
89
  model="gemini-2.5-flash",
90
- contents=SYSTEM_PROMPT + user_prompt
91
  )
92
  return resp.text or "<No refined explanation>"
93
 
94
- # 6. Full pipeline
95
- def find_jobs(file, add_keywords, location, use_ai):
96
  txt = extract_text(file)
97
- keywords = " ".join(add_keywords) if add_keywords else " ".join(extract_keywords(txt)[:3])
98
- jobs = scrape_linkedin(keywords, location)
99
- if not jobs:
100
- return [], "No jobs found—check your keywords or location."
101
-
102
- ranked = rank_jobs(txt, jobs)
103
- if not ranked:
104
- return [], "Job listing found, but no matchable descriptions available."
105
-
106
  table = [{
107
- "Role": j["title"], "Company": j["company"],
108
- "Location": j["location"], "Posted": j["date"],
109
- "Score": f"{s*100:.1f}%", "Apply": j["url"]
110
- } for j, s in ranked]
111
-
112
- ai_note = refine_with_ai(ranked, txt) if use_ai else ""
113
- return table, ai_note
 
114
 
115
- # 7. Gradio UI
116
- with gr.Blocks(theme=gr.themes.Base()) as demo:
117
- gr.Markdown("## 🔎 Resume → LinkedIn Job Matcher")
118
  with gr.Row():
119
- file = gr.File(label="Upload Resume (PDF/DOCX)")
120
- add_keywords = gr.Textbox(label="Additional Keywords (optional)")
121
- location = gr.Textbox(label="Location (city or country)")
122
  use_ai = gr.Checkbox(label="Use Gemini to refine ranking", value=True)
123
- btn = gr.Button("Search Jobs")
124
- result = gr.Dataframe(headers=["Role","Company","Location","Posted","Score","Apply"], row_count=(1,5))
125
  ai_out = gr.Markdown()
126
- btn.click(find_jobs, [file, add_keywords, location, use_ai], [result, ai_out])
127
 
128
  demo.launch()
 
1
+ import os
2
  import gradio as gr
3
  from PyPDF2 import PdfReader
4
  from docx import Document
5
  import yake
6
  import requests
 
7
  from sentence_transformers import SentenceTransformer
8
  from sklearn.metrics.pairwise import cosine_similarity
9
  from google import genai
 
19
  rank and refine the top 5 jobs by relevance, explaining why each is a strong fit.
20
  """
21
 
 
22
  def extract_text(file):
23
  ext = file.name.split('.')[-1].lower()
24
  if ext == "pdf":
 
27
  return "\n".join(para.text for para in Document(file.name).paragraphs)
28
  return ""
29
 
 
30
  def extract_keywords(text):
31
  return [k for k, _ in kw_extractor.extract_keywords(text)]
32
 
33
+ def fetch_jobs_workaround(keywords):
34
+ resp = requests.get("https://www.arbeitnow.com/api/job-board-api")
35
+ if resp.status_code != 200:
36
+ return []
37
+ data = resp.json().get("data", [])
38
+ filtered = [
39
+ job for job in data
40
+ if any(kw.lower() in (job.get("title","") + job.get("description","")).lower() for kw in keywords)
41
+ ]
42
+ return filtered[:100]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
 
44
  def rank_jobs(resume_text, jobs):
45
  if not jobs:
46
  return []
47
  r_emb = embedder.encode([resume_text])
48
+ j_embs = embedder.encode([job.get("description", "") for job in jobs])
49
  sims = cosine_similarity(r_emb, j_embs)[0]
50
  ranked = sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
51
  return ranked[:5]
52
 
 
53
  def refine_with_ai(ranked, resume_text):
54
  lines = "\n".join(f"- {j['title']} at {j['company']} ({j['location']})" for j, _ in ranked)
55
+ prompt = f"Resume:\n{resume_text[:500]}\n\nJobs:\n{lines}\n\nRank them top to bottom and explain why each matches."
56
  resp = genai_client.models.generate_content(
57
  model="gemini-2.5-flash",
58
+ contents=SYSTEM_PROMPT + prompt
59
  )
60
  return resp.text or "<No refined explanation>"
61
 
62
+ def find_jobs(file, add_keywords, use_ai):
 
63
  txt = extract_text(file)
64
+ resume = txt or ""
65
+ kws = add_keywords.split(",") if add_keywords.strip() else extract_keywords(txt)[:3]
66
+ jobs = fetch_jobs_workaround(kws)
67
+ ranked = rank_jobs(resume, jobs)
 
 
 
 
 
68
  table = [{
69
+ "Role": job["title"],
70
+ "Company": job["company"],
71
+ "Location": job["location"],
72
+ "Posted": job["created_at"][:10],
73
+ "Score": f"{score*100:.1f}%",
74
+ "Apply": job["url"]
75
+ } for job, score in ranked]
76
+ return table, refine_with_ai(ranked, resume) if use_ai else ""
77
 
78
+ with gr.Blocks() as demo:
79
+ gr.Markdown("## 🔍 Resume-Based Job Finder (using Arbeitnow API)")
 
80
  with gr.Row():
81
+ resume_file = gr.File(label="Upload Resume (PDF/DOCX)")
82
+ add_keywords = gr.Textbox(label="Additional Keywords, comma‑separated")
 
83
  use_ai = gr.Checkbox(label="Use Gemini to refine ranking", value=True)
84
+ btn = gr.Button("Find Jobs")
85
+ table = gr.Dataframe(headers=["Role","Company","Location","Posted","Score","Apply"], row_count=(1,5))
86
  ai_out = gr.Markdown()
87
+ btn.click(find_jobs, [resume_file, add_keywords, use_ai], [table, ai_out])
88
 
89
  demo.launch()