Spaces:
Sleeping
Sleeping
File size: 8,831 Bytes
db7bdf4 e2af976 f734200 f5213aa 23ea7fd 92bb6b1 f734200 23ea7fd f734200 9f93bdb f2d8246 f734200 85c7b12 f734200 db7bdf4 f734200 db7bdf4 f734200 db7bdf4 f2cc98a 92bb6b1 23ea7fd f2cc98a f734200 db7bdf4 e2af976 fd0c4ab db7bdf4 0166fac fd0c4ab db7bdf4 fd0c4ab db7bdf4 f734200 0215b5a f734200 389e4f5 db7bdf4 23ea7fd f734200 0215b5a f734200 23ea7fd f734200 db7bdf4 f734200 db7bdf4 f734200 f3a7cdf 0215b5a db7bdf4 f2d8246 f2cc98a f2d8246 e2af976 f3a7cdf db7bdf4 13aabb0 92bb6b1 0166fac 23ea7fd 0166fac 23ea7fd 92bb6b1 0166fac 13aabb0 92bb6b1 13aabb0 db7bdf4 f734200 0215b5a b5301f8 23ea7fd e7ab011 b5301f8 e7ab011 9f93bdb b5301f8 0166fac f2d8246 e7ab011 f2d8246 e7ab011 b5301f8 86190d1 b5301f8 f2d8246 0215b5a 85c7b12 f2d8246 92bb6b1 f734200 db7bdf4 f2cc98a db7bdf4 92bb6b1 23ea7fd e7ab011 b5301f8 0215b5a b5301f8 0215b5a b5301f8 85c7b12 b5301f8 dce94ea e7ab011 23ea7fd e7ab011 b5301f8 23ea7fd f2d8246 0166fac b5301f8 23ea7fd b5301f8 23ea7fd b5301f8 f2d8246 b5301f8 f2d8246 db7bdf4 92bb6b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 |
# app.py
import os
import gradio as gr
from PyPDF2 import PdfReader
from docx import Document
import yake
import requests
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from google import genai
from google.genai.types import GenerateContentConfig, ThinkingConfig
from datetime import datetime
import math
import json
# Initialize components
kw_extractor = yake.KeywordExtractor(n=2, top=30)
embedder = SentenceTransformer("all-MiniLM-L6-v2")
genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
PER_PAGE = 10
SYSTEM_PROMPT = """
You are a job-matching assistant. Given a resume and job listings, rank and explain why each job is a good fit.
Return your output as a ranked markdown list of jobs. For each job, include the following:
- Job Title
- Company Name
- Location (if available)
- Why this job is a good match (1–2 sentences)
Keep the tone professional and concise, suitable for display in a career guidance app.
"""
# 1️⃣ Extract text from resume
def extract_text(file):
ext = file.name.lower().split('.')[-1]
if ext == "pdf":
return "\n".join(p.extract_text() or "" for p in PdfReader(file.name).pages)
elif ext == "docx":
return "\n".join(para.text for para in Document(file.name).paragraphs)
return ""
# 2️⃣ Extract keywords using YAKE
def extract_keywords(text):
# Remove the first line (often the candidate's name/header)
parts = text.split("\n", 1)
body = parts[1] if len(parts) > 1 else text
# Extract 1–2‑gram keywords, top 20
kws = kw_extractor.extract_keywords(body)
# Filter out any that look like names or generic headers
filtered = []
for kw, score in kws:
# drop if any word is all-caps (e.g. "SUMMARY", "RITESH")
if any(w.isupper() and len(w) > 2 for w in kw.split()):
continue
filtered.append(kw)
return filtered
def on_resume_upload(file):
text = extract_text(file)
kws = extract_keywords(text)
return ", ".join(kws)
# 3️⃣ Fetch jobs from free public APIs
def fetch_arbeitnow(keywords):
resp = requests.get("https://www.arbeitnow.com/api/job-board-api")
if resp.ok:
jobs = resp.json().get("data", [])
return [j for j in jobs if any(kw.lower() in (j.get("title","") + j.get("description","")).lower() for kw in keywords)]
return []
def fetch_remotive(keywords):
resp = requests.get("https://remotive.com/api/remote-jobs", params={"search": " ".join(keywords)})
if resp.ok:
return resp.json().get("jobs", [])
return []
def fetch_remoteok(keywords):
resp = requests.get("https://remoteok.com/api")
if resp.ok:
data = [j for j in resp.json() if isinstance(j, dict)]
return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
return []
# 4️⃣ Rank jobs by semantic similarity
def rank_jobs(resume_text, jobs):
if not jobs:
return []
emb_r = embedder.encode([resume_text])
emb_j = embedder.encode([j.get("description","") for j in jobs])
sims = cosine_similarity(emb_r, emb_j)[0]
return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
# 5️⃣ Gemini refinement (optional)
def refine_with_ai(ranked, resume_text):
lines = []
for job, _ in ranked:
title = job.get("title") or job.get("position") or "N/A"
company = job.get("company") or job.get("company_name") or ""
loc = job.get("location") or ""
lines.append(f"- {title} at {company} ({loc})")
prompt = (
f"Resume:\n{resume_text[:500]}\n\n"
"Here are the top matched jobs:\n" +
"\n".join(lines) +
"\n\nPlease rank these top to bottom and explain why each is a good match."
)
resp = genai_client.models.generate_content(
model="gemini-2.5-flash",
contents=SYSTEM_PROMPT + prompt,
)
return resp.text or "<No explanation>"
def format_posted(job):
raw = job.get("publication_date") or job.get("created_at") or job.get("date") or ""
if isinstance(raw, int):
# RemoteOK returns an int timestamp
return datetime.fromtimestamp(raw).strftime("%Y-%m-%d")
return str(raw)[:10]
# 6️⃣ Main pipeline
def find_jobs(file, added_kw, use_ai):
resume = extract_text(file) or ""
base_kws = added_kw.split(",") if added_kw.strip() else extract_keywords(resume)
keywords = [kw.strip() for kw in base_kws if kw.strip()]
jobs = fetch_arbeitnow(keywords) + fetch_remotive(keywords) + fetch_remoteok(keywords)
ranked = rank_jobs(resume, jobs)
print("Rank_jobs", ranked)
table = []
for i, (job, score) in enumerate(ranked):
role = job.get("title") or job.get("position", "")
company = job.get("company") or job.get("company_name", "")
location = job.get("location", "N/A")
posted = format_posted(job)
apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
# Make sure none of these are dicts/lists
table.append({
"Role": str(role),
"Company": str(company),
"Location": str(location),
"Posted": str(posted),
"Score": f"{score*100:.1f}%",
"Apply": str(apply_url)
})
explanation = refine_with_ai(ranked, resume) if use_ai else ""
return table, explanation
# 7️⃣ Jobs in DataFrame format
def jobs_to_dataframe(table, page, per_page=PER_PAGE):
total = len(table)
pages = max(1, math.ceil(total / per_page))
page = max(1, min(page, pages))
start, end = (page-1)*per_page, page*per_page
slice_ = table[start:end]
# Convert to list of lists for DataFrame
df_data = []
for row in slice_:
# Create properly formatted clickable link for apply URL
if row['Apply'] and row['Apply'] != 'N/A':
# Format as HTML link with proper styling
apply_link = f'<a href="{row["Apply"]}" target="_blank" style="color: #2563eb; text-decoration: underline;">Apply</a>'
else:
apply_link = "N/A"
df_data.append([
row['Role'],
row['Company'],
row['Location'],
row['Posted'],
row['Score'],
apply_link
])
return df_data, f"Showing jobs {start+1}–{min(end,total)} of {total} (Page {page}/{pages})"
def load_jobs_and_pages(resume, added_kw, use_ai):
full_table, explanation = find_jobs(resume, added_kw, use_ai)
total_pages = max(1, math.ceil(len(full_table) / PER_PAGE))
slider_update = gr.update(value=1, maximum=total_pages)
first_page_data, page_info = jobs_to_dataframe(full_table, 1)
expl_header = "### AI Explanation" if explanation else ""
print("Header", expl_header)
return full_table, explanation, expl_header, slider_update, first_page_data, page_info
# 8️⃣ Gradio UI
with gr.Blocks(theme='Nymbo/Nymbo_Theme') as demo:
gr.Markdown("## 🌍 Global Job Finder")
with gr.Row():
resume = gr.File(label="Upload Resume (PDF/DOCX)")
added = gr.Textbox(label="Add keywords (comma-separated)", placeholder="e.g. Python, ML")
resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
find_btn = gr.Button("Find Jobs")
jobs_state = gr.State([]) # holds full table
page_sel = gr.Slider(1, 1, step=1, value=1, label="Page")
# Page info display
page_info = gr.Markdown("")
# DataFrame for jobs display (removed Summary column)
jobs_df = gr.DataFrame(
headers=["Role", "Company", "Location", "Posted", "Score", "Apply"],
datatype=["str", "str", "str", "str", "str", "html"],
interactive=False,
wrap=True,
column_widths=["20%", "20%", "20%", "15%", "15%", "10%"],
value=[]
)
expl_md_h = gr.Markdown()
expl_md = gr.Markdown() # shows AI explanation
# 1) On Find Jobs:
# - load the jobs, explanation, slider max, and first-page data
find_btn.click(
fn=load_jobs_and_pages,
inputs=[resume, added, use_ai],
outputs=[jobs_state, expl_md, expl_md_h, page_sel, jobs_df, page_info]
)
# 2) On page change, re-render only the table data
def update_page_display(tbl, pg):
df_data, info = jobs_to_dataframe(tbl, pg)
return df_data, info
page_sel.change(
fn=update_page_display,
inputs=[jobs_state, page_sel],
outputs=[jobs_df, page_info]
)
if __name__ == "__main__":
demo.launch() |