Spaces:
Sleeping
Sleeping
| # app.py | |
| import os | |
| import gradio as gr | |
| from PyPDF2 import PdfReader | |
| from docx import Document | |
| import yake | |
| import requests | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from google import genai | |
| from google.genai.types import GenerateContentConfig, ThinkingConfig | |
| from datetime import datetime | |
| import math | |
| import json | |
| # Initialize components | |
| kw_extractor = yake.KeywordExtractor(n=2, top=30) | |
| embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
| genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")) | |
| PER_PAGE = 10 | |
| SYSTEM_PROMPT = """ | |
| You are a job-matching assistant. Given a resume and job listings, rank and explain why each job is a good fit. | |
| Return your output as a ranked markdown list of jobs. For each job, include the following: | |
| - Job Title | |
| - Company Name | |
| - Location (if available) | |
| - Why this job is a good match (1–2 sentences) | |
| Keep the tone professional and concise, suitable for display in a career guidance app. | |
| """ | |
| # 1️⃣ Extract text from resume | |
| def extract_text(file): | |
| ext = file.name.lower().split('.')[-1] | |
| if ext == "pdf": | |
| return "\n".join(p.extract_text() or "" for p in PdfReader(file.name).pages) | |
| elif ext == "docx": | |
| return "\n".join(para.text for para in Document(file.name).paragraphs) | |
| return "" | |
| # 2️⃣ Extract keywords using YAKE | |
| def extract_keywords(text): | |
| # Remove the first line (often the candidate's name/header) | |
| parts = text.split("\n", 1) | |
| body = parts[1] if len(parts) > 1 else text | |
| # Extract 1–2‑gram keywords, top 20 | |
| kws = kw_extractor.extract_keywords(body) | |
| # Filter out any that look like names or generic headers | |
| filtered = [] | |
| for kw, score in kws: | |
| # drop if any word is all-caps (e.g. "SUMMARY", "RITESH") | |
| if any(w.isupper() and len(w) > 2 for w in kw.split()): | |
| continue | |
| filtered.append(kw) | |
| return filtered | |
| def on_resume_upload(file): | |
| text = extract_text(file) | |
| kws = extract_keywords(text) | |
| return ", ".join(kws) | |
| # 3️⃣ Fetch jobs from free public APIs | |
| def fetch_arbeitnow(keywords): | |
| resp = requests.get("https://www.arbeitnow.com/api/job-board-api") | |
| if resp.ok: | |
| jobs = resp.json().get("data", []) | |
| return [j for j in jobs if any(kw.lower() in (j.get("title","") + j.get("description","")).lower() for kw in keywords)] | |
| return [] | |
| def fetch_remotive(keywords): | |
| resp = requests.get("https://remotive.com/api/remote-jobs", params={"search": " ".join(keywords)}) | |
| if resp.ok: | |
| return resp.json().get("jobs", []) | |
| return [] | |
| def fetch_remoteok(keywords): | |
| resp = requests.get("https://remoteok.com/api") | |
| if resp.ok: | |
| data = [j for j in resp.json() if isinstance(j, dict)] | |
| return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)] | |
| return [] | |
| # 4️⃣ Rank jobs by semantic similarity | |
| def rank_jobs(resume_text, jobs): | |
| if not jobs: | |
| return [] | |
| emb_r = embedder.encode([resume_text]) | |
| emb_j = embedder.encode([j.get("description","") for j in jobs]) | |
| sims = cosine_similarity(emb_r, emb_j)[0] | |
| return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True) | |
| # 5️⃣ Gemini refinement (optional) | |
| def refine_with_ai(ranked, resume_text): | |
| lines = [] | |
| for job, _ in ranked: | |
| title = job.get("title") or job.get("position") or "N/A" | |
| company = job.get("company") or job.get("company_name") or "" | |
| loc = job.get("location") or "" | |
| lines.append(f"- {title} at {company} ({loc})") | |
| prompt = ( | |
| f"Resume:\n{resume_text[:500]}\n\n" | |
| "Here are the top matched jobs:\n" + | |
| "\n".join(lines) + | |
| "\n\nPlease rank these top to bottom and explain why each is a good match." | |
| ) | |
| resp = genai_client.models.generate_content( | |
| model="gemini-2.5-flash", | |
| contents=SYSTEM_PROMPT + prompt, | |
| ) | |
| return resp.text or "<No explanation>" | |
| def format_posted(job): | |
| raw = job.get("publication_date") or job.get("created_at") or job.get("date") or "" | |
| if isinstance(raw, int): | |
| # RemoteOK returns an int timestamp | |
| return datetime.fromtimestamp(raw).strftime("%Y-%m-%d") | |
| return str(raw)[:10] | |
| # 6️⃣ Main pipeline | |
| def find_jobs(file, added_kw, use_ai): | |
| resume = extract_text(file) or "" | |
| base_kws = added_kw.split(",") if added_kw.strip() else extract_keywords(resume) | |
| keywords = [kw.strip() for kw in base_kws if kw.strip()] | |
| jobs = fetch_arbeitnow(keywords) + fetch_remotive(keywords) + fetch_remoteok(keywords) | |
| ranked = rank_jobs(resume, jobs) | |
| print("Rank_jobs", ranked) | |
| table = [] | |
| for i, (job, score) in enumerate(ranked): | |
| role = job.get("title") or job.get("position", "") | |
| company = job.get("company") or job.get("company_name", "") | |
| location = job.get("location", "N/A") | |
| posted = format_posted(job) | |
| apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or "" | |
| # Make sure none of these are dicts/lists | |
| table.append({ | |
| "Role": str(role), | |
| "Company": str(company), | |
| "Location": str(location), | |
| "Posted": str(posted), | |
| "Score": f"{score*100:.1f}%", | |
| "Apply": str(apply_url) | |
| }) | |
| explanation = refine_with_ai(ranked, resume) if use_ai else "" | |
| return table, explanation | |
| # 7️⃣ Jobs in DataFrame format | |
| def jobs_to_dataframe(table, page, per_page=PER_PAGE): | |
| total = len(table) | |
| pages = max(1, math.ceil(total / per_page)) | |
| page = max(1, min(page, pages)) | |
| start, end = (page-1)*per_page, page*per_page | |
| slice_ = table[start:end] | |
| # Convert to list of lists for DataFrame | |
| df_data = [] | |
| for row in slice_: | |
| # Create properly formatted clickable link for apply URL | |
| if row['Apply'] and row['Apply'] != 'N/A': | |
| # Format as HTML link with proper styling | |
| apply_link = f'<a href="{row["Apply"]}" target="_blank" style="color: #2563eb; text-decoration: underline;">Apply</a>' | |
| else: | |
| apply_link = "N/A" | |
| df_data.append([ | |
| row['Role'], | |
| row['Company'], | |
| row['Location'], | |
| row['Posted'], | |
| row['Score'], | |
| apply_link | |
| ]) | |
| return df_data, f"Showing jobs {start+1}–{min(end,total)} of {total} (Page {page}/{pages})" | |
| def load_jobs_and_pages(resume, added_kw, use_ai): | |
| full_table, explanation = find_jobs(resume, added_kw, use_ai) | |
| total_pages = max(1, math.ceil(len(full_table) / PER_PAGE)) | |
| slider_update = gr.update(value=1, maximum=total_pages) | |
| first_page_data, page_info = jobs_to_dataframe(full_table, 1) | |
| expl_header = "### AI Explanation" if explanation else "" | |
| print("Header", expl_header) | |
| return full_table, explanation, expl_header, slider_update, first_page_data, page_info | |
| # 8️⃣ Gradio UI | |
| with gr.Blocks(theme='Nymbo/Nymbo_Theme') as demo: | |
| gr.Markdown("## 🌍 Global Job Finder") | |
| with gr.Row(): | |
| resume = gr.File(label="Upload Resume (PDF/DOCX)") | |
| added = gr.Textbox(label="Add keywords (comma-separated)", placeholder="e.g. Python, ML") | |
| resume.upload(on_resume_upload, inputs=[resume], outputs=[added]) | |
| use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False) | |
| find_btn = gr.Button("Find Jobs") | |
| jobs_state = gr.State([]) # holds full table | |
| page_sel = gr.Slider(1, 1, step=1, value=1, label="Page") | |
| # Page info display | |
| page_info = gr.Markdown("") | |
| # DataFrame for jobs display (removed Summary column) | |
| jobs_df = gr.DataFrame( | |
| headers=["Role", "Company", "Location", "Posted", "Score", "Apply"], | |
| datatype=["str", "str", "str", "str", "str", "html"], | |
| interactive=False, | |
| wrap=True, | |
| column_widths=["20%", "20%", "20%", "15%", "15%", "10%"], | |
| value=[] | |
| ) | |
| expl_md_h = gr.Markdown() | |
| expl_md = gr.Markdown() # shows AI explanation | |
| # 1) On Find Jobs: | |
| # - load the jobs, explanation, slider max, and first-page data | |
| find_btn.click( | |
| fn=load_jobs_and_pages, | |
| inputs=[resume, added, use_ai], | |
| outputs=[jobs_state, expl_md, expl_md_h, page_sel, jobs_df, page_info] | |
| ) | |
| # 2) On page change, re-render only the table data | |
| def update_page_display(tbl, pg): | |
| df_data, info = jobs_to_dataframe(tbl, pg) | |
| return df_data, info | |
| page_sel.change( | |
| fn=update_page_display, | |
| inputs=[jobs_state, page_sel], | |
| outputs=[jobs_df, page_info] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |