# app.py import os import gradio as gr from PyPDF2 import PdfReader from docx import Document import yake import requests from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity from google import genai from google.genai.types import GenerateContentConfig, ThinkingConfig from datetime import datetime import math import json # Initialize components kw_extractor = yake.KeywordExtractor(n=2, top=30) embedder = SentenceTransformer("all-MiniLM-L6-v2") genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")) PER_PAGE = 10 SYSTEM_PROMPT = """ You are a job-matching assistant. Given a resume and job listings, rank and explain why each job is a good fit. Return your output as a ranked markdown list of jobs. For each job, include the following: - Job Title - Company Name - Location (if available) - Why this job is a good match (1–2 sentences) Keep the tone professional and concise, suitable for display in a career guidance app. """ # 1️⃣ Extract text from resume def extract_text(file): ext = file.name.lower().split('.')[-1] if ext == "pdf": return "\n".join(p.extract_text() or "" for p in PdfReader(file.name).pages) elif ext == "docx": return "\n".join(para.text for para in Document(file.name).paragraphs) return "" # 2️⃣ Extract keywords using YAKE def extract_keywords(text): # Remove the first line (often the candidate's name/header) parts = text.split("\n", 1) body = parts[1] if len(parts) > 1 else text # Extract 1–2‑gram keywords, top 20 kws = kw_extractor.extract_keywords(body) # Filter out any that look like names or generic headers filtered = [] for kw, score in kws: # drop if any word is all-caps (e.g. "SUMMARY", "RITESH") if any(w.isupper() and len(w) > 2 for w in kw.split()): continue filtered.append(kw) return filtered def on_resume_upload(file): text = extract_text(file) kws = extract_keywords(text) return ", ".join(kws) # 3️⃣ Fetch jobs from free public APIs def fetch_arbeitnow(keywords): resp = requests.get("https://www.arbeitnow.com/api/job-board-api") if resp.ok: jobs = resp.json().get("data", []) return [j for j in jobs if any(kw.lower() in (j.get("title","") + j.get("description","")).lower() for kw in keywords)] return [] def fetch_remotive(keywords): resp = requests.get("https://remotive.com/api/remote-jobs", params={"search": " ".join(keywords)}) if resp.ok: return resp.json().get("jobs", []) return [] def fetch_remoteok(keywords): resp = requests.get("https://remoteok.com/api") if resp.ok: data = [j for j in resp.json() if isinstance(j, dict)] return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)] return [] # 4️⃣ Rank jobs by semantic similarity def rank_jobs(resume_text, jobs): if not jobs: return [] emb_r = embedder.encode([resume_text]) emb_j = embedder.encode([j.get("description","") for j in jobs]) sims = cosine_similarity(emb_r, emb_j)[0] return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True) # 5️⃣ Gemini refinement (optional) def refine_with_ai(ranked, resume_text): lines = [] for job, _ in ranked: title = job.get("title") or job.get("position") or "N/A" company = job.get("company") or job.get("company_name") or "" loc = job.get("location") or "" lines.append(f"- {title} at {company} ({loc})") prompt = ( f"Resume:\n{resume_text[:500]}\n\n" "Here are the top matched jobs:\n" + "\n".join(lines) + "\n\nPlease rank these top to bottom and explain why each is a good match." ) resp = genai_client.models.generate_content( model="gemini-2.5-flash", contents=SYSTEM_PROMPT + prompt, ) return resp.text or "" def format_posted(job): raw = job.get("publication_date") or job.get("created_at") or job.get("date") or "" if isinstance(raw, int): # RemoteOK returns an int timestamp return datetime.fromtimestamp(raw).strftime("%Y-%m-%d") return str(raw)[:10] # 6️⃣ Main pipeline def find_jobs(file, added_kw, use_ai): resume = extract_text(file) or "" base_kws = added_kw.split(",") if added_kw.strip() else extract_keywords(resume) keywords = [kw.strip() for kw in base_kws if kw.strip()] jobs = fetch_arbeitnow(keywords) + fetch_remotive(keywords) + fetch_remoteok(keywords) ranked = rank_jobs(resume, jobs) print("Rank_jobs", ranked) table = [] for i, (job, score) in enumerate(ranked): role = job.get("title") or job.get("position", "") company = job.get("company") or job.get("company_name", "") location = job.get("location", "N/A") posted = format_posted(job) apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or "" # Make sure none of these are dicts/lists table.append({ "Role": str(role), "Company": str(company), "Location": str(location), "Posted": str(posted), "Score": f"{score*100:.1f}%", "Apply": str(apply_url) }) explanation = refine_with_ai(ranked, resume) if use_ai else "" return table, explanation # 7️⃣ Jobs in DataFrame format def jobs_to_dataframe(table, page, per_page=PER_PAGE): total = len(table) pages = max(1, math.ceil(total / per_page)) page = max(1, min(page, pages)) start, end = (page-1)*per_page, page*per_page slice_ = table[start:end] # Convert to list of lists for DataFrame df_data = [] for row in slice_: # Create properly formatted clickable link for apply URL if row['Apply'] and row['Apply'] != 'N/A': # Format as HTML link with proper styling apply_link = f'Apply' else: apply_link = "N/A" df_data.append([ row['Role'], row['Company'], row['Location'], row['Posted'], row['Score'], apply_link ]) return df_data, f"Showing jobs {start+1}–{min(end,total)} of {total} (Page {page}/{pages})" def load_jobs_and_pages(resume, added_kw, use_ai): full_table, explanation = find_jobs(resume, added_kw, use_ai) total_pages = max(1, math.ceil(len(full_table) / PER_PAGE)) slider_update = gr.update(value=1, maximum=total_pages) first_page_data, page_info = jobs_to_dataframe(full_table, 1) expl_header = "### AI Explanation" if explanation else "" print("Header", expl_header) return full_table, explanation, expl_header, slider_update, first_page_data, page_info # 8️⃣ Gradio UI with gr.Blocks(theme='Nymbo/Nymbo_Theme') as demo: gr.Markdown("## 🌍 Global Job Finder") with gr.Row(): resume = gr.File(label="Upload Resume (PDF/DOCX)") added = gr.Textbox(label="Add keywords (comma-separated)", placeholder="e.g. Python, ML") resume.upload(on_resume_upload, inputs=[resume], outputs=[added]) use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False) find_btn = gr.Button("Find Jobs") jobs_state = gr.State([]) # holds full table page_sel = gr.Slider(1, 1, step=1, value=1, label="Page") # Page info display page_info = gr.Markdown("") # DataFrame for jobs display (removed Summary column) jobs_df = gr.DataFrame( headers=["Role", "Company", "Location", "Posted", "Score", "Apply"], datatype=["str", "str", "str", "str", "str", "html"], interactive=False, wrap=True, column_widths=["20%", "20%", "20%", "15%", "15%", "10%"], value=[] ) expl_md_h = gr.Markdown() expl_md = gr.Markdown() # shows AI explanation # 1) On Find Jobs: # - load the jobs, explanation, slider max, and first-page data find_btn.click( fn=load_jobs_and_pages, inputs=[resume, added, use_ai], outputs=[jobs_state, expl_md, expl_md_h, page_sel, jobs_df, page_info] ) # 2) On page change, re-render only the table data def update_page_display(tbl, pg): df_data, info = jobs_to_dataframe(tbl, pg) return df_data, info page_sel.change( fn=update_page_display, inputs=[jobs_state, page_sel], outputs=[jobs_df, page_info] ) if __name__ == "__main__": demo.launch()