Spaces:

riteshkokam
/

JobFinder

Sleeping

App Files Files Community

riteshkokam commited on Jun 21, 2025

Commit

e2af976

verified ·

1 Parent(s): 389e4f5

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -73

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
-import os, re
 import gradio as gr
 from PyPDF2 import PdfReader
 from docx import Document
 import yake
 import requests
-from bs4 import BeautifulSoup
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 from google import genai
@@ -20,7 +19,6 @@ You are an expert job-matching assistant. Given a resume and job listings,
 rank and refine the top 5 jobs by relevance, explaining why each is a strong fit.
 """
-# 1. Resume text extraction
 def extract_text(file):
     ext = file.name.split('.')[-1].lower()
     if ext == "pdf":
@@ -29,100 +27,63 @@ def extract_text(file):
         return "\n".join(para.text for para in Document(file.name).paragraphs)
     return ""
-# 2. Keyword extraction
 def extract_keywords(text):
     return [k for k, _ in kw_extractor.extract_keywords(text)]
-# 3. Scrape LinkedIn jobs via public API
-def scrape_linkedin(keywords, location, start=0):
-    url = "https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search"
-    params = {
-        "keywords": keywords,
-        "location": location,
-        "start": start
-    }
-    headers = {
-        "Accept": "*/*",
-        "Accept-Language": "en-US,en;q=0.9",
-        "Referer": "https://www.linkedin.com/jobs",
-        # optional: fake user-agent to reduce blocking
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
-    }
-    resp = requests.get(url, headers=headers, params=params)
-    html = resp.text  # raw HTML
-    soup = BeautifulSoup(html, "html.parser")
-    items = soup.find_all("li", class_="result-card")
-    jobs = []
-    for li in items:
-        title_tag = li.select_one("h3.base-search-card__title")
-        comp_tag = li.select_one("h4.base-search-card__subtitle")
-        loc_tag = li.select_one("span.job-result-card__location")
-        link_tag = li.find("a", class_="base-card__full-link")
-        date_tag = li.find("time")
-        jobs.append({
-            "title": title_tag.get_text(strip=True) if title_tag else None,
-            "company": comp_tag.get_text(strip=True) if comp_tag else None,
-            "location": loc_tag.get_text(strip=True) if loc_tag else None,
-            "url": link_tag["href"] if link_tag else None,
-            "date": date_tag["datetime"] if date_tag else None,
-            "description": ""  # left blank; full detail requires another request
-        })
-    return jobs
-# 4. Semantic ranking
 def rank_jobs(resume_text, jobs):
     if not jobs:
         return []
     r_emb = embedder.encode([resume_text])
-    j_embs = embedder.encode([j["description"] for j in jobs])
     sims = cosine_similarity(r_emb, j_embs)[0]
     ranked = sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
     return ranked[:5]
-# 5. (Optional) AI refinement
 def refine_with_ai(ranked, resume_text):
     lines = "\n".join(f"- {j['title']} at {j['company']} ({j['location']})" for j, _ in ranked)
-    user_prompt = f"Resume:\n{resume_text[:500]}\n\nJobs:\n{lines}\n\nRank and justify:"
     resp = genai_client.models.generate_content(
         model="gemini-2.5-flash",
-        contents=SYSTEM_PROMPT + user_prompt
     )
     return resp.text or "<No refined explanation>"
-# 6. Full pipeline
-def find_jobs(file, add_keywords, location, use_ai):
     txt = extract_text(file)
-    keywords = " ".join(add_keywords) if add_keywords else " ".join(extract_keywords(txt)[:3])
-    jobs = scrape_linkedin(keywords, location)
-    if not jobs:
-        return [], "No jobs found—check your keywords or location."
-    ranked = rank_jobs(txt, jobs)
-    if not ranked:
-        return [], "Job listing found, but no matchable descriptions available."
     table = [{
-        "Role": j["title"], "Company": j["company"],
-        "Location": j["location"], "Posted": j["date"],
-        "Score": f"{s*100:.1f}%", "Apply": j["url"]
-    } for j, s in ranked]
-    ai_note = refine_with_ai(ranked, txt) if use_ai else ""
-    return table, ai_note
-# 7. Gradio UI
-with gr.Blocks(theme=gr.themes.Base()) as demo:
-    gr.Markdown("## 🔎 Resume → LinkedIn Job Matcher")
     with gr.Row():
-        file = gr.File(label="Upload Resume (PDF/DOCX)")
-        add_keywords = gr.Textbox(label="Additional Keywords (optional)")
-        location = gr.Textbox(label="Location (city or country)")
     use_ai = gr.Checkbox(label="Use Gemini to refine ranking", value=True)
-    btn = gr.Button("Search Jobs")
-    result = gr.Dataframe(headers=["Role","Company","Location","Posted","Score","Apply"], row_count=(1,5))
     ai_out = gr.Markdown()
-    btn.click(find_jobs, [file, add_keywords, location, use_ai], [result, ai_out])
 demo.launch()

+import os
 import gradio as gr
 from PyPDF2 import PdfReader
 from docx import Document
 import yake
 import requests
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 from google import genai
 rank and refine the top 5 jobs by relevance, explaining why each is a strong fit.
 """
 def extract_text(file):
     ext = file.name.split('.')[-1].lower()
     if ext == "pdf":
         return "\n".join(para.text for para in Document(file.name).paragraphs)
     return ""
 def extract_keywords(text):
     return [k for k, _ in kw_extractor.extract_keywords(text)]
+def fetch_jobs_workaround(keywords):
+    resp = requests.get("https://www.arbeitnow.com/api/job-board-api")
+    if resp.status_code != 200:
+        return []
+    data = resp.json().get("data", [])
+    filtered = [
+        job for job in data
+        if any(kw.lower() in (job.get("title","") + job.get("description","")).lower() for kw in keywords)
+    ]
+    return filtered[:100]
 def rank_jobs(resume_text, jobs):
     if not jobs:
         return []
     r_emb = embedder.encode([resume_text])
+    j_embs = embedder.encode([job.get("description", "") for job in jobs])
     sims = cosine_similarity(r_emb, j_embs)[0]
     ranked = sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
     return ranked[:5]
 def refine_with_ai(ranked, resume_text):
     lines = "\n".join(f"- {j['title']} at {j['company']} ({j['location']})" for j, _ in ranked)
+    prompt = f"Resume:\n{resume_text[:500]}\n\nJobs:\n{lines}\n\nRank them top to bottom and explain why each matches."
     resp = genai_client.models.generate_content(
         model="gemini-2.5-flash",
+        contents=SYSTEM_PROMPT + prompt
     )
     return resp.text or "<No refined explanation>"
+def find_jobs(file, add_keywords, use_ai):
     txt = extract_text(file)
+    resume = txt or ""
+    kws = add_keywords.split(",") if add_keywords.strip() else extract_keywords(txt)[:3]
+    jobs = fetch_jobs_workaround(kws)
+    ranked = rank_jobs(resume, jobs)
     table = [{
+        "Role": job["title"],
+        "Company": job["company"],
+        "Location": job["location"],
+        "Posted": job["created_at"][:10],
+        "Score": f"{score*100:.1f}%",
+        "Apply": job["url"]
+    } for job, score in ranked]
+    return table, refine_with_ai(ranked, resume) if use_ai else ""
+with gr.Blocks() as demo:
+    gr.Markdown("## 🔍 Resume-Based Job Finder (using Arbeitnow API)")
     with gr.Row():
+        resume_file = gr.File(label="Upload Resume (PDF/DOCX)")
+        add_keywords = gr.Textbox(label="Additional Keywords, comma‑separated")
     use_ai = gr.Checkbox(label="Use Gemini to refine ranking", value=True)
+    btn = gr.Button("Find Jobs")
+    table = gr.Dataframe(headers=["Role","Company","Location","Posted","Score","Apply"], row_count=(1,5))
     ai_out = gr.Markdown()
+    btn.click(find_jobs, [resume_file, add_keywords, use_ai], [table, ai_out])
 demo.launch()