Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,28 +12,12 @@ from google.genai.types import GenerateContentConfig, ThinkingConfig
|
|
| 12 |
from datetime import datetime
|
| 13 |
import math
|
| 14 |
import json
|
| 15 |
-
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
| 16 |
|
| 17 |
# Initialize components
|
| 18 |
kw_extractor = yake.KeywordExtractor(n=2, top=30)
|
| 19 |
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
| 20 |
genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
|
| 21 |
|
| 22 |
-
# Initialize local summarization model (2025 state-of-the-art)
|
| 23 |
-
print("Loading summarization model...")
|
| 24 |
-
try:
|
| 25 |
-
# Using Microsoft's DialoGPT-based summarizer - fast and efficient for 2025
|
| 26 |
-
summarizer = pipeline(
|
| 27 |
-
"summarization",
|
| 28 |
-
model="facebook/bart-large-cnn", # Fast and reliable for job descriptions
|
| 29 |
-
tokenizer="facebook/bart-large-cnn",
|
| 30 |
-
device=0 if os.system("nvidia-smi") == 0 else -1 # Use GPU if available
|
| 31 |
-
)
|
| 32 |
-
print("Summarization model loaded successfully!")
|
| 33 |
-
except Exception as e:
|
| 34 |
-
print(f"Error loading model, falling back to basic summarization: {e}")
|
| 35 |
-
summarizer = None
|
| 36 |
-
|
| 37 |
PER_PAGE = 10
|
| 38 |
|
| 39 |
SYSTEM_PROMPT = """
|
|
@@ -41,8 +25,6 @@ You are a job-matching assistant. Given a resume and job listings,
|
|
| 41 |
rank and explain why each job is a good fit.
|
| 42 |
"""
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
# 1️⃣ Extract text from resume
|
| 47 |
def extract_text(file):
|
| 48 |
ext = file.name.lower().split('.')[-1]
|
|
@@ -98,64 +80,7 @@ def fetch_remoteok(keywords):
|
|
| 98 |
return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
|
| 99 |
return []
|
| 100 |
|
| 101 |
-
# 4️⃣
|
| 102 |
-
def get_job_summary_fast(job):
|
| 103 |
-
"""Use local HF model to get job summary quickly"""
|
| 104 |
-
try:
|
| 105 |
-
description = job.get("description", "")
|
| 106 |
-
title = job.get("title") or job.get("position", "")
|
| 107 |
-
|
| 108 |
-
# Skip if no description available
|
| 109 |
-
if not description or len(description.strip()) < 20:
|
| 110 |
-
return f"Position for {title}" if title else "Job details available upon application"
|
| 111 |
-
|
| 112 |
-
# Clean and prepare text for summarization
|
| 113 |
-
clean_desc = description.replace('\n', ' ').replace('\r', ' ').strip()
|
| 114 |
-
|
| 115 |
-
# Use local model if available
|
| 116 |
-
if summarizer:
|
| 117 |
-
try:
|
| 118 |
-
# Truncate to model's max length (1024 tokens for BART)
|
| 119 |
-
if len(clean_desc) > 800:
|
| 120 |
-
clean_desc = clean_desc[:800] + "..."
|
| 121 |
-
|
| 122 |
-
# Generate summary with specific parameters for job descriptions
|
| 123 |
-
summary_result = summarizer(
|
| 124 |
-
clean_desc,
|
| 125 |
-
max_length=60, # Keep summaries concise
|
| 126 |
-
min_length=20,
|
| 127 |
-
do_sample=False,
|
| 128 |
-
truncation=True
|
| 129 |
-
)
|
| 130 |
-
|
| 131 |
-
summary = summary_result[0]['summary_text']
|
| 132 |
-
|
| 133 |
-
# Clean up the summary
|
| 134 |
-
if summary:
|
| 135 |
-
# Remove redundant phrases and make it more natural
|
| 136 |
-
summary = summary.replace("The job involves", "").replace("This position", "Position").strip()
|
| 137 |
-
if not summary.endswith('.'):
|
| 138 |
-
summary += '.'
|
| 139 |
-
return summary
|
| 140 |
-
|
| 141 |
-
except Exception as e:
|
| 142 |
-
print(f"Local model error: {e}")
|
| 143 |
-
|
| 144 |
-
# Fallback: Extract first meaningful sentence from description
|
| 145 |
-
sentences = clean_desc.split('.')
|
| 146 |
-
for sentence in sentences[:3]: # Check first 3 sentences
|
| 147 |
-
if len(sentence.strip()) > 30 and any(word in sentence.lower() for word in ['responsible', 'role', 'position', 'work', 'develop', 'manage', 'lead']):
|
| 148 |
-
return sentence.strip() + '.'
|
| 149 |
-
|
| 150 |
-
# Last fallback
|
| 151 |
-
return f"{title} role with responsibilities in {clean_desc[:50]}..." if title else "Job details available upon application"
|
| 152 |
-
|
| 153 |
-
except Exception as e:
|
| 154 |
-
print(f"Error getting job summary: {e}")
|
| 155 |
-
title = job.get("title") or job.get("position", "")
|
| 156 |
-
return f"Role involves {title.lower()} responsibilities" if title else "Job details available upon application"
|
| 157 |
-
|
| 158 |
-
# 5️⃣ Rank jobs by semantic similarity
|
| 159 |
def rank_jobs(resume_text, jobs):
|
| 160 |
if not jobs:
|
| 161 |
return []
|
|
@@ -164,7 +89,7 @@ def rank_jobs(resume_text, jobs):
|
|
| 164 |
sims = cosine_similarity(emb_r, emb_j)[0]
|
| 165 |
return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
|
| 166 |
|
| 167 |
-
#
|
| 168 |
def refine_with_ai(ranked, resume_text):
|
| 169 |
lines = []
|
| 170 |
for job, _ in ranked:
|
|
@@ -193,7 +118,7 @@ def format_posted(job):
|
|
| 193 |
return datetime.fromtimestamp(raw).strftime("%Y-%m-%d")
|
| 194 |
return str(raw)[:10]
|
| 195 |
|
| 196 |
-
#
|
| 197 |
def find_jobs(file, added_kw, use_ai):
|
| 198 |
resume = extract_text(file) or ""
|
| 199 |
base_kws = added_kw.split(",") if added_kw.strip() else extract_keywords(resume)
|
|
@@ -210,10 +135,6 @@ def find_jobs(file, added_kw, use_ai):
|
|
| 210 |
posted = format_posted(job)
|
| 211 |
apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
|
| 212 |
|
| 213 |
-
# Get fast job summary using local model
|
| 214 |
-
print(f"Analyzing job {i+1}/{len(ranked)}: {role} at {company}")
|
| 215 |
-
job_summary = get_job_summary_fast(job)
|
| 216 |
-
|
| 217 |
# Make sure none of these are dicts/lists
|
| 218 |
table.append({
|
| 219 |
"Role": str(role),
|
|
@@ -221,14 +142,13 @@ def find_jobs(file, added_kw, use_ai):
|
|
| 221 |
"Location": str(location),
|
| 222 |
"Posted": str(posted),
|
| 223 |
"Score": f"{score*100:.1f}%",
|
| 224 |
-
"Summary": str(job_summary),
|
| 225 |
"Apply": str(apply_url)
|
| 226 |
})
|
| 227 |
|
| 228 |
explanation = refine_with_ai(ranked, resume) if use_ai else ""
|
| 229 |
return table, explanation
|
| 230 |
|
| 231 |
-
#
|
| 232 |
def jobs_to_dataframe(table, page, per_page=PER_PAGE):
|
| 233 |
total = len(table)
|
| 234 |
pages = max(1, math.ceil(total / per_page))
|
|
@@ -252,7 +172,6 @@ def jobs_to_dataframe(table, page, per_page=PER_PAGE):
|
|
| 252 |
row['Location'],
|
| 253 |
row['Posted'],
|
| 254 |
row['Score'],
|
| 255 |
-
row['Summary'],
|
| 256 |
apply_link
|
| 257 |
])
|
| 258 |
|
|
@@ -268,10 +187,9 @@ def load_jobs_and_pages(resume, added_kw, use_ai):
|
|
| 268 |
|
| 269 |
return full_table, explanation, expl_header, slider_update, first_page_data, page_info
|
| 270 |
|
| 271 |
-
#
|
| 272 |
with gr.Blocks(theme=gr.themes.Base()) as demo:
|
| 273 |
gr.Markdown("## 🌍 Global Job Finder")
|
| 274 |
-
gr.Markdown("*Now with fast AI-powered job summaries using local models*")
|
| 275 |
|
| 276 |
with gr.Row():
|
| 277 |
resume = gr.File(label="Upload Resume (PDF/DOCX)")
|
|
@@ -280,8 +198,6 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
|
|
| 280 |
resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
|
| 281 |
use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
|
| 282 |
|
| 283 |
-
gr.Markdown("**Note:** Job summaries are generated using fast local AI models for quick results.")
|
| 284 |
-
|
| 285 |
find_btn = gr.Button("Find Jobs")
|
| 286 |
|
| 287 |
jobs_state = gr.State([]) # holds full table
|
|
@@ -290,10 +206,10 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
|
|
| 290 |
# Page info display
|
| 291 |
page_info = gr.Markdown("")
|
| 292 |
|
| 293 |
-
# DataFrame for jobs display (removed
|
| 294 |
jobs_df = gr.DataFrame(
|
| 295 |
-
headers=["Role", "Company", "Location", "Posted", "Score", "
|
| 296 |
-
datatype=["str", "str", "str", "str", "str", "
|
| 297 |
interactive=False,
|
| 298 |
wrap=True,
|
| 299 |
value=[]
|
|
|
|
| 12 |
from datetime import datetime
|
| 13 |
import math
|
| 14 |
import json
|
|
|
|
| 15 |
|
| 16 |
# Initialize components
|
| 17 |
kw_extractor = yake.KeywordExtractor(n=2, top=30)
|
| 18 |
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
| 19 |
genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
PER_PAGE = 10
|
| 22 |
|
| 23 |
SYSTEM_PROMPT = """
|
|
|
|
| 25 |
rank and explain why each job is a good fit.
|
| 26 |
"""
|
| 27 |
|
|
|
|
|
|
|
| 28 |
# 1️⃣ Extract text from resume
|
| 29 |
def extract_text(file):
|
| 30 |
ext = file.name.lower().split('.')[-1]
|
|
|
|
| 80 |
return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
|
| 81 |
return []
|
| 82 |
|
| 83 |
+
# 4️⃣ Rank jobs by semantic similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
def rank_jobs(resume_text, jobs):
|
| 85 |
if not jobs:
|
| 86 |
return []
|
|
|
|
| 89 |
sims = cosine_similarity(emb_r, emb_j)[0]
|
| 90 |
return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
|
| 91 |
|
| 92 |
+
# 5️⃣ Gemini refinement (optional)
|
| 93 |
def refine_with_ai(ranked, resume_text):
|
| 94 |
lines = []
|
| 95 |
for job, _ in ranked:
|
|
|
|
| 118 |
return datetime.fromtimestamp(raw).strftime("%Y-%m-%d")
|
| 119 |
return str(raw)[:10]
|
| 120 |
|
| 121 |
+
# 6️⃣ Main pipeline
|
| 122 |
def find_jobs(file, added_kw, use_ai):
|
| 123 |
resume = extract_text(file) or ""
|
| 124 |
base_kws = added_kw.split(",") if added_kw.strip() else extract_keywords(resume)
|
|
|
|
| 135 |
posted = format_posted(job)
|
| 136 |
apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
# Make sure none of these are dicts/lists
|
| 139 |
table.append({
|
| 140 |
"Role": str(role),
|
|
|
|
| 142 |
"Location": str(location),
|
| 143 |
"Posted": str(posted),
|
| 144 |
"Score": f"{score*100:.1f}%",
|
|
|
|
| 145 |
"Apply": str(apply_url)
|
| 146 |
})
|
| 147 |
|
| 148 |
explanation = refine_with_ai(ranked, resume) if use_ai else ""
|
| 149 |
return table, explanation
|
| 150 |
|
| 151 |
+
# 7️⃣ Jobs in DataFrame format
|
| 152 |
def jobs_to_dataframe(table, page, per_page=PER_PAGE):
|
| 153 |
total = len(table)
|
| 154 |
pages = max(1, math.ceil(total / per_page))
|
|
|
|
| 172 |
row['Location'],
|
| 173 |
row['Posted'],
|
| 174 |
row['Score'],
|
|
|
|
| 175 |
apply_link
|
| 176 |
])
|
| 177 |
|
|
|
|
| 187 |
|
| 188 |
return full_table, explanation, expl_header, slider_update, first_page_data, page_info
|
| 189 |
|
| 190 |
+
# 8️⃣ Gradio UI
|
| 191 |
with gr.Blocks(theme=gr.themes.Base()) as demo:
|
| 192 |
gr.Markdown("## 🌍 Global Job Finder")
|
|
|
|
| 193 |
|
| 194 |
with gr.Row():
|
| 195 |
resume = gr.File(label="Upload Resume (PDF/DOCX)")
|
|
|
|
| 198 |
resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
|
| 199 |
use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
|
| 200 |
|
|
|
|
|
|
|
| 201 |
find_btn = gr.Button("Find Jobs")
|
| 202 |
|
| 203 |
jobs_state = gr.State([]) # holds full table
|
|
|
|
| 206 |
# Page info display
|
| 207 |
page_info = gr.Markdown("")
|
| 208 |
|
| 209 |
+
# DataFrame for jobs display (removed Summary column)
|
| 210 |
jobs_df = gr.DataFrame(
|
| 211 |
+
headers=["Role", "Company", "Location", "Posted", "Score", "Apply"],
|
| 212 |
+
datatype=["str", "str", "str", "str", "str", "html"],
|
| 213 |
interactive=False,
|
| 214 |
wrap=True,
|
| 215 |
value=[]
|