riteshkokam commited on
Commit
0215b5a
·
verified ·
1 Parent(s): 9f93bdb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -92
app.py CHANGED
@@ -12,28 +12,12 @@ from google.genai.types import GenerateContentConfig, ThinkingConfig
12
  from datetime import datetime
13
  import math
14
  import json
15
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
16
 
17
  # Initialize components
18
  kw_extractor = yake.KeywordExtractor(n=2, top=30)
19
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
20
  genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
21
 
22
- # Initialize local summarization model (2025 state-of-the-art)
23
- print("Loading summarization model...")
24
- try:
25
- # Using Microsoft's DialoGPT-based summarizer - fast and efficient for 2025
26
- summarizer = pipeline(
27
- "summarization",
28
- model="facebook/bart-large-cnn", # Fast and reliable for job descriptions
29
- tokenizer="facebook/bart-large-cnn",
30
- device=0 if os.system("nvidia-smi") == 0 else -1 # Use GPU if available
31
- )
32
- print("Summarization model loaded successfully!")
33
- except Exception as e:
34
- print(f"Error loading model, falling back to basic summarization: {e}")
35
- summarizer = None
36
-
37
  PER_PAGE = 10
38
 
39
  SYSTEM_PROMPT = """
@@ -41,8 +25,6 @@ You are a job-matching assistant. Given a resume and job listings,
41
  rank and explain why each job is a good fit.
42
  """
43
 
44
-
45
-
46
  # 1️⃣ Extract text from resume
47
  def extract_text(file):
48
  ext = file.name.lower().split('.')[-1]
@@ -98,64 +80,7 @@ def fetch_remoteok(keywords):
98
  return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
99
  return []
100
 
101
- # 4️⃣ Analyze job with local Hugging Face model for fast summaries
102
- def get_job_summary_fast(job):
103
- """Use local HF model to get job summary quickly"""
104
- try:
105
- description = job.get("description", "")
106
- title = job.get("title") or job.get("position", "")
107
-
108
- # Skip if no description available
109
- if not description or len(description.strip()) < 20:
110
- return f"Position for {title}" if title else "Job details available upon application"
111
-
112
- # Clean and prepare text for summarization
113
- clean_desc = description.replace('\n', ' ').replace('\r', ' ').strip()
114
-
115
- # Use local model if available
116
- if summarizer:
117
- try:
118
- # Truncate to model's max length (1024 tokens for BART)
119
- if len(clean_desc) > 800:
120
- clean_desc = clean_desc[:800] + "..."
121
-
122
- # Generate summary with specific parameters for job descriptions
123
- summary_result = summarizer(
124
- clean_desc,
125
- max_length=60, # Keep summaries concise
126
- min_length=20,
127
- do_sample=False,
128
- truncation=True
129
- )
130
-
131
- summary = summary_result[0]['summary_text']
132
-
133
- # Clean up the summary
134
- if summary:
135
- # Remove redundant phrases and make it more natural
136
- summary = summary.replace("The job involves", "").replace("This position", "Position").strip()
137
- if not summary.endswith('.'):
138
- summary += '.'
139
- return summary
140
-
141
- except Exception as e:
142
- print(f"Local model error: {e}")
143
-
144
- # Fallback: Extract first meaningful sentence from description
145
- sentences = clean_desc.split('.')
146
- for sentence in sentences[:3]: # Check first 3 sentences
147
- if len(sentence.strip()) > 30 and any(word in sentence.lower() for word in ['responsible', 'role', 'position', 'work', 'develop', 'manage', 'lead']):
148
- return sentence.strip() + '.'
149
-
150
- # Last fallback
151
- return f"{title} role with responsibilities in {clean_desc[:50]}..." if title else "Job details available upon application"
152
-
153
- except Exception as e:
154
- print(f"Error getting job summary: {e}")
155
- title = job.get("title") or job.get("position", "")
156
- return f"Role involves {title.lower()} responsibilities" if title else "Job details available upon application"
157
-
158
- # 5️⃣ Rank jobs by semantic similarity
159
  def rank_jobs(resume_text, jobs):
160
  if not jobs:
161
  return []
@@ -164,7 +89,7 @@ def rank_jobs(resume_text, jobs):
164
  sims = cosine_similarity(emb_r, emb_j)[0]
165
  return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
166
 
167
- # 6️⃣ Gemini refinement (optional)
168
  def refine_with_ai(ranked, resume_text):
169
  lines = []
170
  for job, _ in ranked:
@@ -193,7 +118,7 @@ def format_posted(job):
193
  return datetime.fromtimestamp(raw).strftime("%Y-%m-%d")
194
  return str(raw)[:10]
195
 
196
- # 7️⃣ Main pipeline
197
  def find_jobs(file, added_kw, use_ai):
198
  resume = extract_text(file) or ""
199
  base_kws = added_kw.split(",") if added_kw.strip() else extract_keywords(resume)
@@ -210,10 +135,6 @@ def find_jobs(file, added_kw, use_ai):
210
  posted = format_posted(job)
211
  apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
212
 
213
- # Get fast job summary using local model
214
- print(f"Analyzing job {i+1}/{len(ranked)}: {role} at {company}")
215
- job_summary = get_job_summary_fast(job)
216
-
217
  # Make sure none of these are dicts/lists
218
  table.append({
219
  "Role": str(role),
@@ -221,14 +142,13 @@ def find_jobs(file, added_kw, use_ai):
221
  "Location": str(location),
222
  "Posted": str(posted),
223
  "Score": f"{score*100:.1f}%",
224
- "Summary": str(job_summary),
225
  "Apply": str(apply_url)
226
  })
227
 
228
  explanation = refine_with_ai(ranked, resume) if use_ai else ""
229
  return table, explanation
230
 
231
- # 8️⃣ Jobs in DataFrame format
232
  def jobs_to_dataframe(table, page, per_page=PER_PAGE):
233
  total = len(table)
234
  pages = max(1, math.ceil(total / per_page))
@@ -252,7 +172,6 @@ def jobs_to_dataframe(table, page, per_page=PER_PAGE):
252
  row['Location'],
253
  row['Posted'],
254
  row['Score'],
255
- row['Summary'],
256
  apply_link
257
  ])
258
 
@@ -268,10 +187,9 @@ def load_jobs_and_pages(resume, added_kw, use_ai):
268
 
269
  return full_table, explanation, expl_header, slider_update, first_page_data, page_info
270
 
271
- # 9️⃣ Gradio UI
272
  with gr.Blocks(theme=gr.themes.Base()) as demo:
273
  gr.Markdown("## 🌍 Global Job Finder")
274
- gr.Markdown("*Now with fast AI-powered job summaries using local models*")
275
 
276
  with gr.Row():
277
  resume = gr.File(label="Upload Resume (PDF/DOCX)")
@@ -280,8 +198,6 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
280
  resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
281
  use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
282
 
283
- gr.Markdown("**Note:** Job summaries are generated using fast local AI models for quick results.")
284
-
285
  find_btn = gr.Button("Find Jobs")
286
 
287
  jobs_state = gr.State([]) # holds full table
@@ -290,10 +206,10 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
290
  # Page info display
291
  page_info = gr.Markdown("")
292
 
293
- # DataFrame for jobs display (removed Company Type column)
294
  jobs_df = gr.DataFrame(
295
- headers=["Role", "Company", "Location", "Posted", "Score", "Summary", "Apply"],
296
- datatype=["str", "str", "str", "str", "str", "str", "html"],
297
  interactive=False,
298
  wrap=True,
299
  value=[]
 
12
  from datetime import datetime
13
  import math
14
  import json
 
15
 
16
  # Initialize components
17
  kw_extractor = yake.KeywordExtractor(n=2, top=30)
18
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
19
  genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  PER_PAGE = 10
22
 
23
  SYSTEM_PROMPT = """
 
25
  rank and explain why each job is a good fit.
26
  """
27
 
 
 
28
  # 1️⃣ Extract text from resume
29
  def extract_text(file):
30
  ext = file.name.lower().split('.')[-1]
 
80
  return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
81
  return []
82
 
83
+ # 4️⃣ Rank jobs by semantic similarity
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  def rank_jobs(resume_text, jobs):
85
  if not jobs:
86
  return []
 
89
  sims = cosine_similarity(emb_r, emb_j)[0]
90
  return sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
91
 
92
+ # 5️⃣ Gemini refinement (optional)
93
  def refine_with_ai(ranked, resume_text):
94
  lines = []
95
  for job, _ in ranked:
 
118
  return datetime.fromtimestamp(raw).strftime("%Y-%m-%d")
119
  return str(raw)[:10]
120
 
121
+ # 6️⃣ Main pipeline
122
  def find_jobs(file, added_kw, use_ai):
123
  resume = extract_text(file) or ""
124
  base_kws = added_kw.split(",") if added_kw.strip() else extract_keywords(resume)
 
135
  posted = format_posted(job)
136
  apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
137
 
 
 
 
 
138
  # Make sure none of these are dicts/lists
139
  table.append({
140
  "Role": str(role),
 
142
  "Location": str(location),
143
  "Posted": str(posted),
144
  "Score": f"{score*100:.1f}%",
 
145
  "Apply": str(apply_url)
146
  })
147
 
148
  explanation = refine_with_ai(ranked, resume) if use_ai else ""
149
  return table, explanation
150
 
151
+ # 7️⃣ Jobs in DataFrame format
152
  def jobs_to_dataframe(table, page, per_page=PER_PAGE):
153
  total = len(table)
154
  pages = max(1, math.ceil(total / per_page))
 
172
  row['Location'],
173
  row['Posted'],
174
  row['Score'],
 
175
  apply_link
176
  ])
177
 
 
187
 
188
  return full_table, explanation, expl_header, slider_update, first_page_data, page_info
189
 
190
+ # 8️⃣ Gradio UI
191
  with gr.Blocks(theme=gr.themes.Base()) as demo:
192
  gr.Markdown("## 🌍 Global Job Finder")
 
193
 
194
  with gr.Row():
195
  resume = gr.File(label="Upload Resume (PDF/DOCX)")
 
198
  resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
199
  use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
200
 
 
 
201
  find_btn = gr.Button("Find Jobs")
202
 
203
  jobs_state = gr.State([]) # holds full table
 
206
  # Page info display
207
  page_info = gr.Markdown("")
208
 
209
+ # DataFrame for jobs display (removed Summary column)
210
  jobs_df = gr.DataFrame(
211
+ headers=["Role", "Company", "Location", "Posted", "Score", "Apply"],
212
+ datatype=["str", "str", "str", "str", "str", "html"],
213
  interactive=False,
214
  wrap=True,
215
  value=[]