riteshkokam commited on
Commit
9f93bdb
·
verified ·
1 Parent(s): 8b7bc54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -79
app.py CHANGED
@@ -12,11 +12,28 @@ from google.genai.types import GenerateContentConfig, ThinkingConfig
12
  from datetime import datetime
13
  import math
14
  import json
 
15
 
16
  # Initialize components
17
  kw_extractor = yake.KeywordExtractor(n=2, top=30)
18
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
19
  genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  PER_PAGE = 10
21
 
22
  SYSTEM_PROMPT = """
@@ -24,21 +41,7 @@ You are a job-matching assistant. Given a resume and job listings,
24
  rank and explain why each job is a good fit.
25
  """
26
 
27
- ANALYSIS_PROMPT = """
28
- Analyze the following job posting and provide:
29
- 1. Company Type: One of [Startup, Mid-size, Enterprise, Non-profit, Government, Consulting, Agency, Other]
30
- 2. Job Summary: A concise 1-2 sentence summary of the role
31
 
32
- Job Title: {title}
33
- Company: {company}
34
- Job Description: {description}
35
-
36
- Please respond in JSON format:
37
- {
38
- "company_type": "one of the categories above",
39
- "job_summary": "1-2 sentence summary"
40
- }
41
- """
42
 
43
  # 1️⃣ Extract text from resume
44
  def extract_text(file):
@@ -95,71 +98,62 @@ def fetch_remoteok(keywords):
95
  return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
96
  return []
97
 
98
- # 4️⃣ Analyze job with AI to get company type and summary
99
- def analyze_job_with_ai(job):
100
- """Use AI to extract company type and job summary"""
101
  try:
102
- title = job.get("title") or job.get("position", "")
103
- company = job.get("company") or job.get("company_name", "")
104
  description = job.get("description", "")
 
105
 
106
  # Skip if no description available
107
- if not description or len(description.strip()) < 10:
108
- return "Other", "Limited job description available"
109
-
110
- # Truncate description if too long (to stay within token limits)
111
- if len(description) > 800:
112
- description = description[:800] + "..."
113
-
114
- prompt = f"""
115
- Analyze this job posting and provide:
116
- 1. Company Type: Choose ONE from [Startup, Mid-size, Enterprise, Non-profit, Government, Consulting, Agency, Other]
117
- 2. Job Summary: Write a concise 1-2 sentence summary of the main responsibilities
118
-
119
- Job Title: {title}
120
- Company: {company}
121
- Job Description: {description}
122
-
123
- Respond in this exact format:
124
- Company Type: [choose one category]
125
- Job Summary: [1-2 sentences describing the role]
126
- """
127
-
128
- resp = genai_client.models.generate_content(
129
- model="gemini-2.5-flash",
130
- contents=prompt,
131
- )
132
-
133
- response_text = resp.text or ""
134
 
135
- # Extract company type and summary from response
136
- company_type = "Other"
137
- job_summary = "No summary available"
138
 
139
- lines = response_text.split('\n')
140
- for line in lines:
141
- line = line.strip()
142
- if line.lower().startswith('company type:'):
143
- company_type = line.split(':', 1)[1].strip()
144
- elif line.lower().startswith('job summary:'):
145
- job_summary = line.split(':', 1)[1].strip()
146
-
147
- # Validate company type
148
- valid_types = ["Startup", "Mid-size", "Enterprise", "Non-profit", "Government", "Consulting", "Agency", "Other"]
149
- if company_type not in valid_types:
150
- company_type = "Other"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
- # Ensure job summary is not empty
153
- if not job_summary or job_summary.lower() in ["no summary available", "n/a", ""]:
154
- job_summary = f"Position involves {title.lower()} responsibilities at {company}"
 
 
155
 
156
- return company_type, job_summary
 
157
 
158
  except Exception as e:
159
- print(f"Error analyzing job with AI: {e}")
160
  title = job.get("title") or job.get("position", "")
161
- company = job.get("company") or job.get("company_name", "")
162
- return "Other", f"Role involves {title.lower()} responsibilities" if title else "Job details available upon application"
163
 
164
  # 5️⃣ Rank jobs by semantic similarity
165
  def rank_jobs(resume_text, jobs):
@@ -216,15 +210,14 @@ def find_jobs(file, added_kw, use_ai):
216
  posted = format_posted(job)
217
  apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
218
 
219
- # Get company type and summary using AI
220
  print(f"Analyzing job {i+1}/{len(ranked)}: {role} at {company}")
221
- company_type, job_summary = analyze_job_with_ai(job)
222
 
223
  # Make sure none of these are dicts/lists
224
  table.append({
225
  "Role": str(role),
226
  "Company": str(company),
227
- "Company Type": str(company_type),
228
  "Location": str(location),
229
  "Posted": str(posted),
230
  "Score": f"{score*100:.1f}%",
@@ -246,13 +239,16 @@ def jobs_to_dataframe(table, page, per_page=PER_PAGE):
246
  # Convert to list of lists for DataFrame
247
  df_data = []
248
  for row in slice_:
249
- # Create clickable link for apply URL
250
- apply_link = f'<a href="{row["Apply"]}" target="_blank">Apply</a>' if row['Apply'] else "N/A"
 
 
 
 
251
 
252
  df_data.append([
253
  row['Role'],
254
  row['Company'],
255
- row['Company Type'],
256
  row['Location'],
257
  row['Posted'],
258
  row['Score'],
@@ -275,7 +271,7 @@ def load_jobs_and_pages(resume, added_kw, use_ai):
275
  # 9️⃣ Gradio UI
276
  with gr.Blocks(theme=gr.themes.Base()) as demo:
277
  gr.Markdown("## 🌍 Global Job Finder")
278
- gr.Markdown("*Now with AI-powered company type classification and job summaries*")
279
 
280
  with gr.Row():
281
  resume = gr.File(label="Upload Resume (PDF/DOCX)")
@@ -284,7 +280,7 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
284
  resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
285
  use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
286
 
287
- gr.Markdown("**Note:** AI analysis for company type and job summaries is automatically enabled and may take a few moments per job.")
288
 
289
  find_btn = gr.Button("Find Jobs")
290
 
@@ -294,10 +290,10 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
294
  # Page info display
295
  page_info = gr.Markdown("")
296
 
297
- # DataFrame for jobs display
298
  jobs_df = gr.DataFrame(
299
- headers=["Role", "Company", "Type", "Location", "Posted", "Score", "Summary", "Apply"],
300
- datatype=["str", "str", "str", "str", "str", "str", "str", "html"],
301
  interactive=False,
302
  wrap=True,
303
  value=[]
 
12
  from datetime import datetime
13
  import math
14
  import json
15
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
16
 
17
  # Initialize components
18
  kw_extractor = yake.KeywordExtractor(n=2, top=30)
19
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
20
  genai_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
21
+
22
+ # Initialize local summarization model (2025 state-of-the-art)
23
+ print("Loading summarization model...")
24
+ try:
25
+ # Using Microsoft's DialoGPT-based summarizer - fast and efficient for 2025
26
+ summarizer = pipeline(
27
+ "summarization",
28
+ model="facebook/bart-large-cnn", # Fast and reliable for job descriptions
29
+ tokenizer="facebook/bart-large-cnn",
30
+ device=0 if os.system("nvidia-smi") == 0 else -1 # Use GPU if available
31
+ )
32
+ print("Summarization model loaded successfully!")
33
+ except Exception as e:
34
+ print(f"Error loading model, falling back to basic summarization: {e}")
35
+ summarizer = None
36
+
37
  PER_PAGE = 10
38
 
39
  SYSTEM_PROMPT = """
 
41
  rank and explain why each job is a good fit.
42
  """
43
 
 
 
 
 
44
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  # 1️⃣ Extract text from resume
47
  def extract_text(file):
 
98
  return [j for j in data if any(kw.lower() in (j.get("position","") + j.get("description","")).lower() for kw in keywords)]
99
  return []
100
 
101
+ # 4️⃣ Analyze job with local Hugging Face model for fast summaries
102
+ def get_job_summary_fast(job):
103
+ """Use local HF model to get job summary quickly"""
104
  try:
 
 
105
  description = job.get("description", "")
106
+ title = job.get("title") or job.get("position", "")
107
 
108
  # Skip if no description available
109
+ if not description or len(description.strip()) < 20:
110
+ return f"Position for {title}" if title else "Job details available upon application"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
+ # Clean and prepare text for summarization
113
+ clean_desc = description.replace('\n', ' ').replace('\r', ' ').strip()
 
114
 
115
+ # Use local model if available
116
+ if summarizer:
117
+ try:
118
+ # Truncate to model's max length (1024 tokens for BART)
119
+ if len(clean_desc) > 800:
120
+ clean_desc = clean_desc[:800] + "..."
121
+
122
+ # Generate summary with specific parameters for job descriptions
123
+ summary_result = summarizer(
124
+ clean_desc,
125
+ max_length=60, # Keep summaries concise
126
+ min_length=20,
127
+ do_sample=False,
128
+ truncation=True
129
+ )
130
+
131
+ summary = summary_result[0]['summary_text']
132
+
133
+ # Clean up the summary
134
+ if summary:
135
+ # Remove redundant phrases and make it more natural
136
+ summary = summary.replace("The job involves", "").replace("This position", "Position").strip()
137
+ if not summary.endswith('.'):
138
+ summary += '.'
139
+ return summary
140
+
141
+ except Exception as e:
142
+ print(f"Local model error: {e}")
143
 
144
+ # Fallback: Extract first meaningful sentence from description
145
+ sentences = clean_desc.split('.')
146
+ for sentence in sentences[:3]: # Check first 3 sentences
147
+ if len(sentence.strip()) > 30 and any(word in sentence.lower() for word in ['responsible', 'role', 'position', 'work', 'develop', 'manage', 'lead']):
148
+ return sentence.strip() + '.'
149
 
150
+ # Last fallback
151
+ return f"{title} role with responsibilities in {clean_desc[:50]}..." if title else "Job details available upon application"
152
 
153
  except Exception as e:
154
+ print(f"Error getting job summary: {e}")
155
  title = job.get("title") or job.get("position", "")
156
+ return f"Role involves {title.lower()} responsibilities" if title else "Job details available upon application"
 
157
 
158
  # 5️⃣ Rank jobs by semantic similarity
159
  def rank_jobs(resume_text, jobs):
 
210
  posted = format_posted(job)
211
  apply_url= job.get("url") or job.get("apply_url","") or job.get("joblink","") or ""
212
 
213
+ # Get fast job summary using local model
214
  print(f"Analyzing job {i+1}/{len(ranked)}: {role} at {company}")
215
+ job_summary = get_job_summary_fast(job)
216
 
217
  # Make sure none of these are dicts/lists
218
  table.append({
219
  "Role": str(role),
220
  "Company": str(company),
 
221
  "Location": str(location),
222
  "Posted": str(posted),
223
  "Score": f"{score*100:.1f}%",
 
239
  # Convert to list of lists for DataFrame
240
  df_data = []
241
  for row in slice_:
242
+ # Create properly formatted clickable link for apply URL
243
+ if row['Apply'] and row['Apply'] != 'N/A':
244
+ # Format as HTML link with proper styling
245
+ apply_link = f'<a href="{row["Apply"]}" target="_blank" style="color: #2563eb; text-decoration: underline;">Apply</a>'
246
+ else:
247
+ apply_link = "N/A"
248
 
249
  df_data.append([
250
  row['Role'],
251
  row['Company'],
 
252
  row['Location'],
253
  row['Posted'],
254
  row['Score'],
 
271
  # 9️⃣ Gradio UI
272
  with gr.Blocks(theme=gr.themes.Base()) as demo:
273
  gr.Markdown("## 🌍 Global Job Finder")
274
+ gr.Markdown("*Now with fast AI-powered job summaries using local models*")
275
 
276
  with gr.Row():
277
  resume = gr.File(label="Upload Resume (PDF/DOCX)")
 
280
  resume.upload(on_resume_upload, inputs=[resume], outputs=[added])
281
  use_ai = gr.Checkbox(label="Use AI to refine explanation", value=False)
282
 
283
+ gr.Markdown("**Note:** Job summaries are generated using fast local AI models for quick results.")
284
 
285
  find_btn = gr.Button("Find Jobs")
286
 
 
290
  # Page info display
291
  page_info = gr.Markdown("")
292
 
293
+ # DataFrame for jobs display (removed Company Type column)
294
  jobs_df = gr.DataFrame(
295
+ headers=["Role", "Company", "Location", "Posted", "Score", "Summary", "Apply"],
296
+ datatype=["str", "str", "str", "str", "str", "str", "html"],
297
  interactive=False,
298
  wrap=True,
299
  value=[]