earlsab commited on
Commit
b2f5d2f
·
1 Parent(s): e62e194

added concurrency

Browse files
Files changed (1) hide show
  1. app.py +80 -36
app.py CHANGED
@@ -5,6 +5,7 @@ import os
5
  import time
6
  from typing import List, Dict, Any
7
  from dotenv import load_dotenv
 
8
 
9
  # Load environment variables
10
  load_dotenv(".env.local")
@@ -115,6 +116,38 @@ def process_skill_quality(text: str) -> Dict:
115
 
116
  return result
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  def process_resume(resume_text: str, job_skills: List[str], progress=None, progress_base=0.4, progress_cap=0.9) -> Dict:
119
  """Process resume using the resume endpoint"""
120
  payload = {"inputs": resume_text}
@@ -152,7 +185,7 @@ def process_resume(resume_text: str, job_skills: List[str], progress=None, progr
152
  all_skills = []
153
  processed_sentences = 0
154
 
155
- # Process skill quality for each role description
156
  for job in result:
157
  if "skills" in job:
158
  for skill in job["skills"]:
@@ -160,28 +193,22 @@ def process_resume(resume_text: str, job_skills: List[str], progress=None, progr
160
  skill["text"] = skill.get("name", "Unknown Skill")
161
  all_skills.append(skill)
162
 
163
- # Process skill quality for each bullet point in the job description
164
- if "description" in job:
165
- quality_scores = []
166
- for sentence in job.get("description", []):
167
- quality_score = process_skill_quality(sentence)
168
- # Prioritize leadership over collaboration if both are "Yes"
169
- is_leadership = quality_score["leadership_token"] == "Yes"
170
- is_collaboration = not is_leadership and quality_score["collaboration_token"] == "Yes"
171
- quality_scores.append({
172
- "sentence": sentence,
173
- "is_leadership": is_leadership,
174
- "is_collaboration": is_collaboration
175
- })
176
-
177
- # Update progress
178
- processed_sentences += 1
179
- try:
180
- if progress is not None and total_sentences > 0:
181
- progress_value = progress_base + (progress_step * processed_sentences)
182
- progress(progress_value, desc=f"Processing {processed_sentences}/{total_sentences} sentences...")
183
- except:
184
- pass
185
 
186
  job["quality_scores"] = quality_scores
187
 
@@ -257,36 +284,53 @@ def create_html_output(job_result: Dict, resume_results: List[Dict]) -> str:
257
  html += "</div>"
258
  return html
259
 
 
 
 
 
 
 
 
 
 
 
 
260
  def process_inputs(job_description: str, input_type: str, resume_text: str, resume_files: List[str], progress=gr.Progress()) -> str:
261
  """Main processing function"""
262
  # Process job description
263
  progress(0.1, desc="Processing job description...")
264
  job_result = process_job_description(job_description)
 
265
 
266
  # Process resumes based on input type
267
  resume_results = []
268
  if input_type == "Paste Text":
269
  # Process single resume from text input
270
  progress(0.4, desc="Processing resume structure...")
271
- resume_result = process_resume(resume_text, [skill['text'] for skill in job_result['skills']],
272
  progress=progress, progress_base=0.4, progress_cap=0.9)
273
  resume_results.append(resume_result)
274
  else:
275
- # Process multiple resumes from file uploads
276
  resume_count = len(resume_files)
277
- progress_segment = 0.5 / resume_count
278
 
279
- for i, file_path in enumerate(resume_files):
280
- progress_base = 0.4 + (progress_segment * i)
281
- progress_cap = 0.4 + (progress_segment * (i + 1))
282
-
283
- progress(progress_base, desc=f"Processing resume {i+1} of {resume_count}...")
284
- with open(file_path, 'r', encoding='utf-8') as f:
285
- resume_content = f.read()
 
 
 
 
 
286
 
287
- resume_result = process_resume(resume_content, [skill['text'] for skill in job_result['skills']],
288
- progress=progress, progress_base=progress_base, progress_cap=progress_cap)
289
- resume_results.append(resume_result)
290
 
291
  # Create HTML output
292
  progress(0.9, desc="Generating results...")
 
5
  import time
6
  from typing import List, Dict, Any
7
  from dotenv import load_dotenv
8
+ import concurrent.futures
9
 
10
  # Load environment variables
11
  load_dotenv(".env.local")
 
116
 
117
  return result
118
 
119
+ def process_skill_quality_batch(sentences):
120
+ """Process multiple sentences through the skill quality endpoint concurrently"""
121
+ results = []
122
+ with concurrent.futures.ThreadPoolExecutor() as executor:
123
+ future_to_sentence = {
124
+ executor.submit(process_skill_quality, sentence): sentence
125
+ for sentence in sentences
126
+ }
127
+
128
+ for future in concurrent.futures.as_completed(future_to_sentence):
129
+ sentence = future_to_sentence[future]
130
+ try:
131
+ quality_score = future.result()
132
+ is_leadership = quality_score["leadership_token"] == "Yes"
133
+ is_collaboration = not is_leadership and quality_score["collaboration_token"] == "Yes"
134
+ results.append({
135
+ "sentence": sentence,
136
+ "is_leadership": is_leadership,
137
+ "is_collaboration": is_collaboration,
138
+ "raw_score": quality_score
139
+ })
140
+ except Exception as e:
141
+ print(f"Error processing sentence: {sentence[:30]}... - {str(e)}")
142
+ results.append({
143
+ "sentence": sentence,
144
+ "is_leadership": False,
145
+ "is_collaboration": False,
146
+ "raw_score": {"leadership": 0, "leadership_token": "No", "collaboration": 0, "collaboration_token": "No"}
147
+ })
148
+
149
+ return results
150
+
151
  def process_resume(resume_text: str, job_skills: List[str], progress=None, progress_base=0.4, progress_cap=0.9) -> Dict:
152
  """Process resume using the resume endpoint"""
153
  payload = {"inputs": resume_text}
 
185
  all_skills = []
186
  processed_sentences = 0
187
 
188
+ # Process skill quality for each role description concurrently
189
  for job in result:
190
  if "skills" in job:
191
  for skill in job["skills"]:
 
193
  skill["text"] = skill.get("name", "Unknown Skill")
194
  all_skills.append(skill)
195
 
196
+ # Process skill quality for sentences in parallel
197
+ if "description" in job and job["description"]:
198
+ # Get all sentences for this job
199
+ sentences = job.get("description", [])
200
+
201
+ # Process all sentences for this job concurrently
202
+ quality_scores = process_skill_quality_batch(sentences)
203
+
204
+ # Update progress after batch processing
205
+ processed_sentences += len(sentences)
206
+ try:
207
+ if progress is not None and total_sentences > 0:
208
+ progress_value = progress_base + (progress_step * processed_sentences)
209
+ progress(progress_value, desc=f"Processing {processed_sentences}/{total_sentences} sentences...")
210
+ except:
211
+ pass
 
 
 
 
 
 
212
 
213
  job["quality_scores"] = quality_scores
214
 
 
284
  html += "</div>"
285
  return html
286
 
287
+ def process_single_resume(file_path, job_skills, progress=None, resume_index=0, total_resumes=1):
288
+ """Process a single resume file"""
289
+ progress_base = 0.4 + (0.5 * resume_index / total_resumes)
290
+ progress_cap = 0.4 + (0.5 * (resume_index + 1) / total_resumes)
291
+
292
+ with open(file_path, 'r', encoding='utf-8') as f:
293
+ resume_content = f.read()
294
+
295
+ return process_resume(resume_content, job_skills,
296
+ progress=progress, progress_base=progress_base, progress_cap=progress_cap)
297
+
298
  def process_inputs(job_description: str, input_type: str, resume_text: str, resume_files: List[str], progress=gr.Progress()) -> str:
299
  """Main processing function"""
300
  # Process job description
301
  progress(0.1, desc="Processing job description...")
302
  job_result = process_job_description(job_description)
303
+ job_skills = [skill['text'] for skill in job_result['skills']]
304
 
305
  # Process resumes based on input type
306
  resume_results = []
307
  if input_type == "Paste Text":
308
  # Process single resume from text input
309
  progress(0.4, desc="Processing resume structure...")
310
+ resume_result = process_resume(resume_text, job_skills,
311
  progress=progress, progress_base=0.4, progress_cap=0.9)
312
  resume_results.append(resume_result)
313
  else:
314
+ # Process multiple resumes from file uploads in parallel
315
  resume_count = len(resume_files)
316
+ progress(0.4, desc=f"Processing {resume_count} resumes in parallel...")
317
 
318
+ with concurrent.futures.ThreadPoolExecutor() as executor:
319
+ # Submit all resume processing tasks
320
+ future_to_resume = {
321
+ executor.submit(
322
+ process_single_resume,
323
+ file_path,
324
+ job_skills,
325
+ progress,
326
+ i,
327
+ resume_count
328
+ ): i for i, file_path in enumerate(resume_files)
329
+ }
330
 
331
+ # Collect results as they complete
332
+ for future in concurrent.futures.as_completed(future_to_resume):
333
+ resume_results.append(future.result())
334
 
335
  # Create HTML output
336
  progress(0.9, desc="Generating results...")