earlsab
commited on
Commit
·
ea32026
1
Parent(s):
ceba096
add connection to another inference
Browse files- .gitignore +1 -0
- handler.py +73 -6
- requirements.txt +2 -1
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.env.local
|
handler.py
CHANGED
|
@@ -10,18 +10,28 @@ from skillNer.general_params import SKILL_DB
|
|
| 10 |
from skillNer.skill_extractor_class import SkillExtractor
|
| 11 |
import torch
|
| 12 |
from transformers import LongformerTokenizer
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
import torch
|
| 15 |
import torch.nn.functional as F
|
| 16 |
from transformers import LongformerTokenizer
|
| 17 |
import re
|
| 18 |
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
Resume_num_labels = None
|
| 21 |
class EndpointHandler():
|
| 22 |
def __init__(self, path=""):
|
| 23 |
# Label mapping as provided
|
| 24 |
# Resume Label Mapping
|
|
|
|
|
|
|
|
|
|
| 25 |
self.Resume_label_map = {
|
| 26 |
"RT": 0, # Resume Title
|
| 27 |
"SST": 1, # Summary Section Title
|
|
@@ -276,24 +286,81 @@ class EndpointHandler():
|
|
| 276 |
print("Defaulted to: ", possible_dates[0][1])
|
| 277 |
return possible_dates[0][0] # Return chosen date
|
| 278 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
def label_resume(self, text):
|
| 280 |
results = self.extract_resume_roles(text)
|
| 281 |
for item in results:
|
| 282 |
# Extracting dates
|
| 283 |
context = (" ".join(item["title"]))
|
| 284 |
-
|
| 285 |
-
|
|
|
|
| 286 |
|
| 287 |
# Try parsing the dates; default to 0 for role_length if parsing fails.
|
| 288 |
try:
|
| 289 |
-
date_started_formatted = self.parse_date(date_started)
|
| 290 |
except ValueError:
|
| 291 |
date_started_formatted = None
|
| 292 |
|
| 293 |
-
# date_started_formatted = parse_date(date_started)
|
| 294 |
-
# date_ended_formatted = parse_date(date_ended)
|
| 295 |
try:
|
| 296 |
-
date_ended_formatted = self.parse_date(date_ended)
|
| 297 |
except ValueError:
|
| 298 |
date_ended_formatted = None
|
| 299 |
|
|
|
|
| 10 |
from skillNer.skill_extractor_class import SkillExtractor
|
| 11 |
import torch
|
| 12 |
from transformers import LongformerTokenizer
|
| 13 |
+
import requests
|
| 14 |
+
import os
|
| 15 |
+
from dotenv import load_dotenv
|
| 16 |
|
| 17 |
import torch
|
| 18 |
import torch.nn.functional as F
|
| 19 |
from transformers import LongformerTokenizer
|
| 20 |
import re
|
| 21 |
from datetime import datetime
|
| 22 |
+
import time
|
| 23 |
+
|
| 24 |
+
# Load environment variables from .env.local
|
| 25 |
+
load_dotenv('.env.local')
|
| 26 |
|
| 27 |
Resume_num_labels = None
|
| 28 |
class EndpointHandler():
|
| 29 |
def __init__(self, path=""):
|
| 30 |
# Label mapping as provided
|
| 31 |
# Resume Label Mapping
|
| 32 |
+
self.hf_token = os.getenv('HUGGINGFACE_TOKEN')
|
| 33 |
+
if not self.hf_token:
|
| 34 |
+
print("Warning: HUGGINGFACE_TOKEN environment variable not set")
|
| 35 |
self.Resume_label_map = {
|
| 36 |
"RT": 0, # Resume Title
|
| 37 |
"SST": 1, # Summary Section Title
|
|
|
|
| 286 |
print("Defaulted to: ", possible_dates[0][1])
|
| 287 |
return possible_dates[0][0] # Return chosen date
|
| 288 |
|
| 289 |
+
def extract_dates_from_context(self, context):
|
| 290 |
+
"""Extract dates from context using the date extraction endpoint."""
|
| 291 |
+
max_retries = 5 # Increased retries for startup
|
| 292 |
+
retry_delay = 5 # Increased delay for startup
|
| 293 |
+
startup_delay = 10 # Longer delay for startup state
|
| 294 |
+
|
| 295 |
+
for attempt in range(max_retries):
|
| 296 |
+
try:
|
| 297 |
+
headers = {
|
| 298 |
+
"Authorization": f"Bearer {self.hf_token}"
|
| 299 |
+
}
|
| 300 |
+
response = requests.post(
|
| 301 |
+
"https://wsk6b4yr3gxrlm4v.us-east-1.aws.endpoints.huggingface.cloud",
|
| 302 |
+
json={"inputs": context},
|
| 303 |
+
headers=headers,
|
| 304 |
+
timeout=30
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
if response.status_code == 200:
|
| 308 |
+
return response.json()
|
| 309 |
+
elif response.status_code == 503:
|
| 310 |
+
if attempt < max_retries - 1:
|
| 311 |
+
if attempt == 0:
|
| 312 |
+
print(f"Service temporarily unavailable (503). Waiting 20 seconds... (Attempt {attempt + 1}/{max_retries})")
|
| 313 |
+
time.sleep(20)
|
| 314 |
+
else:
|
| 315 |
+
print(f"Service temporarily unavailable (503). Waiting 2 seconds... (Attempt {attempt + 1}/{max_retries})")
|
| 316 |
+
time.sleep(2)
|
| 317 |
+
continue
|
| 318 |
+
else:
|
| 319 |
+
print("Service unavailable after maximum retries")
|
| 320 |
+
return {"start_date": None, "end_date": None}
|
| 321 |
+
elif response.status_code == 404:
|
| 322 |
+
print("Endpoint not found. Please check if the endpoint URL is correct.")
|
| 323 |
+
return {"start_date": None, "end_date": None}
|
| 324 |
+
elif response.status_code == 401:
|
| 325 |
+
print("Authentication failed. Please check your Hugging Face token.")
|
| 326 |
+
return {"start_date": None, "end_date": None}
|
| 327 |
+
else:
|
| 328 |
+
print(f"Error calling date extraction endpoint: {response.status_code}")
|
| 329 |
+
print(f"Response: {response.text}")
|
| 330 |
+
return {"start_date": None, "end_date": None}
|
| 331 |
+
|
| 332 |
+
except requests.exceptions.Timeout:
|
| 333 |
+
print(f"Request timed out. Attempt {attempt + 1}/{max_retries}")
|
| 334 |
+
if attempt < max_retries - 1:
|
| 335 |
+
time.sleep(retry_delay)
|
| 336 |
+
continue
|
| 337 |
+
return {"start_date": None, "end_date": None}
|
| 338 |
+
except Exception as e:
|
| 339 |
+
print(f"Exception while calling date extraction endpoint: {str(e)}")
|
| 340 |
+
if attempt < max_retries - 1:
|
| 341 |
+
time.sleep(retry_delay)
|
| 342 |
+
continue
|
| 343 |
+
return {"start_date": None, "end_date": None}
|
| 344 |
+
|
| 345 |
+
return {"start_date": None, "end_date": None}
|
| 346 |
+
|
| 347 |
def label_resume(self, text):
|
| 348 |
results = self.extract_resume_roles(text)
|
| 349 |
for item in results:
|
| 350 |
# Extracting dates
|
| 351 |
context = (" ".join(item["title"]))
|
| 352 |
+
dates = self.extract_dates_from_context(context)
|
| 353 |
+
date_started = dates.get("start_date")
|
| 354 |
+
date_ended = dates.get("end_date")
|
| 355 |
|
| 356 |
# Try parsing the dates; default to 0 for role_length if parsing fails.
|
| 357 |
try:
|
| 358 |
+
date_started_formatted = self.parse_date(date_started) if date_started else None
|
| 359 |
except ValueError:
|
| 360 |
date_started_formatted = None
|
| 361 |
|
|
|
|
|
|
|
| 362 |
try:
|
| 363 |
+
date_ended_formatted = self.parse_date(date_ended) if date_ended else None
|
| 364 |
except ValueError:
|
| 365 |
date_ended_formatted = None
|
| 366 |
|
requirements.txt
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
skillNer>=1.0.0
|
| 2 |
spacy>=3.7.2
|
| 3 |
en-core-web-lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.0/en_core_web_lg-3.7.0-py3-none-any.whl
|
| 4 |
-
ipython>=8.12.0
|
|
|
|
|
|
| 1 |
skillNer>=1.0.0
|
| 2 |
spacy>=3.7.2
|
| 3 |
en-core-web-lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.0/en_core_web_lg-3.7.0-py3-none-any.whl
|
| 4 |
+
ipython>=8.12.0
|
| 5 |
+
python-dotenv>=1.0.0
|