ejqs commited on
Commit
56c05e7
·
1 Parent(s): ad4ea7b

enhance role description handling in extract_resume_roles method

Browse files
Files changed (1) hide show
  1. handler.py +10 -2
handler.py CHANGED
@@ -185,20 +185,28 @@ class EndpointHandler():
185
 
186
  # If element is a description, directly append to the last role in the array
187
  if self.Resume_labels[item]['value'] == "EDC":
 
 
 
 
188
  if roles:
189
  roles[-1]["description"].append(lines[i])
190
  else:
191
  # Optionally, log the error or create a default role
192
  print("Warning: Description found but no role header exists. Skipping this description.")
193
 
 
 
 
 
 
194
  # Cleaning description
195
  for item in roles:
196
  sentences = self.capture_sentences(item['description'])
197
  item['description'] = sentences
198
 
199
  return roles
200
-
201
-
202
  def parse_date(self, date_str):
203
  """Tries multiple formats to parse a date string into a datetime object.
204
 
 
185
 
186
  # If element is a description, directly append to the last role in the array
187
  if self.Resume_labels[item]['value'] == "EDC":
188
+ # If description is stuck between EJT, most likely it is an EJT
189
+ if i - 1 > 0 and i + 1 < len(predictions) and self.Resume_labels[predictions[i - 1]]['value'] == "EJT" and self.Resume_labels[predictions[i + 1]]['value'] == "EJT":
190
+ roles[len(roles) - 1]["title"].append(lines[i])
191
+ continue
192
  if roles:
193
  roles[-1]["description"].append(lines[i])
194
  else:
195
  # Optionally, log the error or create a default role
196
  print("Warning: Description found but no role header exists. Skipping this description.")
197
 
198
+ # If element is not an EJT or EDC but stuck between EDC, most likely it is an EDC
199
+ if self.Resume_labels[item]['value'] != "EDC" and self.Resume_labels[item]['value'] != "EJT":
200
+ if i - 1 > 0 and i + 1 < len(predictions) and self.Resume_labels[predictions[i - 1]]['value'] == "EDC" and self.Resume_labels[predictions[i + 1]]['value'] == "EDC":
201
+ roles[-1]["description"].append(lines[i])
202
+
203
  # Cleaning description
204
  for item in roles:
205
  sentences = self.capture_sentences(item['description'])
206
  item['description'] = sentences
207
 
208
  return roles
209
+
 
210
  def parse_date(self, date_str):
211
  """Tries multiple formats to parse a date string into a datetime object.
212