Somasundaram Ayyappan commited on
Commit
03116a3
·
1 Parent(s): 613cc9b

Add DATE config and widen entity punctuation cleanup

Browse files
resume_config.json CHANGED
@@ -106,6 +106,10 @@
106
  "the comptia security+": "comptia security+"
107
  }
108
  },
 
 
 
 
109
  "EMAIL": {
110
  "min_length": 5,
111
  "exceptions": [],
 
106
  "the comptia security+": "comptia security+"
107
  }
108
  },
109
+ "DATE": {
110
+ "min_length": 3,
111
+ "exceptions": []
112
+ },
113
  "EMAIL": {
114
  "min_length": 5,
115
  "exceptions": [],
training/structured_postprocess.py CHANGED
@@ -106,7 +106,7 @@ class StructuredPostProcessor:
106
  if pattern.lower() in cleaned.lower():
107
  return None
108
  elif label == "SKILL":
109
- cleaned = re.sub(r"[,.]$", "", cleaned)
110
  elif label == "COMPANY":
111
  if rules.get("strip_trailing_state_code"):
112
  cleaned = re.sub(r",?\s+[A-Z]{2}$", "", cleaned).strip()
 
106
  if pattern.lower() in cleaned.lower():
107
  return None
108
  elif label == "SKILL":
109
+ cleaned = re.sub(r"[,.;:]+$", "", cleaned)
110
  elif label == "COMPANY":
111
  if rules.get("strip_trailing_state_code"):
112
  cleaned = re.sub(r",?\s+[A-Z]{2}$", "", cleaned).strip()