Token Classification
Transformers
ONNX
Safetensors
English
distilbert
resume-parsing
ner
resume
cv
information-extraction
Instructions to use oksomu/resume-ner with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use oksomu/resume-ner with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("token-classification", model="oksomu/resume-ner")# Load model directly from transformers import AutoTokenizer, AutoModelForTokenClassification tokenizer = AutoTokenizer.from_pretrained("oksomu/resume-ner") model = AutoModelForTokenClassification.from_pretrained("oksomu/resume-ner") - Notebooks
- Google Colab
- Kaggle
Somasundaram Ayyappan commited on
Commit ·
03116a3
1
Parent(s): 613cc9b
Add DATE config and widen entity punctuation cleanup
Browse files- resume_config.json +4 -0
- training/structured_postprocess.py +1 -1
resume_config.json
CHANGED
|
@@ -106,6 +106,10 @@
|
|
| 106 |
"the comptia security+": "comptia security+"
|
| 107 |
}
|
| 108 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
"EMAIL": {
|
| 110 |
"min_length": 5,
|
| 111 |
"exceptions": [],
|
|
|
|
| 106 |
"the comptia security+": "comptia security+"
|
| 107 |
}
|
| 108 |
},
|
| 109 |
+
"DATE": {
|
| 110 |
+
"min_length": 3,
|
| 111 |
+
"exceptions": []
|
| 112 |
+
},
|
| 113 |
"EMAIL": {
|
| 114 |
"min_length": 5,
|
| 115 |
"exceptions": [],
|
training/structured_postprocess.py
CHANGED
|
@@ -106,7 +106,7 @@ class StructuredPostProcessor:
|
|
| 106 |
if pattern.lower() in cleaned.lower():
|
| 107 |
return None
|
| 108 |
elif label == "SKILL":
|
| 109 |
-
cleaned = re.sub(r"[,.]$", "", cleaned)
|
| 110 |
elif label == "COMPANY":
|
| 111 |
if rules.get("strip_trailing_state_code"):
|
| 112 |
cleaned = re.sub(r",?\s+[A-Z]{2}$", "", cleaned).strip()
|
|
|
|
| 106 |
if pattern.lower() in cleaned.lower():
|
| 107 |
return None
|
| 108 |
elif label == "SKILL":
|
| 109 |
+
cleaned = re.sub(r"[,.;:]+$", "", cleaned)
|
| 110 |
elif label == "COMPANY":
|
| 111 |
if rules.get("strip_trailing_state_code"):
|
| 112 |
cleaned = re.sub(r",?\s+[A-Z]{2}$", "", cleaned).strip()
|