File size: 3,355 Bytes
750e1a2 4129d85 750e1a2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | import unittest
from training.labels import LABEL2ID
from training.structured_postprocess import StructuredPostProcessor, build_text_and_spans
class StructuredPostprocessTest(unittest.TestCase):
def test_build_text_and_spans_round_trip(self):
tokens = ["Jane", "Doe", "jane@example.com", "|", "+1", "5551234567", "|", "Seattle,", "WA"]
tags = [
LABEL2ID["B-NAME"],
LABEL2ID["I-NAME"],
LABEL2ID["B-EMAIL"],
LABEL2ID["O"],
LABEL2ID["B-PHONE"],
LABEL2ID["I-PHONE"],
LABEL2ID["O"],
LABEL2ID["B-LOCATION"],
LABEL2ID["I-LOCATION"],
]
text, spans = build_text_and_spans(tokens, tags, {v: k for k, v in LABEL2ID.items()})
self.assertEqual(text, "Jane Doe jane@example.com | +1 5551234567 | Seattle, WA")
self.assertEqual(
[(span.label, span.text) for span in spans],
[
("NAME", "Jane Doe"),
("EMAIL", "jane@example.com"),
("PHONE", "+1 5551234567"),
("LOCATION", "Seattle, WA"),
],
)
def test_structured_postprocess_builds_expected_fields(self):
processor = StructuredPostProcessor(".")
text, spans = build_text_and_spans(
[
"Jane",
"Doe",
"jane@example.com",
"+1",
"5551234567",
"Seattle,",
"WA",
"Engineer",
"Acme",
"January",
"2020",
"Present",
],
[
LABEL2ID["B-NAME"],
LABEL2ID["I-NAME"],
LABEL2ID["B-EMAIL"],
LABEL2ID["B-PHONE"],
LABEL2ID["I-PHONE"],
LABEL2ID["B-LOCATION"],
LABEL2ID["I-LOCATION"],
LABEL2ID["B-TITLE"],
LABEL2ID["B-COMPANY"],
LABEL2ID["B-DATE"],
LABEL2ID["I-DATE"],
LABEL2ID["B-DATE"],
],
{v: k for k, v in LABEL2ID.items()},
)
parsed = processor.build_structured_resume_from_spans(spans, text)
self.assertEqual(parsed["personal"]["name"], "Jane Doe")
self.assertEqual(parsed["personal"]["email"], "jane@example.com")
self.assertEqual(parsed["experience"][0]["title"], "Engineer")
self.assertEqual(parsed["experience"][0]["company"], "Acme")
self.assertEqual(parsed["experience"][0]["start_date"], "January 2020")
self.assertEqual(parsed["experience"][0]["end_date"], "Present")
def test_normalizes_skills_and_certifications(self):
processor = StructuredPostProcessor(".")
self.assertEqual(processor.normalize_skill("node js"), "node.js")
self.assertEqual(processor.normalize_skill("cpp"), "c++")
self.assertEqual(processor.normalize_certification("ServSafe Manager"), "servsafe manager certification")
def test_compute_years_uses_floor(self):
processor = StructuredPostProcessor(".")
years = processor.compute_years([
{"start_date": "January 2020", "end_date": "June 2022"},
])
self.assertEqual(years, 2)
if __name__ == "__main__":
unittest.main()
|