File size: 3,355 Bytes
750e1a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4129d85
 
 
 
 
 
 
 
 
 
 
 
 
750e1a2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import unittest

from training.labels import LABEL2ID
from training.structured_postprocess import StructuredPostProcessor, build_text_and_spans


class StructuredPostprocessTest(unittest.TestCase):
    def test_build_text_and_spans_round_trip(self):
        tokens = ["Jane", "Doe", "jane@example.com", "|", "+1", "5551234567", "|", "Seattle,", "WA"]
        tags = [
            LABEL2ID["B-NAME"],
            LABEL2ID["I-NAME"],
            LABEL2ID["B-EMAIL"],
            LABEL2ID["O"],
            LABEL2ID["B-PHONE"],
            LABEL2ID["I-PHONE"],
            LABEL2ID["O"],
            LABEL2ID["B-LOCATION"],
            LABEL2ID["I-LOCATION"],
        ]
        text, spans = build_text_and_spans(tokens, tags, {v: k for k, v in LABEL2ID.items()})
        self.assertEqual(text, "Jane Doe jane@example.com | +1 5551234567 | Seattle, WA")
        self.assertEqual(
            [(span.label, span.text) for span in spans],
            [
                ("NAME", "Jane Doe"),
                ("EMAIL", "jane@example.com"),
                ("PHONE", "+1 5551234567"),
                ("LOCATION", "Seattle, WA"),
            ],
        )

    def test_structured_postprocess_builds_expected_fields(self):
        processor = StructuredPostProcessor(".")
        text, spans = build_text_and_spans(
            [
                "Jane",
                "Doe",
                "jane@example.com",
                "+1",
                "5551234567",
                "Seattle,",
                "WA",
                "Engineer",
                "Acme",
                "January",
                "2020",
                "Present",
            ],
            [
                LABEL2ID["B-NAME"],
                LABEL2ID["I-NAME"],
                LABEL2ID["B-EMAIL"],
                LABEL2ID["B-PHONE"],
                LABEL2ID["I-PHONE"],
                LABEL2ID["B-LOCATION"],
                LABEL2ID["I-LOCATION"],
                LABEL2ID["B-TITLE"],
                LABEL2ID["B-COMPANY"],
                LABEL2ID["B-DATE"],
                LABEL2ID["I-DATE"],
                LABEL2ID["B-DATE"],
            ],
            {v: k for k, v in LABEL2ID.items()},
        )
        parsed = processor.build_structured_resume_from_spans(spans, text)
        self.assertEqual(parsed["personal"]["name"], "Jane Doe")
        self.assertEqual(parsed["personal"]["email"], "jane@example.com")
        self.assertEqual(parsed["experience"][0]["title"], "Engineer")
        self.assertEqual(parsed["experience"][0]["company"], "Acme")
        self.assertEqual(parsed["experience"][0]["start_date"], "January 2020")
        self.assertEqual(parsed["experience"][0]["end_date"], "Present")

    def test_normalizes_skills_and_certifications(self):
        processor = StructuredPostProcessor(".")
        self.assertEqual(processor.normalize_skill("node js"), "node.js")
        self.assertEqual(processor.normalize_skill("cpp"), "c++")
        self.assertEqual(processor.normalize_certification("ServSafe Manager"), "servsafe manager certification")

    def test_compute_years_uses_floor(self):
        processor = StructuredPostProcessor(".")
        years = processor.compute_years([
            {"start_date": "January 2020", "end_date": "June 2022"},
        ])
        self.assertEqual(years, 2)


if __name__ == "__main__":
    unittest.main()