import unittest from training.labels import LABEL2ID from training.structured_postprocess import StructuredPostProcessor, build_text_and_spans class StructuredPostprocessTest(unittest.TestCase): def test_build_text_and_spans_round_trip(self): tokens = ["Jane", "Doe", "jane@example.com", "|", "+1", "5551234567", "|", "Seattle,", "WA"] tags = [ LABEL2ID["B-NAME"], LABEL2ID["I-NAME"], LABEL2ID["B-EMAIL"], LABEL2ID["O"], LABEL2ID["B-PHONE"], LABEL2ID["I-PHONE"], LABEL2ID["O"], LABEL2ID["B-LOCATION"], LABEL2ID["I-LOCATION"], ] text, spans = build_text_and_spans(tokens, tags, {v: k for k, v in LABEL2ID.items()}) self.assertEqual(text, "Jane Doe jane@example.com | +1 5551234567 | Seattle, WA") self.assertEqual( [(span.label, span.text) for span in spans], [ ("NAME", "Jane Doe"), ("EMAIL", "jane@example.com"), ("PHONE", "+1 5551234567"), ("LOCATION", "Seattle, WA"), ], ) def test_structured_postprocess_builds_expected_fields(self): processor = StructuredPostProcessor(".") text, spans = build_text_and_spans( [ "Jane", "Doe", "jane@example.com", "+1", "5551234567", "Seattle,", "WA", "Engineer", "Acme", "January", "2020", "Present", ], [ LABEL2ID["B-NAME"], LABEL2ID["I-NAME"], LABEL2ID["B-EMAIL"], LABEL2ID["B-PHONE"], LABEL2ID["I-PHONE"], LABEL2ID["B-LOCATION"], LABEL2ID["I-LOCATION"], LABEL2ID["B-TITLE"], LABEL2ID["B-COMPANY"], LABEL2ID["B-DATE"], LABEL2ID["I-DATE"], LABEL2ID["B-DATE"], ], {v: k for k, v in LABEL2ID.items()}, ) parsed = processor.build_structured_resume_from_spans(spans, text) self.assertEqual(parsed["personal"]["name"], "Jane Doe") self.assertEqual(parsed["personal"]["email"], "jane@example.com") self.assertEqual(parsed["experience"][0]["title"], "Engineer") self.assertEqual(parsed["experience"][0]["company"], "Acme") self.assertEqual(parsed["experience"][0]["start_date"], "January 2020") self.assertEqual(parsed["experience"][0]["end_date"], "Present") def test_normalizes_skills_and_certifications(self): processor = StructuredPostProcessor(".") self.assertEqual(processor.normalize_skill("node js"), "node.js") self.assertEqual(processor.normalize_skill("cpp"), "c++") self.assertEqual(processor.normalize_certification("ServSafe Manager"), "servsafe manager certification") def test_compute_years_uses_floor(self): processor = StructuredPostProcessor(".") years = processor.compute_years([ {"start_date": "January 2020", "end_date": "June 2022"}, ]) self.assertEqual(years, 2) if __name__ == "__main__": unittest.main()