{ "source_file": "연대1_PII_dataset_V3.json", "doc_count": 4981, "sentence_count": 53778, "examples_with_pii": 19037, "positive_ratio": 0.353992, "label_count": 33, "entity_count_by_label": { "PS_NAME": 2045, "PS_NICKNAME": 1354, "OGG_CLUB": 1269, "LC_PLACE": 1201, "CV_POSITION": 1166, "LC_ADDRESS": 1154, "OG_WORKPLACE": 1082, "OGG_EDUCATION": 1045, "OG_DEPARTMENT": 866, "QT_CARD_NUMBER": 805, "QT_ACCOUNT_NUMBER": 804, "DT_BIRTH": 734, "QT_PHONE": 713, "FD_MAJOR": 711, "QT_LENGTH": 707, "QT_AGE": 700, "QT_WEIGHT": 699, "TMI_EMAIL": 618, "LCP_COUNTRY": 613, "QT_MOBILE": 608, "PS_ID": 605, "QT_GRADE": 564, "OGG_RELIGION": 558, "CV_SEX": 553, "TM_BLOOD_TYPE": 467, "TMI_SITE": 460, "QT_PLATE_NUMBER": 457, "CV_MILITARY_CAMP": 407, "QT_DRIVER_NUMBER": 200, "QT_RESIDENT_NUMBER": 200, "QT_IP": 200, "QT_ALIEN_NUMBER": 200, "QT_PASSPORT_NUMBER": 200 }, "split_sentence_counts": { "train": 42894, "validation": 5463, "test": 5421 }, "split_doc_counts": { "train": 3984, "validation": 498, "test": 499 }, "seed": 42 }