File size: 8,089 Bytes
fb21b1a 4924d32 fb21b1a d5d5f9e 4924d32 fb21b1a d5d5f9e fb21b1a d5d5f9e fb21b1a 4924d32 fb21b1a d5d5f9e fb21b1a d5d5f9e fb21b1a 4924d32 fb21b1a 4924d32 fb21b1a d5d5f9e fb21b1a 4924d32 fb21b1a d5d5f9e fb21b1a 4924d32 fb21b1a d5d5f9e fb21b1a 4924d32 fb21b1a d5d5f9e fb21b1a 4924d32 fb21b1a d5d5f9e fb21b1a 4924d32 fb21b1a d5d5f9e fb21b1a 4924d32 fb21b1a d5d5f9e fb21b1a 4924d32 fb21b1a d5d5f9e fb21b1a d5d5f9e fb21b1a 4924d32 fb21b1a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | {
"metadata": {
"timestamp": "2025-09-25T12:48:06.242738",
"model": "Minibase-DeId-Small",
"dataset": "Personal_De-identifier_Benchmark_SFT.jsonl",
"sample_size": 100
},
"metrics": {
"pii_detection_rate": 1.0,
"completeness_score": 0.65,
"semantic_preservation": 0.8110479139122438,
"average_latency_ms": 477.0322895050049,
"successful_requests": 100,
"total_requests": 100
},
"domain_performance": {},
"examples": [
{
"input": "Patient Sarah Johnson, DOB 05/12/1980, visited Dr. Lee at St. Jude Hospital on 2023-10-26. Her contact is (555) 123-4567. She resides at 123 Maple Street, Anytown, CA 90210.",
"expected": "Patient [NAME_1], DOB [DOB_1], visited [NAME_2] at [HOSPITAL_1] on [DATE_1]. Her contact is [PHONE_1]. She resides at [ADDRESS_1].",
"predicted": "Patient [FIRSTNAME_1] [MIDDLENAME_1], DOB [DOB_1], visited Dr. [LASTNAME_1] at [CITY_1] Hospital on [DATE_1]. Her contact is [PHONENUMBER_1]. She resides at [BUILDINGNUMBER_1] [STREET_1], [CITY_2], [STATE_1], [STATE_2].",
"metrics": {
"pii_detection": 1.0,
"completeness": true,
"semantic_preservation": 0.7382222222222222,
"latency_ms": 499.6819496154785
}
},
{
"input": "Deponent Mr. Robert Davis, CEO of GlobalCorp Inc., stated under oath on December 1, 2022, that his attorney, Ms. Emily White from Legal Eagles LLP, advised him. Case number: LD-2022-007.",
"expected": "Deponent [NAME_1], CEO of [ORGANIZATION_1], stated under oath on [DATE_1], that his attorney, [NAME_2] from [ORGANIZATION_2], advised him. Case number: [CASE_ID_1].",
"predicted": "Deponent [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], CEO of [COMPANYNAME_1], stated under oath on [DATE_1], that his attorney, [PREFIX_2] [MIDDLENAME_1] [LASTNAME_2], advised him. Case number: [CURRENCYCODE_1]-[BUILDINGNUMBER_1].",
"metrics": {
"pii_detection": 1.0,
"completeness": true,
"semantic_preservation": 0.7531262939958592,
"latency_ms": 442.65270233154297
}
},
{
"input": "Employee ID: EMP-001-XYZ. Name: John Doe. Salary: $85,000. Email: john.doe@example.com. Marital Status: Married. Nationality: Canadian.",
"expected": "Employee ID: [EMPLOYEE_ID_1]. Name: [NAME_1]. Salary: [SALARY_1]. Email: [EMAIL_1]. Marital Status: [MARITAL_STATUS_1]. Nationality: [NATIONALITY_1].",
"predicted": "Employee ID: EMP-[CREDITCARDCVV_1]. Name: [FIRSTNAME_1] Doe. Salary: [CURRENCYSYMBOL_1][AMOUNT_1]. Email: [EMAIL_1]. Marital Status: Married. Nationality: [STATE_1].",
"metrics": {
"pii_detection": 1.0,
"completeness": false,
"semantic_preservation": 0.8557142857142858,
"latency_ms": 359.38310623168945
}
},
{
"input": "Sra. Elena Rodriguez llam\u00f3 preocupada por su hijo, Miguel Rodriguez, de 7 a\u00f1os. Su n\u00famero de tel\u00e9fono es +34 912 345 678. Viven en Calle Mayor 10, Madrid, Espa\u00f1a.",
"expected": "Sra. [NAME_1] llam\u00f3 preocupada por su hijo, [NAME_2], de [AGE_1] a\u00f1os. Su n\u00famero de tel\u00e9fono es [PHONE_1]. Viven en [ADDRESS_1].",
"predicted": "Sra. [FIRSTNAME_1] [MIDDLENAME_1] [LASTNAME_1] llam\u00f3 preocupada por su hijo, [FIRSTNAME_2] [LASTNAME_2], de 7 a\u00f1os. Su n\u00famero de tel\u00e9fono es [PHONENUMBER_1]. Viven en Calle [STREET_1] [BUILDINGNUMBER_1], [STATE_1], [STATE_2].",
"metrics": {
"pii_detection": 1.0,
"completeness": true,
"semantic_preservation": 0.8737037037037036,
"latency_ms": 530.4110050201416
}
},
{
"input": "Claim filed by Mr. David Chen, Policy #INS-98765, on 15/03/2023, regarding a fractured tibia. His occupation is software engineer at TechSolutions.",
"expected": "Claim filed by [NAME_1], Policy #[POLICY_NUMBER_1], on [DATE_1], regarding a fractured tibia. His occupation is [OCCUPATION_1] at [ORGANIZATION_1].",
"predicted": "Claim filed by [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], Policy #INS-[BUILDINGNUMBER_1], on [DATE_1], regarding a fractured tibia. His occupation is software engineer at TechSolutions.",
"metrics": {
"pii_detection": 1.0,
"completeness": true,
"semantic_preservation": 0.9,
"latency_ms": 294.41308975219727
}
},
{
"input": "Received feedback from Ms. Olivia Brown, born on 1995-11-20. She visited on January 10, 2024, and mentioned a previous appointment on 2023-12-05. Her email is olivia.b@mail.com.",
"expected": "Received feedback from [NAME_1], born on [DOB_1]. She visited on [DATE_1], and mentioned a previous appointment on [DATE_2]. Her email is [EMAIL_1].",
"predicted": "Received feedback from [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], born on [DATE_1]. She visited on [DATE_2], and mentioned a previous appointment on [DATE_3]. Her email is [EMAIL_1].",
"metrics": {
"pii_detection": 1.0,
"completeness": true,
"semantic_preservation": 0.8818181818181818,
"latency_ms": 301.6502857208252
}
},
{
"input": "Participant ID: RP-0042. Name: Dr. Anya Sharma. Ethnicity: South Asian. Occupation: Physician. Consent signed on 2023-09-01.",
"expected": "Participant ID: [PARTICIPANT_ID_1]. Name: [NAME_1]. Ethnicity: [ETHNICITY_1]. Occupation: [OCCUPATION_1]. Consent signed on [DATE_1].",
"predicted": "Participant ID: RP-[BUILDINGNUMBER_1]. Name: Dr. [MIDDLENAME_1]. Ethnicity: [EYECOLOR_1]. Occupation: Physician. Consent signed on [DATE_1].",
"metrics": {
"pii_detection": 1.0,
"completeness": true,
"semantic_preservation": 0.9846153846153847,
"latency_ms": 269.90580558776855
}
},
{
"input": "Customer reported an issue with Order #CUST-ORD-5678. Name: Michael Green. Payment method: Visa. Last 4 digits of card: 1234. Contacted via phone (020 7946 0123) on 2024-01-15.",
"expected": "Customer reported an issue with Order #[ORDER_ID_1]. Name: [NAME_1]. Payment method: [PAYMENT_METHOD_1]. Last 4 digits of card: [CARD_LAST_4_1]. Contacted via phone [PHONE_1] on [DATE_1].",
"predicted": "Customer reported an issue with Order #CUST-ORD-[CREDITCARDCVV_1]. Name: [FIRSTNAME_1] [LASTNAME_1]. Payment method: Visa. Last 4 digits of card: [CREDITCARDCVV_2]. Contacted via phone ([PHONENUMBER_1]) on [DATE_1].",
"metrics": {
"pii_detection": 1.0,
"completeness": true,
"semantic_preservation": 0.9916666666666667,
"latency_ms": 399.30129051208496
}
},
{
"input": "This work was conducted by Dr. Jun Li and Professor Maria Gomez, affiliated with University of Global Studies. Their paper is titled 'AI in Healthcare Deidentification'.",
"expected": "This work was conducted by [NAME_1] and [NAME_2], affiliated with [ORGANIZATION_1]. Their paper is titled 'AI in Healthcare Deidentification'.",
"predicted": "This work was conducted by [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1] and Professor [FIRSTNAME_2] [LASTNAME_2], affiliated with University of Global Studies. Their paper is titled 'AI in Healthcare Deidentification'.",
"metrics": {
"pii_detection": 1.0,
"completeness": false,
"semantic_preservation": 0.8596491228070176,
"latency_ms": 292.7100658416748
}
},
{
"input": "Hello, my name is Alex Kim. I live at Apt 3B, 456 Oak Avenue, Metropolis, NY 10001. My birthday is April 22, 1990. I'm interested in your services.",
"expected": "Hello, my name is [NAME_1]. I live at [ADDRESS_1]. My birthday is [DOB_1]. I'm interested in your services.",
"predicted": "Hello, my name is [FIRSTNAME_1] [MIDDLENAME_1]. I live at [SECONDARYADDRESS_1], [BUILDINGNUMBER_1] [STREET_1], [CITY_1], [STATE_1] [ZIPCODE_1]. My birthday is [DOB_1]. I'm interested in your services.",
"metrics": {
"pii_detection": 1.0,
"completeness": true,
"semantic_preservation": 0.8733333333333333,
"latency_ms": 410.9461307525635
}
}
]
} |