Upload benchmark_results.json with huggingface_hub
Browse files- benchmark_results.json +176 -0
benchmark_results.json
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metadata": {
|
| 3 |
+
"timestamp": "2025-09-25T12:35:05.897062",
|
| 4 |
+
"model": "Minibase-DeId-Small",
|
| 5 |
+
"dataset": "Personal_De-identifier_Benchmark_SFT.jsonl",
|
| 6 |
+
"sample_size": 100
|
| 7 |
+
},
|
| 8 |
+
"metrics": {
|
| 9 |
+
"pii_detection_rate": 0.20322907647907631,
|
| 10 |
+
"completeness_score": 0.64,
|
| 11 |
+
"semantic_preservation": 0.10867183143653728,
|
| 12 |
+
"average_latency_ms": 492.3843741416931,
|
| 13 |
+
"successful_requests": 100,
|
| 14 |
+
"total_requests": 100
|
| 15 |
+
},
|
| 16 |
+
"domain_performance": {
|
| 17 |
+
"medical": {
|
| 18 |
+
"sample_count": 33,
|
| 19 |
+
"pii_detection_rate": 0.21426713017622112,
|
| 20 |
+
"completeness_score": 0.6060606060606061,
|
| 21 |
+
"semantic_preservation": 0.10982099451350788
|
| 22 |
+
},
|
| 23 |
+
"legal": {
|
| 24 |
+
"sample_count": 6,
|
| 25 |
+
"pii_detection_rate": 0.11342592592592593,
|
| 26 |
+
"completeness_score": 0.5,
|
| 27 |
+
"semantic_preservation": 0.05610021786492375
|
| 28 |
+
},
|
| 29 |
+
"hr": {
|
| 30 |
+
"sample_count": 11,
|
| 31 |
+
"pii_detection_rate": 0.20202020202020202,
|
| 32 |
+
"completeness_score": 0.2727272727272727,
|
| 33 |
+
"semantic_preservation": 0.10847864256955164
|
| 34 |
+
},
|
| 35 |
+
"general": {
|
| 36 |
+
"sample_count": 40,
|
| 37 |
+
"pii_detection_rate": 0.21849476911976912,
|
| 38 |
+
"completeness_score": 0.75,
|
| 39 |
+
"semantic_preservation": 0.11955831545905074
|
| 40 |
+
},
|
| 41 |
+
"research": {
|
| 42 |
+
"sample_count": 4,
|
| 43 |
+
"pii_detection_rate": 0.19166666666666668,
|
| 44 |
+
"completeness_score": 0.5,
|
| 45 |
+
"semantic_preservation": 0.10833333333333334
|
| 46 |
+
},
|
| 47 |
+
"customer_service": {
|
| 48 |
+
"sample_count": 6,
|
| 49 |
+
"pii_detection_rate": 0.14047619047619048,
|
| 50 |
+
"completeness_score": 1.0,
|
| 51 |
+
"semantic_preservation": 0.08292633292633293
|
| 52 |
+
}
|
| 53 |
+
},
|
| 54 |
+
"examples": [
|
| 55 |
+
{
|
| 56 |
+
"input": "Patient Sarah Johnson, DOB 05/12/1980, visited Dr. Lee at St. Jude Hospital on 2023-10-26. Her contact is (555) 123-4567. She resides at 123 Maple Street, Anytown, CA 90210.",
|
| 57 |
+
"expected": "Patient [NAME_1], DOB [DOB_1], visited [NAME_2] at [HOSPITAL_1] on [DATE_1]. Her contact is [PHONE_1]. She resides at [ADDRESS_1].",
|
| 58 |
+
"predicted": "Patient [FIRSTNAME_1] [MIDDLENAME_1], DOB [DOB_1], visited Dr. [LASTNAME_1] at [CITY_1] Hospital on [DATE_1]. Her contact is [PHONENUMBER_1]. She resides at [BUILDINGNUMBER_1] [STREET_1], [CITY_2], [STATE_1], [STATE_2].",
|
| 59 |
+
"domain": "medical",
|
| 60 |
+
"metrics": {
|
| 61 |
+
"pii_detection": 0.2857142857142857,
|
| 62 |
+
"completeness": true,
|
| 63 |
+
"semantic_preservation": 0.14285714285714285,
|
| 64 |
+
"latency_ms": 465.7011032104492
|
| 65 |
+
}
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"input": "Deponent Mr. Robert Davis, CEO of GlobalCorp Inc., stated under oath on December 1, 2022, that his attorney, Ms. Emily White from Legal Eagles LLP, advised him. Case number: LD-2022-007.",
|
| 69 |
+
"expected": "Deponent [NAME_1], CEO of [ORGANIZATION_1], stated under oath on [DATE_1], that his attorney, [NAME_2] from [ORGANIZATION_2], advised him. Case number: [CASE_ID_1].",
|
| 70 |
+
"predicted": "Deponent [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], CEO of [COMPANYNAME_1], stated under oath on [DATE_1], that his attorney, [PREFIX_2] [MIDDLENAME_1] [LASTNAME_2], advised him. Case number: LD-2022-007.",
|
| 71 |
+
"domain": "legal",
|
| 72 |
+
"metrics": {
|
| 73 |
+
"pii_detection": 0.16666666666666666,
|
| 74 |
+
"completeness": true,
|
| 75 |
+
"semantic_preservation": 0.1111111111111111,
|
| 76 |
+
"latency_ms": 379.5027732849121
|
| 77 |
+
}
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"input": "Employee ID: EMP-001-XYZ. Name: John Doe. Salary: $85,000. Email: john.doe@example.com. Marital Status: Married. Nationality: Canadian.",
|
| 81 |
+
"expected": "Employee ID: [EMPLOYEE_ID_1]. Name: [NAME_1]. Salary: [SALARY_1]. Email: [EMAIL_1]. Marital Status: [MARITAL_STATUS_1]. Nationality: [NATIONALITY_1].",
|
| 82 |
+
"predicted": "Employee ID: EMP-[BUILDINGNUMBER_1]. Name: [FIRSTNAME_1] Doe. Salary: [CURRENCYSYMBOL_1][AMOUNT_1]. Email: [EMAIL_1]. Marital Status: Married. Nationality: [STATE_1].",
|
| 83 |
+
"domain": "hr",
|
| 84 |
+
"metrics": {
|
| 85 |
+
"pii_detection": 0.16666666666666666,
|
| 86 |
+
"completeness": false,
|
| 87 |
+
"semantic_preservation": 0.09090909090909091,
|
| 88 |
+
"latency_ms": 333.10723304748535
|
| 89 |
+
}
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"input": "Sra. Elena Rodriguez llam\u00f3 preocupada por su hijo, Miguel Rodriguez, de 7 a\u00f1os. Su n\u00famero de tel\u00e9fono es +34 912 345 678. Viven en Calle Mayor 10, Madrid, Espa\u00f1a.",
|
| 93 |
+
"expected": "Sra. [NAME_1] llam\u00f3 preocupada por su hijo, [NAME_2], de [AGE_1] a\u00f1os. Su n\u00famero de tel\u00e9fono es [PHONE_1]. Viven en [ADDRESS_1].",
|
| 94 |
+
"predicted": "Sra. [FIRSTNAME_1] [MIDDLENAME_1] [LASTNAME_1] llam\u00f3 preocupada por su hijo, [FIRSTNAME_2] [LASTNAME_2], de 7 a\u00f1os. Su n\u00famero de tel\u00e9fono es [PHONENUMBER_1]. Viven en Calle [STREET_1] [BUILDINGNUMBER_1], [STATE_1], [STATE_2].",
|
| 95 |
+
"domain": "general",
|
| 96 |
+
"metrics": {
|
| 97 |
+
"pii_detection": 0.0,
|
| 98 |
+
"completeness": true,
|
| 99 |
+
"semantic_preservation": 0.0,
|
| 100 |
+
"latency_ms": 507.0638656616211
|
| 101 |
+
}
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"input": "Claim filed by Mr. David Chen, Policy #INS-98765, on 15/03/2023, regarding a fractured tibia. His occupation is software engineer at TechSolutions.",
|
| 105 |
+
"expected": "Claim filed by [NAME_1], Policy #[POLICY_NUMBER_1], on [DATE_1], regarding a fractured tibia. His occupation is [OCCUPATION_1] at [ORGANIZATION_1].",
|
| 106 |
+
"predicted": "Claim filed by [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], Policy #INS-[BUILDINGNUMBER_1], on [DATE_1], regarding a fractured tibia. His occupation is software engineer at TechSolutions.",
|
| 107 |
+
"domain": "general",
|
| 108 |
+
"metrics": {
|
| 109 |
+
"pii_detection": 0.2,
|
| 110 |
+
"completeness": true,
|
| 111 |
+
"semantic_preservation": 0.1111111111111111,
|
| 112 |
+
"latency_ms": 322.0179080963135
|
| 113 |
+
}
|
| 114 |
+
},
|
| 115 |
+
{
|
| 116 |
+
"input": "Received feedback from Ms. Olivia Brown, born on 1995-11-20. She visited on January 10, 2024, and mentioned a previous appointment on 2023-12-05. Her email is olivia.b@mail.com.",
|
| 117 |
+
"expected": "Received feedback from [NAME_1], born on [DOB_1]. She visited on [DATE_1], and mentioned a previous appointment on [DATE_2]. Her email is [EMAIL_1].",
|
| 118 |
+
"predicted": "Received feedback from [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], born on [DATE_1]. She visited on [DATE_2], and mentioned a previous appointment on [DATE_3]. Her email is [EMAIL_1].",
|
| 119 |
+
"domain": "general",
|
| 120 |
+
"metrics": {
|
| 121 |
+
"pii_detection": 0.6,
|
| 122 |
+
"completeness": true,
|
| 123 |
+
"semantic_preservation": 0.2857142857142857,
|
| 124 |
+
"latency_ms": 331.7408561706543
|
| 125 |
+
}
|
| 126 |
+
},
|
| 127 |
+
{
|
| 128 |
+
"input": "Participant ID: RP-0042. Name: Dr. Anya Sharma. Ethnicity: South Asian. Occupation: Physician. Consent signed on 2023-09-01.",
|
| 129 |
+
"expected": "Participant ID: [PARTICIPANT_ID_1]. Name: [NAME_1]. Ethnicity: [ETHNICITY_1]. Occupation: [OCCUPATION_1]. Consent signed on [DATE_1].",
|
| 130 |
+
"predicted": "Participant ID: RP-[BUILDINGNUMBER_1]. Name: Dr. [MIDDLENAME_1]. Ethnicity: [EYECOLOR_1]. Occupation: Physician. Consent signed on [DATE_1].",
|
| 131 |
+
"domain": "research",
|
| 132 |
+
"metrics": {
|
| 133 |
+
"pii_detection": 0.2,
|
| 134 |
+
"completeness": true,
|
| 135 |
+
"semantic_preservation": 0.125,
|
| 136 |
+
"latency_ms": 280.93981742858887
|
| 137 |
+
}
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"input": "Customer reported an issue with Order #CUST-ORD-5678. Name: Michael Green. Payment method: Visa. Last 4 digits of card: 1234. Contacted via phone (020 7946 0123) on 2024-01-15.",
|
| 141 |
+
"expected": "Customer reported an issue with Order #[ORDER_ID_1]. Name: [NAME_1]. Payment method: [PAYMENT_METHOD_1]. Last 4 digits of card: [CARD_LAST_4_1]. Contacted via phone [PHONE_1] on [DATE_1].",
|
| 142 |
+
"predicted": "Customer reported an issue with Order #CUST-ORD-[CREDITCARDCVV_1]. Name: [FIRSTNAME_1] [LASTNAME_1]. Payment method: Visa. Last 4 digits of card: [CREDITCARDCVV_2]. Contacted via phone ([PHONENUMBER_1]) on [DATE_1].",
|
| 143 |
+
"domain": "customer_service",
|
| 144 |
+
"metrics": {
|
| 145 |
+
"pii_detection": 0.2,
|
| 146 |
+
"completeness": true,
|
| 147 |
+
"semantic_preservation": 0.1111111111111111,
|
| 148 |
+
"latency_ms": 405.1778316497803
|
| 149 |
+
}
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"input": "This work was conducted by Dr. Jun Li and Professor Maria Gomez, affiliated with University of Global Studies. Their paper is titled 'AI in Healthcare Deidentification'.",
|
| 153 |
+
"expected": "This work was conducted by [NAME_1] and [NAME_2], affiliated with [ORGANIZATION_1]. Their paper is titled 'AI in Healthcare Deidentification'.",
|
| 154 |
+
"predicted": "This work was conducted by [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1] and Professor [FIRSTNAME_2] [LASTNAME_2], affiliated with University of Global Studies. Their paper is titled 'AI in Healthcare Deidentification'.",
|
| 155 |
+
"domain": "general",
|
| 156 |
+
"metrics": {
|
| 157 |
+
"pii_detection": 0.0,
|
| 158 |
+
"completeness": false,
|
| 159 |
+
"semantic_preservation": 0.0,
|
| 160 |
+
"latency_ms": 409.94930267333984
|
| 161 |
+
}
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"input": "Hello, my name is Alex Kim. I live at Apt 3B, 456 Oak Avenue, Metropolis, NY 10001. My birthday is April 22, 1990. I'm interested in your services.",
|
| 165 |
+
"expected": "Hello, my name is [NAME_1]. I live at [ADDRESS_1]. My birthday is [DOB_1]. I'm interested in your services.",
|
| 166 |
+
"predicted": "Hello, my name is [FIRSTNAME_1] [MIDDLENAME_1]. I live at [SECONDARYADDRESS_1], [BUILDINGNUMBER_1] [STREET_1], [CITY_1], [STATE_1] [ZIPCODE_1]. My birthday is [DATE_1]. I'm interested in your services.",
|
| 167 |
+
"domain": "customer_service",
|
| 168 |
+
"metrics": {
|
| 169 |
+
"pii_detection": 0.0,
|
| 170 |
+
"completeness": true,
|
| 171 |
+
"semantic_preservation": 0.0,
|
| 172 |
+
"latency_ms": 415.47536849975586
|
| 173 |
+
}
|
| 174 |
+
}
|
| 175 |
+
]
|
| 176 |
+
}
|