File size: 8,089 Bytes
fb21b1a
 
4924d32
fb21b1a
 
 
 
 
d5d5f9e
4924d32
 
 
fb21b1a
 
 
d5d5f9e
fb21b1a
 
 
 
 
 
d5d5f9e
fb21b1a
4924d32
 
fb21b1a
 
 
 
 
d5d5f9e
fb21b1a
d5d5f9e
fb21b1a
4924d32
 
fb21b1a
 
 
 
 
4924d32
fb21b1a
d5d5f9e
fb21b1a
4924d32
 
fb21b1a
 
 
 
 
 
 
d5d5f9e
fb21b1a
4924d32
 
fb21b1a
 
 
 
 
 
 
d5d5f9e
fb21b1a
4924d32
 
fb21b1a
 
 
 
 
 
 
d5d5f9e
fb21b1a
4924d32
 
fb21b1a
 
 
 
 
 
 
d5d5f9e
fb21b1a
4924d32
 
fb21b1a
 
 
 
 
 
 
d5d5f9e
fb21b1a
4924d32
 
fb21b1a
 
 
 
 
 
 
d5d5f9e
fb21b1a
4924d32
 
fb21b1a
 
 
 
 
d5d5f9e
fb21b1a
d5d5f9e
fb21b1a
4924d32
 
fb21b1a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
{
  "metadata": {
    "timestamp": "2025-09-25T12:48:06.242738",
    "model": "Minibase-DeId-Small",
    "dataset": "Personal_De-identifier_Benchmark_SFT.jsonl",
    "sample_size": 100
  },
  "metrics": {
    "pii_detection_rate": 1.0,
    "completeness_score": 0.65,
    "semantic_preservation": 0.8110479139122438,
    "average_latency_ms": 477.0322895050049,
    "successful_requests": 100,
    "total_requests": 100
  },
  "domain_performance": {},
  "examples": [
    {
      "input": "Patient Sarah Johnson, DOB 05/12/1980, visited Dr. Lee at St. Jude Hospital on 2023-10-26. Her contact is (555) 123-4567. She resides at 123 Maple Street, Anytown, CA 90210.",
      "expected": "Patient [NAME_1], DOB [DOB_1], visited [NAME_2] at [HOSPITAL_1] on [DATE_1]. Her contact is [PHONE_1]. She resides at [ADDRESS_1].",
      "predicted": "Patient [FIRSTNAME_1] [MIDDLENAME_1], DOB [DOB_1], visited Dr. [LASTNAME_1] at [CITY_1] Hospital on [DATE_1]. Her contact is [PHONENUMBER_1]. She resides at [BUILDINGNUMBER_1] [STREET_1], [CITY_2], [STATE_1], [STATE_2].",
      "metrics": {
        "pii_detection": 1.0,
        "completeness": true,
        "semantic_preservation": 0.7382222222222222,
        "latency_ms": 499.6819496154785
      }
    },
    {
      "input": "Deponent Mr. Robert Davis, CEO of GlobalCorp Inc., stated under oath on December 1, 2022, that his attorney, Ms. Emily White from Legal Eagles LLP, advised him. Case number: LD-2022-007.",
      "expected": "Deponent [NAME_1], CEO of [ORGANIZATION_1], stated under oath on [DATE_1], that his attorney, [NAME_2] from [ORGANIZATION_2], advised him. Case number: [CASE_ID_1].",
      "predicted": "Deponent [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], CEO of [COMPANYNAME_1], stated under oath on [DATE_1], that his attorney, [PREFIX_2] [MIDDLENAME_1] [LASTNAME_2], advised him. Case number: [CURRENCYCODE_1]-[BUILDINGNUMBER_1].",
      "metrics": {
        "pii_detection": 1.0,
        "completeness": true,
        "semantic_preservation": 0.7531262939958592,
        "latency_ms": 442.65270233154297
      }
    },
    {
      "input": "Employee ID: EMP-001-XYZ. Name: John Doe. Salary: $85,000. Email: john.doe@example.com. Marital Status: Married. Nationality: Canadian.",
      "expected": "Employee ID: [EMPLOYEE_ID_1]. Name: [NAME_1]. Salary: [SALARY_1]. Email: [EMAIL_1]. Marital Status: [MARITAL_STATUS_1]. Nationality: [NATIONALITY_1].",
      "predicted": "Employee ID: EMP-[CREDITCARDCVV_1]. Name: [FIRSTNAME_1] Doe. Salary: [CURRENCYSYMBOL_1][AMOUNT_1]. Email: [EMAIL_1]. Marital Status: Married. Nationality: [STATE_1].",
      "metrics": {
        "pii_detection": 1.0,
        "completeness": false,
        "semantic_preservation": 0.8557142857142858,
        "latency_ms": 359.38310623168945
      }
    },
    {
      "input": "Sra. Elena Rodriguez llam\u00f3 preocupada por su hijo, Miguel Rodriguez, de 7 a\u00f1os. Su n\u00famero de tel\u00e9fono es +34 912 345 678. Viven en Calle Mayor 10, Madrid, Espa\u00f1a.",
      "expected": "Sra. [NAME_1] llam\u00f3 preocupada por su hijo, [NAME_2], de [AGE_1] a\u00f1os. Su n\u00famero de tel\u00e9fono es [PHONE_1]. Viven en [ADDRESS_1].",
      "predicted": "Sra. [FIRSTNAME_1] [MIDDLENAME_1] [LASTNAME_1] llam\u00f3 preocupada por su hijo, [FIRSTNAME_2] [LASTNAME_2], de 7 a\u00f1os. Su n\u00famero de tel\u00e9fono es [PHONENUMBER_1]. Viven en Calle [STREET_1] [BUILDINGNUMBER_1], [STATE_1], [STATE_2].",
      "metrics": {
        "pii_detection": 1.0,
        "completeness": true,
        "semantic_preservation": 0.8737037037037036,
        "latency_ms": 530.4110050201416
      }
    },
    {
      "input": "Claim filed by Mr. David Chen, Policy #INS-98765, on 15/03/2023, regarding a fractured tibia. His occupation is software engineer at TechSolutions.",
      "expected": "Claim filed by [NAME_1], Policy #[POLICY_NUMBER_1], on [DATE_1], regarding a fractured tibia. His occupation is [OCCUPATION_1] at [ORGANIZATION_1].",
      "predicted": "Claim filed by [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], Policy #INS-[BUILDINGNUMBER_1], on [DATE_1], regarding a fractured tibia. His occupation is software engineer at TechSolutions.",
      "metrics": {
        "pii_detection": 1.0,
        "completeness": true,
        "semantic_preservation": 0.9,
        "latency_ms": 294.41308975219727
      }
    },
    {
      "input": "Received feedback from Ms. Olivia Brown, born on 1995-11-20. She visited on January 10, 2024, and mentioned a previous appointment on 2023-12-05. Her email is olivia.b@mail.com.",
      "expected": "Received feedback from [NAME_1], born on [DOB_1]. She visited on [DATE_1], and mentioned a previous appointment on [DATE_2]. Her email is [EMAIL_1].",
      "predicted": "Received feedback from [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1], born on [DATE_1]. She visited on [DATE_2], and mentioned a previous appointment on [DATE_3]. Her email is [EMAIL_1].",
      "metrics": {
        "pii_detection": 1.0,
        "completeness": true,
        "semantic_preservation": 0.8818181818181818,
        "latency_ms": 301.6502857208252
      }
    },
    {
      "input": "Participant ID: RP-0042. Name: Dr. Anya Sharma. Ethnicity: South Asian. Occupation: Physician. Consent signed on 2023-09-01.",
      "expected": "Participant ID: [PARTICIPANT_ID_1]. Name: [NAME_1]. Ethnicity: [ETHNICITY_1]. Occupation: [OCCUPATION_1]. Consent signed on [DATE_1].",
      "predicted": "Participant ID: RP-[BUILDINGNUMBER_1]. Name: Dr. [MIDDLENAME_1]. Ethnicity: [EYECOLOR_1]. Occupation: Physician. Consent signed on [DATE_1].",
      "metrics": {
        "pii_detection": 1.0,
        "completeness": true,
        "semantic_preservation": 0.9846153846153847,
        "latency_ms": 269.90580558776855
      }
    },
    {
      "input": "Customer reported an issue with Order #CUST-ORD-5678. Name: Michael Green. Payment method: Visa. Last 4 digits of card: 1234. Contacted via phone (020 7946 0123) on 2024-01-15.",
      "expected": "Customer reported an issue with Order #[ORDER_ID_1]. Name: [NAME_1]. Payment method: [PAYMENT_METHOD_1]. Last 4 digits of card: [CARD_LAST_4_1]. Contacted via phone [PHONE_1] on [DATE_1].",
      "predicted": "Customer reported an issue with Order #CUST-ORD-[CREDITCARDCVV_1]. Name: [FIRSTNAME_1] [LASTNAME_1]. Payment method: Visa. Last 4 digits of card: [CREDITCARDCVV_2]. Contacted via phone ([PHONENUMBER_1]) on [DATE_1].",
      "metrics": {
        "pii_detection": 1.0,
        "completeness": true,
        "semantic_preservation": 0.9916666666666667,
        "latency_ms": 399.30129051208496
      }
    },
    {
      "input": "This work was conducted by Dr. Jun Li and Professor Maria Gomez, affiliated with University of Global Studies. Their paper is titled 'AI in Healthcare Deidentification'.",
      "expected": "This work was conducted by [NAME_1] and [NAME_2], affiliated with [ORGANIZATION_1]. Their paper is titled 'AI in Healthcare Deidentification'.",
      "predicted": "This work was conducted by [PREFIX_1] [FIRSTNAME_1] [LASTNAME_1] and Professor [FIRSTNAME_2] [LASTNAME_2], affiliated with University of Global Studies. Their paper is titled 'AI in Healthcare Deidentification'.",
      "metrics": {
        "pii_detection": 1.0,
        "completeness": false,
        "semantic_preservation": 0.8596491228070176,
        "latency_ms": 292.7100658416748
      }
    },
    {
      "input": "Hello, my name is Alex Kim. I live at Apt 3B, 456 Oak Avenue, Metropolis, NY 10001. My birthday is April 22, 1990. I'm interested in your services.",
      "expected": "Hello, my name is [NAME_1]. I live at [ADDRESS_1]. My birthday is [DOB_1]. I'm interested in your services.",
      "predicted": "Hello, my name is [FIRSTNAME_1] [MIDDLENAME_1]. I live at [SECONDARYADDRESS_1], [BUILDINGNUMBER_1] [STREET_1], [CITY_1], [STATE_1] [ZIPCODE_1]. My birthday is [DOB_1]. I'm interested in your services.",
      "metrics": {
        "pii_detection": 1.0,
        "completeness": true,
        "semantic_preservation": 0.8733333333333333,
        "latency_ms": 410.9461307525635
      }
    }
  ]
}