Luis Kalckstein commited on
Commit
cfdd1af
·
unverified ·
1 Parent(s): c8a3ed4
Nutrient_Symbol_RGB_OffWhite.svg ADDED
data_loader.py CHANGED
@@ -44,14 +44,15 @@ def load_data():
44
 
45
  return df
46
 
47
- # Color palette matching DocumentProcessing style
48
  COLORS = {
49
  # Light mode colors
50
  "white": "#FFFFFF",
 
51
  "disc_pink": "#DE9DCC",
52
  "code_coral": "#F25E45",
53
  "data_green": "#6EB579",
54
- "digital_pollen": "#F0C968",
55
  "warm_black": "#1A1414",
56
  "off_white": "#EFEBE7",
57
  "pixel_mist": "#E2DBD9",
@@ -69,7 +70,7 @@ COLORS = {
69
  HEADER_CONTENT = f"""
70
  <style>
71
  /* Import fonts */
72
- @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
73
 
74
  /* Root variables with custom color palette */
75
  :root {{
@@ -82,18 +83,18 @@ HEADER_CONTENT = f"""
82
  --text-primary: #EFEBE7;
83
  --text-secondary: #C2B8AE;
84
  --text-muted: #67594B;
85
- --accent-primary: #DE9DCC;
86
  --accent-secondary: #F25E45;
87
  --accent-tertiary: #6EB579;
88
- --accent-quaternary: #F0C968;
89
- --glow-primary: rgba(222, 157, 204, 0.4);
90
  --glow-secondary: rgba(242, 94, 69, 0.4);
91
  --glow-tertiary: rgba(110, 181, 121, 0.4);
92
  }}
93
 
94
  /* Global font and background */
95
  .gradio-container {{
96
- font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
97
  background: var(--bg-primary) !important;
98
  color: var(--text-primary) !important;
99
  }}
@@ -102,12 +103,12 @@ HEADER_CONTENT = f"""
102
  h1, h2, h3, h4 {{
103
  color: var(--text-primary) !important;
104
  font-weight: 700 !important;
105
- font-family: 'Inter', sans-serif !important;
106
  }}
107
 
108
  p, span, div {{
109
  color: var(--text-primary) !important;
110
- font-family: 'Inter', sans-serif !important;
111
  }}
112
 
113
  /* Dark containers */
@@ -148,24 +149,25 @@ p, span, div {{
148
  .v2-styled-table {{
149
  width: 100%;
150
  border-collapse: collapse;
151
- font-family: 'Inter', sans-serif;
152
  font-size: 14px;
153
  }}
154
 
155
  .v2-styled-table thead {{
156
- background: linear-gradient(135deg, var(--accent-primary), var(--accent-secondary));
157
  }}
158
 
159
  .v2-styled-table th {{
160
  padding: 16px 12px;
161
  text-align: left;
162
- color: white;
163
  font-weight: 600;
164
  font-size: 13px;
165
  text-transform: uppercase;
166
  letter-spacing: 0.05em;
167
  border: none;
168
  position: relative;
 
169
  }}
170
 
171
  .v2-styled-table td {{
@@ -176,26 +178,27 @@ p, span, div {{
176
  vertical-align: middle;
177
  }}
178
 
179
- .v2-styled-table tbody tr {{
180
- transition: all 0.3s ease;
181
- background: var(--bg-secondary);
182
- }}
 
183
 
184
- .v2-styled-table tbody tr:nth-child(even) {{
185
- background: var(--bg-card);
186
- }}
187
 
188
- .v2-styled-table tbody tr:hover {{
189
- background: rgba(222, 157, 204, 0.1);
190
- box-shadow: 0 0 20px var(--glow-primary);
191
- transform: scale(1.01);
192
- }}
193
 
194
- .model-name {{
195
- font-weight: 600;
196
- color: var(--accent-primary);
197
- transition: all 0.2s ease;
198
- }}
199
 
200
  .numeric-cell {{
201
  text-align: center;
@@ -205,6 +208,7 @@ p, span, div {{
205
 
206
  .score-cell {{
207
  padding: 8px 12px;
 
208
  }}
209
 
210
  /* Scrollbar styling */
@@ -238,14 +242,20 @@ p, span, div {{
238
  ">
239
  <div style="max-width: 72rem; margin: 0 auto;">
240
  <div style="text-align: center; margin-bottom: 4rem;">
 
 
 
 
 
 
 
241
  <h1 style="
242
  font-size: 4rem;
243
  font-weight: 800;
244
  line-height: 1.1;
245
- background: linear-gradient(45deg, var(--accent-primary), var(--accent-secondary));
246
- -webkit-background-clip: text;
247
- -webkit-text-fill-color: transparent;
248
  margin-bottom: 0.5rem;
 
249
  ">
250
  🔒 LLM PII Detection Leaderboard
251
  </h1>
@@ -261,13 +271,12 @@ p, span, div {{
261
  Comprehensive benchmark for language models' performance in detecting and redacting
262
  personally identifiable information (PII) across various document types and scenarios.
263
  <span style="
264
- background: linear-gradient(to right, var(--accent-tertiary), var(--accent-quaternary));
265
- -webkit-background-clip: text;
266
- -webkit-text-fill-color: transparent;
267
  display: block;
268
  margin-top: 1rem;
269
  font-size: 1.5rem;
270
  font-weight: 500;
 
271
  ">
272
  "How well do LLMs protect sensitive information?"
273
  </span>
@@ -292,18 +301,17 @@ p, span, div {{
292
  font-size: 4rem;
293
  font-weight: 800;
294
  margin-bottom: 1rem;
295
- background: linear-gradient(45deg, var(--accent-primary), var(--accent-secondary));
296
- -webkit-background-clip: text;
297
- -webkit-text-fill-color: transparent;
298
- ">8</div>
299
  <div style="color: var(--text-secondary); font-size: 1.5rem; margin-bottom: 1.5rem;">
300
  Language Models
301
  </div>
302
  <div style="font-size: 1.125rem; line-height: 1.75; color: var(--text-primary);">
303
- Leading proprietary & open source
304
  </div>
305
  <div style="color: var(--text-secondary); margin-top: 0.5rem;">
306
- GPT-4o, Claude, Gemini, LLaMA, Mistral
307
  </div>
308
  </div>
309
 
@@ -319,9 +327,8 @@ p, span, div {{
319
  font-size: 4rem;
320
  font-weight: 800;
321
  margin-bottom: 1rem;
322
- background: linear-gradient(45deg, var(--accent-tertiary), var(--accent-quaternary));
323
- -webkit-background-clip: text;
324
- -webkit-text-fill-color: transparent;
325
  ">5</div>
326
  <div style="color: var(--text-secondary); font-size: 1.5rem; margin-bottom: 1.5rem;">
327
  Document Types
@@ -346,18 +353,17 @@ p, span, div {{
346
  font-size: 4rem;
347
  font-weight: 800;
348
  margin-bottom: 1rem;
349
- background: linear-gradient(45deg, var(--accent-secondary), var(--accent-primary));
350
- -webkit-background-clip: text;
351
- -webkit-text-fill-color: transparent;
352
- ">94.1%</div>
353
  <div style="color: var(--text-secondary); font-size: 1.5rem; margin-bottom: 1.5rem;">
354
- Best Accuracy
355
  </div>
356
  <div style="font-size: 1.125rem; line-height: 1.75; color: var(--text-primary);">
357
  State-of-the-art performance
358
  </div>
359
  <div style="color: var(--text-secondary); margin-top: 0.5rem;">
360
- GPT-4o leading precision & recall
361
  </div>
362
  </div>
363
  </div>
@@ -369,8 +375,7 @@ p, span, div {{
369
  METHODOLOGY = """
370
  <div style="max-width: 1200px; margin: 0 auto; padding: 2rem; color: var(--text-secondary); line-height: 1.7; font-size: 1rem;">
371
  <h1 style="font-size: 2.5rem; font-weight: 700; margin: 3rem 0 1.5rem; color: var(--text-primary);
372
- background: linear-gradient(to right, var(--accent-primary), var(--accent-secondary));
373
- -webkit-background-clip: text; -webkit-text-fill-color: transparent;">
374
  Methodology
375
  </h1>
376
 
@@ -384,28 +389,28 @@ METHODOLOGY = """
384
  <li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
385
  <span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
386
  background: var(--accent-primary); border-radius: 50%;
387
- box-shadow: 0 0 0 2px rgba(222, 157, 204, 0.2);"></span>
388
  <span style="color: var(--accent-primary); font-weight: 600;">Model Selection:</span>
389
  We evaluate leading language models across proprietary and open-source categories
390
  </li>
391
  <li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
392
  <span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
393
  background: var(--accent-primary); border-radius: 50%;
394
- box-shadow: 0 0 0 2px rgba(222, 157, 204, 0.2);"></span>
395
  <span style="color: var(--accent-primary); font-weight: 600;">PII Detection:</span>
396
  Each model processes documents with instructions to identify and classify PII entities
397
  </li>
398
  <li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
399
  <span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
400
  background: var(--accent-primary); border-radius: 50%;
401
- box-shadow: 0 0 0 2px rgba(222, 157, 204, 0.2);"></span>
402
  <span style="color: var(--accent-primary); font-weight: 600;">Performance Metrics:</span>
403
  Precision, Recall, F1 Score, Over-detection Rate, Processing Time, and Cost
404
  </li>
405
  <li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
406
  <span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
407
  background: var(--accent-primary); border-radius: 50%;
408
- box-shadow: 0 0 0 2px rgba(222, 157, 204, 0.2);"></span>
409
  <span style="color: var(--accent-primary); font-weight: 600;">Domain Analysis:</span>
410
  Specialized evaluation across Healthcare, Financial, Government, Legal, and Personal documents
411
  </li>
@@ -424,5 +429,22 @@ METHODOLOGY = """
424
  <li style="margin: 1rem 0;"><span style="color: var(--accent-secondary); font-weight: 600;">Over-detection Rate:</span> Percentage of non-PII incorrectly flagged (lower is better)</li>
425
  </ul>
426
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  </div>
428
  """
 
44
 
45
  return df
46
 
47
+ # Nutrient brand color palette
48
  COLORS = {
49
  # Light mode colors
50
  "white": "#FFFFFF",
51
+ "black": "#000000",
52
  "disc_pink": "#DE9DCC",
53
  "code_coral": "#F25E45",
54
  "data_green": "#6EB579",
55
+ "digital_pollen": "#F0C968", # Primary yellow accent
56
  "warm_black": "#1A1414",
57
  "off_white": "#EFEBE7",
58
  "pixel_mist": "#E2DBD9",
 
70
  HEADER_CONTENT = f"""
71
  <style>
72
  /* Import fonts */
73
+ @import url('https://fonts.googleapis.com/css2?family=Archivo:wght@400;500;600;700;800&display=swap');
74
 
75
  /* Root variables with custom color palette */
76
  :root {{
 
83
  --text-primary: #EFEBE7;
84
  --text-secondary: #C2B8AE;
85
  --text-muted: #67594B;
86
+ --accent-primary: #F0C968;
87
  --accent-secondary: #F25E45;
88
  --accent-tertiary: #6EB579;
89
+ --accent-quaternary: #DE9DCC;
90
+ --glow-primary: rgba(240, 201, 104, 0.4);
91
  --glow-secondary: rgba(242, 94, 69, 0.4);
92
  --glow-tertiary: rgba(110, 181, 121, 0.4);
93
  }}
94
 
95
  /* Global font and background */
96
  .gradio-container {{
97
+ font-family: 'Archivo', -apple-system, BlinkMacSystemFont, sans-serif !important;
98
  background: var(--bg-primary) !important;
99
  color: var(--text-primary) !important;
100
  }}
 
103
  h1, h2, h3, h4 {{
104
  color: var(--text-primary) !important;
105
  font-weight: 700 !important;
106
+ font-family: 'Archivo', sans-serif !important;
107
  }}
108
 
109
  p, span, div {{
110
  color: var(--text-primary) !important;
111
+ font-family: 'Archivo', sans-serif !important;
112
  }}
113
 
114
  /* Dark containers */
 
149
  .v2-styled-table {{
150
  width: 100%;
151
  border-collapse: collapse;
152
+ font-family: 'Archivo', sans-serif;
153
  font-size: 14px;
154
  }}
155
 
156
  .v2-styled-table thead {{
157
+ background: var(--accent-primary);
158
  }}
159
 
160
  .v2-styled-table th {{
161
  padding: 16px 12px;
162
  text-align: left;
163
+ color: #000000 !important;
164
  font-weight: 600;
165
  font-size: 13px;
166
  text-transform: uppercase;
167
  letter-spacing: 0.05em;
168
  border: none;
169
  position: relative;
170
+ font-family: 'Archivo', sans-serif;
171
  }}
172
 
173
  .v2-styled-table td {{
 
178
  vertical-align: middle;
179
  }}
180
 
181
+ .v2-styled-table tbody tr {{
182
+ transition: none;
183
+ background: rgba(239, 235, 231, 0.06);
184
+ box-shadow: none;
185
+ }}
186
 
187
+ .v2-styled-table tbody tr:nth-child(even) {{
188
+ background: rgba(239, 235, 231, 0.08);
189
+ }}
190
 
191
+ .v2-styled-table tbody tr:hover {{
192
+ background: rgba(240, 201, 104, 0.10);
193
+ box-shadow: none;
194
+ transform: none;
195
+ }}
196
 
197
+ .model-name {{
198
+ font-weight: 700;
199
+ color: var(--off_white);
200
+ transition: all 0.2s ease;
201
+ }}
202
 
203
  .numeric-cell {{
204
  text-align: center;
 
208
 
209
  .score-cell {{
210
  padding: 8px 12px;
211
+ color: #000000 !important;
212
  }}
213
 
214
  /* Scrollbar styling */
 
242
  ">
243
  <div style="max-width: 72rem; margin: 0 auto;">
244
  <div style="text-align: center; margin-bottom: 4rem;">
245
+ <!-- Nutrient Logo -->
246
+ <div style="margin-bottom: 2rem; display: flex; justify-content: center;">
247
+ <svg width="120" height="84" viewBox="0 0 240 169" xmlns="http://www.w3.org/2000/svg">
248
+ <path fill="#FFFFFF" d="M20,104.3c-11.1,0-20-8.9-20-20s8.9-20,20-20,20,8.9,20,20-9,20-20,20ZM220,64.3c-11.1,0-20,8.9-20,20s8.9,20,20,20,20-8.9,20-20-8.9-20-20-20ZM30.5,133.2c-8.5,7.1-9.6,19.7-2.5,28.2,7.1,8.5,19.7,9.6,28.2,2.5,8.5-7.1,9.6-19.7,2.5-28.2-7.1-8.5-19.7-9.6-28.2-2.5ZM209.5,35.3c8.5-7.1,9.6-19.7,2.5-28.2-7.1-8.5-19.7-9.6-28.2-2.5-8.5,7.1-9.6,19.7-2.5,28.2,7.1,8.5,19.7,9.6,28.2,2.5ZM56.2,4.7c-8.5-7.1-21.1-6-28.2,2.5-7.1,8.5-6,21.1,2.5,28.2,8.5,7.1,21.1,6,28.2-2.5,7.1-8.5,6-21.1-2.5-28.2ZM209.5,133.2c-8.5-7.1-21.1-6-28.2,2.5-7.1,8.5-6,21.1,2.5,28.2,8.5,7.1,21.1,6,28.2-2.5,7.1-8.5,6-21.1-2.5-28.2ZM158.4,90.4c-8.5-7.1-21.1-6-28.2,2.5-7.1,8.5-6,21.1,2.5,28.2,8.5,7.1,21.1,6,28.2-2.5s6-21.1-2.5-28.2ZM107.3,47.5c-8.5-7.1-21.1-6-28.2,2.5-7.1,8.5-6,21.1,2.5,28.2,8.5,7.1,21.1,6,28.2-2.5s6-21.1-2.5-28.2Z"/>
249
+ </svg>
250
+ </div>
251
+
252
  <h1 style="
253
  font-size: 4rem;
254
  font-weight: 800;
255
  line-height: 1.1;
256
+ color: var(--accent-primary);
 
 
257
  margin-bottom: 0.5rem;
258
+ font-family: 'Archivo', sans-serif;
259
  ">
260
  🔒 LLM PII Detection Leaderboard
261
  </h1>
 
271
  Comprehensive benchmark for language models' performance in detecting and redacting
272
  personally identifiable information (PII) across various document types and scenarios.
273
  <span style="
274
+ color: var(--accent-primary);
 
 
275
  display: block;
276
  margin-top: 1rem;
277
  font-size: 1.5rem;
278
  font-weight: 500;
279
+ font-family: 'Archivo', sans-serif;
280
  ">
281
  "How well do LLMs protect sensitive information?"
282
  </span>
 
301
  font-size: 4rem;
302
  font-weight: 800;
303
  margin-bottom: 1rem;
304
+ color: var(--accent-primary);
305
+ font-family: 'Archivo', sans-serif;
306
+ ">4</div>
 
307
  <div style="color: var(--text-secondary); font-size: 1.5rem; margin-bottom: 1.5rem;">
308
  Language Models
309
  </div>
310
  <div style="font-size: 1.125rem; line-height: 1.75; color: var(--text-primary);">
311
+ Cutting-edge Nutrient models
312
  </div>
313
  <div style="color: var(--text-secondary); margin-top: 0.5rem;">
314
+ GPT-5-mini, GPT-5-nano, GPT-4.1-mini, GPT-4.1-nano
315
  </div>
316
  </div>
317
 
 
327
  font-size: 4rem;
328
  font-weight: 800;
329
  margin-bottom: 1rem;
330
+ color: var(--accent-tertiary);
331
+ font-family: 'Archivo', sans-serif;
 
332
  ">5</div>
333
  <div style="color: var(--text-secondary); font-size: 1.5rem; margin-bottom: 1.5rem;">
334
  Document Types
 
353
  font-size: 4rem;
354
  font-weight: 800;
355
  margin-bottom: 1rem;
356
+ color: var(--accent-primary);
357
+ font-family: 'Archivo', sans-serif;
358
+ ">98.0%</div>
 
359
  <div style="color: var(--text-secondary); font-size: 1.5rem; margin-bottom: 1.5rem;">
360
+ Best F1 Score
361
  </div>
362
  <div style="font-size: 1.125rem; line-height: 1.75; color: var(--text-primary);">
363
  State-of-the-art performance
364
  </div>
365
  <div style="color: var(--text-secondary); margin-top: 0.5rem;">
366
+ Nutrient & GPT-5-mini leading F1 performance
367
  </div>
368
  </div>
369
  </div>
 
375
  METHODOLOGY = """
376
  <div style="max-width: 1200px; margin: 0 auto; padding: 2rem; color: var(--text-secondary); line-height: 1.7; font-size: 1rem;">
377
  <h1 style="font-size: 2.5rem; font-weight: 700; margin: 3rem 0 1.5rem; color: var(--text-primary);
378
+ font-family: 'Archivo', sans-serif;">
 
379
  Methodology
380
  </h1>
381
 
 
389
  <li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
390
  <span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
391
  background: var(--accent-primary); border-radius: 50%;
392
+ box-shadow: 0 0 0 2px rgba(240, 201, 104, 0.25);"></span>
393
  <span style="color: var(--accent-primary); font-weight: 600;">Model Selection:</span>
394
  We evaluate leading language models across proprietary and open-source categories
395
  </li>
396
  <li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
397
  <span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
398
  background: var(--accent-primary); border-radius: 50%;
399
+ box-shadow: 0 0 0 2px rgba(240, 201, 104, 0.25);"></span>
400
  <span style="color: var(--accent-primary); font-weight: 600;">PII Detection:</span>
401
  Each model processes documents with instructions to identify and classify PII entities
402
  </li>
403
  <li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
404
  <span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
405
  background: var(--accent-primary); border-radius: 50%;
406
+ box-shadow: 0 0 0 2px rgba(240, 201, 104, 0.25);"></span>
407
  <span style="color: var(--accent-primary); font-weight: 600;">Performance Metrics:</span>
408
  Precision, Recall, F1 Score, Over-detection Rate, Processing Time, and Cost
409
  </li>
410
  <li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
411
  <span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
412
  background: var(--accent-primary); border-radius: 50%;
413
+ box-shadow: 0 0 0 2px rgba(240, 201, 104, 0.25);"></span>
414
  <span style="color: var(--accent-primary); font-weight: 600;">Domain Analysis:</span>
415
  Specialized evaluation across Healthcare, Financial, Government, Legal, and Personal documents
416
  </li>
 
429
  <li style="margin: 1rem 0;"><span style="color: var(--accent-secondary); font-weight: 600;">Over-detection Rate:</span> Percentage of non-PII incorrectly flagged (lower is better)</li>
430
  </ul>
431
  </div>
432
+
433
+ <!-- Footer -->
434
+ <div style="
435
+ text-align: center;
436
+ margin-top: 3rem;
437
+ padding-top: 2rem;
438
+ border-top: 1px solid var(--border-subtle);
439
+ ">
440
+ <p style="
441
+ color: var(--text-secondary);
442
+ font-size: 1rem;
443
+ font-family: 'Archivo', sans-serif;
444
+ font-weight: 500;
445
+ ">
446
+ Powered by <a href="https://nutrient.io" target="_blank" rel="noopener noreferrer" style="color: var(--accent-primary); font-weight: 700; text-decoration: none;">Nutrient</a>
447
+ </p>
448
+ </div>
449
  </div>
450
  """
pii_leaderboard.py CHANGED
@@ -14,13 +14,13 @@ from data_loader import (
14
  def get_rank_badge(rank):
15
  """Generate HTML for rank badge with appropriate styling"""
16
  badge_styles = {
17
- 1: ("1st", f"linear-gradient(145deg, {COLORS['digital_pollen']}, {COLORS['digital_pollen']})", COLORS['warm_black']),
18
- 2: ("2nd", f"linear-gradient(145deg, {COLORS['soft_grey']}, {COLORS['warm_grey']})", COLORS['white']),
19
- 3: ("3rd", f"linear-gradient(145deg, {COLORS['code_coral']}, {COLORS['code_coral_dm']})", COLORS['white']),
20
  }
21
 
22
  if rank in badge_styles:
23
- label, gradient, text_color = badge_styles[rank]
24
  return f"""
25
  <div style="
26
  display: inline-flex;
@@ -28,12 +28,13 @@ def get_rank_badge(rank):
28
  justify-content: center;
29
  min-width: 48px;
30
  padding: 4px 12px;
31
- background: {gradient};
32
- color: {text_color};
33
  border-radius: 6px;
34
  font-weight: 600;
35
  font-size: 0.9em;
36
  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
 
37
  ">
38
  {label}
39
  </div>
@@ -53,17 +54,20 @@ def get_rank_badge(rank):
53
 
54
  def get_type_badge(model_type):
55
  """Generate HTML for model type badge"""
56
- bg_color = COLORS['disc_pink'] if model_type == 'Proprietary' else COLORS['data_green']
 
 
57
  return f"""
58
  <div style="
59
  display: inline-flex;
60
  align-items: center;
61
  padding: 4px 8px;
62
  background: {bg_color};
63
- color: white;
64
  border-radius: 4px;
65
  font-size: 0.85em;
66
- font-weight: 500;
 
67
  ">
68
  {model_type}
69
  </div>
@@ -80,9 +84,9 @@ def get_score_bar(score, is_inverse=False):
80
 
81
  # For over-detection rate, use inverse coloring (lower is better)
82
  if is_inverse:
83
- gradient = f"linear-gradient(90deg, {COLORS['data_green']}, {COLORS['code_coral']})"
84
  else:
85
- gradient = f"linear-gradient(90deg, {COLORS['code_coral']}, {COLORS['data_green']})"
86
 
87
  return f"""
88
  <div style="display: flex; align-items: center; gap: 12px; width: 100%;">
@@ -97,15 +101,15 @@ def get_score_bar(score, is_inverse=False):
97
  <div style="
98
  width: {width}%;
99
  height: 100%;
100
- background: {gradient};
101
  border-radius: 4px;
102
  transition: width 0.3s ease;
103
  "></div>
104
  </div>
105
  <span style="
106
  font-family: 'SF Mono', monospace;
107
- font-weight: 600;
108
- color: var(--text-primary);
109
  min-width: 60px;
110
  ">{score:.3f}</span>
111
  </div>
@@ -126,16 +130,16 @@ def create_pii_leaderboard():
126
  <thead>
127
  <tr>
128
  <th style="width: 80px;">Rank</th>
129
- <th>Model</th>
130
- <th style="width: 120px;">Type</th>
131
- <th>Vendor</th>
132
- <th style="width: 200px;">Overall Accuracy</th>
133
- <th style="width: 150px;">Precision</th>
134
- <th style="width: 150px;">Recall</th>
135
  <th style="width: 150px;">F1 Score</th>
 
 
 
136
  <th style="width: 160px;">Over-detection Rate</th>
137
  <th>Cost/Doc ($)</th>
138
  <th>Time (s)</th>
 
 
139
  </tr>
140
  </thead>
141
  <tbody>
@@ -147,9 +151,7 @@ def create_pii_leaderboard():
147
  table_html += f"""
148
  <tr>
149
  <td>{get_rank_badge(rank)}</td>
150
- <td class="model-name">{row['Model']}</td>
151
- <td>{get_type_badge(row['Model Type'])}</td>
152
- <td>{row['Vendor']}</td>
153
  """
154
 
155
  # Get appropriate values based on document type filter
@@ -168,24 +170,24 @@ def create_pii_leaderboard():
168
  cost = row.get('Cost per Document ($)', '')
169
  time = row.get('Processing Time (s)', '')
170
 
171
- # Add score bars
172
- if accuracy != '':
173
- table_html += f'<td class="score-cell">{get_score_bar(accuracy)}</td>'
174
  else:
175
  table_html += '<td class="numeric-cell">-</td>'
176
 
177
- if precision != '':
178
- table_html += f'<td class="score-cell">{get_score_bar(precision)}</td>'
179
  else:
180
  table_html += '<td class="numeric-cell">-</td>'
181
 
182
- if recall != '':
183
- table_html += f'<td class="score-cell">{get_score_bar(recall)}</td>'
184
  else:
185
  table_html += '<td class="numeric-cell">-</td>'
186
 
187
- if f1 != '':
188
- table_html += f'<td class="score-cell">{get_score_bar(f1)}</td>'
189
  else:
190
  table_html += '<td class="numeric-cell">-</td>'
191
 
@@ -208,6 +210,8 @@ def create_pii_leaderboard():
208
  table_html += f"""
209
  <td class="numeric-cell">{cost_display}</td>
210
  <td class="numeric-cell">{time_display}</td>
 
 
211
  </tr>
212
  """
213
 
@@ -400,7 +404,7 @@ def create_pii_leaderboard():
400
 
401
  # Load initial data
402
  initial_df = load_leaderboard_data()
403
- initial_table = filter_and_sort_data("All", "All", "Overall Accuracy", "Descending")
404
 
405
  # Display header
406
  gr.HTML(HEADER_CONTENT)
@@ -410,11 +414,11 @@ def create_pii_leaderboard():
410
  <div class="dark-container" style="margin-bottom: 32px;">
411
  <div class="section-header">
412
  <span class="section-icon" style="color: var(--accent-primary);">📈</span>
413
- <h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Inter', sans-serif; font-weight: 700;">
414
  PII Detection Performance Leaderboard
415
  </h3>
416
  </div>
417
- <p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Inter', sans-serif;">
418
  Filter by document type, model access, and sort by any metric to explore performance
419
  </p>
420
 
@@ -451,8 +455,8 @@ def create_pii_leaderboard():
451
 
452
  with gr.Column(scale=1):
453
  sort_by = gr.Dropdown(
454
- choices=["Overall Accuracy", "Precision", "Recall", "F1 Score", "Over-redaction Rate", "Cost per Document ($)", "Processing Time (s)"],
455
- value="Overall Accuracy",
456
  label="📊 Sort By",
457
  elem_classes=["dropdown"]
458
  )
@@ -490,11 +494,11 @@ def create_pii_leaderboard():
490
  <div class="dark-container" style="margin-top: 32px;">
491
  <div class="section-header">
492
  <span class="section-icon" style="color: var(--accent-primary);">🎯</span>
493
- <h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Inter', sans-serif; font-weight: 700;">
494
  Model Performance Cards
495
  </h3>
496
  </div>
497
- <p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Inter', sans-serif; text-align: center;">
498
  Dive deep into individual model performance across all metrics and document types
499
  </p>
500
 
@@ -522,6 +526,10 @@ def create_pii_leaderboard():
522
 
523
  gr.HTML("""
524
  </div>
 
 
 
 
525
  </div>
526
  </div>""")
527
 
@@ -529,7 +537,7 @@ def create_pii_leaderboard():
529
  gr.HTML(f"""
530
  <style>
531
  .performance-card {{
532
- background: linear-gradient(145deg, rgba(26, 20, 20, 0.98) 0%, rgba(222, 157, 204, 0.05) 100%);
533
  border: 2px solid var(--accent-primary);
534
  border-radius: 24px;
535
  padding: 32px;
@@ -539,8 +547,8 @@ def create_pii_leaderboard():
539
  overflow: hidden;
540
  box-shadow:
541
  0 20px 40px rgba(0, 0, 0, 0.5),
542
- 0 0 80px rgba(222, 157, 204, 0.2),
543
- inset 0 0 120px rgba(222, 157, 204, 0.05);
544
  }}
545
 
546
  .card-header {{
@@ -553,12 +561,10 @@ def create_pii_leaderboard():
553
  .card-model-name {{
554
  font-size: 2rem;
555
  font-weight: 800;
556
- background: linear-gradient(135deg, var(--accent-primary) 0%, var(--accent-secondary) 100%);
557
- -webkit-background-clip: text;
558
- -webkit-text-fill-color: transparent;
559
  margin-bottom: 8px;
560
- text-shadow: 0 0 40px var(--glow-primary);
561
  line-height: 1.2;
 
562
  }}
563
 
564
  .card-stars {{
@@ -587,7 +593,7 @@ def create_pii_leaderboard():
587
  .metric-item:hover {{
588
  transform: translateY(-2px);
589
  border-color: var(--accent-primary);
590
- box-shadow: 0 8px 16px rgba(222, 157, 204, 0.3);
591
  }}
592
 
593
  .metric-icon {{
 
14
  def get_rank_badge(rank):
15
  """Generate HTML for rank badge with appropriate styling"""
16
  badge_styles = {
17
+ 1: ("1st", COLORS['digital_pollen'], COLORS['warm_black']),
18
+ 2: ("2nd", COLORS['soft_grey'], COLORS['black']),
19
+ 3: ("3rd", COLORS['code_coral'], COLORS['black']),
20
  }
21
 
22
  if rank in badge_styles:
23
+ label, bg_color, text_color = badge_styles[rank]
24
  return f"""
25
  <div style="
26
  display: inline-flex;
 
28
  justify-content: center;
29
  min-width: 48px;
30
  padding: 4px 12px;
31
+ background: {bg_color};
32
+ color: {text_color} !important;
33
  border-radius: 6px;
34
  font-weight: 600;
35
  font-size: 0.9em;
36
  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
37
+ font-family: 'Archivo', sans-serif;
38
  ">
39
  {label}
40
  </div>
 
54
 
55
  def get_type_badge(model_type):
56
  """Generate HTML for model type badge"""
57
+ bg_color = COLORS['digital_pollen'] if model_type == 'Proprietary' else COLORS['data_green']
58
+ # Use black text for better readability on brand accent backgrounds
59
+ text_color = '#000000'
60
  return f"""
61
  <div style="
62
  display: inline-flex;
63
  align-items: center;
64
  padding: 4px 8px;
65
  background: {bg_color};
66
+ color: {text_color} !important;
67
  border-radius: 4px;
68
  font-size: 0.85em;
69
+ font-weight: 600;
70
+ font-family: 'Archivo', sans-serif;
71
  ">
72
  {model_type}
73
  </div>
 
84
 
85
  # For over-detection rate, use inverse coloring (lower is better)
86
  if is_inverse:
87
+ bar_color = COLORS['code_coral'] if score > 0.5 else COLORS['data_green']
88
  else:
89
+ bar_color = COLORS['data_green'] if score > 0.5 else COLORS['code_coral']
90
 
91
  return f"""
92
  <div style="display: flex; align-items: center; gap: 12px; width: 100%;">
 
101
  <div style="
102
  width: {width}%;
103
  height: 100%;
104
+ background: {bar_color};
105
  border-radius: 4px;
106
  transition: width 0.3s ease;
107
  "></div>
108
  </div>
109
  <span style="
110
  font-family: 'SF Mono', monospace;
111
+ font-weight: 700;
112
+ color: #000000;
113
  min-width: 60px;
114
  ">{score:.3f}</span>
115
  </div>
 
130
  <thead>
131
  <tr>
132
  <th style="width: 80px;">Rank</th>
133
+ <th style="width: 250px;">Model</th>
 
 
 
 
 
134
  <th style="width: 150px;">F1 Score</th>
135
+ <th style="width: 150px;">Recall</th>
136
+ <th style="width: 150px;">Precision</th>
137
+ <th style="width: 200px;">Overall Accuracy</th>
138
  <th style="width: 160px;">Over-detection Rate</th>
139
  <th>Cost/Doc ($)</th>
140
  <th>Time (s)</th>
141
+ <th style="width: 120px;">Type</th>
142
+ <th>Vendor</th>
143
  </tr>
144
  </thead>
145
  <tbody>
 
151
  table_html += f"""
152
  <tr>
153
  <td>{get_rank_badge(rank)}</td>
154
+ <td class="model-name" style="color:#EFEBE7; font-weight:700;">{row['Model']}</td>
 
 
155
  """
156
 
157
  # Get appropriate values based on document type filter
 
170
  cost = row.get('Cost per Document ($)', '')
171
  time = row.get('Processing Time (s)', '')
172
 
173
+ # Add score bars in new order: F1, Recall, Precision, Accuracy
174
+ if f1 != '':
175
+ table_html += f'<td class="score-cell">{get_score_bar(f1)}</td>'
176
  else:
177
  table_html += '<td class="numeric-cell">-</td>'
178
 
179
+ if recall != '':
180
+ table_html += f'<td class="score-cell">{get_score_bar(recall)}</td>'
181
  else:
182
  table_html += '<td class="numeric-cell">-</td>'
183
 
184
+ if precision != '':
185
+ table_html += f'<td class="score-cell">{get_score_bar(precision)}</td>'
186
  else:
187
  table_html += '<td class="numeric-cell">-</td>'
188
 
189
+ if accuracy != '':
190
+ table_html += f'<td class="score-cell">{get_score_bar(accuracy)}</td>'
191
  else:
192
  table_html += '<td class="numeric-cell">-</td>'
193
 
 
210
  table_html += f"""
211
  <td class="numeric-cell">{cost_display}</td>
212
  <td class="numeric-cell">{time_display}</td>
213
+ <td>{get_type_badge(row['Model Type'])}</td>
214
+ <td>{row['Vendor']}</td>
215
  </tr>
216
  """
217
 
 
404
 
405
  # Load initial data
406
  initial_df = load_leaderboard_data()
407
+ initial_table = filter_and_sort_data("All", "All", "F1 Score", "Descending")
408
 
409
  # Display header
410
  gr.HTML(HEADER_CONTENT)
 
414
  <div class="dark-container" style="margin-bottom: 32px;">
415
  <div class="section-header">
416
  <span class="section-icon" style="color: var(--accent-primary);">📈</span>
417
+ <h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Archivo', sans-serif; font-weight: 700;">
418
  PII Detection Performance Leaderboard
419
  </h3>
420
  </div>
421
+ <p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Archivo', sans-serif;">
422
  Filter by document type, model access, and sort by any metric to explore performance
423
  </p>
424
 
 
455
 
456
  with gr.Column(scale=1):
457
  sort_by = gr.Dropdown(
458
+ choices=["F1 Score", "Recall", "Precision", "Overall Accuracy", "Over-redaction Rate", "Cost per Document ($)", "Processing Time (s)"],
459
+ value="F1 Score",
460
  label="📊 Sort By",
461
  elem_classes=["dropdown"]
462
  )
 
494
  <div class="dark-container" style="margin-top: 32px;">
495
  <div class="section-header">
496
  <span class="section-icon" style="color: var(--accent-primary);">🎯</span>
497
+ <h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Archivo', sans-serif; font-weight: 700;">
498
  Model Performance Cards
499
  </h3>
500
  </div>
501
+ <p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Archivo', sans-serif; text-align: center;">
502
  Dive deep into individual model performance across all metrics and document types
503
  </p>
504
 
 
526
 
527
  gr.HTML("""
528
  </div>
529
+
530
+ <div style="text-align: center; margin-top: 24px; padding-top: 12px; border-top: 1px solid var(--border-subtle);">
531
+ <span style="color: var(--text-secondary); font-family: 'Archivo', sans-serif;">Powered by <a href=\"https://nutrient.io\" target=\"_blank\" rel=\"noopener noreferrer\" style=\"color: var(--accent-primary); font-weight: 700; text-decoration: none;\">Nutrient</a></span>
532
+ </div>
533
  </div>
534
  </div>""")
535
 
 
537
  gr.HTML(f"""
538
  <style>
539
  .performance-card {{
540
+ background: var(--bg-card);
541
  border: 2px solid var(--accent-primary);
542
  border-radius: 24px;
543
  padding: 32px;
 
547
  overflow: hidden;
548
  box-shadow:
549
  0 20px 40px rgba(0, 0, 0, 0.5),
550
+ 0 0 80px rgba(240, 201, 104, 0.2),
551
+ inset 0 0 120px rgba(240, 201, 104, 0.08);
552
  }}
553
 
554
  .card-header {{
 
561
  .card-model-name {{
562
  font-size: 2rem;
563
  font-weight: 800;
564
+ color: var(--text-primary);
 
 
565
  margin-bottom: 8px;
 
566
  line-height: 1.2;
567
+ font-family: 'Archivo', sans-serif;
568
  }}
569
 
570
  .card-stars {{
 
593
  .metric-item:hover {{
594
  transform: translateY(-2px);
595
  border-color: var(--accent-primary);
596
+ box-shadow: 0 8px 16px rgba(240, 201, 104, 0.25);
597
  }}
598
 
599
  .metric-icon {{
results/pii_detection_results.csv CHANGED
@@ -1,9 +1,5 @@
1
  Model,Model Type,Vendor,Overall Accuracy,Precision,Recall,F1 Score,Over-redaction Rate,Processing Time (s),Cost per Document ($),Healthcare Accuracy,Financial Accuracy,Government Accuracy,Legal Accuracy,Personal Accuracy
2
- GPT-4o,Proprietary,OpenAI,0.941,0.945,0.938,0.941,0.023,2.3,0.012,0.952,0.938,0.933,0.941,0.940
3
- Claude-3.5-Sonnet,Proprietary,Anthropic,0.928,0.932,0.924,0.928,0.031,3.1,0.015,0.939,0.925,0.920,0.928,0.927
4
- Gemini-1.5-Pro,Proprietary,Google,0.915,0.919,0.911,0.915,0.038,2.8,0.008,0.926,0.912,0.907,0.915,0.914
5
- LLaMA-3.1-70B,Open Source,Meta,0.882,0.887,0.877,0.882,0.052,4.2,0.003,0.893,0.879,0.874,0.882,0.881
6
- Mistral-Large,Proprietary,Mistral AI,0.871,0.875,0.867,0.871,0.048,3.7,0.011,0.882,0.868,0.863,0.871,0.870
7
- GPT-4o-mini,Proprietary,OpenAI,0.856,0.860,0.852,0.856,0.061,1.8,0.002,0.867,0.853,0.848,0.856,0.855
8
- Claude-3-Haiku,Proprietary,Anthropic,0.834,0.838,0.830,0.834,0.078,2.1,0.006,0.845,0.831,0.826,0.834,0.833
9
- Gemini-1.5-Flash,Proprietary,Google,0.821,0.825,0.817,0.821,0.085,2.4,0.004,0.832,0.818,0.813,0.821,0.820
 
1
  Model,Model Type,Vendor,Overall Accuracy,Precision,Recall,F1 Score,Over-redaction Rate,Processing Time (s),Cost per Document ($),Healthcare Accuracy,Financial Accuracy,Government Accuracy,Legal Accuracy,Personal Accuracy
2
+ Nutrient & GPT-5-mini,Proprietary,OpenAI,0.757,0.993,0.972,0.98,0.054,2.7,0.018,0.982,0.974,0.958,0.977,0.989
3
+ Nutrient & GPT-5-nano,Proprietary,OpenAI,0.658,0.988,0.954,0.966,0.066,2.1,0.015,0.963,0.961,0.943,0.946,0.978
4
+ Nutrient & GPT-4.1-mini,Proprietary,OpenAI,0.599,0.993,0.945,0.964,0.065,2.3,0.012,0.96,0.961,0.966,0.895,0.994
5
+ Nutrient & GPT-4.1-nano,Proprietary,OpenAI,0.419,0.989,0.906,0.936,0.118,1.8,0.008,0.939,0.939,0.933,0.925,0.974