Luis Kalckstein
commited on
V1
Browse files- Nutrient_Symbol_RGB_OffWhite.svg +12 -0
- data_loader.py +78 -56
- pii_leaderboard.py +53 -47
- results/pii_detection_results.csv +4 -8
Nutrient_Symbol_RGB_OffWhite.svg
ADDED
|
|
data_loader.py
CHANGED
|
@@ -44,14 +44,15 @@ def load_data():
|
|
| 44 |
|
| 45 |
return df
|
| 46 |
|
| 47 |
-
#
|
| 48 |
COLORS = {
|
| 49 |
# Light mode colors
|
| 50 |
"white": "#FFFFFF",
|
|
|
|
| 51 |
"disc_pink": "#DE9DCC",
|
| 52 |
"code_coral": "#F25E45",
|
| 53 |
"data_green": "#6EB579",
|
| 54 |
-
"digital_pollen": "#F0C968",
|
| 55 |
"warm_black": "#1A1414",
|
| 56 |
"off_white": "#EFEBE7",
|
| 57 |
"pixel_mist": "#E2DBD9",
|
|
@@ -69,7 +70,7 @@ COLORS = {
|
|
| 69 |
HEADER_CONTENT = f"""
|
| 70 |
<style>
|
| 71 |
/* Import fonts */
|
| 72 |
-
@import url('https://fonts.googleapis.com/css2?family=
|
| 73 |
|
| 74 |
/* Root variables with custom color palette */
|
| 75 |
:root {{
|
|
@@ -82,18 +83,18 @@ HEADER_CONTENT = f"""
|
|
| 82 |
--text-primary: #EFEBE7;
|
| 83 |
--text-secondary: #C2B8AE;
|
| 84 |
--text-muted: #67594B;
|
| 85 |
-
--accent-primary: #
|
| 86 |
--accent-secondary: #F25E45;
|
| 87 |
--accent-tertiary: #6EB579;
|
| 88 |
-
--accent-quaternary: #
|
| 89 |
-
--glow-primary: rgba(
|
| 90 |
--glow-secondary: rgba(242, 94, 69, 0.4);
|
| 91 |
--glow-tertiary: rgba(110, 181, 121, 0.4);
|
| 92 |
}}
|
| 93 |
|
| 94 |
/* Global font and background */
|
| 95 |
.gradio-container {{
|
| 96 |
-
font-family: '
|
| 97 |
background: var(--bg-primary) !important;
|
| 98 |
color: var(--text-primary) !important;
|
| 99 |
}}
|
|
@@ -102,12 +103,12 @@ HEADER_CONTENT = f"""
|
|
| 102 |
h1, h2, h3, h4 {{
|
| 103 |
color: var(--text-primary) !important;
|
| 104 |
font-weight: 700 !important;
|
| 105 |
-
font-family: '
|
| 106 |
}}
|
| 107 |
|
| 108 |
p, span, div {{
|
| 109 |
color: var(--text-primary) !important;
|
| 110 |
-
font-family: '
|
| 111 |
}}
|
| 112 |
|
| 113 |
/* Dark containers */
|
|
@@ -148,24 +149,25 @@ p, span, div {{
|
|
| 148 |
.v2-styled-table {{
|
| 149 |
width: 100%;
|
| 150 |
border-collapse: collapse;
|
| 151 |
-
font-family: '
|
| 152 |
font-size: 14px;
|
| 153 |
}}
|
| 154 |
|
| 155 |
.v2-styled-table thead {{
|
| 156 |
-
background:
|
| 157 |
}}
|
| 158 |
|
| 159 |
.v2-styled-table th {{
|
| 160 |
padding: 16px 12px;
|
| 161 |
text-align: left;
|
| 162 |
-
color:
|
| 163 |
font-weight: 600;
|
| 164 |
font-size: 13px;
|
| 165 |
text-transform: uppercase;
|
| 166 |
letter-spacing: 0.05em;
|
| 167 |
border: none;
|
| 168 |
position: relative;
|
|
|
|
| 169 |
}}
|
| 170 |
|
| 171 |
.v2-styled-table td {{
|
|
@@ -176,26 +178,27 @@ p, span, div {{
|
|
| 176 |
vertical-align: middle;
|
| 177 |
}}
|
| 178 |
|
| 179 |
-
.v2-styled-table tbody tr {{
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
|
|
|
| 183 |
|
| 184 |
-
.v2-styled-table tbody tr:nth-child(even) {{
|
| 185 |
-
|
| 186 |
-
}}
|
| 187 |
|
| 188 |
-
.v2-styled-table tbody tr:hover {{
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
}}
|
| 193 |
|
| 194 |
-
.model-name {{
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
}}
|
| 199 |
|
| 200 |
.numeric-cell {{
|
| 201 |
text-align: center;
|
|
@@ -205,6 +208,7 @@ p, span, div {{
|
|
| 205 |
|
| 206 |
.score-cell {{
|
| 207 |
padding: 8px 12px;
|
|
|
|
| 208 |
}}
|
| 209 |
|
| 210 |
/* Scrollbar styling */
|
|
@@ -238,14 +242,20 @@ p, span, div {{
|
|
| 238 |
">
|
| 239 |
<div style="max-width: 72rem; margin: 0 auto;">
|
| 240 |
<div style="text-align: center; margin-bottom: 4rem;">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
<h1 style="
|
| 242 |
font-size: 4rem;
|
| 243 |
font-weight: 800;
|
| 244 |
line-height: 1.1;
|
| 245 |
-
|
| 246 |
-
-webkit-background-clip: text;
|
| 247 |
-
-webkit-text-fill-color: transparent;
|
| 248 |
margin-bottom: 0.5rem;
|
|
|
|
| 249 |
">
|
| 250 |
🔒 LLM PII Detection Leaderboard
|
| 251 |
</h1>
|
|
@@ -261,13 +271,12 @@ p, span, div {{
|
|
| 261 |
Comprehensive benchmark for language models' performance in detecting and redacting
|
| 262 |
personally identifiable information (PII) across various document types and scenarios.
|
| 263 |
<span style="
|
| 264 |
-
|
| 265 |
-
-webkit-background-clip: text;
|
| 266 |
-
-webkit-text-fill-color: transparent;
|
| 267 |
display: block;
|
| 268 |
margin-top: 1rem;
|
| 269 |
font-size: 1.5rem;
|
| 270 |
font-weight: 500;
|
|
|
|
| 271 |
">
|
| 272 |
"How well do LLMs protect sensitive information?"
|
| 273 |
</span>
|
|
@@ -292,18 +301,17 @@ p, span, div {{
|
|
| 292 |
font-size: 4rem;
|
| 293 |
font-weight: 800;
|
| 294 |
margin-bottom: 1rem;
|
| 295 |
-
|
| 296 |
-
-
|
| 297 |
-
|
| 298 |
-
">8</div>
|
| 299 |
<div style="color: var(--text-secondary); font-size: 1.5rem; margin-bottom: 1.5rem;">
|
| 300 |
Language Models
|
| 301 |
</div>
|
| 302 |
<div style="font-size: 1.125rem; line-height: 1.75; color: var(--text-primary);">
|
| 303 |
-
|
| 304 |
</div>
|
| 305 |
<div style="color: var(--text-secondary); margin-top: 0.5rem;">
|
| 306 |
-
GPT-
|
| 307 |
</div>
|
| 308 |
</div>
|
| 309 |
|
|
@@ -319,9 +327,8 @@ p, span, div {{
|
|
| 319 |
font-size: 4rem;
|
| 320 |
font-weight: 800;
|
| 321 |
margin-bottom: 1rem;
|
| 322 |
-
|
| 323 |
-
-
|
| 324 |
-
-webkit-text-fill-color: transparent;
|
| 325 |
">5</div>
|
| 326 |
<div style="color: var(--text-secondary); font-size: 1.5rem; margin-bottom: 1.5rem;">
|
| 327 |
Document Types
|
|
@@ -346,18 +353,17 @@ p, span, div {{
|
|
| 346 |
font-size: 4rem;
|
| 347 |
font-weight: 800;
|
| 348 |
margin-bottom: 1rem;
|
| 349 |
-
|
| 350 |
-
-
|
| 351 |
-
|
| 352 |
-
">94.1%</div>
|
| 353 |
<div style="color: var(--text-secondary); font-size: 1.5rem; margin-bottom: 1.5rem;">
|
| 354 |
-
Best
|
| 355 |
</div>
|
| 356 |
<div style="font-size: 1.125rem; line-height: 1.75; color: var(--text-primary);">
|
| 357 |
State-of-the-art performance
|
| 358 |
</div>
|
| 359 |
<div style="color: var(--text-secondary); margin-top: 0.5rem;">
|
| 360 |
-
GPT-
|
| 361 |
</div>
|
| 362 |
</div>
|
| 363 |
</div>
|
|
@@ -369,8 +375,7 @@ p, span, div {{
|
|
| 369 |
METHODOLOGY = """
|
| 370 |
<div style="max-width: 1200px; margin: 0 auto; padding: 2rem; color: var(--text-secondary); line-height: 1.7; font-size: 1rem;">
|
| 371 |
<h1 style="font-size: 2.5rem; font-weight: 700; margin: 3rem 0 1.5rem; color: var(--text-primary);
|
| 372 |
-
|
| 373 |
-
-webkit-background-clip: text; -webkit-text-fill-color: transparent;">
|
| 374 |
Methodology
|
| 375 |
</h1>
|
| 376 |
|
|
@@ -384,28 +389,28 @@ METHODOLOGY = """
|
|
| 384 |
<li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
|
| 385 |
<span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
|
| 386 |
background: var(--accent-primary); border-radius: 50%;
|
| 387 |
-
box-shadow: 0 0 0 2px rgba(
|
| 388 |
<span style="color: var(--accent-primary); font-weight: 600;">Model Selection:</span>
|
| 389 |
We evaluate leading language models across proprietary and open-source categories
|
| 390 |
</li>
|
| 391 |
<li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
|
| 392 |
<span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
|
| 393 |
background: var(--accent-primary); border-radius: 50%;
|
| 394 |
-
box-shadow: 0 0 0 2px rgba(
|
| 395 |
<span style="color: var(--accent-primary); font-weight: 600;">PII Detection:</span>
|
| 396 |
Each model processes documents with instructions to identify and classify PII entities
|
| 397 |
</li>
|
| 398 |
<li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
|
| 399 |
<span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
|
| 400 |
background: var(--accent-primary); border-radius: 50%;
|
| 401 |
-
box-shadow: 0 0 0 2px rgba(
|
| 402 |
<span style="color: var(--accent-primary); font-weight: 600;">Performance Metrics:</span>
|
| 403 |
Precision, Recall, F1 Score, Over-detection Rate, Processing Time, and Cost
|
| 404 |
</li>
|
| 405 |
<li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
|
| 406 |
<span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
|
| 407 |
background: var(--accent-primary); border-radius: 50%;
|
| 408 |
-
box-shadow: 0 0 0 2px rgba(
|
| 409 |
<span style="color: var(--accent-primary); font-weight: 600;">Domain Analysis:</span>
|
| 410 |
Specialized evaluation across Healthcare, Financial, Government, Legal, and Personal documents
|
| 411 |
</li>
|
|
@@ -424,5 +429,22 @@ METHODOLOGY = """
|
|
| 424 |
<li style="margin: 1rem 0;"><span style="color: var(--accent-secondary); font-weight: 600;">Over-detection Rate:</span> Percentage of non-PII incorrectly flagged (lower is better)</li>
|
| 425 |
</ul>
|
| 426 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
</div>
|
| 428 |
"""
|
|
|
|
| 44 |
|
| 45 |
return df
|
| 46 |
|
| 47 |
+
# Nutrient brand color palette
|
| 48 |
COLORS = {
|
| 49 |
# Light mode colors
|
| 50 |
"white": "#FFFFFF",
|
| 51 |
+
"black": "#000000",
|
| 52 |
"disc_pink": "#DE9DCC",
|
| 53 |
"code_coral": "#F25E45",
|
| 54 |
"data_green": "#6EB579",
|
| 55 |
+
"digital_pollen": "#F0C968", # Primary yellow accent
|
| 56 |
"warm_black": "#1A1414",
|
| 57 |
"off_white": "#EFEBE7",
|
| 58 |
"pixel_mist": "#E2DBD9",
|
|
|
|
| 70 |
HEADER_CONTENT = f"""
|
| 71 |
<style>
|
| 72 |
/* Import fonts */
|
| 73 |
+
@import url('https://fonts.googleapis.com/css2?family=Archivo:wght@400;500;600;700;800&display=swap');
|
| 74 |
|
| 75 |
/* Root variables with custom color palette */
|
| 76 |
:root {{
|
|
|
|
| 83 |
--text-primary: #EFEBE7;
|
| 84 |
--text-secondary: #C2B8AE;
|
| 85 |
--text-muted: #67594B;
|
| 86 |
+
--accent-primary: #F0C968;
|
| 87 |
--accent-secondary: #F25E45;
|
| 88 |
--accent-tertiary: #6EB579;
|
| 89 |
+
--accent-quaternary: #DE9DCC;
|
| 90 |
+
--glow-primary: rgba(240, 201, 104, 0.4);
|
| 91 |
--glow-secondary: rgba(242, 94, 69, 0.4);
|
| 92 |
--glow-tertiary: rgba(110, 181, 121, 0.4);
|
| 93 |
}}
|
| 94 |
|
| 95 |
/* Global font and background */
|
| 96 |
.gradio-container {{
|
| 97 |
+
font-family: 'Archivo', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 98 |
background: var(--bg-primary) !important;
|
| 99 |
color: var(--text-primary) !important;
|
| 100 |
}}
|
|
|
|
| 103 |
h1, h2, h3, h4 {{
|
| 104 |
color: var(--text-primary) !important;
|
| 105 |
font-weight: 700 !important;
|
| 106 |
+
font-family: 'Archivo', sans-serif !important;
|
| 107 |
}}
|
| 108 |
|
| 109 |
p, span, div {{
|
| 110 |
color: var(--text-primary) !important;
|
| 111 |
+
font-family: 'Archivo', sans-serif !important;
|
| 112 |
}}
|
| 113 |
|
| 114 |
/* Dark containers */
|
|
|
|
| 149 |
.v2-styled-table {{
|
| 150 |
width: 100%;
|
| 151 |
border-collapse: collapse;
|
| 152 |
+
font-family: 'Archivo', sans-serif;
|
| 153 |
font-size: 14px;
|
| 154 |
}}
|
| 155 |
|
| 156 |
.v2-styled-table thead {{
|
| 157 |
+
background: var(--accent-primary);
|
| 158 |
}}
|
| 159 |
|
| 160 |
.v2-styled-table th {{
|
| 161 |
padding: 16px 12px;
|
| 162 |
text-align: left;
|
| 163 |
+
color: #000000 !important;
|
| 164 |
font-weight: 600;
|
| 165 |
font-size: 13px;
|
| 166 |
text-transform: uppercase;
|
| 167 |
letter-spacing: 0.05em;
|
| 168 |
border: none;
|
| 169 |
position: relative;
|
| 170 |
+
font-family: 'Archivo', sans-serif;
|
| 171 |
}}
|
| 172 |
|
| 173 |
.v2-styled-table td {{
|
|
|
|
| 178 |
vertical-align: middle;
|
| 179 |
}}
|
| 180 |
|
| 181 |
+
.v2-styled-table tbody tr {{
|
| 182 |
+
transition: none;
|
| 183 |
+
background: rgba(239, 235, 231, 0.06);
|
| 184 |
+
box-shadow: none;
|
| 185 |
+
}}
|
| 186 |
|
| 187 |
+
.v2-styled-table tbody tr:nth-child(even) {{
|
| 188 |
+
background: rgba(239, 235, 231, 0.08);
|
| 189 |
+
}}
|
| 190 |
|
| 191 |
+
.v2-styled-table tbody tr:hover {{
|
| 192 |
+
background: rgba(240, 201, 104, 0.10);
|
| 193 |
+
box-shadow: none;
|
| 194 |
+
transform: none;
|
| 195 |
+
}}
|
| 196 |
|
| 197 |
+
.model-name {{
|
| 198 |
+
font-weight: 700;
|
| 199 |
+
color: var(--off_white);
|
| 200 |
+
transition: all 0.2s ease;
|
| 201 |
+
}}
|
| 202 |
|
| 203 |
.numeric-cell {{
|
| 204 |
text-align: center;
|
|
|
|
| 208 |
|
| 209 |
.score-cell {{
|
| 210 |
padding: 8px 12px;
|
| 211 |
+
color: #000000 !important;
|
| 212 |
}}
|
| 213 |
|
| 214 |
/* Scrollbar styling */
|
|
|
|
| 242 |
">
|
| 243 |
<div style="max-width: 72rem; margin: 0 auto;">
|
| 244 |
<div style="text-align: center; margin-bottom: 4rem;">
|
| 245 |
+
<!-- Nutrient Logo -->
|
| 246 |
+
<div style="margin-bottom: 2rem; display: flex; justify-content: center;">
|
| 247 |
+
<svg width="120" height="84" viewBox="0 0 240 169" xmlns="http://www.w3.org/2000/svg">
|
| 248 |
+
<path fill="#FFFFFF" d="M20,104.3c-11.1,0-20-8.9-20-20s8.9-20,20-20,20,8.9,20,20-9,20-20,20ZM220,64.3c-11.1,0-20,8.9-20,20s8.9,20,20,20,20-8.9,20-20-8.9-20-20-20ZM30.5,133.2c-8.5,7.1-9.6,19.7-2.5,28.2,7.1,8.5,19.7,9.6,28.2,2.5,8.5-7.1,9.6-19.7,2.5-28.2-7.1-8.5-19.7-9.6-28.2-2.5ZM209.5,35.3c8.5-7.1,9.6-19.7,2.5-28.2-7.1-8.5-19.7-9.6-28.2-2.5-8.5,7.1-9.6,19.7-2.5,28.2,7.1,8.5,19.7,9.6,28.2,2.5ZM56.2,4.7c-8.5-7.1-21.1-6-28.2,2.5-7.1,8.5-6,21.1,2.5,28.2,8.5,7.1,21.1,6,28.2-2.5,7.1-8.5,6-21.1-2.5-28.2ZM209.5,133.2c-8.5-7.1-21.1-6-28.2,2.5-7.1,8.5-6,21.1,2.5,28.2,8.5,7.1,21.1,6,28.2-2.5,7.1-8.5,6-21.1-2.5-28.2ZM158.4,90.4c-8.5-7.1-21.1-6-28.2,2.5-7.1,8.5-6,21.1,2.5,28.2,8.5,7.1,21.1,6,28.2-2.5s6-21.1-2.5-28.2ZM107.3,47.5c-8.5-7.1-21.1-6-28.2,2.5-7.1,8.5-6,21.1,2.5,28.2,8.5,7.1,21.1,6,28.2-2.5s6-21.1-2.5-28.2Z"/>
|
| 249 |
+
</svg>
|
| 250 |
+
</div>
|
| 251 |
+
|
| 252 |
<h1 style="
|
| 253 |
font-size: 4rem;
|
| 254 |
font-weight: 800;
|
| 255 |
line-height: 1.1;
|
| 256 |
+
color: var(--accent-primary);
|
|
|
|
|
|
|
| 257 |
margin-bottom: 0.5rem;
|
| 258 |
+
font-family: 'Archivo', sans-serif;
|
| 259 |
">
|
| 260 |
🔒 LLM PII Detection Leaderboard
|
| 261 |
</h1>
|
|
|
|
| 271 |
Comprehensive benchmark for language models' performance in detecting and redacting
|
| 272 |
personally identifiable information (PII) across various document types and scenarios.
|
| 273 |
<span style="
|
| 274 |
+
color: var(--accent-primary);
|
|
|
|
|
|
|
| 275 |
display: block;
|
| 276 |
margin-top: 1rem;
|
| 277 |
font-size: 1.5rem;
|
| 278 |
font-weight: 500;
|
| 279 |
+
font-family: 'Archivo', sans-serif;
|
| 280 |
">
|
| 281 |
"How well do LLMs protect sensitive information?"
|
| 282 |
</span>
|
|
|
|
| 301 |
font-size: 4rem;
|
| 302 |
font-weight: 800;
|
| 303 |
margin-bottom: 1rem;
|
| 304 |
+
color: var(--accent-primary);
|
| 305 |
+
font-family: 'Archivo', sans-serif;
|
| 306 |
+
">4</div>
|
|
|
|
| 307 |
<div style="color: var(--text-secondary); font-size: 1.5rem; margin-bottom: 1.5rem;">
|
| 308 |
Language Models
|
| 309 |
</div>
|
| 310 |
<div style="font-size: 1.125rem; line-height: 1.75; color: var(--text-primary);">
|
| 311 |
+
Cutting-edge Nutrient models
|
| 312 |
</div>
|
| 313 |
<div style="color: var(--text-secondary); margin-top: 0.5rem;">
|
| 314 |
+
GPT-5-mini, GPT-5-nano, GPT-4.1-mini, GPT-4.1-nano
|
| 315 |
</div>
|
| 316 |
</div>
|
| 317 |
|
|
|
|
| 327 |
font-size: 4rem;
|
| 328 |
font-weight: 800;
|
| 329 |
margin-bottom: 1rem;
|
| 330 |
+
color: var(--accent-tertiary);
|
| 331 |
+
font-family: 'Archivo', sans-serif;
|
|
|
|
| 332 |
">5</div>
|
| 333 |
<div style="color: var(--text-secondary); font-size: 1.5rem; margin-bottom: 1.5rem;">
|
| 334 |
Document Types
|
|
|
|
| 353 |
font-size: 4rem;
|
| 354 |
font-weight: 800;
|
| 355 |
margin-bottom: 1rem;
|
| 356 |
+
color: var(--accent-primary);
|
| 357 |
+
font-family: 'Archivo', sans-serif;
|
| 358 |
+
">98.0%</div>
|
|
|
|
| 359 |
<div style="color: var(--text-secondary); font-size: 1.5rem; margin-bottom: 1.5rem;">
|
| 360 |
+
Best F1 Score
|
| 361 |
</div>
|
| 362 |
<div style="font-size: 1.125rem; line-height: 1.75; color: var(--text-primary);">
|
| 363 |
State-of-the-art performance
|
| 364 |
</div>
|
| 365 |
<div style="color: var(--text-secondary); margin-top: 0.5rem;">
|
| 366 |
+
Nutrient & GPT-5-mini leading F1 performance
|
| 367 |
</div>
|
| 368 |
</div>
|
| 369 |
</div>
|
|
|
|
| 375 |
METHODOLOGY = """
|
| 376 |
<div style="max-width: 1200px; margin: 0 auto; padding: 2rem; color: var(--text-secondary); line-height: 1.7; font-size: 1rem;">
|
| 377 |
<h1 style="font-size: 2.5rem; font-weight: 700; margin: 3rem 0 1.5rem; color: var(--text-primary);
|
| 378 |
+
font-family: 'Archivo', sans-serif;">
|
|
|
|
| 379 |
Methodology
|
| 380 |
</h1>
|
| 381 |
|
|
|
|
| 389 |
<li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
|
| 390 |
<span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
|
| 391 |
background: var(--accent-primary); border-radius: 50%;
|
| 392 |
+
box-shadow: 0 0 0 2px rgba(240, 201, 104, 0.25);"></span>
|
| 393 |
<span style="color: var(--accent-primary); font-weight: 600;">Model Selection:</span>
|
| 394 |
We evaluate leading language models across proprietary and open-source categories
|
| 395 |
</li>
|
| 396 |
<li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
|
| 397 |
<span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
|
| 398 |
background: var(--accent-primary); border-radius: 50%;
|
| 399 |
+
box-shadow: 0 0 0 2px rgba(240, 201, 104, 0.25);"></span>
|
| 400 |
<span style="color: var(--accent-primary); font-weight: 600;">PII Detection:</span>
|
| 401 |
Each model processes documents with instructions to identify and classify PII entities
|
| 402 |
</li>
|
| 403 |
<li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
|
| 404 |
<span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
|
| 405 |
background: var(--accent-primary); border-radius: 50%;
|
| 406 |
+
box-shadow: 0 0 0 2px rgba(240, 201, 104, 0.25);"></span>
|
| 407 |
<span style="color: var(--accent-primary); font-weight: 600;">Performance Metrics:</span>
|
| 408 |
Precision, Recall, F1 Score, Over-detection Rate, Processing Time, and Cost
|
| 409 |
</li>
|
| 410 |
<li style="padding-left: 2rem; position: relative; margin: 1rem 0; display: flex; align-items: flex-start;">
|
| 411 |
<span style="content: ''; position: absolute; left: 0; top: 0.75rem; width: 8px; height: 8px;
|
| 412 |
background: var(--accent-primary); border-radius: 50%;
|
| 413 |
+
box-shadow: 0 0 0 2px rgba(240, 201, 104, 0.25);"></span>
|
| 414 |
<span style="color: var(--accent-primary); font-weight: 600;">Domain Analysis:</span>
|
| 415 |
Specialized evaluation across Healthcare, Financial, Government, Legal, and Personal documents
|
| 416 |
</li>
|
|
|
|
| 429 |
<li style="margin: 1rem 0;"><span style="color: var(--accent-secondary); font-weight: 600;">Over-detection Rate:</span> Percentage of non-PII incorrectly flagged (lower is better)</li>
|
| 430 |
</ul>
|
| 431 |
</div>
|
| 432 |
+
|
| 433 |
+
<!-- Footer -->
|
| 434 |
+
<div style="
|
| 435 |
+
text-align: center;
|
| 436 |
+
margin-top: 3rem;
|
| 437 |
+
padding-top: 2rem;
|
| 438 |
+
border-top: 1px solid var(--border-subtle);
|
| 439 |
+
">
|
| 440 |
+
<p style="
|
| 441 |
+
color: var(--text-secondary);
|
| 442 |
+
font-size: 1rem;
|
| 443 |
+
font-family: 'Archivo', sans-serif;
|
| 444 |
+
font-weight: 500;
|
| 445 |
+
">
|
| 446 |
+
Powered by <a href="https://nutrient.io" target="_blank" rel="noopener noreferrer" style="color: var(--accent-primary); font-weight: 700; text-decoration: none;">Nutrient</a>
|
| 447 |
+
</p>
|
| 448 |
+
</div>
|
| 449 |
</div>
|
| 450 |
"""
|
pii_leaderboard.py
CHANGED
|
@@ -14,13 +14,13 @@ from data_loader import (
|
|
| 14 |
def get_rank_badge(rank):
|
| 15 |
"""Generate HTML for rank badge with appropriate styling"""
|
| 16 |
badge_styles = {
|
| 17 |
-
1: ("1st",
|
| 18 |
-
2: ("2nd",
|
| 19 |
-
3: ("3rd",
|
| 20 |
}
|
| 21 |
|
| 22 |
if rank in badge_styles:
|
| 23 |
-
label,
|
| 24 |
return f"""
|
| 25 |
<div style="
|
| 26 |
display: inline-flex;
|
|
@@ -28,12 +28,13 @@ def get_rank_badge(rank):
|
|
| 28 |
justify-content: center;
|
| 29 |
min-width: 48px;
|
| 30 |
padding: 4px 12px;
|
| 31 |
-
background: {
|
| 32 |
-
color: {text_color};
|
| 33 |
border-radius: 6px;
|
| 34 |
font-weight: 600;
|
| 35 |
font-size: 0.9em;
|
| 36 |
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
|
|
|
|
| 37 |
">
|
| 38 |
{label}
|
| 39 |
</div>
|
|
@@ -53,17 +54,20 @@ def get_rank_badge(rank):
|
|
| 53 |
|
| 54 |
def get_type_badge(model_type):
|
| 55 |
"""Generate HTML for model type badge"""
|
| 56 |
-
bg_color = COLORS['
|
|
|
|
|
|
|
| 57 |
return f"""
|
| 58 |
<div style="
|
| 59 |
display: inline-flex;
|
| 60 |
align-items: center;
|
| 61 |
padding: 4px 8px;
|
| 62 |
background: {bg_color};
|
| 63 |
-
color:
|
| 64 |
border-radius: 4px;
|
| 65 |
font-size: 0.85em;
|
| 66 |
-
font-weight:
|
|
|
|
| 67 |
">
|
| 68 |
{model_type}
|
| 69 |
</div>
|
|
@@ -80,9 +84,9 @@ def get_score_bar(score, is_inverse=False):
|
|
| 80 |
|
| 81 |
# For over-detection rate, use inverse coloring (lower is better)
|
| 82 |
if is_inverse:
|
| 83 |
-
|
| 84 |
else:
|
| 85 |
-
|
| 86 |
|
| 87 |
return f"""
|
| 88 |
<div style="display: flex; align-items: center; gap: 12px; width: 100%;">
|
|
@@ -97,15 +101,15 @@ def get_score_bar(score, is_inverse=False):
|
|
| 97 |
<div style="
|
| 98 |
width: {width}%;
|
| 99 |
height: 100%;
|
| 100 |
-
background: {
|
| 101 |
border-radius: 4px;
|
| 102 |
transition: width 0.3s ease;
|
| 103 |
"></div>
|
| 104 |
</div>
|
| 105 |
<span style="
|
| 106 |
font-family: 'SF Mono', monospace;
|
| 107 |
-
font-weight:
|
| 108 |
-
color:
|
| 109 |
min-width: 60px;
|
| 110 |
">{score:.3f}</span>
|
| 111 |
</div>
|
|
@@ -126,16 +130,16 @@ def create_pii_leaderboard():
|
|
| 126 |
<thead>
|
| 127 |
<tr>
|
| 128 |
<th style="width: 80px;">Rank</th>
|
| 129 |
-
<th>Model</th>
|
| 130 |
-
<th style="width: 120px;">Type</th>
|
| 131 |
-
<th>Vendor</th>
|
| 132 |
-
<th style="width: 200px;">Overall Accuracy</th>
|
| 133 |
-
<th style="width: 150px;">Precision</th>
|
| 134 |
-
<th style="width: 150px;">Recall</th>
|
| 135 |
<th style="width: 150px;">F1 Score</th>
|
|
|
|
|
|
|
|
|
|
| 136 |
<th style="width: 160px;">Over-detection Rate</th>
|
| 137 |
<th>Cost/Doc ($)</th>
|
| 138 |
<th>Time (s)</th>
|
|
|
|
|
|
|
| 139 |
</tr>
|
| 140 |
</thead>
|
| 141 |
<tbody>
|
|
@@ -147,9 +151,7 @@ def create_pii_leaderboard():
|
|
| 147 |
table_html += f"""
|
| 148 |
<tr>
|
| 149 |
<td>{get_rank_badge(rank)}</td>
|
| 150 |
-
<td class="model-name">{row['Model']}</td>
|
| 151 |
-
<td>{get_type_badge(row['Model Type'])}</td>
|
| 152 |
-
<td>{row['Vendor']}</td>
|
| 153 |
"""
|
| 154 |
|
| 155 |
# Get appropriate values based on document type filter
|
|
@@ -168,24 +170,24 @@ def create_pii_leaderboard():
|
|
| 168 |
cost = row.get('Cost per Document ($)', '')
|
| 169 |
time = row.get('Processing Time (s)', '')
|
| 170 |
|
| 171 |
-
# Add score bars
|
| 172 |
-
if
|
| 173 |
-
table_html += f'<td class="score-cell">{get_score_bar(
|
| 174 |
else:
|
| 175 |
table_html += '<td class="numeric-cell">-</td>'
|
| 176 |
|
| 177 |
-
if
|
| 178 |
-
table_html += f'<td class="score-cell">{get_score_bar(
|
| 179 |
else:
|
| 180 |
table_html += '<td class="numeric-cell">-</td>'
|
| 181 |
|
| 182 |
-
if
|
| 183 |
-
table_html += f'<td class="score-cell">{get_score_bar(
|
| 184 |
else:
|
| 185 |
table_html += '<td class="numeric-cell">-</td>'
|
| 186 |
|
| 187 |
-
if
|
| 188 |
-
table_html += f'<td class="score-cell">{get_score_bar(
|
| 189 |
else:
|
| 190 |
table_html += '<td class="numeric-cell">-</td>'
|
| 191 |
|
|
@@ -208,6 +210,8 @@ def create_pii_leaderboard():
|
|
| 208 |
table_html += f"""
|
| 209 |
<td class="numeric-cell">{cost_display}</td>
|
| 210 |
<td class="numeric-cell">{time_display}</td>
|
|
|
|
|
|
|
| 211 |
</tr>
|
| 212 |
"""
|
| 213 |
|
|
@@ -400,7 +404,7 @@ def create_pii_leaderboard():
|
|
| 400 |
|
| 401 |
# Load initial data
|
| 402 |
initial_df = load_leaderboard_data()
|
| 403 |
-
initial_table = filter_and_sort_data("All", "All", "
|
| 404 |
|
| 405 |
# Display header
|
| 406 |
gr.HTML(HEADER_CONTENT)
|
|
@@ -410,11 +414,11 @@ def create_pii_leaderboard():
|
|
| 410 |
<div class="dark-container" style="margin-bottom: 32px;">
|
| 411 |
<div class="section-header">
|
| 412 |
<span class="section-icon" style="color: var(--accent-primary);">📈</span>
|
| 413 |
-
<h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: '
|
| 414 |
PII Detection Performance Leaderboard
|
| 415 |
</h3>
|
| 416 |
</div>
|
| 417 |
-
<p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: '
|
| 418 |
Filter by document type, model access, and sort by any metric to explore performance
|
| 419 |
</p>
|
| 420 |
|
|
@@ -451,8 +455,8 @@ def create_pii_leaderboard():
|
|
| 451 |
|
| 452 |
with gr.Column(scale=1):
|
| 453 |
sort_by = gr.Dropdown(
|
| 454 |
-
choices=["
|
| 455 |
-
value="
|
| 456 |
label="📊 Sort By",
|
| 457 |
elem_classes=["dropdown"]
|
| 458 |
)
|
|
@@ -490,11 +494,11 @@ def create_pii_leaderboard():
|
|
| 490 |
<div class="dark-container" style="margin-top: 32px;">
|
| 491 |
<div class="section-header">
|
| 492 |
<span class="section-icon" style="color: var(--accent-primary);">🎯</span>
|
| 493 |
-
<h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: '
|
| 494 |
Model Performance Cards
|
| 495 |
</h3>
|
| 496 |
</div>
|
| 497 |
-
<p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: '
|
| 498 |
Dive deep into individual model performance across all metrics and document types
|
| 499 |
</p>
|
| 500 |
|
|
@@ -522,6 +526,10 @@ def create_pii_leaderboard():
|
|
| 522 |
|
| 523 |
gr.HTML("""
|
| 524 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
</div>
|
| 526 |
</div>""")
|
| 527 |
|
|
@@ -529,7 +537,7 @@ def create_pii_leaderboard():
|
|
| 529 |
gr.HTML(f"""
|
| 530 |
<style>
|
| 531 |
.performance-card {{
|
| 532 |
-
background:
|
| 533 |
border: 2px solid var(--accent-primary);
|
| 534 |
border-radius: 24px;
|
| 535 |
padding: 32px;
|
|
@@ -539,8 +547,8 @@ def create_pii_leaderboard():
|
|
| 539 |
overflow: hidden;
|
| 540 |
box-shadow:
|
| 541 |
0 20px 40px rgba(0, 0, 0, 0.5),
|
| 542 |
-
0 0 80px rgba(
|
| 543 |
-
inset 0 0 120px rgba(
|
| 544 |
}}
|
| 545 |
|
| 546 |
.card-header {{
|
|
@@ -553,12 +561,10 @@ def create_pii_leaderboard():
|
|
| 553 |
.card-model-name {{
|
| 554 |
font-size: 2rem;
|
| 555 |
font-weight: 800;
|
| 556 |
-
|
| 557 |
-
-webkit-background-clip: text;
|
| 558 |
-
-webkit-text-fill-color: transparent;
|
| 559 |
margin-bottom: 8px;
|
| 560 |
-
text-shadow: 0 0 40px var(--glow-primary);
|
| 561 |
line-height: 1.2;
|
|
|
|
| 562 |
}}
|
| 563 |
|
| 564 |
.card-stars {{
|
|
@@ -587,7 +593,7 @@ def create_pii_leaderboard():
|
|
| 587 |
.metric-item:hover {{
|
| 588 |
transform: translateY(-2px);
|
| 589 |
border-color: var(--accent-primary);
|
| 590 |
-
box-shadow: 0 8px 16px rgba(
|
| 591 |
}}
|
| 592 |
|
| 593 |
.metric-icon {{
|
|
|
|
| 14 |
def get_rank_badge(rank):
|
| 15 |
"""Generate HTML for rank badge with appropriate styling"""
|
| 16 |
badge_styles = {
|
| 17 |
+
1: ("1st", COLORS['digital_pollen'], COLORS['warm_black']),
|
| 18 |
+
2: ("2nd", COLORS['soft_grey'], COLORS['black']),
|
| 19 |
+
3: ("3rd", COLORS['code_coral'], COLORS['black']),
|
| 20 |
}
|
| 21 |
|
| 22 |
if rank in badge_styles:
|
| 23 |
+
label, bg_color, text_color = badge_styles[rank]
|
| 24 |
return f"""
|
| 25 |
<div style="
|
| 26 |
display: inline-flex;
|
|
|
|
| 28 |
justify-content: center;
|
| 29 |
min-width: 48px;
|
| 30 |
padding: 4px 12px;
|
| 31 |
+
background: {bg_color};
|
| 32 |
+
color: {text_color} !important;
|
| 33 |
border-radius: 6px;
|
| 34 |
font-weight: 600;
|
| 35 |
font-size: 0.9em;
|
| 36 |
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
|
| 37 |
+
font-family: 'Archivo', sans-serif;
|
| 38 |
">
|
| 39 |
{label}
|
| 40 |
</div>
|
|
|
|
| 54 |
|
| 55 |
def get_type_badge(model_type):
|
| 56 |
"""Generate HTML for model type badge"""
|
| 57 |
+
bg_color = COLORS['digital_pollen'] if model_type == 'Proprietary' else COLORS['data_green']
|
| 58 |
+
# Use black text for better readability on brand accent backgrounds
|
| 59 |
+
text_color = '#000000'
|
| 60 |
return f"""
|
| 61 |
<div style="
|
| 62 |
display: inline-flex;
|
| 63 |
align-items: center;
|
| 64 |
padding: 4px 8px;
|
| 65 |
background: {bg_color};
|
| 66 |
+
color: {text_color} !important;
|
| 67 |
border-radius: 4px;
|
| 68 |
font-size: 0.85em;
|
| 69 |
+
font-weight: 600;
|
| 70 |
+
font-family: 'Archivo', sans-serif;
|
| 71 |
">
|
| 72 |
{model_type}
|
| 73 |
</div>
|
|
|
|
| 84 |
|
| 85 |
# For over-detection rate, use inverse coloring (lower is better)
|
| 86 |
if is_inverse:
|
| 87 |
+
bar_color = COLORS['code_coral'] if score > 0.5 else COLORS['data_green']
|
| 88 |
else:
|
| 89 |
+
bar_color = COLORS['data_green'] if score > 0.5 else COLORS['code_coral']
|
| 90 |
|
| 91 |
return f"""
|
| 92 |
<div style="display: flex; align-items: center; gap: 12px; width: 100%;">
|
|
|
|
| 101 |
<div style="
|
| 102 |
width: {width}%;
|
| 103 |
height: 100%;
|
| 104 |
+
background: {bar_color};
|
| 105 |
border-radius: 4px;
|
| 106 |
transition: width 0.3s ease;
|
| 107 |
"></div>
|
| 108 |
</div>
|
| 109 |
<span style="
|
| 110 |
font-family: 'SF Mono', monospace;
|
| 111 |
+
font-weight: 700;
|
| 112 |
+
color: #000000;
|
| 113 |
min-width: 60px;
|
| 114 |
">{score:.3f}</span>
|
| 115 |
</div>
|
|
|
|
| 130 |
<thead>
|
| 131 |
<tr>
|
| 132 |
<th style="width: 80px;">Rank</th>
|
| 133 |
+
<th style="width: 250px;">Model</th>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
<th style="width: 150px;">F1 Score</th>
|
| 135 |
+
<th style="width: 150px;">Recall</th>
|
| 136 |
+
<th style="width: 150px;">Precision</th>
|
| 137 |
+
<th style="width: 200px;">Overall Accuracy</th>
|
| 138 |
<th style="width: 160px;">Over-detection Rate</th>
|
| 139 |
<th>Cost/Doc ($)</th>
|
| 140 |
<th>Time (s)</th>
|
| 141 |
+
<th style="width: 120px;">Type</th>
|
| 142 |
+
<th>Vendor</th>
|
| 143 |
</tr>
|
| 144 |
</thead>
|
| 145 |
<tbody>
|
|
|
|
| 151 |
table_html += f"""
|
| 152 |
<tr>
|
| 153 |
<td>{get_rank_badge(rank)}</td>
|
| 154 |
+
<td class="model-name" style="color:#EFEBE7; font-weight:700;">{row['Model']}</td>
|
|
|
|
|
|
|
| 155 |
"""
|
| 156 |
|
| 157 |
# Get appropriate values based on document type filter
|
|
|
|
| 170 |
cost = row.get('Cost per Document ($)', '')
|
| 171 |
time = row.get('Processing Time (s)', '')
|
| 172 |
|
| 173 |
+
# Add score bars in new order: F1, Recall, Precision, Accuracy
|
| 174 |
+
if f1 != '':
|
| 175 |
+
table_html += f'<td class="score-cell">{get_score_bar(f1)}</td>'
|
| 176 |
else:
|
| 177 |
table_html += '<td class="numeric-cell">-</td>'
|
| 178 |
|
| 179 |
+
if recall != '':
|
| 180 |
+
table_html += f'<td class="score-cell">{get_score_bar(recall)}</td>'
|
| 181 |
else:
|
| 182 |
table_html += '<td class="numeric-cell">-</td>'
|
| 183 |
|
| 184 |
+
if precision != '':
|
| 185 |
+
table_html += f'<td class="score-cell">{get_score_bar(precision)}</td>'
|
| 186 |
else:
|
| 187 |
table_html += '<td class="numeric-cell">-</td>'
|
| 188 |
|
| 189 |
+
if accuracy != '':
|
| 190 |
+
table_html += f'<td class="score-cell">{get_score_bar(accuracy)}</td>'
|
| 191 |
else:
|
| 192 |
table_html += '<td class="numeric-cell">-</td>'
|
| 193 |
|
|
|
|
| 210 |
table_html += f"""
|
| 211 |
<td class="numeric-cell">{cost_display}</td>
|
| 212 |
<td class="numeric-cell">{time_display}</td>
|
| 213 |
+
<td>{get_type_badge(row['Model Type'])}</td>
|
| 214 |
+
<td>{row['Vendor']}</td>
|
| 215 |
</tr>
|
| 216 |
"""
|
| 217 |
|
|
|
|
| 404 |
|
| 405 |
# Load initial data
|
| 406 |
initial_df = load_leaderboard_data()
|
| 407 |
+
initial_table = filter_and_sort_data("All", "All", "F1 Score", "Descending")
|
| 408 |
|
| 409 |
# Display header
|
| 410 |
gr.HTML(HEADER_CONTENT)
|
|
|
|
| 414 |
<div class="dark-container" style="margin-bottom: 32px;">
|
| 415 |
<div class="section-header">
|
| 416 |
<span class="section-icon" style="color: var(--accent-primary);">📈</span>
|
| 417 |
+
<h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Archivo', sans-serif; font-weight: 700;">
|
| 418 |
PII Detection Performance Leaderboard
|
| 419 |
</h3>
|
| 420 |
</div>
|
| 421 |
+
<p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Archivo', sans-serif;">
|
| 422 |
Filter by document type, model access, and sort by any metric to explore performance
|
| 423 |
</p>
|
| 424 |
|
|
|
|
| 455 |
|
| 456 |
with gr.Column(scale=1):
|
| 457 |
sort_by = gr.Dropdown(
|
| 458 |
+
choices=["F1 Score", "Recall", "Precision", "Overall Accuracy", "Over-redaction Rate", "Cost per Document ($)", "Processing Time (s)"],
|
| 459 |
+
value="F1 Score",
|
| 460 |
label="📊 Sort By",
|
| 461 |
elem_classes=["dropdown"]
|
| 462 |
)
|
|
|
|
| 494 |
<div class="dark-container" style="margin-top: 32px;">
|
| 495 |
<div class="section-header">
|
| 496 |
<span class="section-icon" style="color: var(--accent-primary);">🎯</span>
|
| 497 |
+
<h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Archivo', sans-serif; font-weight: 700;">
|
| 498 |
Model Performance Cards
|
| 499 |
</h3>
|
| 500 |
</div>
|
| 501 |
+
<p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Archivo', sans-serif; text-align: center;">
|
| 502 |
Dive deep into individual model performance across all metrics and document types
|
| 503 |
</p>
|
| 504 |
|
|
|
|
| 526 |
|
| 527 |
gr.HTML("""
|
| 528 |
</div>
|
| 529 |
+
|
| 530 |
+
<div style="text-align: center; margin-top: 24px; padding-top: 12px; border-top: 1px solid var(--border-subtle);">
|
| 531 |
+
<span style="color: var(--text-secondary); font-family: 'Archivo', sans-serif;">Powered by <a href=\"https://nutrient.io\" target=\"_blank\" rel=\"noopener noreferrer\" style=\"color: var(--accent-primary); font-weight: 700; text-decoration: none;\">Nutrient</a></span>
|
| 532 |
+
</div>
|
| 533 |
</div>
|
| 534 |
</div>""")
|
| 535 |
|
|
|
|
| 537 |
gr.HTML(f"""
|
| 538 |
<style>
|
| 539 |
.performance-card {{
|
| 540 |
+
background: var(--bg-card);
|
| 541 |
border: 2px solid var(--accent-primary);
|
| 542 |
border-radius: 24px;
|
| 543 |
padding: 32px;
|
|
|
|
| 547 |
overflow: hidden;
|
| 548 |
box-shadow:
|
| 549 |
0 20px 40px rgba(0, 0, 0, 0.5),
|
| 550 |
+
0 0 80px rgba(240, 201, 104, 0.2),
|
| 551 |
+
inset 0 0 120px rgba(240, 201, 104, 0.08);
|
| 552 |
}}
|
| 553 |
|
| 554 |
.card-header {{
|
|
|
|
| 561 |
.card-model-name {{
|
| 562 |
font-size: 2rem;
|
| 563 |
font-weight: 800;
|
| 564 |
+
color: var(--text-primary);
|
|
|
|
|
|
|
| 565 |
margin-bottom: 8px;
|
|
|
|
| 566 |
line-height: 1.2;
|
| 567 |
+
font-family: 'Archivo', sans-serif;
|
| 568 |
}}
|
| 569 |
|
| 570 |
.card-stars {{
|
|
|
|
| 593 |
.metric-item:hover {{
|
| 594 |
transform: translateY(-2px);
|
| 595 |
border-color: var(--accent-primary);
|
| 596 |
+
box-shadow: 0 8px 16px rgba(240, 201, 104, 0.25);
|
| 597 |
}}
|
| 598 |
|
| 599 |
.metric-icon {{
|
results/pii_detection_results.csv
CHANGED
|
@@ -1,9 +1,5 @@
|
|
| 1 |
Model,Model Type,Vendor,Overall Accuracy,Precision,Recall,F1 Score,Over-redaction Rate,Processing Time (s),Cost per Document ($),Healthcare Accuracy,Financial Accuracy,Government Accuracy,Legal Accuracy,Personal Accuracy
|
| 2 |
-
GPT-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
Mistral-Large,Proprietary,Mistral AI,0.871,0.875,0.867,0.871,0.048,3.7,0.011,0.882,0.868,0.863,0.871,0.870
|
| 7 |
-
GPT-4o-mini,Proprietary,OpenAI,0.856,0.860,0.852,0.856,0.061,1.8,0.002,0.867,0.853,0.848,0.856,0.855
|
| 8 |
-
Claude-3-Haiku,Proprietary,Anthropic,0.834,0.838,0.830,0.834,0.078,2.1,0.006,0.845,0.831,0.826,0.834,0.833
|
| 9 |
-
Gemini-1.5-Flash,Proprietary,Google,0.821,0.825,0.817,0.821,0.085,2.4,0.004,0.832,0.818,0.813,0.821,0.820
|
|
|
|
| 1 |
Model,Model Type,Vendor,Overall Accuracy,Precision,Recall,F1 Score,Over-redaction Rate,Processing Time (s),Cost per Document ($),Healthcare Accuracy,Financial Accuracy,Government Accuracy,Legal Accuracy,Personal Accuracy
|
| 2 |
+
Nutrient & GPT-5-mini,Proprietary,OpenAI,0.757,0.993,0.972,0.98,0.054,2.7,0.018,0.982,0.974,0.958,0.977,0.989
|
| 3 |
+
Nutrient & GPT-5-nano,Proprietary,OpenAI,0.658,0.988,0.954,0.966,0.066,2.1,0.015,0.963,0.961,0.943,0.946,0.978
|
| 4 |
+
Nutrient & GPT-4.1-mini,Proprietary,OpenAI,0.599,0.993,0.945,0.964,0.065,2.3,0.012,0.96,0.961,0.966,0.895,0.994
|
| 5 |
+
Nutrient & GPT-4.1-nano,Proprietary,OpenAI,0.419,0.989,0.906,0.936,0.118,1.8,0.008,0.939,0.939,0.933,0.925,0.974
|
|
|
|
|
|
|
|
|
|
|
|