Spaces:
Running
Running
Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -187,9 +187,15 @@ def generate_table_html(rows):
|
|
| 187 |
<span class="text-sm font-medium text-slate-700">{row['directTime']}</span>
|
| 188 |
</div>
|
| 189 |
<div class="flex justify-between items-center">
|
| 190 |
-
<span class="text-xs text-slate-500">Recall:</span>
|
| 191 |
<span class="text-xs font-mono bg-slate-100 px-1.5 rounded text-slate-600">
|
| 192 |
-
{row['
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
</span>
|
| 194 |
</div>
|
| 195 |
</div>
|
|
@@ -205,11 +211,18 @@ def generate_table_html(rows):
|
|
| 205 |
</td>
|
| 206 |
<td class="px-6 py-4 whitespace-nowrap align-top border-b border-slate-100">
|
| 207 |
<div class="flex flex-col justify-center h-full pt-1">
|
|
|
|
| 208 |
<div class="flex items-center">
|
| 209 |
-
<span class="text-
|
| 210 |
-
<span class="material-symbols-outlined text-green-600 text-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
</div>
|
| 212 |
-
<span class="text-[10px] text-
|
| 213 |
</div>
|
| 214 |
</td>
|
| 215 |
</tr>
|
|
@@ -339,17 +352,34 @@ def run_benchmark(query):
|
|
| 339 |
prod_contents = set(p.payload.get('text', str(p.payload)) for p in prod_results)
|
| 340 |
|
| 341 |
if base_contents:
|
| 342 |
-
|
| 343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
else:
|
| 345 |
-
|
|
|
|
| 346 |
|
| 347 |
-
# Efficiency Gain: (
|
| 348 |
-
|
|
|
|
| 349 |
if direct_sharded_time_ms > 0:
|
| 350 |
-
|
| 351 |
else:
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
|
| 354 |
# Formatting
|
| 355 |
row = {
|
|
@@ -364,8 +394,11 @@ def run_benchmark(query):
|
|
| 364 |
"confDisplay": f"{confidence*100:.1f}%",
|
| 365 |
"directTime": f"{direct_sharded_time_ms:.1f} ms",
|
| 366 |
"baselineTime": f"{baseline_time_ms:.1f} ms",
|
| 367 |
-
"
|
| 368 |
-
"
|
|
|
|
|
|
|
|
|
|
| 369 |
}
|
| 370 |
rows.append(row)
|
| 371 |
|
|
|
|
| 187 |
<span class="text-sm font-medium text-slate-700">{row['directTime']}</span>
|
| 188 |
</div>
|
| 189 |
<div class="flex justify-between items-center">
|
| 190 |
+
<span class="text-xs text-slate-500">Recall@10:</span>
|
| 191 |
<span class="text-xs font-mono bg-slate-100 px-1.5 rounded text-slate-600">
|
| 192 |
+
{row['recall_10']}
|
| 193 |
+
</span>
|
| 194 |
+
</div>
|
| 195 |
+
<div class="flex justify-between items-center">
|
| 196 |
+
<span class="text-xs text-slate-500">Recall@5:</span>
|
| 197 |
+
<span class="text-xs font-mono bg-slate-100 px-1.5 rounded text-slate-600">
|
| 198 |
+
{row['recall_5']}
|
| 199 |
</span>
|
| 200 |
</div>
|
| 201 |
</div>
|
|
|
|
| 211 |
</td>
|
| 212 |
<td class="px-6 py-4 whitespace-nowrap align-top border-b border-slate-100">
|
| 213 |
<div class="flex flex-col justify-center h-full pt-1">
|
| 214 |
+
<div class="flex flex-col justify-center h-full pt-1 space-y-2">
|
| 215 |
<div class="flex items-center">
|
| 216 |
+
<span class="text-sm font-bold text-green-600">{row['efficiency_sharded']}</span>
|
| 217 |
+
<span class="material-symbols-outlined text-green-600 text-[10px] ml-1">bolt</span>
|
| 218 |
+
</div>
|
| 219 |
+
<span class="text-[10px] text-green-700/60 font-semibold tracking-wide">[vs Sharded]</span>
|
| 220 |
+
|
| 221 |
+
<div class="flex items-center mt-2 border-t border-slate-100 pt-2">
|
| 222 |
+
<span class="text-sm font-bold text-slate-600">{row['efficiency_base']}</span>
|
| 223 |
+
<span class="material-symbols-outlined text-slate-500 text-[10px] ml-1">bolt</span>
|
| 224 |
</div>
|
| 225 |
+
<span class="text-[10px] text-slate-500/60 font-semibold tracking-wide">[vs Base]</span>
|
| 226 |
</div>
|
| 227 |
</td>
|
| 228 |
</tr>
|
|
|
|
| 352 |
prod_contents = set(p.payload.get('text', str(p.payload)) for p in prod_results)
|
| 353 |
|
| 354 |
if base_contents:
|
| 355 |
+
intersection_10 = len(base_contents.intersection(prod_contents))
|
| 356 |
+
recall_10 = (intersection_10 / len(base_contents)) * 100
|
| 357 |
+
|
| 358 |
+
# Recall@5
|
| 359 |
+
base_contents_5 = set(p.payload.get('text', str(p.payload)) for p in base_results[:5])
|
| 360 |
+
prod_contents_5 = set(p.payload.get('text', str(p.payload)) for p in prod_results[:5])
|
| 361 |
+
if base_contents_5:
|
| 362 |
+
intersection_5 = len(base_contents_5.intersection(prod_contents_5))
|
| 363 |
+
recall_5 = (intersection_5 / len(base_contents_5)) * 100
|
| 364 |
+
else:
|
| 365 |
+
recall_5 = 0.0
|
| 366 |
else:
|
| 367 |
+
recall_10 = 0.0
|
| 368 |
+
recall_5 = 0.0
|
| 369 |
|
| 370 |
+
# Efficiency Gain: (Reference - Optimized) / Reference
|
| 371 |
+
|
| 372 |
+
# 1. vs Direct Sharded
|
| 373 |
if direct_sharded_time_ms > 0:
|
| 374 |
+
eff_gain_sharded = ((direct_sharded_time_ms - latency_ms) / direct_sharded_time_ms) * 100
|
| 375 |
else:
|
| 376 |
+
eff_gain_sharded = 0.0
|
| 377 |
+
|
| 378 |
+
# 2. vs Base (No Sharding)
|
| 379 |
+
if baseline_time_ms > 0:
|
| 380 |
+
eff_gain_base = ((baseline_time_ms - latency_ms) / baseline_time_ms) * 100
|
| 381 |
+
else:
|
| 382 |
+
eff_gain_base = 0.0
|
| 383 |
|
| 384 |
# Formatting
|
| 385 |
row = {
|
|
|
|
| 394 |
"confDisplay": f"{confidence*100:.1f}%",
|
| 395 |
"directTime": f"{direct_sharded_time_ms:.1f} ms",
|
| 396 |
"baselineTime": f"{baseline_time_ms:.1f} ms",
|
| 397 |
+
"baselineTime": f"{baseline_time_ms:.1f} ms",
|
| 398 |
+
"recall_10": f"{recall_10:.1f}%",
|
| 399 |
+
"recall_5": f"{recall_5:.1f}%",
|
| 400 |
+
"efficiency_sharded": f"{eff_gain_sharded:.1f}%",
|
| 401 |
+
"efficiency_base": f"{eff_gain_base:.1f}%"
|
| 402 |
}
|
| 403 |
rows.append(row)
|
| 404 |
|