Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,29 @@ logger = logging.getLogger(__name__)
|
|
| 10 |
|
| 11 |
def load_and_display_json(file):
|
| 12 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
# Read the uploaded JSON file
|
| 14 |
with open(file.name, 'r', encoding='utf-8') as f:
|
| 15 |
data = json.load(f)
|
|
@@ -51,7 +74,7 @@ def load_and_display_json(file):
|
|
| 51 |
'Eval Scores': eval_scores,
|
| 52 |
'Response': item.get('chat_response', 'N/A'),
|
| 53 |
'Source': item.get('source', 'N/A'),
|
| 54 |
-
'Tools':
|
| 55 |
'Retrieval Context': item.get('retrieval_context', 'N/A'),
|
| 56 |
'Ground Truth': item.get('ground_truth', 'N/A'),
|
| 57 |
'Evaluation Metrics': eval_metrics,
|
|
@@ -104,20 +127,20 @@ def load_and_display_json(file):
|
|
| 104 |
}
|
| 105 |
.expandable {
|
| 106 |
cursor: pointer;
|
| 107 |
-
color:
|
| 108 |
font-weight: bold;
|
| 109 |
text-decoration: none;
|
| 110 |
display: inline-block;
|
| 111 |
padding: 8px;
|
| 112 |
-
transition:
|
| 113 |
-
background-color: #e8f0fe;
|
| 114 |
border-radius: 4px;
|
| 115 |
-
border:
|
| 116 |
position: relative;
|
|
|
|
| 117 |
}
|
| 118 |
.expandable:hover {
|
| 119 |
-
|
| 120 |
-
|
| 121 |
}
|
| 122 |
.details {
|
| 123 |
display: none;
|
|
@@ -264,45 +287,51 @@ def load_and_display_json(file):
|
|
| 264 |
/* Column width adjustments */
|
| 265 |
table th:nth-child(1),
|
| 266 |
table td:nth-child(1) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
width: 15%;
|
| 268 |
white-space: nowrap;
|
| 269 |
overflow: hidden;
|
| 270 |
text-overflow: ellipsis;
|
| 271 |
}
|
| 272 |
-
table th:nth-child(
|
| 273 |
-
table td:nth-child(
|
| 274 |
width: 25%;
|
| 275 |
max-width: 350px;
|
| 276 |
white-space: nowrap;
|
| 277 |
overflow: hidden;
|
| 278 |
text-overflow: ellipsis;
|
| 279 |
}
|
| 280 |
-
table th:nth-child(3),
|
| 281 |
-
table td:nth-child(3),
|
| 282 |
table th:nth-child(4),
|
| 283 |
table td:nth-child(4),
|
| 284 |
table th:nth-child(5),
|
| 285 |
-
table td:nth-child(5)
|
|
|
|
|
|
|
| 286 |
width: 8%;
|
| 287 |
min-width: 70px;
|
| 288 |
text-align: center;
|
| 289 |
}
|
| 290 |
-
table th:nth-child(6),
|
| 291 |
-
table td:nth-child(6) {
|
| 292 |
-
width: 21%;
|
| 293 |
-
}
|
| 294 |
table th:nth-child(7),
|
| 295 |
table td:nth-child(7) {
|
| 296 |
-
width:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
text-align: center;
|
| 298 |
}
|
| 299 |
/* Add tooltips for truncated content */
|
| 300 |
-
table td:nth-child(
|
| 301 |
-
table td:nth-child(
|
| 302 |
position: relative;
|
| 303 |
}
|
| 304 |
-
table td:nth-child(
|
| 305 |
-
table td:nth-child(
|
| 306 |
content: attr(title);
|
| 307 |
position: absolute;
|
| 308 |
left: 0;
|
|
@@ -338,6 +367,9 @@ def load_and_display_json(file):
|
|
| 338 |
</div>
|
| 339 |
</div>
|
| 340 |
|
|
|
|
|
|
|
|
|
|
| 341 |
<script>
|
| 342 |
document.addEventListener('keydown', function(event) {
|
| 343 |
if (event.key === 'Escape') {
|
|
@@ -350,9 +382,73 @@ def load_and_display_json(file):
|
|
| 350 |
</script>
|
| 351 |
"""
|
| 352 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
# Add table
|
| 354 |
html_output += "<table>"
|
| 355 |
-
html_output += "<tr>" + "".join(f"<th>{col}</th>" for col in ['User ID', 'Question', 'Confidence', 'Send to Human', 'Time Spent', 'Eval Scores', 'Details']) + "</tr>"
|
| 356 |
for _, row in df.iterrows():
|
| 357 |
# Check for scores
|
| 358 |
low_validity = False
|
|
@@ -424,6 +520,7 @@ def load_and_display_json(file):
|
|
| 424 |
logger.info(f"Row {row['Index']} marked as 'Medium Information Coverage'")
|
| 425 |
|
| 426 |
html_output += f"<tr{row_class}>"
|
|
|
|
| 427 |
html_output += f"<td title=\"{html.escape(str(row['User ID']))}\">{html.escape(str(row['User ID']))}</td>"
|
| 428 |
html_output += f"<td title=\"{html.escape(str(row['Question']))}\">{html.escape(str(row['Question']))}</td>"
|
| 429 |
html_output += f"<td>{row['Confidence']}</td>"
|
|
@@ -474,31 +571,60 @@ def load_and_display_json(file):
|
|
| 474 |
else:
|
| 475 |
eval_metrics_json = "N/A"
|
| 476 |
|
| 477 |
-
# Add
|
| 478 |
html_output += f"""
|
| 479 |
<td class="detail-container">
|
| 480 |
-
<
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
</div>
|
| 503 |
</td>
|
| 504 |
"""
|
|
|
|
| 10 |
|
| 11 |
def load_and_display_json(file):
|
| 12 |
try:
|
| 13 |
+
# Function to format tools list as HTML with each tool on a separate line
|
| 14 |
+
def format_tools_list(tools):
|
| 15 |
+
if not tools or not isinstance(tools, list) or len(tools) == 0:
|
| 16 |
+
return "<span style='color: #888;'>No tools used</span>"
|
| 17 |
+
|
| 18 |
+
html_list = "<ul style='margin: 0; padding-left: 20px;'>"
|
| 19 |
+
for tool in tools:
|
| 20 |
+
# Handle different possible formats of tool information
|
| 21 |
+
if isinstance(tool, dict):
|
| 22 |
+
# If tool is a dictionary, format it nicely
|
| 23 |
+
tool_name = tool.get('name', 'Unknown Tool')
|
| 24 |
+
tool_details = json.dumps(tool, indent=2)
|
| 25 |
+
html_list += f"""<li style='margin-bottom: 8px;'>
|
| 26 |
+
<strong>{html.escape(str(tool_name))}</strong>
|
| 27 |
+
<pre style='margin: 5px 0 0 10px; padding: 5px; background-color: #f8f8f8; border-radius: 3px; font-size: 12px; max-height: 150px; overflow-y: auto;'>{html.escape(tool_details)}</pre>
|
| 28 |
+
</li>"""
|
| 29 |
+
else:
|
| 30 |
+
# Simple string or other type
|
| 31 |
+
html_list += f"<li style='margin-bottom: 5px;'>{html.escape(str(tool))}</li>"
|
| 32 |
+
|
| 33 |
+
html_list += "</ul>"
|
| 34 |
+
return html_list
|
| 35 |
+
|
| 36 |
# Read the uploaded JSON file
|
| 37 |
with open(file.name, 'r', encoding='utf-8') as f:
|
| 38 |
data = json.load(f)
|
|
|
|
| 74 |
'Eval Scores': eval_scores,
|
| 75 |
'Response': item.get('chat_response', 'N/A'),
|
| 76 |
'Source': item.get('source', 'N/A'),
|
| 77 |
+
'Tools': item.get('tools', []), # Keep as a list
|
| 78 |
'Retrieval Context': item.get('retrieval_context', 'N/A'),
|
| 79 |
'Ground Truth': item.get('ground_truth', 'N/A'),
|
| 80 |
'Evaluation Metrics': eval_metrics,
|
|
|
|
| 127 |
}
|
| 128 |
.expandable {
|
| 129 |
cursor: pointer;
|
| 130 |
+
color: white;
|
| 131 |
font-weight: bold;
|
| 132 |
text-decoration: none;
|
| 133 |
display: inline-block;
|
| 134 |
padding: 8px;
|
| 135 |
+
transition: all 0.2s;
|
|
|
|
| 136 |
border-radius: 4px;
|
| 137 |
+
border: none;
|
| 138 |
position: relative;
|
| 139 |
+
text-align: center;
|
| 140 |
}
|
| 141 |
.expandable:hover {
|
| 142 |
+
filter: brightness(110%);
|
| 143 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.2);
|
| 144 |
}
|
| 145 |
.details {
|
| 146 |
display: none;
|
|
|
|
| 287 |
/* Column width adjustments */
|
| 288 |
table th:nth-child(1),
|
| 289 |
table td:nth-child(1) {
|
| 290 |
+
width: 5%;
|
| 291 |
+
text-align: center;
|
| 292 |
+
font-weight: bold;
|
| 293 |
+
}
|
| 294 |
+
table th:nth-child(2),
|
| 295 |
+
table td:nth-child(2) {
|
| 296 |
width: 15%;
|
| 297 |
white-space: nowrap;
|
| 298 |
overflow: hidden;
|
| 299 |
text-overflow: ellipsis;
|
| 300 |
}
|
| 301 |
+
table th:nth-child(3),
|
| 302 |
+
table td:nth-child(3) {
|
| 303 |
width: 25%;
|
| 304 |
max-width: 350px;
|
| 305 |
white-space: nowrap;
|
| 306 |
overflow: hidden;
|
| 307 |
text-overflow: ellipsis;
|
| 308 |
}
|
|
|
|
|
|
|
| 309 |
table th:nth-child(4),
|
| 310 |
table td:nth-child(4),
|
| 311 |
table th:nth-child(5),
|
| 312 |
+
table td:nth-child(5),
|
| 313 |
+
table th:nth-child(6),
|
| 314 |
+
table td:nth-child(6) {
|
| 315 |
width: 8%;
|
| 316 |
min-width: 70px;
|
| 317 |
text-align: center;
|
| 318 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
table th:nth-child(7),
|
| 320 |
table td:nth-child(7) {
|
| 321 |
+
width: 21%;
|
| 322 |
+
}
|
| 323 |
+
table th:nth-child(8),
|
| 324 |
+
table td:nth-child(8) {
|
| 325 |
+
width: 20%;
|
| 326 |
text-align: center;
|
| 327 |
}
|
| 328 |
/* Add tooltips for truncated content */
|
| 329 |
+
table td:nth-child(2),
|
| 330 |
+
table td:nth-child(3) {
|
| 331 |
position: relative;
|
| 332 |
}
|
| 333 |
+
table td:nth-child(2):hover::after,
|
| 334 |
+
table td:nth-child(3):hover::after {
|
| 335 |
content: attr(title);
|
| 336 |
position: absolute;
|
| 337 |
left: 0;
|
|
|
|
| 367 |
</div>
|
| 368 |
</div>
|
| 369 |
|
| 370 |
+
<style>
|
| 371 |
+
|
| 372 |
+
</style>
|
| 373 |
<script>
|
| 374 |
document.addEventListener('keydown', function(event) {
|
| 375 |
if (event.key === 'Escape') {
|
|
|
|
| 382 |
</script>
|
| 383 |
"""
|
| 384 |
|
| 385 |
+
# Calculate statistics for each row type
|
| 386 |
+
total_rows = len(df)
|
| 387 |
+
send_to_human_count = 0
|
| 388 |
+
low_coverage_count = 0
|
| 389 |
+
medium_coverage_count = 0
|
| 390 |
+
high_coverage_count = 0
|
| 391 |
+
|
| 392 |
+
for _, row in df.iterrows():
|
| 393 |
+
# Check for metrics to determine row type
|
| 394 |
+
is_send_to_human = row['Send to Human'] is True
|
| 395 |
+
|
| 396 |
+
# Extract coverage score
|
| 397 |
+
coverage_score = None
|
| 398 |
+
if row['Evaluation Metrics']:
|
| 399 |
+
for metric in row['Evaluation Metrics']:
|
| 400 |
+
if metric.get('name') == 'Information Coverage (GEval)':
|
| 401 |
+
try:
|
| 402 |
+
coverage_score = float(metric.get('score', 0))
|
| 403 |
+
except (ValueError, TypeError):
|
| 404 |
+
pass
|
| 405 |
+
|
| 406 |
+
# Count row types (in order of priority for display)
|
| 407 |
+
if is_send_to_human:
|
| 408 |
+
send_to_human_count += 1
|
| 409 |
+
elif coverage_score is not None:
|
| 410 |
+
if coverage_score < 0.5:
|
| 411 |
+
low_coverage_count += 1
|
| 412 |
+
elif coverage_score >= 0.8:
|
| 413 |
+
high_coverage_count += 1
|
| 414 |
+
else: # Between 0.5 and 0.8
|
| 415 |
+
medium_coverage_count += 1
|
| 416 |
+
|
| 417 |
+
# Calculate percentages
|
| 418 |
+
send_to_human_percent = (send_to_human_count / total_rows * 100) if total_rows > 0 else 0
|
| 419 |
+
low_coverage_percent = (low_coverage_count / total_rows * 100) if total_rows > 0 else 0
|
| 420 |
+
medium_coverage_percent = (medium_coverage_count / total_rows * 100) if total_rows > 0 else 0
|
| 421 |
+
high_coverage_percent = (high_coverage_count / total_rows * 100) if total_rows > 0 else 0
|
| 422 |
+
|
| 423 |
+
# Add statistics summary at the top
|
| 424 |
+
html_output += """
|
| 425 |
+
<div style="margin-bottom: 20px; padding: 15px; border: 1px solid #e0e0e0; border-radius: 5px; background-color: #f9f9f9;">
|
| 426 |
+
<h3>Row Type Statistics</h3>
|
| 427 |
+
<div style="display: flex; flex-wrap: wrap; gap: 15px;">
|
| 428 |
+
<div style="background-color: #ffcccc; padding: 10px; border-radius: 5px; min-width: 200px;">
|
| 429 |
+
<strong>Send to Human:</strong> {:.1f}% ({} of {} rows)
|
| 430 |
+
</div>
|
| 431 |
+
<div style="background-color: #e0e0e0; padding: 10px; border-radius: 5px; min-width: 200px;">
|
| 432 |
+
<strong>Low Coverage:</strong> {:.1f}% ({} of {} rows)
|
| 433 |
+
</div>
|
| 434 |
+
<div style="background-color: #a9dfbf; padding: 10px; border-radius: 5px; min-width: 200px;">
|
| 435 |
+
<strong>Medium Coverage:</strong> {:.1f}% ({} of {} rows)
|
| 436 |
+
</div>
|
| 437 |
+
<div style="background-color: #1e8449; color: white; padding: 10px; border-radius: 5px; min-width: 200px;">
|
| 438 |
+
<strong>High Coverage:</strong> {:.1f}% ({} of {} rows)
|
| 439 |
+
</div>
|
| 440 |
+
</div>
|
| 441 |
+
</div>
|
| 442 |
+
""".format(
|
| 443 |
+
send_to_human_percent, send_to_human_count, total_rows,
|
| 444 |
+
low_coverage_percent, low_coverage_count, total_rows,
|
| 445 |
+
medium_coverage_percent, medium_coverage_count, total_rows,
|
| 446 |
+
high_coverage_percent, high_coverage_count, total_rows
|
| 447 |
+
)
|
| 448 |
+
|
| 449 |
# Add table
|
| 450 |
html_output += "<table>"
|
| 451 |
+
html_output += "<tr>" + "".join(f"<th>{col}</th>" for col in ['#', 'User ID', 'Question', 'Confidence', 'Send to Human', 'Time Spent', 'Eval Scores', 'Details']) + "</tr>"
|
| 452 |
for _, row in df.iterrows():
|
| 453 |
# Check for scores
|
| 454 |
low_validity = False
|
|
|
|
| 520 |
logger.info(f"Row {row['Index']} marked as 'Medium Information Coverage'")
|
| 521 |
|
| 522 |
html_output += f"<tr{row_class}>"
|
| 523 |
+
html_output += f"<td style='text-align: center;'>{row['Index'] + 1}</td>"
|
| 524 |
html_output += f"<td title=\"{html.escape(str(row['User ID']))}\">{html.escape(str(row['User ID']))}</td>"
|
| 525 |
html_output += f"<td title=\"{html.escape(str(row['Question']))}\">{html.escape(str(row['Question']))}</td>"
|
| 526 |
html_output += f"<td>{row['Confidence']}</td>"
|
|
|
|
| 571 |
else:
|
| 572 |
eval_metrics_json = "N/A"
|
| 573 |
|
| 574 |
+
# Add buttons for different popups in the same row
|
| 575 |
html_output += f"""
|
| 576 |
<td class="detail-container">
|
| 577 |
+
<div style="display: flex; flex-direction: row; justify-content: space-around; gap: 3px;">
|
| 578 |
+
<!-- Details Button -->
|
| 579 |
+
<div style="flex: 1;">
|
| 580 |
+
<input type='checkbox' id='toggle_{row["Index"]}'>
|
| 581 |
+
<label for='toggle_{row["Index"]}' class='expandable' style="width: 100%; box-sizing: border-box; margin: 0; padding: 5px 2px; font-size: 12px; background-color: #4CAF50;">Details</label>
|
| 582 |
+
<div class='overlay' onclick="document.getElementById('toggle_{row["Index"]}').checked = false;"></div>
|
| 583 |
+
<div class='details' onclick="event.stopPropagation();">
|
| 584 |
+
<button class="close-details" onclick="document.getElementById('toggle_{row["Index"]}').checked = false;">×</button>
|
| 585 |
+
<strong>Question:</strong>
|
| 586 |
+
<p>{html.escape(str(row['Question']))}</p>
|
| 587 |
+
<strong>Ground Truth:</strong>
|
| 588 |
+
<p>{html.escape(str(row['Ground Truth']))}</p>
|
| 589 |
+
<strong>Response:</strong>
|
| 590 |
+
<p>{html.escape(str(row['Response']))}</p>
|
| 591 |
+
<strong>Source:</strong>
|
| 592 |
+
<p>{html.escape(str(row['Source']))}</p>
|
| 593 |
+
<strong>Tools:</strong>
|
| 594 |
+
<div style="margin-top: 5px; margin-bottom: 10px;">
|
| 595 |
+
{format_tools_list(row['Tools'])}
|
| 596 |
+
</div>
|
| 597 |
+
{f"<strong style='color: #d32f2f;'>Call Human Message:</strong><p style='color: #d32f2f; white-space: pre-wrap;'>{html.escape(formatted_message)}</p>" if row['Send to Human'] is True else ""}
|
| 598 |
+
<strong>Information to Check:</strong>
|
| 599 |
+
<p>{html.escape(str(row['Information to Check']))}</p>
|
| 600 |
+
</div>
|
| 601 |
+
</div>
|
| 602 |
+
|
| 603 |
+
<!-- Evaluation Metrics Button -->
|
| 604 |
+
<div style="flex: 1;">
|
| 605 |
+
<input type='checkbox' id='metrics_toggle_{row["Index"]}'>
|
| 606 |
+
<label for='metrics_toggle_{row["Index"]}' class='expandable' style="width: 100%; box-sizing: border-box; margin: 0; padding: 5px 2px; font-size: 12px; background-color: #2196F3;">Metrics</label>
|
| 607 |
+
<div class='overlay' onclick="document.getElementById('metrics_toggle_{row["Index"]}').checked = false;"></div>
|
| 608 |
+
<div class='details' onclick="event.stopPropagation();">
|
| 609 |
+
<button class="close-details" onclick="document.getElementById('metrics_toggle_{row["Index"]}').checked = false;">×</button>
|
| 610 |
+
<strong>Information to Check:</strong>
|
| 611 |
+
<p style="margin-bottom: 15px; padding: 8px; background-color: #f5f5f5; border-left: 4px solid #2196F3; border-radius: 3px;">{html.escape(str(row['Information to Check']))}</p>
|
| 612 |
+
<strong>Evaluation Metrics:</strong>
|
| 613 |
+
<div class='json-viewer'><pre>{html.escape(eval_metrics_json)}</pre></div>
|
| 614 |
+
</div>
|
| 615 |
+
</div>
|
| 616 |
+
|
| 617 |
+
<!-- Retrieval Context Button -->
|
| 618 |
+
<div style="flex: 1;">
|
| 619 |
+
<input type='checkbox' id='context_toggle_{row["Index"]}'>
|
| 620 |
+
<label for='context_toggle_{row["Index"]}' class='expandable' style="width: 100%; box-sizing: border-box; margin: 0; padding: 5px 2px; font-size: 12px; background-color: #FF9800;">Context</label>
|
| 621 |
+
<div class='overlay' onclick="document.getElementById('context_toggle_{row["Index"]}').checked = false;"></div>
|
| 622 |
+
<div class='details' onclick="event.stopPropagation();">
|
| 623 |
+
<button class="close-details" onclick="document.getElementById('context_toggle_{row["Index"]}').checked = false;">×</button>
|
| 624 |
+
<strong>Retrieval Context:</strong>
|
| 625 |
+
<div class='json-viewer'><pre>{retrieval_context_json}</pre></div>
|
| 626 |
+
</div>
|
| 627 |
+
</div>
|
| 628 |
</div>
|
| 629 |
</td>
|
| 630 |
"""
|