Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -92,11 +92,12 @@ def load_and_display_json(file):
|
|
| 92 |
.low-validity {
|
| 93 |
background-color: #fff2cc !important;
|
| 94 |
}
|
| 95 |
-
.low-correctness {
|
| 96 |
-
background-color: #dddddd !important;
|
| 97 |
-
}
|
| 98 |
.high-coverage {
|
| 99 |
-
background-color: #
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
}
|
| 101 |
.low-coverage {
|
| 102 |
background-color: #e0e0e0 !important;
|
|
@@ -224,11 +225,8 @@ def load_and_display_json(file):
|
|
| 224 |
.yellow-box {
|
| 225 |
background-color: #fff2cc;
|
| 226 |
}
|
| 227 |
-
.gray-box {
|
| 228 |
-
background-color: #dddddd;
|
| 229 |
-
}
|
| 230 |
.green-box {
|
| 231 |
-
background-color: #
|
| 232 |
}
|
| 233 |
.detail-container {
|
| 234 |
position: relative;
|
|
@@ -326,17 +324,17 @@ def load_and_display_json(file):
|
|
| 326 |
<div class="color-box red-box"></div>
|
| 327 |
<div>Red: "Send to Human" is true - The system flagged this query to be sent to a human operator</div>
|
| 328 |
</div>
|
| 329 |
-
<div class="legend-item">
|
| 330 |
-
<div class="color-box gray-box"></div>
|
| 331 |
-
<div>Gray: Low Correctness - Correctness score is below 0.6</div>
|
| 332 |
-
</div>
|
| 333 |
<div class="legend-item">
|
| 334 |
<div class="color-box" style="background-color: #e0e0e0;"></div>
|
| 335 |
<div>Light Gray: Low Information Coverage - Information Coverage score is below 0.5</div>
|
| 336 |
</div>
|
| 337 |
<div class="legend-item">
|
| 338 |
-
<div class="color-box
|
| 339 |
-
<div>Green:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
</div>
|
| 341 |
</div>
|
| 342 |
|
|
@@ -358,11 +356,10 @@ def load_and_display_json(file):
|
|
| 358 |
for _, row in df.iterrows():
|
| 359 |
# Check for scores
|
| 360 |
low_validity = False
|
| 361 |
-
low_correctness = False
|
| 362 |
high_coverage = False
|
|
|
|
| 363 |
low_coverage = False
|
| 364 |
input_validity_found = False
|
| 365 |
-
correctness_found = False
|
| 366 |
info_coverage_found = False
|
| 367 |
|
| 368 |
if row['Evaluation Metrics']:
|
|
@@ -388,30 +385,13 @@ def load_and_display_json(file):
|
|
| 388 |
except (ValueError, TypeError) as e:
|
| 389 |
logger.warning(f"Could not convert {input_validity_value} to float: {e}")
|
| 390 |
|
| 391 |
-
# Check for Correctness
|
| 392 |
-
elif metric_name == 'Correctness (GEval)':
|
| 393 |
-
correctness_found = True
|
| 394 |
-
correctness_value = metric.get('score')
|
| 395 |
-
logger.info(f"Found Correctness score: {correctness_value} (type: {type(correctness_value).__name__})")
|
| 396 |
-
|
| 397 |
-
# Try to convert to float and check if < 0.6
|
| 398 |
-
try:
|
| 399 |
-
if correctness_value is not None:
|
| 400 |
-
float_value = float(correctness_value)
|
| 401 |
-
logger.info(f"Converted to float: {float_value}")
|
| 402 |
-
if float_value < 0.6:
|
| 403 |
-
low_correctness = True
|
| 404 |
-
logger.info(f"Low Correctness detected: {float_value}")
|
| 405 |
-
except (ValueError, TypeError) as e:
|
| 406 |
-
logger.warning(f"Could not convert {correctness_value} to float: {e}")
|
| 407 |
-
|
| 408 |
# Check for Information Coverage
|
| 409 |
elif metric_name == 'Information Coverage (GEval)':
|
| 410 |
info_coverage_found = True
|
| 411 |
info_coverage_value = metric.get('score')
|
| 412 |
logger.info(f"Found Information Coverage score: {info_coverage_value} (type: {type(info_coverage_value).__name__})")
|
| 413 |
|
| 414 |
-
# Try to convert to float and check for high (>=0.8) or low (<0.5) coverage
|
| 415 |
try:
|
| 416 |
if info_coverage_value is not None:
|
| 417 |
float_value = float(info_coverage_value)
|
|
@@ -419,27 +399,29 @@ def load_and_display_json(file):
|
|
| 419 |
if float_value >= 0.8:
|
| 420 |
high_coverage = True
|
| 421 |
logger.info(f"High Information Coverage detected: {float_value}")
|
| 422 |
-
elif float_value
|
|
|
|
|
|
|
|
|
|
| 423 |
low_coverage = True
|
| 424 |
logger.info(f"Low Information Coverage detected: {float_value}")
|
| 425 |
except (ValueError, TypeError) as e:
|
| 426 |
logger.warning(f"Could not convert {info_coverage_value} to float: {e}")
|
| 427 |
|
| 428 |
-
# Determine row class (prioritize in order: send_to_human,
|
| 429 |
row_class = ""
|
| 430 |
if row['Send to Human'] is True:
|
| 431 |
row_class = " class='send-to-human'"
|
| 432 |
logger.info(f"Row {row['Index']} marked as 'Send to Human'")
|
| 433 |
-
# Removed low_validity check as per request
|
| 434 |
-
elif low_correctness:
|
| 435 |
-
row_class = " class='low-correctness'"
|
| 436 |
-
logger.info(f"Row {row['Index']} marked as 'Low Correctness'")
|
| 437 |
elif low_coverage:
|
| 438 |
row_class = " class='low-coverage'"
|
| 439 |
logger.info(f"Row {row['Index']} marked as 'Low Information Coverage'")
|
| 440 |
elif high_coverage:
|
| 441 |
row_class = " class='high-coverage'"
|
| 442 |
logger.info(f"Row {row['Index']} marked as 'High Information Coverage'")
|
|
|
|
|
|
|
|
|
|
| 443 |
|
| 444 |
html_output += f"<tr{row_class}>"
|
| 445 |
html_output += f"<td title=\"{html.escape(str(row['User ID']))}\">{html.escape(str(row['User ID']))}</td>"
|
|
@@ -535,9 +517,9 @@ with gr.Blocks() as demo:
|
|
| 535 |
with gr.Accordion("Row Color Legend", open=True):
|
| 536 |
gr.Markdown("""
|
| 537 |
* **Red rows**: "Send to Human" is true - The system flagged this query to be sent to a human operator
|
| 538 |
-
* **Gray rows**: Low Correctness - Correctness score is below 0.6
|
| 539 |
* **Light Gray rows**: Low Information Coverage - Information Coverage score is below 0.5
|
| 540 |
-
* **Green rows**:
|
|
|
|
| 541 |
""")
|
| 542 |
|
| 543 |
file_input = gr.File(label="Upload JSON File", file_types=[".json"])
|
|
|
|
| 92 |
.low-validity {
|
| 93 |
background-color: #fff2cc !important;
|
| 94 |
}
|
|
|
|
|
|
|
|
|
|
| 95 |
.high-coverage {
|
| 96 |
+
background-color: #1e8449 !important;
|
| 97 |
+
color: white;
|
| 98 |
+
}
|
| 99 |
+
.medium-coverage {
|
| 100 |
+
background-color: #a9dfbf !important;
|
| 101 |
}
|
| 102 |
.low-coverage {
|
| 103 |
background-color: #e0e0e0 !important;
|
|
|
|
| 225 |
.yellow-box {
|
| 226 |
background-color: #fff2cc;
|
| 227 |
}
|
|
|
|
|
|
|
|
|
|
| 228 |
.green-box {
|
| 229 |
+
background-color: #1e8449;
|
| 230 |
}
|
| 231 |
.detail-container {
|
| 232 |
position: relative;
|
|
|
|
| 324 |
<div class="color-box red-box"></div>
|
| 325 |
<div>Red: "Send to Human" is true - The system flagged this query to be sent to a human operator</div>
|
| 326 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
<div class="legend-item">
|
| 328 |
<div class="color-box" style="background-color: #e0e0e0;"></div>
|
| 329 |
<div>Light Gray: Low Information Coverage - Information Coverage score is below 0.5</div>
|
| 330 |
</div>
|
| 331 |
<div class="legend-item">
|
| 332 |
+
<div class="color-box" style="background-color: #a9dfbf;"></div>
|
| 333 |
+
<div>Shallow Green: Medium Information Coverage - Information Coverage score is between 0.5 and 0.8</div>
|
| 334 |
+
</div>
|
| 335 |
+
<div class="legend-item">
|
| 336 |
+
<div class="color-box" style="background-color: #1e8449; color: white;"></div>
|
| 337 |
+
<div>Dark Green: High Information Coverage - Information Coverage score is at least 0.8</div>
|
| 338 |
</div>
|
| 339 |
</div>
|
| 340 |
|
|
|
|
| 356 |
for _, row in df.iterrows():
|
| 357 |
# Check for scores
|
| 358 |
low_validity = False
|
|
|
|
| 359 |
high_coverage = False
|
| 360 |
+
medium_coverage = False
|
| 361 |
low_coverage = False
|
| 362 |
input_validity_found = False
|
|
|
|
| 363 |
info_coverage_found = False
|
| 364 |
|
| 365 |
if row['Evaluation Metrics']:
|
|
|
|
| 385 |
except (ValueError, TypeError) as e:
|
| 386 |
logger.warning(f"Could not convert {input_validity_value} to float: {e}")
|
| 387 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
# Check for Information Coverage
|
| 389 |
elif metric_name == 'Information Coverage (GEval)':
|
| 390 |
info_coverage_found = True
|
| 391 |
info_coverage_value = metric.get('score')
|
| 392 |
logger.info(f"Found Information Coverage score: {info_coverage_value} (type: {type(info_coverage_value).__name__})")
|
| 393 |
|
| 394 |
+
# Try to convert to float and check for high (>=0.8), medium (0.5-0.8), or low (<0.5) coverage
|
| 395 |
try:
|
| 396 |
if info_coverage_value is not None:
|
| 397 |
float_value = float(info_coverage_value)
|
|
|
|
| 399 |
if float_value >= 0.8:
|
| 400 |
high_coverage = True
|
| 401 |
logger.info(f"High Information Coverage detected: {float_value}")
|
| 402 |
+
elif float_value >= 0.5:
|
| 403 |
+
medium_coverage = True
|
| 404 |
+
logger.info(f"Medium Information Coverage detected: {float_value}")
|
| 405 |
+
else:
|
| 406 |
low_coverage = True
|
| 407 |
logger.info(f"Low Information Coverage detected: {float_value}")
|
| 408 |
except (ValueError, TypeError) as e:
|
| 409 |
logger.warning(f"Could not convert {info_coverage_value} to float: {e}")
|
| 410 |
|
| 411 |
+
# Determine row class (prioritize in order: send_to_human, low_coverage, medium_coverage, high_coverage)
|
| 412 |
row_class = ""
|
| 413 |
if row['Send to Human'] is True:
|
| 414 |
row_class = " class='send-to-human'"
|
| 415 |
logger.info(f"Row {row['Index']} marked as 'Send to Human'")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
elif low_coverage:
|
| 417 |
row_class = " class='low-coverage'"
|
| 418 |
logger.info(f"Row {row['Index']} marked as 'Low Information Coverage'")
|
| 419 |
elif high_coverage:
|
| 420 |
row_class = " class='high-coverage'"
|
| 421 |
logger.info(f"Row {row['Index']} marked as 'High Information Coverage'")
|
| 422 |
+
elif medium_coverage:
|
| 423 |
+
row_class = " class='medium-coverage'"
|
| 424 |
+
logger.info(f"Row {row['Index']} marked as 'Medium Information Coverage'")
|
| 425 |
|
| 426 |
html_output += f"<tr{row_class}>"
|
| 427 |
html_output += f"<td title=\"{html.escape(str(row['User ID']))}\">{html.escape(str(row['User ID']))}</td>"
|
|
|
|
| 517 |
with gr.Accordion("Row Color Legend", open=True):
|
| 518 |
gr.Markdown("""
|
| 519 |
* **Red rows**: "Send to Human" is true - The system flagged this query to be sent to a human operator
|
|
|
|
| 520 |
* **Light Gray rows**: Low Information Coverage - Information Coverage score is below 0.5
|
| 521 |
+
* **Shallow Green rows**: Medium Information Coverage - Information Coverage score is between 0.5 and 0.8
|
| 522 |
+
* **Dark Green rows**: High Information Coverage - Information Coverage score is at least 0.8
|
| 523 |
""")
|
| 524 |
|
| 525 |
file_input = gr.File(label="Upload JSON File", file_types=[".json"])
|