Spaces:

curtizz
/

show_eval_result

Sleeping

App Files Files Community

curtizz commited on Jun 11, 2025

Commit

2102e80

verified ·

1 Parent(s): bc6b9d5

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -42

app.py CHANGED Viewed

@@ -92,11 +92,12 @@ def load_and_display_json(file):
             .low-validity {
                 background-color: #fff2cc !important;
             }
-            .low-correctness {
-                background-color: #dddddd !important;
-            }
             .high-coverage {
-                background-color: #d4edda !important;
             }
             .low-coverage {
                 background-color: #e0e0e0 !important;
@@ -224,11 +225,8 @@ def load_and_display_json(file):
             .yellow-box {
                 background-color: #fff2cc;
             }
-            .gray-box {
-                background-color: #dddddd;
-            }
             .green-box {
-                background-color: #d4edda;
             }
             .detail-container {
                 position: relative;
@@ -326,17 +324,17 @@ def load_and_display_json(file):
                 <div class="color-box red-box"></div>
                 <div>Red: "Send to Human" is true - The system flagged this query to be sent to a human operator</div>
             </div>
-            <div class="legend-item">
-                <div class="color-box gray-box"></div>
-                <div>Gray: Low Correctness - Correctness score is below 0.6</div>
-            </div>
             <div class="legend-item">
                 <div class="color-box" style="background-color: #e0e0e0;"></div>
                 <div>Light Gray: Low Information Coverage - Information Coverage score is below 0.5</div>
             </div>
             <div class="legend-item">
-                <div class="color-box green-box"></div>
-                <div>Green: High Information Coverage - Information Coverage score is at least 0.8</div>
             </div>
         </div>
@@ -358,11 +356,10 @@ def load_and_display_json(file):
         for _, row in df.iterrows():
             # Check for scores
             low_validity = False
-            low_correctness = False
             high_coverage = False
             low_coverage = False
             input_validity_found = False
-            correctness_found = False
             info_coverage_found = False
             if row['Evaluation Metrics']:
@@ -388,30 +385,13 @@ def load_and_display_json(file):
                             except (ValueError, TypeError) as e:
                                 logger.warning(f"Could not convert {input_validity_value} to float: {e}")
-                        # Check for Correctness
-                        elif metric_name == 'Correctness (GEval)':
-                            correctness_found = True
-                            correctness_value = metric.get('score')
-                            logger.info(f"Found Correctness score: {correctness_value} (type: {type(correctness_value).__name__})")
-                            # Try to convert to float and check if < 0.6
-                            try:
-                                if correctness_value is not None:
-                                    float_value = float(correctness_value)
-                                    logger.info(f"Converted to float: {float_value}")
-                                    if float_value < 0.6:
-                                        low_correctness = True
-                                        logger.info(f"Low Correctness detected: {float_value}")
-                            except (ValueError, TypeError) as e:
-                                logger.warning(f"Could not convert {correctness_value} to float: {e}")
                         # Check for Information Coverage
                         elif metric_name == 'Information Coverage (GEval)':
                             info_coverage_found = True
                             info_coverage_value = metric.get('score')
                             logger.info(f"Found Information Coverage score: {info_coverage_value} (type: {type(info_coverage_value).__name__})")
-                            # Try to convert to float and check for high (>=0.8) or low (<0.5) coverage
                             try:
                                 if info_coverage_value is not None:
                                     float_value = float(info_coverage_value)
@@ -419,27 +399,29 @@ def load_and_display_json(file):
                                     if float_value >= 0.8:
                                         high_coverage = True
                                         logger.info(f"High Information Coverage detected: {float_value}")
-                                    elif float_value < 0.5:
                                         low_coverage = True
                                         logger.info(f"Low Information Coverage detected: {float_value}")
                             except (ValueError, TypeError) as e:
                                 logger.warning(f"Could not convert {info_coverage_value} to float: {e}")
-            # Determine row class (prioritize in order: send_to_human, low_correctness, low_coverage, high_coverage)
             row_class = ""
             if row['Send to Human'] is True:
                 row_class = " class='send-to-human'"
                 logger.info(f"Row {row['Index']} marked as 'Send to Human'")
-            # Removed low_validity check as per request
-            elif low_correctness:
-                row_class = " class='low-correctness'"
-                logger.info(f"Row {row['Index']} marked as 'Low Correctness'")
             elif low_coverage:
                 row_class = " class='low-coverage'"
                 logger.info(f"Row {row['Index']} marked as 'Low Information Coverage'")
             elif high_coverage:
                 row_class = " class='high-coverage'"
                 logger.info(f"Row {row['Index']} marked as 'High Information Coverage'")
             html_output += f"<tr{row_class}>"
             html_output += f"<td title=\"{html.escape(str(row['User ID']))}\">{html.escape(str(row['User ID']))}</td>"
@@ -535,9 +517,9 @@ with gr.Blocks() as demo:
     with gr.Accordion("Row Color Legend", open=True):
         gr.Markdown("""
         * **Red rows**: "Send to Human" is true - The system flagged this query to be sent to a human operator
-        * **Gray rows**: Low Correctness - Correctness score is below 0.6
         * **Light Gray rows**: Low Information Coverage - Information Coverage score is below 0.5
-        * **Green rows**: High Information Coverage - Information Coverage score is at least 0.8
         """)
     file_input = gr.File(label="Upload JSON File", file_types=[".json"])

             .low-validity {
                 background-color: #fff2cc !important;
             }
             .high-coverage {
+                background-color: #1e8449 !important;
+                color: white;
+            }
+            .medium-coverage {
+                background-color: #a9dfbf !important;
             }
             .low-coverage {
                 background-color: #e0e0e0 !important;
             .yellow-box {
                 background-color: #fff2cc;
             }
             .green-box {
+                background-color: #1e8449;
             }
             .detail-container {
                 position: relative;
                 <div class="color-box red-box"></div>
                 <div>Red: "Send to Human" is true - The system flagged this query to be sent to a human operator</div>
             </div>
             <div class="legend-item">
                 <div class="color-box" style="background-color: #e0e0e0;"></div>
                 <div>Light Gray: Low Information Coverage - Information Coverage score is below 0.5</div>
             </div>
             <div class="legend-item">
+                <div class="color-box" style="background-color: #a9dfbf;"></div>
+                <div>Shallow Green: Medium Information Coverage - Information Coverage score is between 0.5 and 0.8</div>
+            </div>
+            <div class="legend-item">
+                <div class="color-box" style="background-color: #1e8449; color: white;"></div>
+                <div>Dark Green: High Information Coverage - Information Coverage score is at least 0.8</div>
             </div>
         </div>
         for _, row in df.iterrows():
             # Check for scores
             low_validity = False
             high_coverage = False
+            medium_coverage = False
             low_coverage = False
             input_validity_found = False
             info_coverage_found = False
             if row['Evaluation Metrics']:
                             except (ValueError, TypeError) as e:
                                 logger.warning(f"Could not convert {input_validity_value} to float: {e}")
                         # Check for Information Coverage
                         elif metric_name == 'Information Coverage (GEval)':
                             info_coverage_found = True
                             info_coverage_value = metric.get('score')
                             logger.info(f"Found Information Coverage score: {info_coverage_value} (type: {type(info_coverage_value).__name__})")
+                            # Try to convert to float and check for high (>=0.8), medium (0.5-0.8), or low (<0.5) coverage
                             try:
                                 if info_coverage_value is not None:
                                     float_value = float(info_coverage_value)
                                     if float_value >= 0.8:
                                         high_coverage = True
                                         logger.info(f"High Information Coverage detected: {float_value}")
+                                    elif float_value >= 0.5:
+                                        medium_coverage = True
+                                        logger.info(f"Medium Information Coverage detected: {float_value}")
+                                    else:
                                         low_coverage = True
                                         logger.info(f"Low Information Coverage detected: {float_value}")
                             except (ValueError, TypeError) as e:
                                 logger.warning(f"Could not convert {info_coverage_value} to float: {e}")
+            # Determine row class (prioritize in order: send_to_human, low_coverage, medium_coverage, high_coverage)
             row_class = ""
             if row['Send to Human'] is True:
                 row_class = " class='send-to-human'"
                 logger.info(f"Row {row['Index']} marked as 'Send to Human'")
             elif low_coverage:
                 row_class = " class='low-coverage'"
                 logger.info(f"Row {row['Index']} marked as 'Low Information Coverage'")
             elif high_coverage:
                 row_class = " class='high-coverage'"
                 logger.info(f"Row {row['Index']} marked as 'High Information Coverage'")
+            elif medium_coverage:
+                row_class = " class='medium-coverage'"
+                logger.info(f"Row {row['Index']} marked as 'Medium Information Coverage'")
             html_output += f"<tr{row_class}>"
             html_output += f"<td title=\"{html.escape(str(row['User ID']))}\">{html.escape(str(row['User ID']))}</td>"
     with gr.Accordion("Row Color Legend", open=True):
         gr.Markdown("""
         * **Red rows**: "Send to Human" is true - The system flagged this query to be sent to a human operator
         * **Light Gray rows**: Low Information Coverage - Information Coverage score is below 0.5
+        * **Shallow Green rows**: Medium Information Coverage - Information Coverage score is between 0.5 and 0.8
+        * **Dark Green rows**: High Information Coverage - Information Coverage score is at least 0.8
         """)
     file_input = gr.File(label="Upload JSON File", file_types=[".json"])