curtizz's picture
Update app.py
94d72ef verified
import gradio as gr
import json
import pandas as pd
import html
import logging
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def load_and_display_json(file):
try:
# Function to format tools list as HTML with each tool on a separate line
def format_tools_list(tools):
if not tools or not isinstance(tools, list) or len(tools) == 0:
return "<span style='color: #888;'>No tools used</span>"
html_list = "<ul style='margin: 0; padding-left: 20px;'>"
for tool in tools:
# Handle different possible formats of tool information
if isinstance(tool, dict):
# If tool is a dictionary, format it nicely
tool_name = tool.get('name', 'Unknown Tool')
tool_details = json.dumps(tool, indent=2)
html_list += f"""<li style='margin-bottom: 8px;'>
<strong>{html.escape(str(tool_name))}</strong>
<pre style='margin: 5px 0 0 10px; padding: 5px; background-color: #f8f8f8; border-radius: 3px; font-size: 12px; max-height: 150px; overflow-y: auto;'>{html.escape(tool_details)}</pre>
</li>"""
else:
# Simple string or other type
html_list += f"<li style='margin-bottom: 5px;'>{html.escape(str(tool))}</li>"
html_list += "</ul>"
return html_list
# Read the uploaded JSON file
with open(file.name, 'r', encoding='utf-8') as f:
data = json.load(f)
# Validate that the data is a list of dictionaries
if not isinstance(data, list) or not all(isinstance(item, dict) for item in data):
return "Error: JSON file must contain a list of dictionaries."
# Prepare data for DataFrame
table_data = []
for idx, item in enumerate(data):
# Extract relevant fields, handling missing keys
eval_metrics = item.get('evaluation_metrics', [])
# Create a formatted string with each score on its own line with 3 decimal places
if eval_metrics:
eval_scores = "<br>".join(
f"{metric.get('name', 'Unknown')}: {format(float(metric.get('score', 0)), '.3f') if isinstance(metric.get('score'), (int, float)) else metric.get('score', 'N/A')}"
for metric in eval_metrics
)
else:
eval_scores = "N/A"
# Format time_spent value if it exists and is numeric
time_spent = item.get('time_spent', 'N/A')
if isinstance(time_spent, (int, float)):
formatted_time = f"{time_spent:.2f}s"
else:
formatted_time = time_spent
row = {
'Index': idx,
'User ID': item.get('user_id', 'N/A'),
'Question': item.get('question', 'N/A'),
'Confidence': item.get('confidence_score', 'N/A'),
'Send to Human': item.get('send_to_human', 'N/A'),
'Call Human Message': item.get('call_human_message', 'N/A'),
'Time Spent': formatted_time,
'Eval Scores': eval_scores,
'Response': item.get('chat_response', 'N/A'),
'Source': item.get('source', 'N/A'),
'Tools': item.get('tools', []), # Keep as a list
'Retrieval Context': item.get('retrieval_context', 'N/A'),
'Ground Truth': item.get('ground_truth', 'N/A'),
'Evaluation Metrics': eval_metrics,
'Information to Check': item.get('information_to_check', 'N/A')
}
table_data.append(row)
# Create DataFrame
df = pd.DataFrame(table_data)
# Create HTML output for display
html_output = """
<style>
table {
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
font-family: Arial, sans-serif;
}
th, td {
border: 1px solid #e0e0e0;
padding: 12px;
text-align: left;
font-size: 14px;
vertical-align: top;
}
th {
background-color: #4CAF50;
color: white;
font-weight: bold;
}
tr:nth-child(even) {
background-color: #f9f9f9;
}
.send-to-human {
background-color: #ffcccc !important;
}
.low-validity {
background-color: #fff2cc !important;
}
.high-coverage {
background-color: #1e8449 !important;
color: white;
}
.medium-coverage {
background-color: #a9dfbf !important;
}
.low-coverage {
background-color: #e0e0e0 !important;
}
.expandable {
cursor: pointer;
color: white;
font-weight: bold;
text-decoration: none;
display: inline-block;
padding: 8px;
transition: all 0.2s;
border-radius: 4px;
border: none;
position: relative;
text-align: center;
}
.expandable:hover {
filter: brightness(110%);
box-shadow: 0 2px 4px rgba(0,0,0,0.2);
}
.details {
display: none;
padding: 20px;
background-color: #ffffff;
border: 1px solid #e0e0e0;
border-radius: 5px;
margin-top: 10px;
box-shadow: 0 4px 8px rgba(0,0,0,0.2);
position: fixed;
z-index: 1000;
width: 80%;
min-width: 600px;
max-width: 1200px;
height: auto;
min-height: 400px;
max-height: 85vh;
overflow-y: auto;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
}
.human-message-popup {
display: none;
padding: 20px;
background-color: #ffffff;
border: 1px solid #e0e0e0;
border-radius: 5px;
box-shadow: 0 4px 8px rgba(0,0,0,0.2);
position: fixed;
z-index: 1000;
width: 70%;
min-width: 500px;
max-width: 1000px;
height: auto;
min-height: 200px;
max-height: 80vh;
overflow-y: auto;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
background-color: #fff9f9;
border: 1px solid #d32f2f;
}
input[type="checkbox"] {
display: none !important;
appearance: none;
-webkit-appearance: none;
-moz-appearance: none;
}
input[type="checkbox"]:checked ~ .details {
display: block;
}
input[type="checkbox"]:checked ~ .human-message-popup {
display: block;
}
input[type="checkbox"]:checked + .expandable::after {
content: " (Close)";
}
.details strong {
color: #333;
font-size: 16px;
display: block;
margin-bottom: 5px;
}
.details p {
margin: 10px 0;
line-height: 1.5;
}
.json-viewer {
background-color: #f5f5f5;
padding: 10px;
border-radius: 5px;
font-family: monospace;
font-size: 13px;
overflow-x: auto;
white-space: pre-wrap;
}
pre {
white-space: pre-wrap;
word-wrap: break-word;
margin: 0;
}
.color-legend {
margin: 20px 0;
padding: 15px;
border: 1px solid #e0e0e0;
border-radius: 5px;
background-color: #f9f9f9;
}
.legend-item {
display: flex;
align-items: center;
margin-bottom: 10px;
}
.color-box {
width: 20px;
height: 20px;
margin-right: 10px;
border: 1px solid #ccc;
}
.red-box {
background-color: #ffcccc;
}
.yellow-box {
background-color: #fff2cc;
}
.green-box {
background-color: #1e8449;
}
.detail-container {
position: relative;
}
.close-details {
position: absolute;
top: 5px;
right: 5px;
cursor: pointer;
background-color: #f44336;
color: white;
border: none;
border-radius: 50%;
width: 24px;
height: 24px;
display: flex;
align-items: center;
justify-content: center;
font-weight: bold;
}
.overlay {
display: none;
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: rgba(0,0,0,0.5);
z-index: 900;
}
input[type="checkbox"]:checked ~ .overlay {
display: block;
}
/* Column width adjustments */
table th:nth-child(1),
table td:nth-child(1) {
width: 5%;
text-align: center;
font-weight: bold;
}
table th:nth-child(2),
table td:nth-child(2) {
width: 15%;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
table th:nth-child(3),
table td:nth-child(3) {
width: 25%;
max-width: 350px;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
table th:nth-child(4),
table td:nth-child(4),
table th:nth-child(5),
table td:nth-child(5),
table th:nth-child(6),
table td:nth-child(6) {
width: 8%;
min-width: 70px;
text-align: center;
}
table th:nth-child(7),
table td:nth-child(7) {
width: 21%;
}
table th:nth-child(8),
table td:nth-child(8) {
width: 20%;
text-align: center;
}
/* Add tooltips for truncated content */
table td:nth-child(2),
table td:nth-child(3) {
position: relative;
}
table td:nth-child(2):hover::after,
table td:nth-child(3):hover::after {
content: attr(title);
position: absolute;
left: 0;
top: 100%;
z-index: 500;
background-color: #333;
color: #fff;
padding: 5px 10px;
border-radius: 4px;
white-space: pre-wrap;
max-width: 400px;
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
}
</style>
<div class="color-legend">
<h3>Row Color Legend</h3>
<div class="legend-item">
<div class="color-box" style="background-color: #1e8449; color: white;"></div>
<div>Dark Green: High Information Coverage - Information Coverage score is at least 0.8 (highest priority)</div>
</div>
<div class="legend-item">
<div class="color-box red-box"></div>
<div>Red: "Send to Human" is true but with Information Coverage below 0.8</div>
</div>
<div class="legend-item">
<div class="color-box" style="background-color: #a9dfbf;"></div>
<div>Shallow Green: Medium Information Coverage - Information Coverage score is between 0.5 and 0.8</div>
</div>
<div class="legend-item">
<div class="color-box" style="background-color: #e0e0e0;"></div>
<div>Light Gray: Low Information Coverage - Information Coverage score is below 0.5</div>
</div>
</div>
<style>
</style>
<script>
document.addEventListener('keydown', function(event) {
if (event.key === 'Escape') {
// Find all checked checkboxes and uncheck them
document.querySelectorAll('input[type="checkbox"]:checked').forEach(function(checkbox) {
checkbox.checked = false;
});
}
});
</script>
"""
# Calculate statistics for each row type
total_rows = len(df)
send_to_human_count = 0 # Send to Human with less than 0.8 coverage
all_send_to_human_count = 0 # All Send to Human (regardless of coverage)
low_coverage_count = 0
medium_coverage_count = 0
high_coverage_count = 0
for _, row in df.iterrows():
# Check for metrics to determine row type
is_send_to_human = row['Send to Human'] is True
# Count all send to human regardless of coverage
if is_send_to_human:
all_send_to_human_count += 1
# Extract coverage score
coverage_score = None
if row['Evaluation Metrics']:
for metric in row['Evaluation Metrics']:
if metric.get('name') == 'Information Coverage (GEval)':
try:
coverage_score = float(metric.get('score', 0))
except (ValueError, TypeError):
pass
# Count row types (matching the same priority logic used for display)
if coverage_score is not None and coverage_score >= 0.8:
high_coverage_count += 1
elif is_send_to_human:
send_to_human_count += 1 # This is now "Send to Human with less than 0.8 coverage"
elif coverage_score is not None:
if coverage_score >= 0.5: # Between 0.5 and 0.8
medium_coverage_count += 1
else: # Below 0.5
low_coverage_count += 1
# Calculate percentages
send_to_human_percent = (send_to_human_count / total_rows * 100) if total_rows > 0 else 0
all_send_to_human_percent = (all_send_to_human_count / total_rows * 100) if total_rows > 0 else 0
low_coverage_percent = (low_coverage_count / total_rows * 100) if total_rows > 0 else 0
medium_coverage_percent = (medium_coverage_count / total_rows * 100) if total_rows > 0 else 0
high_coverage_percent = (high_coverage_count / total_rows * 100) if total_rows > 0 else 0
# Add statistics summary at the top
html_output += """
<div style="margin-bottom: 20px; padding: 15px; border: 1px solid #e0e0e0; border-radius: 5px; background-color: #f9f9f9;">
<h3>Row Type Statistics</h3>
<div style="display: flex; flex-wrap: wrap; gap: 15px;">
<div style="background-color: #1e8449; color: white; padding: 10px; border-radius: 5px; min-width: 200px;">
<strong>High Coverage:</strong> {:.1f}% ({} of {} rows)
</div>
<div style="background-color: #ffcccc; padding: 10px; border-radius: 5px; min-width: 200px;">
<strong>Send to Human (<0.8 coverage):</strong> {:.1f}% ({} of {} rows)
</div>
<div style="background-color: #a9dfbf; padding: 10px; border-radius: 5px; min-width: 200px;">
<strong>Medium Coverage:</strong> {:.1f}% ({} of {} rows)
</div>
<div style="background-color: #e0e0e0; padding: 10px; border-radius: 5px; min-width: 200px;">
<strong>Low Coverage:</strong> {:.1f}% ({} of {} rows)
</div>
<div style="background-color: #f8d7da; padding: 10px; border-radius: 5px; min-width: 200px; border: 1px dashed #721c24;">
<strong>All Send to Human:</strong> {:.1f}% ({} of {} rows)
</div>
</div>
</div>
""".format(
high_coverage_percent, high_coverage_count, total_rows,
send_to_human_percent, send_to_human_count, total_rows,
medium_coverage_percent, medium_coverage_count, total_rows,
low_coverage_percent, low_coverage_count, total_rows,
all_send_to_human_percent, all_send_to_human_count, total_rows
)
# Add table
html_output += "<table>"
html_output += "<tr>" + "".join(f"<th>{col}</th>" for col in ['#', 'User ID', 'Question', 'Confidence', 'Send to Human', 'Time Spent', 'Eval Scores', 'Details']) + "</tr>"
for _, row in df.iterrows():
# Check for scores
low_validity = False
high_coverage = False
medium_coverage = False
low_coverage = False
input_validity_found = False
info_coverage_found = False
if row['Evaluation Metrics']:
logger.info(f"Checking evaluation metrics for row {row['Index']}")
for metric in row['Evaluation Metrics']:
# Check for metrics by name
metric_name = metric.get('name', '')
if isinstance(metric_name, str):
# Check for Input Validity
if metric_name == 'Input Validity (GEval)':
input_validity_found = True
input_validity_value = metric.get('score')
logger.info(f"Found Input Validity score: {input_validity_value} (type: {type(input_validity_value).__name__})")
# Try to convert to float and check if < 0.8
try:
if input_validity_value is not None:
float_value = float(input_validity_value)
logger.info(f"Converted to float: {float_value}")
if float_value < 0.8:
low_validity = True
logger.info(f"Low Input Validity detected: {float_value}")
except (ValueError, TypeError) as e:
logger.warning(f"Could not convert {input_validity_value} to float: {e}")
# Check for Information Coverage
elif metric_name == 'Information Coverage (GEval)':
info_coverage_found = True
info_coverage_value = metric.get('score')
logger.info(f"Found Information Coverage score: {info_coverage_value} (type: {type(info_coverage_value).__name__})")
# Try to convert to float and check for high (>=0.8), medium (0.5-0.8), or low (<0.5) coverage
try:
if info_coverage_value is not None:
float_value = float(info_coverage_value)
logger.info(f"Converted to float: {float_value}")
if float_value >= 0.8:
high_coverage = True
logger.info(f"High Information Coverage detected: {float_value}")
elif float_value >= 0.5:
medium_coverage = True
logger.info(f"Medium Information Coverage detected: {float_value}")
else:
low_coverage = True
logger.info(f"Low Information Coverage detected: {float_value}")
except (ValueError, TypeError) as e:
logger.warning(f"Could not convert {info_coverage_value} to float: {e}")
# Determine row class (prioritize high coverage over send_to_human, then medium, then low)
row_class = ""
if high_coverage:
row_class = " class='high-coverage'"
logger.info(f"Row {row['Index']} marked as 'High Information Coverage'")
elif row['Send to Human'] is True:
row_class = " class='send-to-human'"
logger.info(f"Row {row['Index']} marked as 'Send to Human'")
elif medium_coverage:
row_class = " class='medium-coverage'"
logger.info(f"Row {row['Index']} marked as 'Medium Information Coverage'")
elif low_coverage:
row_class = " class='low-coverage'"
logger.info(f"Row {row['Index']} marked as 'Low Information Coverage'")
html_output += f"<tr{row_class}>"
html_output += f"<td style='text-align: center;'>{row['Index'] + 1}</td>"
html_output += f"<td title=\"{html.escape(str(row['User ID']))}\">{html.escape(str(row['User ID']))}</td>"
html_output += f"<td title=\"{html.escape(str(row['Question']))}\">{html.escape(str(row['Question']))}</td>"
html_output += f"<td>{row['Confidence']}</td>"
# Add Send to Human cell with conditional message display
if row['Send to Human'] is True:
# Format call_human_message as JSON if it's a dictionary
call_human_message = row['Call Human Message']
try:
if isinstance(call_human_message, dict):
formatted_message = json.dumps(call_human_message, indent=2, ensure_ascii=False)
else:
formatted_message = str(call_human_message)
except:
formatted_message = str(call_human_message)
html_output += f"""<td>
<span style='font-weight: bold; color: #d32f2f;'>True</span>
<input type='checkbox' id='message_toggle_{row["Index"]}'>
<label for='message_toggle_{row["Index"]}' class='expandable' style='margin-top: 5px; background-color: #d32f2f; color: white; border: none; border-radius: 4px; padding: 5px 10px; cursor: pointer; font-size: 12px; display: block; width: calc(100% - 16px); text-align: center;'>Show Message</label>
<div class='overlay' onclick="document.getElementById('message_toggle_{row["Index"]}').checked = false;"></div>
<div class='human-message-popup' onclick="event.stopPropagation();">
<button class="close-details" onclick="document.getElementById('message_toggle_{row["Index"]}').checked = false;">×</button>
<strong style='color: #d32f2f; font-size: 16px; margin-bottom: 10px;'>Call Human Message:</strong>
<div style='font-size: 14px; color: #000; white-space: pre-wrap; overflow-x: auto; background-color: #f5f5f5; padding: 10px; border-radius: 5px; border: 1px solid #ddd;'>
{html.escape(formatted_message)}
</div>
</div>
</td>"""
else:
html_output += f"<td><span style='color: #555;'>False</span></td>"
html_output += f"<td>{row['Time Spent']}</td>"
html_output += f"<td>{row['Eval Scores']}</td>"
# Convert JSON objects to pretty-printed strings
retrieval_context_json = html.escape(str(row['Retrieval Context'])) if row['Retrieval Context'] else "N/A"
# Format evaluation metrics with 3 decimal places for scores
if row['Evaluation Metrics']:
formatted_metrics = []
for metric in row['Evaluation Metrics']:
metric_copy = dict(metric)
if 'score' in metric_copy and isinstance(metric_copy['score'], (int, float)):
metric_copy['score'] = format(float(metric_copy['score']), '.3f')
formatted_metrics.append(metric_copy)
eval_metrics_json = json.dumps(formatted_metrics, indent=2, ensure_ascii=False)
else:
eval_metrics_json = "N/A"
# Add buttons for different popups in the same row
html_output += f"""
<td class="detail-container">
<div style="display: flex; flex-direction: row; justify-content: space-around; gap: 3px;">
<!-- Details Button -->
<div style="flex: 1;">
<input type='checkbox' id='toggle_{row["Index"]}'>
<label for='toggle_{row["Index"]}' class='expandable' style="width: 100%; box-sizing: border-box; margin: 0; padding: 5px 2px; font-size: 12px; background-color: #4CAF50;">Details</label>
<div class='overlay' onclick="document.getElementById('toggle_{row["Index"]}').checked = false;"></div>
<div class='details' onclick="event.stopPropagation();">
<button class="close-details" onclick="document.getElementById('toggle_{row["Index"]}').checked = false;">×</button>
<strong>Question:</strong>
<p>{html.escape(str(row['Question']))}</p>
<strong>Ground Truth:</strong>
<p>{html.escape(str(row['Ground Truth']))}</p>
<strong>Response:</strong>
<p>{html.escape(str(row['Response']))}</p>
<strong>Source:</strong>
<p>{html.escape(str(row['Source']))}</p>
<strong>Tools:</strong>
<div style="margin-top: 5px; margin-bottom: 10px;">
{format_tools_list(row['Tools'])}
</div>
{f"<strong style='color: #d32f2f;'>Call Human Message:</strong><p style='color: #d32f2f; white-space: pre-wrap;'>{html.escape(formatted_message)}</p>" if row['Send to Human'] is True else ""}
<strong>Information to Check:</strong>
<p>{html.escape(str(row['Information to Check']))}</p>
</div>
</div>
<!-- Evaluation Metrics Button -->
<div style="flex: 1;">
<input type='checkbox' id='metrics_toggle_{row["Index"]}'>
<label for='metrics_toggle_{row["Index"]}' class='expandable' style="width: 100%; box-sizing: border-box; margin: 0; padding: 5px 2px; font-size: 12px; background-color: #2196F3;">Metrics</label>
<div class='overlay' onclick="document.getElementById('metrics_toggle_{row["Index"]}').checked = false;"></div>
<div class='details' onclick="event.stopPropagation();">
<button class="close-details" onclick="document.getElementById('metrics_toggle_{row["Index"]}').checked = false;">×</button>
<strong>Information to Check:</strong>
<p style="margin-bottom: 15px; padding: 8px; background-color: #f5f5f5; border-left: 4px solid #2196F3; border-radius: 3px;">{html.escape(str(row['Information to Check']))}</p>
<strong>Evaluation Metrics:</strong>
<div class='json-viewer'><pre>{html.escape(eval_metrics_json)}</pre></div>
</div>
</div>
<!-- Retrieval Context Button -->
<div style="flex: 1;">
<input type='checkbox' id='context_toggle_{row["Index"]}'>
<label for='context_toggle_{row["Index"]}' class='expandable' style="width: 100%; box-sizing: border-box; margin: 0; padding: 5px 2px; font-size: 12px; background-color: #FF9800;">Context</label>
<div class='overlay' onclick="document.getElementById('context_toggle_{row["Index"]}').checked = false;"></div>
<div class='details' onclick="event.stopPropagation();">
<button class="close-details" onclick="document.getElementById('context_toggle_{row["Index"]}').checked = false;">×</button>
<strong>Retrieval Context:</strong>
<div class='json-viewer'><pre>{retrieval_context_json}</pre></div>
</div>
</div>
</div>
</td>
"""
html_output += "</tr>"
html_output += "</table>"
return html_output
except Exception as e:
return f"Error processing JSON file: {str(e)}"
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# JSON Data Visualization")
gr.Markdown("Upload a JSON file containing a list of dictionaries to visualize the data.")
with gr.Accordion("Row Color Legend", open=True):
gr.Markdown("""
* **Dark Green rows**: High Information Coverage - Information Coverage score is at least 0.8 (highest priority)
* **Red rows**: "Send to Human" is true but with Information Coverage below 0.8
* **Shallow Green rows**: Medium Information Coverage - Information Coverage score is between 0.5 and 0.8
* **Light Gray rows**: Low Information Coverage - Information Coverage score is below 0.5
The statistics section also includes "All Send to Human" count (including those with high coverage).
""")
file_input = gr.File(label="Upload JSON File", file_types=[".json"])
output = gr.HTML(label="Data Visualization")
file_input.change(load_and_display_json, inputs=file_input, outputs=output)
# Launch the interface
demo.launch()