curtizz commited on
Commit
4598d9f
·
verified ·
1 Parent(s): 834bce8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +502 -0
app.py CHANGED
@@ -0,0 +1,502 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import pandas as pd
4
+ import html
5
+ import logging
6
+
7
+ # Set up logging
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
+
11
+ def load_and_display_json(file):
12
+ try:
13
+ # Read the uploaded JSON file
14
+ with open(file.name, 'r', encoding='utf-8') as f:
15
+ data = json.load(f)
16
+
17
+ # Validate that the data is a list of dictionaries
18
+ if not isinstance(data, list) or not all(isinstance(item, dict) for item in data):
19
+ return "Error: JSON file must contain a list of dictionaries."
20
+
21
+ # Prepare data for DataFrame
22
+ table_data = []
23
+ for idx, item in enumerate(data):
24
+ # Extract relevant fields, handling missing keys
25
+ eval_metrics = item.get('evaluation_metrics', [])
26
+
27
+ # Create a formatted string with each score on its own line with 3 decimal places
28
+ if eval_metrics:
29
+ eval_scores = "<br>".join(
30
+ f"{metric.get('name', 'Unknown')}: {format(float(metric.get('score', 0)), '.3f') if isinstance(metric.get('score'), (int, float)) else metric.get('score', 'N/A')}"
31
+ for metric in eval_metrics
32
+ )
33
+ else:
34
+ eval_scores = "N/A"
35
+
36
+ # Format time_spent value if it exists and is numeric
37
+ time_spent = item.get('time_spent', 'N/A')
38
+ if isinstance(time_spent, (int, float)):
39
+ formatted_time = f"{time_spent:.2f}s"
40
+ else:
41
+ formatted_time = time_spent
42
+
43
+ row = {
44
+ 'Index': idx,
45
+ 'User ID': item.get('user_id', 'N/A'),
46
+ 'Question': item.get('question', 'N/A'),
47
+ 'Confidence': item.get('confidence_score', 'N/A'),
48
+ 'Send to Human': item.get('send_to_human', 'N/A'),
49
+ 'Call Human Message': item.get('call_human_message', 'N/A'),
50
+ 'Time Spent': formatted_time,
51
+ 'Eval Scores': eval_scores,
52
+ 'Response': item.get('chat_response', 'N/A'),
53
+ 'Source': item.get('source', 'N/A'),
54
+ 'Tools': ', '.join(item.get('tools', [])),
55
+ 'Retrieval Context': item.get('retrieval_context', 'N/A'),
56
+ 'Ground Truth': item.get('ground_truth', 'N/A'),
57
+ 'Evaluation Metrics': eval_metrics
58
+ }
59
+ table_data.append(row)
60
+
61
+ # Create DataFrame
62
+ df = pd.DataFrame(table_data)
63
+
64
+ # Create HTML output for display
65
+ html_output = """
66
+ <style>
67
+ table {
68
+ width: 100%;
69
+ border-collapse: collapse;
70
+ margin-bottom: 20px;
71
+ font-family: Arial, sans-serif;
72
+ }
73
+ th, td {
74
+ border: 1px solid #e0e0e0;
75
+ padding: 12px;
76
+ text-align: left;
77
+ font-size: 14px;
78
+ vertical-align: top;
79
+ }
80
+ th {
81
+ background-color: #4CAF50;
82
+ color: white;
83
+ font-weight: bold;
84
+ }
85
+ tr:nth-child(even) {
86
+ background-color: #f9f9f9;
87
+ }
88
+ .send-to-human {
89
+ background-color: #ffcccc !important;
90
+ }
91
+ .low-validity {
92
+ background-color: #fff2cc !important;
93
+ }
94
+ .low-correctness {
95
+ background-color: #dddddd !important;
96
+ }
97
+ .expandable {
98
+ cursor: pointer;
99
+ color: #1a73e8;
100
+ font-weight: bold;
101
+ text-decoration: none;
102
+ display: inline-block;
103
+ padding: 8px;
104
+ transition: color 0.2s;
105
+ background-color: #e8f0fe;
106
+ border-radius: 4px;
107
+ border: 1px solid #c6dafc;
108
+ position: relative;
109
+ }
110
+ .expandable:hover {
111
+ color: #1557b0;
112
+ background-color: #d4e6fc;
113
+ }
114
+ .details {
115
+ display: none;
116
+ padding: 20px;
117
+ background-color: #ffffff;
118
+ border: 1px solid #e0e0e0;
119
+ border-radius: 5px;
120
+ margin-top: 10px;
121
+ box-shadow: 0 4px 8px rgba(0,0,0,0.2);
122
+ position: fixed;
123
+ z-index: 1000;
124
+ width: 80%;
125
+ min-width: 600px;
126
+ max-width: 1200px;
127
+ height: auto;
128
+ min-height: 400px;
129
+ max-height: 85vh;
130
+ overflow-y: auto;
131
+ left: 50%;
132
+ top: 50%;
133
+ transform: translate(-50%, -50%);
134
+ }
135
+ .human-message-popup {
136
+ display: none;
137
+ padding: 20px;
138
+ background-color: #ffffff;
139
+ border: 1px solid #e0e0e0;
140
+ border-radius: 5px;
141
+ box-shadow: 0 4px 8px rgba(0,0,0,0.2);
142
+ position: fixed;
143
+ z-index: 1000;
144
+ width: 70%;
145
+ min-width: 500px;
146
+ max-width: 1000px;
147
+ height: auto;
148
+ min-height: 200px;
149
+ max-height: 80vh;
150
+ overflow-y: auto;
151
+ left: 50%;
152
+ top: 50%;
153
+ transform: translate(-50%, -50%);
154
+ background-color: #fff9f9;
155
+ border: 1px solid #d32f2f;
156
+ }
157
+ input[type="checkbox"] {
158
+ display: none !important;
159
+ appearance: none;
160
+ -webkit-appearance: none;
161
+ -moz-appearance: none;
162
+ }
163
+ input[type="checkbox"]:checked ~ .details {
164
+ display: block;
165
+ }
166
+ input[type="checkbox"]:checked ~ .human-message-popup {
167
+ display: block;
168
+ }
169
+ input[type="checkbox"]:checked + .expandable::after {
170
+ content: " (Close)";
171
+ }
172
+ .details strong {
173
+ color: #333;
174
+ font-size: 16px;
175
+ display: block;
176
+ margin-bottom: 5px;
177
+ }
178
+ .details p {
179
+ margin: 10px 0;
180
+ line-height: 1.5;
181
+ }
182
+ .json-viewer {
183
+ background-color: #f5f5f5;
184
+ padding: 10px;
185
+ border-radius: 5px;
186
+ font-family: monospace;
187
+ font-size: 13px;
188
+ overflow-x: auto;
189
+ white-space: pre-wrap;
190
+ }
191
+ pre {
192
+ white-space: pre-wrap;
193
+ word-wrap: break-word;
194
+ margin: 0;
195
+ }
196
+ .color-legend {
197
+ margin: 20px 0;
198
+ padding: 15px;
199
+ border: 1px solid #e0e0e0;
200
+ border-radius: 5px;
201
+ background-color: #f9f9f9;
202
+ }
203
+ .legend-item {
204
+ display: flex;
205
+ align-items: center;
206
+ margin-bottom: 10px;
207
+ }
208
+ .color-box {
209
+ width: 20px;
210
+ height: 20px;
211
+ margin-right: 10px;
212
+ border: 1px solid #ccc;
213
+ }
214
+ .red-box {
215
+ background-color: #ffcccc;
216
+ }
217
+ .yellow-box {
218
+ background-color: #fff2cc;
219
+ }
220
+ .gray-box {
221
+ background-color: #dddddd;
222
+ }
223
+ .detail-container {
224
+ position: relative;
225
+ }
226
+ .close-details {
227
+ position: absolute;
228
+ top: 5px;
229
+ right: 5px;
230
+ cursor: pointer;
231
+ background-color: #f44336;
232
+ color: white;
233
+ border: none;
234
+ border-radius: 50%;
235
+ width: 24px;
236
+ height: 24px;
237
+ display: flex;
238
+ align-items: center;
239
+ justify-content: center;
240
+ font-weight: bold;
241
+ }
242
+ .overlay {
243
+ display: none;
244
+ position: fixed;
245
+ top: 0;
246
+ left: 0;
247
+ width: 100%;
248
+ height: 100%;
249
+ background-color: rgba(0,0,0,0.5);
250
+ z-index: 900;
251
+ }
252
+ input[type="checkbox"]:checked ~ .overlay {
253
+ display: block;
254
+ }
255
+
256
+ /* Column width adjustments */
257
+ table th:nth-child(1),
258
+ table td:nth-child(1) {
259
+ width: 15%;
260
+ white-space: nowrap;
261
+ overflow: hidden;
262
+ text-overflow: ellipsis;
263
+ }
264
+ table th:nth-child(2),
265
+ table td:nth-child(2) {
266
+ width: 25%;
267
+ max-width: 350px;
268
+ white-space: nowrap;
269
+ overflow: hidden;
270
+ text-overflow: ellipsis;
271
+ }
272
+ table th:nth-child(3),
273
+ table td:nth-child(3),
274
+ table th:nth-child(4),
275
+ table td:nth-child(4),
276
+ table th:nth-child(5),
277
+ table td:nth-child(5) {
278
+ width: 8%;
279
+ min-width: 70px;
280
+ text-align: center;
281
+ }
282
+ table th:nth-child(6),
283
+ table td:nth-child(6) {
284
+ width: 21%;
285
+ }
286
+ table th:nth-child(7),
287
+ table td:nth-child(7) {
288
+ width: 15%;
289
+ text-align: center;
290
+ }
291
+ /* Add tooltips for truncated content */
292
+ table td:nth-child(1),
293
+ table td:nth-child(2) {
294
+ position: relative;
295
+ }
296
+ table td:nth-child(1):hover::after,
297
+ table td:nth-child(2):hover::after {
298
+ content: attr(title);
299
+ position: absolute;
300
+ left: 0;
301
+ top: 100%;
302
+ z-index: 500;
303
+ background-color: #333;
304
+ color: #fff;
305
+ padding: 5px 10px;
306
+ border-radius: 4px;
307
+ white-space: pre-wrap;
308
+ max-width: 400px;
309
+ box-shadow: 0 2px 5px rgba(0,0,0,0.2);
310
+ }
311
+ </style>
312
+
313
+ <div class="color-legend">
314
+ <h3>Row Color Legend</h3>
315
+ <div class="legend-item">
316
+ <div class="color-box red-box"></div>
317
+ <div>Red: "Send to Human" is true - The system flagged this query to be sent to a human operator</div>
318
+ </div>
319
+ <div class="legend-item">
320
+ <div class="color-box yellow-box"></div>
321
+ <div>Yellow: Low Input Validity - Input Validity score is below 0.8</div>
322
+ </div>
323
+ <div class="legend-item">
324
+ <div class="color-box gray-box"></div>
325
+ <div>Gray: Low Correctness - Correctness score is below 0.6</div>
326
+ </div>
327
+ </div>
328
+
329
+ <script>
330
+ document.addEventListener('keydown', function(event) {
331
+ if (event.key === 'Escape') {
332
+ // Find all checked checkboxes and uncheck them
333
+ document.querySelectorAll('input[type="checkbox"]:checked').forEach(function(checkbox) {
334
+ checkbox.checked = false;
335
+ });
336
+ }
337
+ });
338
+ </script>
339
+ """
340
+
341
+ # Add table
342
+ html_output += "<table>"
343
+ html_output += "<tr>" + "".join(f"<th>{col}</th>" for col in ['User ID', 'Question', 'Confidence', 'Send to Human', 'Time Spent', 'Eval Scores', 'Details']) + "</tr>"
344
+ for _, row in df.iterrows():
345
+ # Check for low input validity score
346
+ low_validity = False
347
+ low_correctness = False
348
+ input_validity_found = False
349
+ correctness_found = False
350
+
351
+ if row['Evaluation Metrics']:
352
+ logger.info(f"Checking evaluation metrics for row {row['Index']}")
353
+ for metric in row['Evaluation Metrics']:
354
+ # Check for metrics by name
355
+ metric_name = metric.get('name', '')
356
+ if isinstance(metric_name, str):
357
+ # Check for Input Validity
358
+ if metric_name == 'Input Validity (GEval)':
359
+ input_validity_found = True
360
+ input_validity_value = metric.get('score')
361
+ logger.info(f"Found Input Validity score: {input_validity_value} (type: {type(input_validity_value).__name__})")
362
+
363
+ # Try to convert to float and check if < 0.8
364
+ try:
365
+ if input_validity_value is not None:
366
+ float_value = float(input_validity_value)
367
+ logger.info(f"Converted to float: {float_value}")
368
+ if float_value < 0.8:
369
+ low_validity = True
370
+ logger.info(f"Low Input Validity detected: {float_value}")
371
+ except (ValueError, TypeError) as e:
372
+ logger.warning(f"Could not convert {input_validity_value} to float: {e}")
373
+
374
+ # Check for Correctness
375
+ elif metric_name == 'Correctness (GEval)':
376
+ correctness_found = True
377
+ correctness_value = metric.get('score')
378
+ logger.info(f"Found Correctness score: {correctness_value} (type: {type(correctness_value).__name__})")
379
+
380
+ # Try to convert to float and check if < 0.6
381
+ try:
382
+ if correctness_value is not None:
383
+ float_value = float(correctness_value)
384
+ logger.info(f"Converted to float: {float_value}")
385
+ if float_value < 0.6:
386
+ low_correctness = True
387
+ logger.info(f"Low Correctness detected: {float_value}")
388
+ except (ValueError, TypeError) as e:
389
+ logger.warning(f"Could not convert {correctness_value} to float: {e}")
390
+
391
+ # Determine row class (prioritize in order: send_to_human, low_validity, low_correctness)
392
+ row_class = ""
393
+ if row['Send to Human'] is True:
394
+ row_class = " class='send-to-human'"
395
+ logger.info(f"Row {row['Index']} marked as 'Send to Human'")
396
+ elif low_validity:
397
+ row_class = " class='low-validity'"
398
+ logger.info(f"Row {row['Index']} marked as 'Low Validity'")
399
+ elif low_correctness:
400
+ row_class = " class='low-correctness'"
401
+ logger.info(f"Row {row['Index']} marked as 'Low Correctness'")
402
+
403
+ html_output += f"<tr{row_class}>"
404
+ html_output += f"<td title=\"{html.escape(str(row['User ID']))}\">{html.escape(str(row['User ID']))}</td>"
405
+ html_output += f"<td title=\"{html.escape(str(row['Question']))}\">{html.escape(str(row['Question']))}</td>"
406
+ html_output += f"<td>{row['Confidence']}</td>"
407
+
408
+ # Add Send to Human cell with conditional message display
409
+ if row['Send to Human'] is True:
410
+ # Format call_human_message as JSON if it's a dictionary
411
+ call_human_message = row['Call Human Message']
412
+ try:
413
+ if isinstance(call_human_message, dict):
414
+ formatted_message = json.dumps(call_human_message, indent=2, ensure_ascii=False)
415
+ else:
416
+ formatted_message = str(call_human_message)
417
+ except:
418
+ formatted_message = str(call_human_message)
419
+
420
+ html_output += f"""<td>
421
+ <span style='font-weight: bold; color: #d32f2f;'>True</span>
422
+ <input type='checkbox' id='message_toggle_{row["Index"]}'>
423
+ <label for='message_toggle_{row["Index"]}' class='expandable' style='margin-top: 5px; background-color: #d32f2f; color: white; border: none; border-radius: 4px; padding: 5px 10px; cursor: pointer; font-size: 12px; display: block; width: calc(100% - 16px); text-align: center;'>Show Message</label>
424
+ <div class='overlay' onclick="document.getElementById('message_toggle_{row["Index"]}').checked = false;"></div>
425
+ <div class='human-message-popup' onclick="event.stopPropagation();">
426
+ <button class="close-details" onclick="document.getElementById('message_toggle_{row["Index"]}').checked = false;">×</button>
427
+ <strong style='color: #d32f2f; font-size: 16px; margin-bottom: 10px;'>Call Human Message:</strong>
428
+ <div style='font-size: 14px; color: #000; white-space: pre-wrap; overflow-x: auto; background-color: #f5f5f5; padding: 10px; border-radius: 5px; border: 1px solid #ddd;'>
429
+ {html.escape(formatted_message)}
430
+ </div>
431
+ </div>
432
+ </td>"""
433
+ else:
434
+ html_output += f"<td><span style='color: #555;'>False</span></td>"
435
+
436
+ html_output += f"<td>{row['Time Spent']}</td>"
437
+ html_output += f"<td>{row['Eval Scores']}</td>"
438
+
439
+ # Convert JSON objects to pretty-printed strings
440
+ retrieval_context_json = html.escape(str(row['Retrieval Context'])) if row['Retrieval Context'] else "N/A"
441
+
442
+ # Format evaluation metrics with 3 decimal places for scores
443
+ if row['Evaluation Metrics']:
444
+ formatted_metrics = []
445
+ for metric in row['Evaluation Metrics']:
446
+ metric_copy = dict(metric)
447
+ if 'score' in metric_copy and isinstance(metric_copy['score'], (int, float)):
448
+ metric_copy['score'] = format(float(metric_copy['score']), '.3f')
449
+ formatted_metrics.append(metric_copy)
450
+ eval_metrics_json = json.dumps(formatted_metrics, indent=2, ensure_ascii=False)
451
+ else:
452
+ eval_metrics_json = "N/A"
453
+
454
+ # Add details in the same row
455
+ html_output += f"""
456
+ <td class="detail-container">
457
+ <input type='checkbox' id='toggle_{row["Index"]}'>
458
+ <label for='toggle_{row["Index"]}' class='expandable'>Show Details</label>
459
+ <div class='overlay' onclick="document.getElementById('toggle_{row["Index"]}').checked = false;"></div>
460
+ <div class='details' onclick="event.stopPropagation();">
461
+ <button class="close-details" onclick="document.getElementById('toggle_{row["Index"]}').checked = false;">×</button>
462
+ <strong>Ground Truth:</strong>
463
+ <p>{html.escape(str(row['Ground Truth']))}</p>
464
+ <strong>Response:</strong>
465
+ <p>{html.escape(str(row['Response']))}</p>
466
+ <strong>Source:</strong>
467
+ <p>{html.escape(str(row['Source']))}</p>
468
+ <strong>Tools:</strong>
469
+ <p>{html.escape(str(row['Tools']))}</p>
470
+ {f"<strong style='color: #d32f2f;'>Call Human Message:</strong><p style='color: #d32f2f; white-space: pre-wrap;'>{html.escape(formatted_message)}</p>" if row['Send to Human'] is True else ""}
471
+ <strong>Evaluation Metrics:</strong>
472
+ <div class='json-viewer'><pre>{html.escape(eval_metrics_json)}</pre></div>
473
+ <strong>Retrieval Context:</strong>
474
+ <div class='json-viewer'><pre>{retrieval_context_json}</pre></div>
475
+ </div>
476
+ </td>
477
+ """
478
+ html_output += "</tr>"
479
+ html_output += "</table>"
480
+
481
+ return html_output
482
+ except Exception as e:
483
+ return f"Error processing JSON file: {str(e)}"
484
+
485
+ # Create Gradio interface
486
+ with gr.Blocks() as demo:
487
+ gr.Markdown("# JSON Data Visualization")
488
+ gr.Markdown("Upload a JSON file containing a list of dictionaries to visualize the data.")
489
+
490
+ with gr.Accordion("Row Color Legend", open=True):
491
+ gr.Markdown("""
492
+ * **Red rows**: "Send to Human" is true - The system flagged this query to be sent to a human operator
493
+ * **Yellow rows**: Low Input Validity - Input Validity score is below 0.8
494
+ * **Gray rows**: Low Correctness - Correctness score is below 0.6
495
+ """)
496
+
497
+ file_input = gr.File(label="Upload JSON File", file_types=[".json"])
498
+ output = gr.HTML(label="Data Visualization")
499
+ file_input.change(load_and_display_json, inputs=file_input, outputs=output)
500
+
501
+ # Launch the interface
502
+ demo.launch()