firobeid commited on
Commit
f184642
Β·
verified Β·
1 Parent(s): 66304c3

Upload leaderboard files

Browse files
Files changed (2) hide show
  1. leaderboard.py +474 -0
  2. requirements.txt +3 -0
leaderboard.py ADDED
@@ -0,0 +1,474 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import plotly.graph_objects as go
3
+ import plotly.express as px
4
+ import pandas as pd
5
+ from plotly.subplots import make_subplots
6
+ import numpy as np
7
+ import io
8
+
9
+ # Default sample data (will be replaced when CSV is uploaded)
10
+ default_data = pd.DataFrame({
11
+ 'model': ['L1_Sentiment_Analysis'] * 24 + ['L2_Advanced_Classifier'] * 24,
12
+ 'partition': (['inference'] * 8 + ['test'] * 8 + ['train'] * 8) * 2,
13
+ 'topic': (['OVERALL', 'Earnings_Ratings', 'Entertainment', 'Financial_Funds', 'Investment_Banking',
14
+ 'Mechanical_Transportation', 'Pharmaceutical', 'Technology'] * 3) * 2,
15
+ 'FPR': [0.7603, 0.7831, 0.6161, 0.7772, 0.7010, 0.6869, 0.7979, 0.8701,
16
+ 0.7664, 0.8374, 0.6022, 0.8635, 0.6505, 0.6567, 0.7614, 0.8711,
17
+ 0.7333, 0.7534, 0.6299, 0.7594, 0.6468, 0.6164, 0.7575, 0.8825] +
18
+ [0.8103, 0.8331, 0.6661, 0.8272, 0.7510, 0.7369, 0.8479, 0.9201,
19
+ 0.8164, 0.8874, 0.6522, 0.9135, 0.7005, 0.7067, 0.8114, 0.9211,
20
+ 0.7833, 0.8034, 0.6799, 0.8094, 0.6968, 0.6664, 0.8075, 0.9325],
21
+ 'Confidence': [0.2397, 0.2169, 0.3839, 0.2228, 0.2990, 0.3131, 0.2021, 0.1299,
22
+ 0.2336, 0.1626, 0.3978, 0.1365, 0.3495, 0.3433, 0.2386, 0.1289,
23
+ 0.2667, 0.2466, 0.3701, 0.2406, 0.3532, 0.3836, 0.2425, 0.1175] +
24
+ [0.1897, 0.1669, 0.3339, 0.1728, 0.2490, 0.2631, 0.1521, 0.0799,
25
+ 0.1836, 0.1126, 0.3478, 0.0865, 0.2995, 0.2933, 0.1886, 0.0789,
26
+ 0.2167, 0.1966, 0.3201, 0.1906, 0.3032, 0.3336, 0.1925, 0.0675],
27
+ 'FDR': [0.3812, 0.3916, 0.4233, 0.3421, 0.3886, 0.3487, 0.4363, 0.3631,
28
+ 0.4867, 0.4326, 0.5000, 0.4899, 0.4845, 0.4903, 0.5217, 0.4767,
29
+ 0.4653, 0.4592, 0.4652, 0.4615, 0.4672, 0.4749, 0.4727, 0.4607] +
30
+ [0.3312, 0.3416, 0.3733, 0.2921, 0.3386, 0.2987, 0.3863, 0.3131,
31
+ 0.4367, 0.3826, 0.4500, 0.4399, 0.4345, 0.4403, 0.4717, 0.4267,
32
+ 0.4153, 0.4092, 0.4152, 0.4115, 0.4172, 0.4249, 0.4227, 0.4107],
33
+ 'Precision': [0.6188, 0.6084, 0.5767, 0.6579, 0.6114, 0.6513, 0.5637, 0.6369,
34
+ 0.5133, 0.5674, 0.5000, 0.5101, 0.5155, 0.5097, 0.4783, 0.5233,
35
+ 0.5347, 0.5408, 0.5348, 0.5385, 0.5328, 0.5251, 0.5273, 0.5393] +
36
+ [0.6688, 0.6584, 0.6267, 0.7079, 0.6614, 0.7013, 0.6137, 0.6869,
37
+ 0.5633, 0.6174, 0.5500, 0.5601, 0.5655, 0.5597, 0.5283, 0.5733,
38
+ 0.5847, 0.5908, 0.5848, 0.5885, 0.5828, 0.5751, 0.5773, 0.5893],
39
+ 'Recall_Power': [0.7715, 0.7014, 0.6225, 0.8112, 0.6948, 0.6865, 0.8189, 0.9073,
40
+ 0.7914, 0.8321, 0.6680, 0.8550, 0.6623, 0.7439, 0.7534, 0.9049,
41
+ 0.7427, 0.7582, 0.6250, 0.7760, 0.6491, 0.6336, 0.7650, 0.8897] +
42
+ [0.8215, 0.7514, 0.6725, 0.8612, 0.7448, 0.7365, 0.8689, 0.9573,
43
+ 0.8414, 0.8821, 0.7180, 0.9050, 0.7123, 0.7939, 0.8034, 0.9549,
44
+ 0.7927, 0.8082, 0.6750, 0.8260, 0.6991, 0.6836, 0.8150, 0.9397],
45
+ 'Accuracy': [0.5670, 0.5242, 0.5209, 0.6042, 0.5418, 0.5563, 0.5459, 0.6174,
46
+ 0.5155, 0.5435, 0.5259, 0.5048, 0.5093, 0.5350, 0.4862, 0.5276,
47
+ 0.5197, 0.5225, 0.5069, 0.5260, 0.5106, 0.5131, 0.5167, 0.5324] +
48
+ [0.6170, 0.5742, 0.5709, 0.6542, 0.5918, 0.6063, 0.5959, 0.6674,
49
+ 0.5655, 0.5935, 0.5759, 0.5548, 0.5593, 0.5850, 0.5362, 0.5776,
50
+ 0.5697, 0.5725, 0.5569, 0.5760, 0.5606, 0.5631, 0.5667, 0.5824],
51
+ 'G_mean': [0.430033, 0.390043, 0.488854, 0.425130, 0.455791, 0.463620, 0.406817, 0.343305,
52
+ 0.429966, 0.367831, 0.515490, 0.341625, 0.481117, 0.505352, 0.423983, 0.341528,
53
+ 0.445060, 0.432403, 0.480950, 0.432094, 0.478813, 0.493000, 0.430712, 0.323326] +
54
+ [0.480033, 0.440043, 0.538854, 0.475130, 0.505791, 0.513620, 0.456817, 0.393305,
55
+ 0.479966, 0.417831, 0.565490, 0.391625, 0.531117, 0.555352, 0.473983, 0.391528,
56
+ 0.495060, 0.482403, 0.530950, 0.482094, 0.528813, 0.543000, 0.480712, 0.373326]
57
+ })
58
+
59
+ def load_csv_data(file):
60
+ """Load and validate CSV data"""
61
+ if file is None:
62
+ return default_data, "Using default sample data"
63
+
64
+ try:
65
+ df = pd.read_csv(file.name)
66
+
67
+ # Validate required columns
68
+ required_cols = ['model', 'partition', 'topic', 'FPR', 'Confidence', 'FDR',
69
+ 'Precision', 'Recall_Power', 'Accuracy', 'G_mean']
70
+ missing_cols = [col for col in required_cols if col not in df.columns]
71
+
72
+ if missing_cols:
73
+ return default_data, f"❌ Missing columns: {missing_cols}. Using default data."
74
+
75
+ # Clean data
76
+ df = df.dropna()
77
+
78
+ return df, f"βœ… Successfully loaded {len(df)} records with {df['model'].nunique()} models"
79
+
80
+ except Exception as e:
81
+ return default_data, f"❌ Error loading CSV: {str(e)}. Using default data."
82
+
83
+ def create_model_leaderboard(df, partition_filter='all', topic_filter='OVERALL'):
84
+ """Create leaderboard comparing all models"""
85
+ filtered_df = df.copy()
86
+
87
+ if partition_filter != 'all':
88
+ filtered_df = filtered_df[filtered_df['partition'] == partition_filter]
89
+
90
+ if topic_filter != 'all':
91
+ filtered_df = filtered_df[filtered_df['topic'] == topic_filter]
92
+
93
+ # Calculate average metrics per model
94
+ metrics = ['Precision', 'Recall_Power', 'Accuracy', 'G_mean']
95
+ leaderboard = filtered_df.groupby('model')[metrics].mean().reset_index()
96
+
97
+ # Calculate overall score (average of key metrics)
98
+ leaderboard['Overall_Score'] = leaderboard[['Precision', 'Recall_Power', 'Accuracy']].mean(axis=1)
99
+ leaderboard = leaderboard.sort_values('Overall_Score', ascending=False)
100
+
101
+ # Create subplot for each metric
102
+ fig = make_subplots(
103
+ rows=1, cols=len(metrics) + 1,
104
+ subplot_titles=metrics + ['Overall Score']
105
+ )
106
+
107
+ colors = px.colors.qualitative.Set3[:len(leaderboard)]
108
+
109
+ for i, metric in enumerate(metrics + ['Overall_Score']):
110
+ for j, (_, row) in enumerate(leaderboard.iterrows()):
111
+ fig.add_trace(
112
+ go.Bar(
113
+ x=[row['model']],
114
+ y=[row[metric]],
115
+ name=row['model'] if i == 0 else "",
116
+ marker_color=colors[j],
117
+ showlegend=True if i == 0 else False,
118
+ text=f"{row[metric]:.3f}",
119
+ textposition="outside"
120
+ ),
121
+ row=1, col=i+1
122
+ )
123
+
124
+ fig.update_layout(
125
+ title=f"Model Leaderboard - {partition_filter.title()} | {topic_filter}",
126
+ height=500,
127
+ showlegend=True
128
+ )
129
+
130
+ # Update y-axes
131
+ for i in range(1, len(metrics) + 2):
132
+ fig.update_yaxes(range=[0, 1], row=1, col=i)
133
+
134
+ return fig
135
+
136
+ def create_topic_comparison(df, models_selected=None, metric='Accuracy', partition_filter='all'):
137
+ """Compare selected models across topics"""
138
+ if models_selected is None or len(models_selected) == 0:
139
+ models_selected = df['model'].unique()[:3] # Default to first 3 models
140
+
141
+ # Filter data
142
+ filtered_df = df[df['model'].isin(models_selected)].copy()
143
+
144
+ if partition_filter != 'all':
145
+ filtered_df = filtered_df[filtered_df['partition'] == partition_filter]
146
+
147
+ # Average across partitions for each model-topic combination
148
+ topic_performance = filtered_df.groupby(['model', 'topic'])[metric].mean().reset_index()
149
+
150
+ # Create grouped bar chart
151
+ fig = go.Figure()
152
+
153
+ colors = px.colors.qualitative.Set3[:len(models_selected)]
154
+ topics = sorted(topic_performance['topic'].unique())
155
+
156
+ for i, model in enumerate(models_selected):
157
+ model_data = topic_performance[topic_performance['model'] == model]
158
+ fig.add_trace(go.Bar(
159
+ name=model,
160
+ x=topics,
161
+ y=model_data[metric],
162
+ marker_color=colors[i],
163
+ text=[f"{val:.3f}" for val in model_data[metric]],
164
+ textposition='outside'
165
+ ))
166
+
167
+ fig.update_layout(
168
+ title=f"Model Comparison Across Topics ({metric}) - {partition_filter.title()}",
169
+ xaxis_title="Topics",
170
+ yaxis_title=metric,
171
+ barmode='group',
172
+ height=500,
173
+ xaxis_tickangle=-45,
174
+ yaxis=dict(range=[0, 1])
175
+ )
176
+
177
+ return fig
178
+
179
+ def create_partition_analysis(df, models_selected=None):
180
+ """Analyze model performance across partitions"""
181
+ if models_selected is None or len(models_selected) == 0:
182
+ models_selected = df['model'].unique()[:3]
183
+
184
+ filtered_df = df[df['model'].isin(models_selected)].copy()
185
+
186
+ # Average across topics for each model-partition combination
187
+ metrics = ['FPR', 'Confidence', 'FDR', 'Precision', 'Recall_Power', 'Accuracy', 'G_mean']
188
+ partition_performance = filtered_df.groupby(['model', 'partition'])[metrics].mean().reset_index()
189
+
190
+ # Create subplots for each metric
191
+ fig = make_subplots(
192
+ rows=2, cols=4,
193
+ subplot_titles=metrics + [''], # Extra empty title for 8th subplot
194
+ specs=[[{"colspan": 1}, {"colspan": 1}, {"colspan": 1}, {"colspan": 1}],
195
+ [{"colspan": 1}, {"colspan": 1}, {"colspan": 1}, None]] # 7 subplots total
196
+ )
197
+
198
+ colors = px.colors.qualitative.Set3[:len(models_selected)]
199
+ partitions = ['train', 'test', 'inference']
200
+
201
+ # Plot each metric
202
+ for i, metric in enumerate(metrics):
203
+ row = 1 if i < 4 else 2
204
+ col = (i % 4) + 1
205
+
206
+ for j, model in enumerate(models_selected):
207
+ model_data = partition_performance[partition_performance['model'] == model]
208
+ model_data = model_data.sort_values('partition') # Ensure consistent ordering
209
+
210
+ fig.add_trace(
211
+ go.Bar(
212
+ name=model if i == 0 else "",
213
+ x=model_data['partition'],
214
+ y=model_data[metric],
215
+ marker_color=colors[j],
216
+ showlegend=True if i == 0 else False,
217
+ text=[f"{val:.3f}" for val in model_data[metric]],
218
+ textposition='outside'
219
+ ),
220
+ row=row, col=col
221
+ )
222
+
223
+ fig.update_layout(
224
+ title="Model Performance Across Partitions - All Metrics",
225
+ height=800,
226
+ barmode='group'
227
+ )
228
+
229
+ # Update y-axes for all subplots
230
+ for i in range(1, 8): # 7 subplots
231
+ row = 1 if i <= 4 else 2
232
+ col = i if i <= 4 else i - 4
233
+ if i <= 7: # Only update existing subplots
234
+ fig.update_yaxes(range=[0, 1], row=row, col=col)
235
+
236
+ return fig
237
+
238
+ def create_performance_summary_table(df):
239
+ """Create summary table with key statistics"""
240
+ # Calculate summary statistics
241
+ summary_stats = []
242
+
243
+ for model in df['model'].unique():
244
+ model_data = df[df['model'] == model]
245
+
246
+ stats = {
247
+ 'Model': model,
248
+ 'Avg_Accuracy': model_data['Accuracy'].mean(),
249
+ 'Avg_Precision': model_data['Precision'].mean(),
250
+ 'Avg_Recall': model_data['Recall_Power'].mean(),
251
+ 'Avg_G_mean': model_data['G_mean'].mean(),
252
+ 'Best_Topic_Accuracy': model_data.loc[model_data['Accuracy'].idxmax(), 'topic'],
253
+ 'Best_Topic_Score': model_data['Accuracy'].max(),
254
+ 'Worst_Topic_Accuracy': model_data.loc[model_data['Accuracy'].idxmin(), 'topic'],
255
+ 'Worst_Topic_Score': model_data['Accuracy'].min(),
256
+ 'Performance_Variance': model_data['Accuracy'].var()
257
+ }
258
+ summary_stats.append(stats)
259
+
260
+ summary_df = pd.DataFrame(summary_stats)
261
+ summary_df = summary_df.round(4)
262
+ summary_df = summary_df.sort_values('Avg_Accuracy', ascending=False)
263
+
264
+ return summary_df
265
+
266
+ # Create the Gradio interface
267
+ with gr.Blocks(title="Multi-Model Classifier Dashboard", theme=gr.themes.Soft()) as demo:
268
+ gr.HTML("<h1 style='text-align: center; color: #2E86AB;'>πŸ† Multi-Model Classifier Dashboard</h1>")
269
+
270
+ # Data loading section
271
+ with gr.Row():
272
+ with gr.Column():
273
+ csv_file = gr.File(
274
+ label="πŸ“ Upload CSV File",
275
+ file_types=['.csv']
276
+ )
277
+ data_status = gr.Textbox(
278
+ label="Data Status",
279
+ value="Using default sample data with 2 models",
280
+ interactive=False
281
+ )
282
+
283
+ # Store current data
284
+ current_data = gr.State(value=default_data)
285
+
286
+ with gr.Tabs():
287
+ with gr.TabItem("πŸ† Model Leaderboard"):
288
+ with gr.Row():
289
+ with gr.Column(scale=1):
290
+ partition_filter = gr.Dropdown(
291
+ choices=['all', 'inference', 'test', 'train'],
292
+ value='all',
293
+ label="Filter by Partition"
294
+ )
295
+ topic_filter = gr.Dropdown(
296
+ choices=['all', 'OVERALL'],
297
+ value='OVERALL',
298
+ label="Filter by Topic"
299
+ )
300
+
301
+ with gr.Column(scale=3):
302
+ leaderboard_chart = gr.Plot()
303
+
304
+ with gr.TabItem("πŸ“Š Topic Comparison"):
305
+ with gr.Row():
306
+ with gr.Column(scale=1):
307
+ models_selector = gr.CheckboxGroup(
308
+ choices=[],
309
+ label="Select Models to Compare",
310
+ value=[]
311
+ )
312
+ metric_selector = gr.Dropdown(
313
+ choices=['FPR', 'Confidence', 'FDR', 'Precision', 'Recall_Power', 'Accuracy', 'G_mean'],
314
+ value='Accuracy',
315
+ label="Select Metric"
316
+ )
317
+ partition_filter_topic = gr.Dropdown(
318
+ choices=['all', 'inference', 'test', 'train'],
319
+ value='all',
320
+ label="Filter by Partition"
321
+ )
322
+
323
+ with gr.Column(scale=3):
324
+ topic_comparison_chart = gr.Plot()
325
+
326
+ with gr.TabItem("πŸ”„ Partition Analysis"):
327
+ with gr.Row():
328
+ with gr.Column(scale=1):
329
+ models_selector_partition = gr.CheckboxGroup(
330
+ choices=[],
331
+ label="Select Models to Analyze",
332
+ value=[]
333
+ )
334
+
335
+ with gr.Column(scale=3):
336
+ partition_analysis_chart = gr.Plot()
337
+
338
+ with gr.TabItem("πŸ“ˆ Performance Summary"):
339
+ summary_table = gr.DataFrame(
340
+ label="Model Performance Summary",
341
+ interactive=False
342
+ )
343
+
344
+ with gr.TabItem("πŸ“‹ Raw Data"):
345
+ raw_data_table = gr.DataFrame(
346
+ label="Complete Dataset",
347
+ interactive=True
348
+ )
349
+
350
+ def update_dashboard(file):
351
+ """Update all dashboard components when new data is loaded"""
352
+ df, status = load_csv_data(file)
353
+
354
+ # Update model choices
355
+ model_choices = sorted(df['model'].unique())
356
+ topic_choices = ['all'] + sorted(df['topic'].unique())
357
+
358
+ # Create initial plots
359
+ leaderboard = create_model_leaderboard(df)
360
+ topic_comp = create_topic_comparison(df, model_choices[:3])
361
+ partition_analysis = create_partition_analysis(df, model_choices[:3])
362
+ summary = create_performance_summary_table(df)
363
+
364
+ return (
365
+ df, status,
366
+ gr.update(choices=topic_choices, value='OVERALL'),
367
+ gr.update(choices=model_choices, value=model_choices[:3]),
368
+ gr.update(choices=model_choices, value=model_choices[:3]),
369
+ leaderboard, topic_comp, partition_analysis, summary, df
370
+ )
371
+
372
+ # Event handlers
373
+ csv_file.change(
374
+ fn=update_dashboard,
375
+ inputs=[csv_file],
376
+ outputs=[
377
+ current_data, data_status, topic_filter,
378
+ models_selector, models_selector_partition,
379
+ leaderboard_chart, topic_comparison_chart,
380
+ partition_analysis_chart, summary_table, raw_data_table
381
+ ]
382
+ )
383
+
384
+ # Update leaderboard when filters change
385
+ def update_leaderboard(data, partition, topic):
386
+ return create_model_leaderboard(data, partition, topic)
387
+
388
+ partition_filter.change(
389
+ fn=update_leaderboard,
390
+ inputs=[current_data, partition_filter, topic_filter],
391
+ outputs=leaderboard_chart
392
+ )
393
+
394
+ topic_filter.change(
395
+ fn=update_leaderboard,
396
+ inputs=[current_data, partition_filter, topic_filter],
397
+ outputs=leaderboard_chart
398
+ )
399
+
400
+ # Update topic comparison when models, metric, or partition change
401
+ def update_topic_comparison(data, selected_models, metric, partition):
402
+ return create_topic_comparison(data, selected_models, metric, partition)
403
+
404
+ models_selector.change(
405
+ fn=update_topic_comparison,
406
+ inputs=[current_data, models_selector, metric_selector, partition_filter_topic],
407
+ outputs=topic_comparison_chart
408
+ )
409
+
410
+ metric_selector.change(
411
+ fn=update_topic_comparison,
412
+ inputs=[current_data, models_selector, metric_selector, partition_filter_topic],
413
+ outputs=topic_comparison_chart
414
+ )
415
+
416
+ partition_filter_topic.change(
417
+ fn=update_topic_comparison,
418
+ inputs=[current_data, models_selector, metric_selector, partition_filter_topic],
419
+ outputs=topic_comparison_chart
420
+ )
421
+
422
+ # Update partition analysis when models change
423
+ def update_partition_analysis(data, selected_models):
424
+ return create_partition_analysis(data, selected_models)
425
+
426
+ models_selector_partition.change(
427
+ fn=update_partition_analysis,
428
+ inputs=[current_data, models_selector_partition],
429
+ outputs=partition_analysis_chart
430
+ )
431
+
432
+ # Initialize dashboard with default data
433
+ demo.load(
434
+ fn=lambda: update_dashboard(None),
435
+ outputs=[
436
+ current_data, data_status, topic_filter,
437
+ models_selector, models_selector_partition,
438
+ leaderboard_chart, topic_comparison_chart,
439
+ partition_analysis_chart, summary_table, raw_data_table
440
+ ]
441
+ )
442
+
443
+ gr.Markdown("""
444
+ ### πŸ’‘ Dashboard Features
445
+
446
+ **πŸ“ Data Loading**: Upload your CSV file with classifier results. The app automatically detects all models and creates comparisons.
447
+
448
+ **πŸ† Model Leaderboard**:
449
+ - Compare all models side-by-side across key metrics
450
+ - Filter by partition and topic for specific comparisons
451
+ - Overall score calculated from precision, recall, and accuracy
452
+
453
+ **πŸ“Š Topic Comparison**:
454
+ - Select specific models to compare across all topics
455
+ - Choose any metric (FPR, Confidence, FDR, Precision, Recall_Power, Accuracy, G_mean)
456
+ - Filter by partition to focus on specific evaluation splits
457
+ - Visual comparison across business categories
458
+
459
+ **πŸ”„ Partition Analysis**:
460
+ - Analyze all metrics across train/test/inference partitions
461
+ - Compare multiple models across different evaluation splits
462
+ - Monitor generalization capabilities and detect overfitting
463
+ - Comprehensive view of all 7 performance metrics
464
+
465
+ **πŸ“ˆ Performance Summary**:
466
+ - Statistical overview of each model's performance
467
+ - Best and worst performing topics for each model
468
+ - Performance variance analysis
469
+
470
+ **CSV Format**: Your file should have columns: `model`, `partition`, `topic`, `FPR`, `Confidence`, `FDR`, `Precision`, `Recall_Power`, `Accuracy`, `G_mean`
471
+ """)
472
+
473
+ if __name__ == "__main__":
474
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ numpy==1.24.4
2
+ plotly==6.0.1
3
+ pandas==1.5.3