Ákos Hadnagy commited on
Commit
3fa220f
·
1 Parent(s): 4046334

Rename to app.py for deployment

Browse files
Files changed (1) hide show
  1. app.py +541 -0
app.py ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ LLM Inference Performance Dashboard
4
+
5
+ A Gradio-based dashboard for visualizing and analyzing LLM inference benchmark results.
6
+ Provides filtering, comparison, and historical analysis capabilities.
7
+ """
8
+
9
+ import gradio as gr
10
+ import plotly.graph_objects as go
11
+ import plotly.express as px
12
+ from plotly.subplots import make_subplots
13
+ import pandas as pd
14
+ import polars as pl
15
+ from datetime import datetime
16
+ from typing import List, Dict, Any, Optional, Tuple
17
+ import logging
18
+
19
+ from benchmark_data_reader import BenchmarkDataReader
20
+
21
+ logging.basicConfig(level=logging.INFO)
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class BenchmarkDashboard:
26
+ """Main dashboard class for LLM inference performance visualization."""
27
+
28
+ def __init__(self):
29
+ """Initialize the dashboard and load data."""
30
+ self.reader = BenchmarkDataReader()
31
+ self.df = None
32
+ self.load_data()
33
+
34
+ def load_data(self) -> None:
35
+ """Load benchmark data from files."""
36
+ try:
37
+ self.df = self.reader.read_benchmark_files()
38
+ if not self.df.is_empty():
39
+ # Convert to pandas for easier plotting with plotly
40
+ self.df_pandas = self.df.to_pandas()
41
+ # Convert timestamp to datetime
42
+ self.df_pandas['timestamp'] = pd.to_datetime(self.df_pandas['timestamp'])
43
+ logger.info(f"Loaded {len(self.df_pandas)} benchmark scenarios")
44
+ else:
45
+ logger.warning("No benchmark data loaded")
46
+ self.df_pandas = pd.DataFrame()
47
+ except Exception as e:
48
+ logger.error(f"Error loading data: {e}")
49
+ self.df_pandas = pd.DataFrame()
50
+
51
+ def get_filter_options(self) -> Tuple[List[str], List[str], List[str], List[str], str, str]:
52
+ """Get unique values for filter dropdowns and date range."""
53
+ if self.df_pandas.empty:
54
+ return [], [], [], [], "", ""
55
+
56
+ models = sorted(self.df_pandas['model_name'].dropna().unique().tolist())
57
+ scenarios = sorted(self.df_pandas['scenario_name'].dropna().unique().tolist())
58
+ gpus = sorted(self.df_pandas['gpu_name'].dropna().unique().tolist())
59
+
60
+ # Get benchmark runs grouped by date (or commit_id if available)
61
+ benchmark_runs = []
62
+
63
+ # Group by commit_id if available, otherwise group by date
64
+ if self.df_pandas['commit_id'].notna().any():
65
+ # Group by commit_id
66
+ for commit_id in self.df_pandas['commit_id'].dropna().unique():
67
+ commit_data = self.df_pandas[self.df_pandas['commit_id'] == commit_id]
68
+ date_str = commit_data['timestamp'].min().strftime('%Y-%m-%d')
69
+ models_count = len(commit_data['model_name'].unique())
70
+ scenarios_count = len(commit_data['scenario_name'].unique())
71
+ run_id = f"Commit {commit_id[:8]} ({date_str}) - {models_count} models, {scenarios_count} scenarios"
72
+ benchmark_runs.append(run_id)
73
+ else:
74
+ # Group by date since commit_id is not available
75
+ self.df_pandas['date'] = self.df_pandas['timestamp'].dt.date
76
+ for date in sorted(self.df_pandas['date'].unique()):
77
+ date_data = self.df_pandas[self.df_pandas['date'] == date]
78
+ models_count = len(date_data['model_name'].unique())
79
+ scenarios_count = len(date_data['scenario_name'].unique())
80
+
81
+ # Check if any commit_id exists for this date (even if null)
82
+ unique_commits = date_data['commit_id'].dropna().unique()
83
+ if len(unique_commits) > 0:
84
+ commit_display = f"Commit {unique_commits[0][:8]}"
85
+ else:
86
+ commit_display = "No commit ID"
87
+
88
+ run_id = f"{date} - {commit_display} - {models_count} models, {scenarios_count} scenarios"
89
+ benchmark_runs.append(run_id)
90
+
91
+ benchmark_runs = sorted(benchmark_runs)
92
+
93
+ # Get date range
94
+ min_date = self.df_pandas['timestamp'].min().strftime('%Y-%m-%d')
95
+ max_date = self.df_pandas['timestamp'].max().strftime('%Y-%m-%d')
96
+
97
+ return models, scenarios, gpus, benchmark_runs, min_date, max_date
98
+
99
+ def filter_data(self, selected_models: List[str], selected_scenarios: List[str],
100
+ selected_gpus: List[str], selected_run: str = None,
101
+ start_date: str = None, end_date: str = None) -> pd.DataFrame:
102
+ """Filter data based on user selections."""
103
+ if self.df_pandas.empty:
104
+ return pd.DataFrame()
105
+
106
+ filtered_df = self.df_pandas.copy()
107
+
108
+ if selected_models:
109
+ filtered_df = filtered_df[filtered_df['model_name'].isin(selected_models)]
110
+ if selected_scenarios:
111
+ filtered_df = filtered_df[filtered_df['scenario_name'].isin(selected_scenarios)]
112
+ if selected_gpus:
113
+ filtered_df = filtered_df[filtered_df['gpu_name'].isin(selected_gpus)]
114
+
115
+ # Filter by date range
116
+ if start_date and end_date:
117
+ start_datetime = pd.to_datetime(start_date)
118
+ end_datetime = pd.to_datetime(end_date) + pd.Timedelta(days=1) # Include end date
119
+ filtered_df = filtered_df[
120
+ (filtered_df['timestamp'] >= start_datetime) &
121
+ (filtered_df['timestamp'] < end_datetime)
122
+ ]
123
+
124
+ # Filter by specific benchmark run (commit or date-based grouping)
125
+ if selected_run:
126
+ if selected_run.startswith("Commit "):
127
+ # Extract commit_id from the run_id format: "Commit 12345678 (2025-09-16) - models"
128
+ try:
129
+ commit_id_part = selected_run.split('Commit ')[1].split(' ')[0] # Get commit hash
130
+ # Find all data with this commit_id
131
+ filtered_df = filtered_df[filtered_df['commit_id'] == commit_id_part]
132
+ except (IndexError, ValueError):
133
+ # Fallback if parsing fails
134
+ logger.warning(f"Failed to parse commit from: {selected_run}")
135
+ else:
136
+ # Date-based grouping format: "2025-09-16 - X models, Y scenarios"
137
+ try:
138
+ date_str = selected_run.split(' - ')[0]
139
+ selected_date = pd.to_datetime(date_str).date()
140
+
141
+ # Add date column if not exists
142
+ if 'date' not in filtered_df.columns:
143
+ filtered_df = filtered_df.copy()
144
+ filtered_df['date'] = filtered_df['timestamp'].dt.date
145
+
146
+ # Filter by date
147
+ filtered_df = filtered_df[filtered_df['date'] == selected_date]
148
+ except (IndexError, ValueError) as e:
149
+ logger.warning(f"Failed to parse date from: {selected_run}, error: {e}")
150
+ # Return empty dataframe if parsing fails
151
+ filtered_df = filtered_df.iloc[0:0]
152
+
153
+ return filtered_df
154
+
155
+ def create_performance_comparison_chart(self, filtered_df: pd.DataFrame,
156
+ metric: str = "tokens_per_second_mean") -> go.Figure:
157
+ """Create performance comparison chart."""
158
+ if filtered_df.empty:
159
+ fig = go.Figure()
160
+ fig.add_annotation(text="No data available for selected filters",
161
+ xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
162
+ return fig
163
+
164
+ # Create bar chart comparing performance across models and scenarios
165
+ fig = px.bar(
166
+ filtered_df,
167
+ x='scenario_name',
168
+ y=metric,
169
+ color='model_name',
170
+ title=f'Performance Comparison: {metric.replace("_", " ").title()}',
171
+ labels={
172
+ metric: metric.replace("_", " ").title(),
173
+ 'scenario_name': 'Benchmark Scenario',
174
+ 'model_name': 'Model'
175
+ },
176
+ hover_data=['gpu_name', 'timestamp']
177
+ )
178
+
179
+ fig.update_layout(
180
+ xaxis_tickangle=-45,
181
+ height=500,
182
+ showlegend=True,
183
+ plot_bgcolor='rgba(235, 242, 250, 1.0)',
184
+ paper_bgcolor='rgba(245, 248, 252, 0.7)'
185
+ )
186
+
187
+ return fig
188
+
189
+ def create_historical_trend_chart(self, filtered_df: pd.DataFrame,
190
+ metric: str = "tokens_per_second_mean") -> go.Figure:
191
+ """Create historical trend chart showing performance across different benchmark runs for the same scenarios."""
192
+ if filtered_df.empty:
193
+ fig = go.Figure()
194
+ fig.add_annotation(text="No data available for selected filters",
195
+ xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
196
+ return fig
197
+
198
+ fig = go.Figure()
199
+
200
+ # Group by model and scenario combination to show trends across benchmark runs
201
+ for model in filtered_df['model_name'].unique():
202
+ model_data = filtered_df[filtered_df['model_name'] == model]
203
+
204
+ for scenario in model_data['scenario_name'].unique():
205
+ scenario_data = model_data[model_data['scenario_name'] == scenario]
206
+
207
+ # Sort by timestamp to show chronological progression
208
+ scenario_data = scenario_data.sort_values('timestamp')
209
+
210
+ # Only show trends if we have multiple data points for this model-scenario combination
211
+ if len(scenario_data) > 1:
212
+ fig.add_trace(go.Scatter(
213
+ x=scenario_data['timestamp'],
214
+ y=scenario_data[metric],
215
+ mode='lines+markers',
216
+ name=f'{model} - {scenario}',
217
+ line=dict(width=2),
218
+ marker=dict(size=6),
219
+ hovertemplate=f'<b>{model}</b><br>' +
220
+ f'Scenario: {scenario}<br>' +
221
+ 'Time: %{x}<br>' +
222
+ f'{metric.replace("_", " ").title()}: %{{y}}<br>' +
223
+ '<extra></extra>'
224
+ ))
225
+
226
+ # If no trends found (all scenarios have only single runs), show a message
227
+ if len(fig.data) == 0:
228
+ fig.add_annotation(
229
+ text="No historical trends available.<br>Each scenario only has one benchmark run.<br>Historical trends require multiple runs of the same scenario over time.",
230
+ xref="paper", yref="paper", x=0.5, y=0.5,
231
+ showarrow=False,
232
+ font=dict(size=14)
233
+ )
234
+
235
+ fig.update_layout(
236
+ title=f'Historical Trends Across Benchmark Runs: {metric.replace("_", " ").title()}',
237
+ xaxis_title='Timestamp',
238
+ yaxis_title=metric.replace("_", " ").title(),
239
+ height=500,
240
+ hovermode='closest',
241
+ showlegend=True,
242
+ plot_bgcolor='rgba(235, 242, 250, 1.0)',
243
+ paper_bgcolor='rgba(245, 248, 252, 0.7)'
244
+ )
245
+
246
+ return fig
247
+
248
+ def create_gpu_comparison_chart(self, filtered_df: pd.DataFrame) -> go.Figure:
249
+ """Create GPU utilization and memory usage comparison."""
250
+ if filtered_df.empty:
251
+ fig = go.Figure()
252
+ fig.add_annotation(text="No data available for selected filters",
253
+ xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
254
+ return fig
255
+
256
+ # Create subplots for GPU metrics
257
+ fig = make_subplots(
258
+ rows=2, cols=2,
259
+ subplot_titles=('GPU Utilization Mean (%)', 'GPU Memory Used (MB)',
260
+ 'GPU Utilization vs Performance', 'Memory Usage vs Performance'),
261
+ specs=[[{"secondary_y": False}, {"secondary_y": False}],
262
+ [{"secondary_y": False}, {"secondary_y": False}]]
263
+ )
264
+
265
+ # GPU Utilization bar chart
266
+ gpu_util_data = filtered_df.groupby(['model_name', 'gpu_name'])['gpu_gpu_utilization_mean'].mean().reset_index()
267
+ for model in gpu_util_data['model_name'].unique():
268
+ model_data = gpu_util_data[gpu_util_data['model_name'] == model]
269
+ fig.add_trace(
270
+ go.Bar(x=model_data['gpu_name'], y=model_data['gpu_gpu_utilization_mean'],
271
+ name=f'{model} - Utilization', showlegend=True),
272
+ row=1, col=1
273
+ )
274
+
275
+ # GPU Memory Usage bar chart
276
+ gpu_mem_data = filtered_df.groupby(['model_name', 'gpu_name'])['gpu_gpu_memory_used_mean'].mean().reset_index()
277
+ for model in gpu_mem_data['model_name'].unique():
278
+ model_data = gpu_mem_data[gpu_mem_data['model_name'] == model]
279
+ fig.add_trace(
280
+ go.Bar(x=model_data['gpu_name'], y=model_data['gpu_gpu_memory_used_mean'],
281
+ name=f'{model} - Memory', showlegend=True),
282
+ row=1, col=2
283
+ )
284
+
285
+ # GPU Utilization vs Performance scatter
286
+ fig.add_trace(
287
+ go.Scatter(x=filtered_df['gpu_gpu_utilization_mean'],
288
+ y=filtered_df['tokens_per_second_mean'],
289
+ mode='markers',
290
+ text=filtered_df['model_name'],
291
+ name='Util vs Performance',
292
+ showlegend=True),
293
+ row=2, col=1
294
+ )
295
+
296
+ # Memory Usage vs Performance scatter
297
+ fig.add_trace(
298
+ go.Scatter(x=filtered_df['gpu_gpu_memory_used_mean'],
299
+ y=filtered_df['tokens_per_second_mean'],
300
+ mode='markers',
301
+ text=filtered_df['model_name'],
302
+ name='Memory vs Performance',
303
+ showlegend=True),
304
+ row=2, col=2
305
+ )
306
+
307
+ fig.update_layout(
308
+ height=800,
309
+ title_text="GPU Performance Analysis",
310
+ plot_bgcolor='rgba(235, 242, 250, 1.0)',
311
+ paper_bgcolor='rgba(245, 248, 252, 0.7)'
312
+ )
313
+ return fig
314
+
315
+ def create_metrics_summary_table(self, filtered_df: pd.DataFrame) -> pd.DataFrame:
316
+ """Create summary statistics table."""
317
+ if filtered_df.empty:
318
+ return pd.DataFrame({'Message': ['No data available for selected filters']})
319
+
320
+ # Key performance metrics
321
+ metrics_cols = [
322
+ 'tokens_per_second_mean', 'latency_seconds_mean',
323
+ 'time_to_first_token_seconds_mean', 'time_per_output_token_seconds_mean'
324
+ ]
325
+
326
+ summary_data = []
327
+ for model in filtered_df['model_name'].unique():
328
+ model_data = filtered_df[filtered_df['model_name'] == model]
329
+
330
+ row = {'Model': model, 'Scenarios': len(model_data)}
331
+ for metric in metrics_cols:
332
+ if metric in model_data.columns:
333
+ row[f'{metric.replace("_", " ").title()} (Avg)'] = f"{model_data[metric].mean():.2f}"
334
+ row[f'{metric.replace("_", " ").title()} (Best)'] = f"{model_data[metric].min() if 'latency' in metric or 'time' in metric else model_data[metric].max():.2f}"
335
+
336
+ summary_data.append(row)
337
+
338
+ return pd.DataFrame(summary_data)
339
+
340
+ def update_dashboard(self, selected_models: List[str], selected_scenarios: List[str],
341
+ selected_gpus: List[str], selected_run: str, metric: str):
342
+ """Update all dashboard components based on current filters."""
343
+ filtered_df = self.filter_data(
344
+ selected_models, selected_scenarios, selected_gpus, selected_run
345
+ )
346
+
347
+ # Create charts
348
+ perf_chart = self.create_performance_comparison_chart(filtered_df, metric)
349
+ gpu_chart = self.create_gpu_comparison_chart(filtered_df)
350
+ summary_table = self.create_metrics_summary_table(filtered_df)
351
+
352
+ # Summary stats
353
+ if not filtered_df.empty:
354
+ summary_text = f"""
355
+ **Data Summary:**
356
+ - Total Scenarios: {len(filtered_df)}
357
+ - Models: {', '.join(filtered_df['model_name'].unique())}
358
+ - Date Range: {filtered_df['timestamp'].min().strftime('%Y-%m-%d')} to {filtered_df['timestamp'].max().strftime('%Y-%m-%d')}
359
+ - Benchmark Runs: {len(filtered_df.groupby(['timestamp', 'file_path']))}
360
+ """
361
+ else:
362
+ summary_text = "No data available for current selection."
363
+
364
+ return perf_chart, gpu_chart, summary_table, summary_text
365
+
366
+ def update_historical_trends(self, selected_models: List[str], selected_scenarios: List[str],
367
+ selected_gpus: List[str], start_date: str, end_date: str, metric: str):
368
+ """Update historical trends chart with date filtering."""
369
+ filtered_df = self.filter_data(
370
+ selected_models, selected_scenarios, selected_gpus,
371
+ start_date=start_date, end_date=end_date
372
+ )
373
+ trend_chart = self.create_historical_trend_chart(filtered_df, metric)
374
+ return trend_chart
375
+
376
+
377
+ def create_gradio_interface() -> gr.Interface:
378
+ """Create the Gradio interface."""
379
+ dashboard = BenchmarkDashboard()
380
+ models, scenarios, gpus, benchmark_runs, min_date, max_date = dashboard.get_filter_options()
381
+
382
+ # Performance metrics options
383
+ metric_options = [
384
+ "tokens_per_second_mean",
385
+ "latency_seconds_mean",
386
+ "time_to_first_token_seconds_mean",
387
+ "time_per_output_token_seconds_mean"
388
+ ]
389
+
390
+ with gr.Blocks(title="LLM Inference Performance Dashboard", theme=gr.themes.Soft()) as demo:
391
+ gr.Markdown("# 🚀 LLM Inference Performance Dashboard")
392
+ gr.Markdown("Analyze and compare LLM inference performance across models, scenarios, and hardware configurations.")
393
+
394
+ with gr.Row():
395
+ with gr.Column(scale=1):
396
+ gr.Markdown("## Filters")
397
+
398
+ model_filter = gr.CheckboxGroup(
399
+ choices=models,
400
+ value=models,
401
+ label="Select Models",
402
+ interactive=True
403
+ )
404
+ scenario_filter = gr.CheckboxGroup(
405
+ choices=scenarios,
406
+ value=scenarios[:5] if len(scenarios) > 5 else scenarios, # Limit initial selection
407
+ label="Select Scenarios",
408
+ interactive=True
409
+ )
410
+ gpu_filter = gr.CheckboxGroup(
411
+ choices=gpus,
412
+ value=gpus,
413
+ label="Select GPUs",
414
+ interactive=True
415
+ )
416
+ metric_selector = gr.Dropdown(
417
+ choices=metric_options,
418
+ value="tokens_per_second_mean",
419
+ label="Primary Metric",
420
+ interactive=True
421
+ )
422
+
423
+ gr.Markdown("### Benchmark Run Selection")
424
+
425
+ # Search field for filtering benchmark runs
426
+ run_search = gr.Textbox(
427
+ value="",
428
+ label="Search Benchmark Runs",
429
+ placeholder="Search by date, commit ID, etc.",
430
+ interactive=True
431
+ )
432
+
433
+ # Filtered benchmark run selector
434
+ benchmark_run_selector = gr.Dropdown(
435
+ choices=benchmark_runs,
436
+ value=benchmark_runs[0] if benchmark_runs else None,
437
+ label="Select Benchmark Run",
438
+ info="Choose specific daily run (all models from same commit/date)",
439
+ interactive=True,
440
+ allow_custom_value=False
441
+ )
442
+
443
+ with gr.Column(scale=3):
444
+ with gr.Tabs():
445
+ with gr.TabItem("Performance Comparison"):
446
+ perf_plot = gr.Plot(label="Performance Comparison")
447
+
448
+ with gr.TabItem("Historical Trends"):
449
+ with gr.Row():
450
+ with gr.Column(scale=1):
451
+ gr.Markdown("### Date Range for Historical Analysis")
452
+ start_date = gr.Textbox(
453
+ value=min_date,
454
+ label="Start Date (YYYY-MM-DD)",
455
+ placeholder="2025-01-01",
456
+ interactive=True
457
+ )
458
+ end_date = gr.Textbox(
459
+ value=max_date,
460
+ label="End Date (YYYY-MM-DD)",
461
+ placeholder="2025-12-31",
462
+ interactive=True
463
+ )
464
+ with gr.Column(scale=3):
465
+ trend_plot = gr.Plot(label="Historical Trends")
466
+
467
+ with gr.TabItem("GPU Analysis"):
468
+ gpu_plot = gr.Plot(label="GPU Performance Analysis")
469
+
470
+ with gr.TabItem("Summary Statistics"):
471
+ summary_table = gr.Dataframe(label="Performance Summary")
472
+
473
+ with gr.Row():
474
+ summary_text = gr.Markdown("", label="Summary")
475
+
476
+ # Function to filter benchmark runs based on search
477
+ def filter_benchmark_runs(search_text):
478
+ if not search_text:
479
+ return gr.Dropdown(choices=benchmark_runs, value=benchmark_runs[0] if benchmark_runs else None)
480
+
481
+ # Filter runs that contain the search text (case insensitive)
482
+ filtered_runs = [run for run in benchmark_runs if search_text.lower() in run.lower()]
483
+ return gr.Dropdown(choices=filtered_runs, value=filtered_runs[0] if filtered_runs else None)
484
+
485
+ # Update function for main dashboard (excluding historical trends)
486
+ def update_main(models_selected, scenarios_selected, gpus_selected, run_selected, metric):
487
+ return dashboard.update_dashboard(
488
+ models_selected, scenarios_selected, gpus_selected, run_selected, metric
489
+ )
490
+
491
+ # Update function for historical trends
492
+ def update_trends(models_selected, scenarios_selected, gpus_selected, start_dt, end_dt, metric):
493
+ return dashboard.update_historical_trends(
494
+ models_selected, scenarios_selected, gpus_selected, start_dt, end_dt, metric
495
+ )
496
+
497
+ # Set up interactivity for main dashboard
498
+ main_inputs = [model_filter, scenario_filter, gpu_filter, benchmark_run_selector, metric_selector]
499
+ main_outputs = [perf_plot, gpu_plot, summary_table, summary_text]
500
+
501
+ # Set up interactivity for historical trends
502
+ trends_inputs = [model_filter, scenario_filter, gpu_filter, start_date, end_date, metric_selector]
503
+ trends_outputs = [trend_plot]
504
+
505
+ # Update main dashboard on filter changes
506
+ for input_component in main_inputs:
507
+ input_component.change(fn=update_main, inputs=main_inputs, outputs=main_outputs)
508
+
509
+ # Update historical trends on filter changes
510
+ for input_component in trends_inputs:
511
+ input_component.change(fn=update_trends, inputs=trends_inputs, outputs=trends_outputs)
512
+
513
+ # Connect search field to filter benchmark runs
514
+ run_search.change(fn=filter_benchmark_runs, inputs=[run_search], outputs=[benchmark_run_selector])
515
+
516
+ # Initial load
517
+ demo.load(fn=update_main, inputs=main_inputs, outputs=main_outputs)
518
+ demo.load(fn=update_trends, inputs=trends_inputs, outputs=trends_outputs)
519
+
520
+ return demo
521
+
522
+
523
+ def main():
524
+ """Launch the dashboard."""
525
+ logger.info("Starting LLM Inference Performance Dashboard")
526
+
527
+ try:
528
+ demo = create_gradio_interface()
529
+ demo.launch(
530
+ server_name="0.0.0.0",
531
+ server_port=7860,
532
+ share=False,
533
+ show_error=True
534
+ )
535
+ except Exception as e:
536
+ logger.error(f"Error launching dashboard: {e}")
537
+ raise
538
+
539
+
540
+ if __name__ == "__main__":
541
+ main()