Spaces:

AIguysingstoo
/

optimization-engineer

Sleeping

App Files Files Community

optimization-engineer / interfaces /gradio_app.py

AIguysingstoo

Upload 9 files

e9bb6c3 verified 8 months ago

raw

history blame contribute delete

10.4 kB

	import gradio as gr
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from typing import List, Tuple

	from agent.benchmarker import ModelBenchmarker
	from core.benchmark import BenchmarkConfig
	from core.utils import get_system_info

	class GradioApp:
	"""Gradio web interface for model benchmarking."""

	def __init__(self):
	self.benchmarker = ModelBenchmarker()
	self.history = []

	def benchmark_single(
	self,
	model_name: str,
	dataset_name: str,
	num_samples: int,
	max_tokens: int,
	quantization: str,
	torch_compile: bool,
	perplexity: bool,
	device: str
	) -> Tuple[str, str, str]:
	"""Run single model benchmark."""
	try:
	config = BenchmarkConfig(
	model_name=model_name,
	dataset_name=dataset_name,
	num_samples=num_samples,
	max_new_tokens=max_tokens,
	quantization_type=quantization,
	use_torch_compile=torch_compile,
	calculate_perplexity=perplexity,
	device=device if device != "auto" else None
	)

	results = self.benchmarker.run_benchmark(config)
	self.history.append(results)

	# Format summary
	summary = results["summary"]
	summary_text = f"""## Benchmark Results

	Model: {summary['model_name']}
	Device: {summary['device']}
	Optimization: {summary['optimization_type']}

	### Performance Metrics
	- Throughput: {summary['avg_tokens_per_second']:.2f} tokens/second
	- First Token Latency: {summary['avg_first_token_latency_seconds']:.4f} seconds
	- Peak Memory: {summary['max_memory_mb']:.2f} MB
	- Samples: {summary['num_samples']}
	{f"- Perplexity: {summary['avg_perplexity']:.4f}" if summary.get('avg_perplexity') else ""}
	"""

	# Sample results table
	samples_df = pd.DataFrame(results['samples'])
	if not samples_df.empty:
	display_cols = ['prompt_id', 'input_tokens', 'output_tokens', 'tokens_per_second', 'first_token_latency_seconds']
	samples_table = samples_df[display_cols].head(10).to_html(index=False)
	else:
	samples_table = "No sample data available"

	return summary_text, samples_table, "✅ Benchmark completed!"

	except Exception as e:
	return f"❌ Error: {str(e)}", "", f"❌ Failed: {str(e)}"

	def compare_optimizations(
	self,
	model_name: str,
	dataset_name: str,
	num_samples: int,
	optimizations: List[str]
	) -> Tuple[str, go.Figure, str]:
	"""Compare different quantization."""
	try:
	results = []

	for opt in optimizations:
	config = BenchmarkConfig(
	model_name=model_name,
	dataset_name=dataset_name,
	num_samples=num_samples,
	quantization_type=opt,
	calculate_perplexity=True
	)

	benchmarker = ModelBenchmarker() # Fresh instance
	result = benchmarker.run_benchmark(config)
	results.append(result["summary"])

	# Create comparison
	df = pd.DataFrame(results)

	# Create plot
	fig = go.Figure()

	fig.add_trace(go.Bar(
	name='Throughput',
	x=df['optimization_type'],
	y=df['avg_tokens_per_second'],
	yaxis='y'
	))

	fig.add_trace(go.Scatter(
	name='Memory (MB)',
	x=df['optimization_type'],
	y=df['max_memory_mb'],
	yaxis='y2',
	mode='lines+markers',
	line=dict(color='red')
	))

	fig.update_layout(
	title=f'Optimization Comparison: {model_name}',
	xaxis_title='Optimization',
	yaxis=dict(title='Throughput (tok/s)', side='left'),
	yaxis2=dict(title='Memory (MB)', side='right', overlaying='y')
	)

	# Summary text
	best_throughput = max(results, key=lambda x: x['avg_tokens_per_second'])
	best_memory = min(results, key=lambda x: x['max_memory_mb'])

	summary = f"""## Comparison Results

	### Best Configurations
	- Highest Throughput: {best_throughput['optimization_type']} ({best_throughput['avg_tokens_per_second']:.2f} tok/s)
	- Lowest Memory: {best_memory['optimization_type']} ({best_memory['max_memory_mb']:.2f} MB)

	### Results Table
	\| Optimization \| Throughput \| Memory \| Perplexity \|
	\|--------------\|-----------\|---------\|-----------\|
	{chr(10).join([f"\| {r['optimization_type']} \| {r['avg_tokens_per_second']:.2f} \| {r['max_memory_mb']:.2f} \| {r.get('avg_perplexity', 'N/A')} \|" for r in results])}
	"""

	return summary, fig, "✅ Comparison completed!"

	except Exception as e:
	return f"❌ Error: {str(e)}", go.Figure(), f"❌ Failed: {str(e)}"

	def get_history(self) -> str:
	"""Get benchmark history."""
	if not self.history:
	return "No benchmarks run yet."

	history_text = "# Benchmark History\n\n"
	for i, result in enumerate(self.history):
	summary = result["summary"]
	history_text += f"""## Run {i+1}
	- Model: {summary['model_name']}
	- Time: {summary['timestamp']}
	- Throughput: {summary['avg_tokens_per_second']:.2f} tok/s
	- Memory: {summary['max_memory_mb']:.2f} MB

	---
	"""

	return history_text

	def create_interface(self):
	"""Create Gradio interface."""
	with gr.Blocks(title="Model Benchmark Agent", theme=gr.themes.Soft()) as app:
	gr.Markdown("# 🚀 Model Benchmark Agent")
	gr.Markdown("Benchmark Hugging Face models with optimum-quanto quantization")

	with gr.Tabs():
	# Single Benchmark Tab
	with gr.TabItem("Single Benchmark"):
	with gr.Row():
	with gr.Column():
	model_input = gr.Textbox("facebook/opt-iml-max-1.3b", label="Model Name")
	dataset_input = gr.Textbox("tatsu-lab/alpaca", label="Dataset")
	num_samples = gr.Slider(1, 100, 20, step=1, label="Samples")
	max_tokens = gr.Slider(10, 512, 100, label="Max Tokens")
	quantization = gr.Dropdown(
	["none", "int8", "int4", "int2", "float8"],
	value="none",
	label="Quantization"
	)
	torch_compile = gr.Checkbox(label="Use torch.compile")
	perplexity = gr.Checkbox(label="Calculate Perplexity")
	device = gr.Dropdown(["auto", "cuda", "cpu", "mps"], value="auto", label="Device")

	benchmark_btn = gr.Button("🚀 Run Benchmark", variant="primary")

	with gr.Column():
	results_md = gr.Markdown()
	samples_html = gr.HTML()
	status_text = gr.Textbox(label="Status", interactive=False)

	benchmark_btn.click(
	self.benchmark_single,
	inputs=[model_input, dataset_input, num_samples, max_tokens, quantization, torch_compile, perplexity, device],
	outputs=[results_md, samples_html, status_text]
	)

	# Comparison Tab
	with gr.TabItem("Compare Optimizations"):
	with gr.Row():
	with gr.Column():
	comp_model = gr.Textbox("facebook/opt-iml-max-1.3b", label="Model")
	comp_dataset = gr.Textbox("tatsu-lab/alpaca", label="Dataset")
	comp_samples = gr.Slider(1, 50, 10, step=1, label="Samples")
	comp_opts = gr.CheckboxGroup(
	["none", "int8", "int4", "int2"],
	value=["none", "int8"],
	label="Optimizations to Compare"
	)

	compare_btn = gr.Button("📊 Compare", variant="primary")

	with gr.Column():
	comp_results = gr.Markdown()
	comp_plot = gr.Plot()
	comp_status = gr.Textbox(label="Status", interactive=False)

	compare_btn.click(
	self.compare_optimizations,
	inputs=[comp_model, comp_dataset, comp_samples, comp_opts],
	outputs=[comp_results, comp_plot, comp_status]
	)

	# History Tab
	with gr.TabItem("History"):
	history_md = gr.Markdown()
	refresh_btn = gr.Button("🔄 Refresh")
	refresh_btn.click(self.get_history, outputs=[history_md])

	# System Info Tab
	with gr.TabItem("System Info"):
	sys_info_md = gr.Markdown()
	sys_info_btn = gr.Button("📋 Get System Info")
	sys_info_btn.click(get_system_info, outputs=[sys_info_md])

	return app

	def launch_app():
	"""Launch the Gradio app."""
	app = GradioApp()
	interface = app.create_interface()
	interface.launch(share=False,
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True,
	mcp_server=True)