Spaces:

AIguysingstoo
/

optimization-engineer

Sleeping

File size: 10,423 Bytes

e9bb6c3

import gradio as gr
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from typing import List, Tuple

from agent.benchmarker import ModelBenchmarker
from core.benchmark import BenchmarkConfig
from core.utils import get_system_info

class GradioApp:
    """Gradio web interface for model benchmarking."""
    
    def __init__(self):
        self.benchmarker = ModelBenchmarker()
        self.history = []
        
    def benchmark_single(
        self,
        model_name: str,
        dataset_name: str,
        num_samples: int,
        max_tokens: int,
        quantization: str,
        torch_compile: bool,
        perplexity: bool,
        device: str
    ) -> Tuple[str, str, str]:
        """Run single model benchmark."""
        try:
            config = BenchmarkConfig(
                model_name=model_name,
                dataset_name=dataset_name,
                num_samples=num_samples,
                max_new_tokens=max_tokens,
                quantization_type=quantization,
                use_torch_compile=torch_compile,
                calculate_perplexity=perplexity,
                device=device if device != "auto" else None
            )
            
            results = self.benchmarker.run_benchmark(config)
            self.history.append(results)
            
            # Format summary
            summary = results["summary"]
            summary_text = f"""## Benchmark Results

**Model**: {summary['model_name']}  
**Device**: {summary['device']}  
**Optimization**: {summary['optimization_type']}

### Performance Metrics
- **Throughput**: {summary['avg_tokens_per_second']:.2f} tokens/second
- **First Token Latency**: {summary['avg_first_token_latency_seconds']:.4f} seconds  
- **Peak Memory**: {summary['max_memory_mb']:.2f} MB
- **Samples**: {summary['num_samples']}
{f"- **Perplexity**: {summary['avg_perplexity']:.4f}" if summary.get('avg_perplexity') else ""}
            """
            
            # Sample results table
            samples_df = pd.DataFrame(results['samples'])
            if not samples_df.empty:
                display_cols = ['prompt_id', 'input_tokens', 'output_tokens', 'tokens_per_second', 'first_token_latency_seconds']
                samples_table = samples_df[display_cols].head(10).to_html(index=False)
            else:
                samples_table = "No sample data available"
            
            return summary_text, samples_table, "✅ Benchmark completed!"
            
        except Exception as e:
            return f"❌ Error: {str(e)}", "", f"❌ Failed: {str(e)}"
    
    def compare_optimizations(
        self,
        model_name: str,
        dataset_name: str,
        num_samples: int,
        optimizations: List[str]
    ) -> Tuple[str, go.Figure, str]:
        """Compare different quantization."""
        try:
            results = []
            
            for opt in optimizations:
                config = BenchmarkConfig(
                    model_name=model_name,
                    dataset_name=dataset_name,
                    num_samples=num_samples,
                    quantization_type=opt,
                    calculate_perplexity=True
                )
                
                benchmarker = ModelBenchmarker()  # Fresh instance
                result = benchmarker.run_benchmark(config)
                results.append(result["summary"])
            
            # Create comparison
            df = pd.DataFrame(results)
            
            # Create plot
            fig = go.Figure()
            
            fig.add_trace(go.Bar(
                name='Throughput',
                x=df['optimization_type'],
                y=df['avg_tokens_per_second'],
                yaxis='y'
            ))
            
            fig.add_trace(go.Scatter(
                name='Memory (MB)',
                x=df['optimization_type'], 
                y=df['max_memory_mb'],
                yaxis='y2',
                mode='lines+markers',
                line=dict(color='red')
            ))
            
            fig.update_layout(
                title=f'Optimization Comparison: {model_name}',
                xaxis_title='Optimization',
                yaxis=dict(title='Throughput (tok/s)', side='left'),
                yaxis2=dict(title='Memory (MB)', side='right', overlaying='y')
            )
            
            # Summary text
            best_throughput = max(results, key=lambda x: x['avg_tokens_per_second'])
            best_memory = min(results, key=lambda x: x['max_memory_mb'])
            
            summary = f"""## Comparison Results

### Best Configurations
- **Highest Throughput**: {best_throughput['optimization_type']} ({best_throughput['avg_tokens_per_second']:.2f} tok/s)
- **Lowest Memory**: {best_memory['optimization_type']} ({best_memory['max_memory_mb']:.2f} MB)

### Results Table
| Optimization | Throughput | Memory | Perplexity |
|--------------|-----------|---------|-----------|
{chr(10).join([f"| {r['optimization_type']} | {r['avg_tokens_per_second']:.2f} | {r['max_memory_mb']:.2f} | {r.get('avg_perplexity', 'N/A')} |" for r in results])}
            """
            
            return summary, fig, "✅ Comparison completed!"
            
        except Exception as e:
            return f"❌ Error: {str(e)}", go.Figure(), f"❌ Failed: {str(e)}"
    
    def get_history(self) -> str:
        """Get benchmark history."""
        if not self.history:
            return "No benchmarks run yet."
        
        history_text = "# Benchmark History\n\n"
        for i, result in enumerate(self.history):
            summary = result["summary"]
            history_text += f"""## Run {i+1}
- **Model**: {summary['model_name']}
- **Time**: {summary['timestamp']}  
- **Throughput**: {summary['avg_tokens_per_second']:.2f} tok/s
- **Memory**: {summary['max_memory_mb']:.2f} MB

---
            """
        
        return history_text
    
    def create_interface(self):
        """Create Gradio interface."""
        with gr.Blocks(title="Model Benchmark Agent", theme=gr.themes.Soft()) as app:
            gr.Markdown("# 🚀 Model Benchmark Agent")
            gr.Markdown("Benchmark Hugging Face models with optimum-quanto quantization")
            
            with gr.Tabs():
                # Single Benchmark Tab
                with gr.TabItem("Single Benchmark"):
                    with gr.Row():
                        with gr.Column():
                            model_input = gr.Textbox("facebook/opt-iml-max-1.3b", label="Model Name")
                            dataset_input = gr.Textbox("tatsu-lab/alpaca", label="Dataset")
                            num_samples = gr.Slider(1, 100, 20, step=1, label="Samples")
                            max_tokens = gr.Slider(10, 512, 100, label="Max Tokens")
                            quantization = gr.Dropdown(
                                ["none", "int8", "int4", "int2", "float8"],
                                value="none",
                                label="Quantization"
                            )
                            torch_compile = gr.Checkbox(label="Use torch.compile")
                            perplexity = gr.Checkbox(label="Calculate Perplexity")
                            device = gr.Dropdown(["auto", "cuda", "cpu", "mps"], value="auto", label="Device")
                            
                            benchmark_btn = gr.Button("🚀 Run Benchmark", variant="primary")
                        
                        with gr.Column():
                            results_md = gr.Markdown()
                            samples_html = gr.HTML()
                            status_text = gr.Textbox(label="Status", interactive=False)
                    
                    benchmark_btn.click(
                        self.benchmark_single,
                        inputs=[model_input, dataset_input, num_samples, max_tokens, quantization, torch_compile, perplexity, device],
                        outputs=[results_md, samples_html, status_text]
                    )
                
                # Comparison Tab
                with gr.TabItem("Compare Optimizations"):
                    with gr.Row():
                        with gr.Column():
                            comp_model = gr.Textbox("facebook/opt-iml-max-1.3b", label="Model")
                            comp_dataset = gr.Textbox("tatsu-lab/alpaca", label="Dataset")
                            comp_samples = gr.Slider(1, 50, 10, step=1, label="Samples")
                            comp_opts = gr.CheckboxGroup(
                                ["none", "int8", "int4", "int2"],
                                value=["none", "int8"],
                                label="Optimizations to Compare"
                            )
                            
                            compare_btn = gr.Button("📊 Compare", variant="primary")
                        
                        with gr.Column():
                            comp_results = gr.Markdown()
                            comp_plot = gr.Plot()
                            comp_status = gr.Textbox(label="Status", interactive=False)
                    
                    compare_btn.click(
                        self.compare_optimizations,
                        inputs=[comp_model, comp_dataset, comp_samples, comp_opts],
                        outputs=[comp_results, comp_plot, comp_status]
                    )
                
                # History Tab
                with gr.TabItem("History"):
                    history_md = gr.Markdown()
                    refresh_btn = gr.Button("🔄 Refresh")
                    refresh_btn.click(self.get_history, outputs=[history_md])
                
                # System Info Tab
                with gr.TabItem("System Info"):
                    sys_info_md = gr.Markdown()
                    sys_info_btn = gr.Button("📋 Get System Info")
                    sys_info_btn.click(get_system_info, outputs=[sys_info_md])
        
        return app

def launch_app():
    """Launch the Gradio app."""
    app = GradioApp()
    interface = app.create_interface()
    interface.launch(share=False, 
                     server_name="0.0.0.0", 
                     server_port=7860,
                     show_error=True, 
                     mcp_server=True)