| """ |
| OktoBLAS Benchmark Chart Generator |
| ================================== |
| Generates comparison charts with REAL benchmark data |
| |
| Run: python generate_benchmark_chart.py |
| """ |
|
|
| import matplotlib.pyplot as plt |
| import numpy as np |
|
|
| |
| |
| |
|
|
| |
| quick_test = { |
| 'modes': ['PyTorch FP32\n(Baseline)', 'OktoBLAS FP16\n(Tensor Cores)'], |
| 'time': [1.97, 1.07], |
| 'speed': [50.8, 93.7], |
| 'speedup': [1.0, 1.85] |
| } |
|
|
| |
| speed_test = { |
| 'modes': ['PyTorch FP32\n(Baseline)', 'OktoBLAS FP16\n(Tensor Cores)', 'OktoBLAS TURBO\n(Fused)'], |
| 'time_ms': [9.73, 4.86, 3.63], |
| 'speedup': [1.0, 2.0, 2.68] |
| } |
|
|
| |
| gemm_data = { |
| 'operations': ['FP16 GEMM\n1024', 'FP16 GEMM\n2048', 'Fused\nAttention'], |
| 'pytorch': [23.3, 34.6, 0.28], |
| 'oktoblas': [29.1, 35.1, 0.96] |
| } |
|
|
| |
| |
| |
|
|
| plt.style.use('dark_background') |
| fig = plt.figure(figsize=(14, 10)) |
|
|
| fig.suptitle('OktoBLAS Performance Benchmark\nby OktoSeek', |
| fontsize=18, fontweight='bold', color='#00ff88', y=0.98) |
|
|
| |
| pytorch_color = '#ff6b6b' |
| oktoblas_color = '#4ecdc4' |
| turbo_color = '#ffd93d' |
|
|
| |
| |
| |
| ax1 = fig.add_subplot(2, 2, 1) |
| x = np.arange(len(quick_test['modes'])) |
| colors = [pytorch_color, oktoblas_color] |
| bars = ax1.bar(x, quick_test['speed'], color=colors, alpha=0.85, edgecolor='white', linewidth=2) |
|
|
| ax1.set_ylabel('Speed (examples/sec)', fontsize=12, fontweight='bold') |
| ax1.set_title('π Training Speed (100 examples)\n(Higher is Better)', fontsize=13, fontweight='bold', pad=10) |
| ax1.set_xticks(x) |
| ax1.set_xticklabels(quick_test['modes'], fontsize=10) |
| ax1.set_ylim(0, 120) |
| ax1.grid(True, alpha=0.2, axis='y') |
|
|
| for bar, val, speedup in zip(bars, quick_test['speed'], quick_test['speedup']): |
| label = f'{val:.1f} ex/s' |
| if speedup > 1: |
| label += f'\n(+{(speedup-1)*100:.0f}%)' |
| ax1.annotate(label, xy=(bar.get_x() + bar.get_width()/2, bar.get_height()), |
| ha='center', va='bottom', fontsize=10, fontweight='bold', color='white') |
|
|
| |
| |
| |
| ax2 = fig.add_subplot(2, 2, 2) |
| x = np.arange(len(speed_test['modes'])) |
| colors = [pytorch_color, oktoblas_color, turbo_color] |
| bars = ax2.bar(x, speed_test['time_ms'], color=colors, alpha=0.85, edgecolor='white', linewidth=2) |
|
|
| ax2.set_ylabel('Time (ms)', fontsize=12, fontweight='bold') |
| ax2.set_title('β‘ Matrix Ops Speed\n(Lower is Better)', fontsize=13, fontweight='bold', pad=10) |
| ax2.set_xticks(x) |
| ax2.set_xticklabels(speed_test['modes'], fontsize=9) |
| ax2.set_ylim(0, 12) |
| ax2.grid(True, alpha=0.2, axis='y') |
|
|
| for bar, val, speedup in zip(bars, speed_test['time_ms'], speed_test['speedup']): |
| label = f'{val:.2f}ms' |
| if speedup > 1: |
| label += f'\n({speedup:.2f}x)' |
| ax2.annotate(label, xy=(bar.get_x() + bar.get_width()/2, bar.get_height()), |
| ha='center', va='bottom', fontsize=9, fontweight='bold', color='white') |
|
|
| |
| |
| |
| ax3 = fig.add_subplot(2, 2, 3) |
| x_gemm = np.arange(len(gemm_data['operations'])) |
| width = 0.35 |
|
|
| bars1 = ax3.bar(x_gemm - width/2, gemm_data['pytorch'], width, label='PyTorch', |
| color=pytorch_color, alpha=0.85, edgecolor='white', linewidth=1.5) |
| bars2 = ax3.bar(x_gemm + width/2, gemm_data['oktoblas'], width, label='OktoBLAS', |
| color=oktoblas_color, alpha=0.85, edgecolor='white', linewidth=1.5) |
|
|
| ax3.set_ylabel('TFLOPS', fontsize=12, fontweight='bold') |
| ax3.set_title('π GEMM Kernel Performance\n(Higher is Better)', fontsize=13, fontweight='bold', pad=10) |
| ax3.set_xticks(x_gemm) |
| ax3.set_xticklabels(gemm_data['operations'], fontsize=9) |
| ax3.legend(loc='upper left', fontsize=10) |
| ax3.grid(True, alpha=0.2, axis='y') |
|
|
| for i, (p, o) in enumerate(zip(gemm_data['pytorch'], gemm_data['oktoblas'])): |
| speedup = (o - p) / p * 100 |
| if speedup > 0: |
| ax3.annotate(f'+{speedup:.0f}%', |
| xy=(x_gemm[i] + width/2, o), |
| ha='center', va='bottom', fontsize=9, color='#00ff88', fontweight='bold') |
|
|
| |
| |
| |
| ax4 = fig.add_subplot(2, 2, 4) |
| ax4.axis('off') |
|
|
| summary_text = """ |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| β OktoBLAS BENCHMARK SUMMARY β |
| β βββββββββββββββββββββββββββββββββββββββββββββββββββ£ |
| β β |
| β π TRAINING SPEED (100 examples) β |
| β ββββββββββββββββββββββββββββββββββββββββββββ β |
| β PyTorch FP32: 50.8 ex/s (baseline) β |
| β OktoBLAS FP16: 93.7 ex/s (+85% faster) β |
| β β |
| β β‘ MATRIX OPS SPEED β |
| β ββββββββββββββββββββββββββββββββββββββββββββ β |
| β PyTorch FP32: 9.73 ms (baseline) β |
| β OktoBLAS FP16: 4.86 ms (2.00x faster) β |
| β OktoBLAS TURBO: 3.63 ms (2.68x faster) β |
| β β |
| β π₯ SPEEDUP SUMMARY β |
| β ββββββββββββββββββββββββββββββββββββββββββββ β |
| β β’ Training: +85% faster β |
| β β’ Matrix Ops: +100% faster β |
| β β’ TURBO Mode: +168% faster β |
| β β’ FP16 GEMM 1024: +25% TFLOPS β |
| β β’ Fused Attention: +243% TFLOPS β |
| β β |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| """ |
|
|
| ax4.text(0.5, 0.5, summary_text, transform=ax4.transAxes, fontsize=9, |
| verticalalignment='center', horizontalalignment='center', |
| fontfamily='monospace', color='white', |
| bbox=dict(boxstyle='round,pad=0.5', facecolor='#1a1a2e', |
| edgecolor='#4ecdc4', linewidth=2)) |
|
|
| plt.tight_layout(rect=[0, 0.02, 1, 0.95]) |
|
|
| |
| plt.savefig('benchmark_comparison.png', dpi=150, facecolor='#0d0d0d', |
| edgecolor='none', bbox_inches='tight', pad_inches=0.3) |
| print("β
Saved: benchmark_comparison.png") |
|
|
| print("\nπ Chart generated with REAL benchmark data!") |
| print(" Training: 1.85x faster") |
| print(" Matrix Ops: 2.68x faster") |
|
|