oktoblast / docs /generate_benchmark_chart.py
OktoSeek's picture
Upload 20 files
8046910 verified
"""
OktoBLAS Benchmark Chart Generator
==================================
Generates comparison charts with REAL benchmark data
Run: python generate_benchmark_chart.py
"""
import matplotlib.pyplot as plt
import numpy as np
# ============================================================
# REAL BENCHMARK DATA (December 2025)
# ============================================================
# Quick Test (100 examples)
quick_test = {
'modes': ['PyTorch FP32\n(Baseline)', 'OktoBLAS FP16\n(Tensor Cores)'],
'time': [1.97, 1.07],
'speed': [50.8, 93.7],
'speedup': [1.0, 1.85]
}
# Speed Test (Matrix Operations)
speed_test = {
'modes': ['PyTorch FP32\n(Baseline)', 'OktoBLAS FP16\n(Tensor Cores)', 'OktoBLAS TURBO\n(Fused)'],
'time_ms': [9.73, 4.86, 3.63],
'speedup': [1.0, 2.0, 2.68]
}
# GEMM Kernels
gemm_data = {
'operations': ['FP16 GEMM\n1024', 'FP16 GEMM\n2048', 'Fused\nAttention'],
'pytorch': [23.3, 34.6, 0.28],
'oktoblas': [29.1, 35.1, 0.96]
}
# ============================================================
# CHART GENERATION
# ============================================================
plt.style.use('dark_background')
fig = plt.figure(figsize=(14, 10))
fig.suptitle('OktoBLAS Performance Benchmark\nby OktoSeek',
fontsize=18, fontweight='bold', color='#00ff88', y=0.98)
# Colors
pytorch_color = '#ff6b6b'
oktoblas_color = '#4ecdc4'
turbo_color = '#ffd93d'
# ============================================================
# Chart 1: Training Speed (Top Left)
# ============================================================
ax1 = fig.add_subplot(2, 2, 1)
x = np.arange(len(quick_test['modes']))
colors = [pytorch_color, oktoblas_color]
bars = ax1.bar(x, quick_test['speed'], color=colors, alpha=0.85, edgecolor='white', linewidth=2)
ax1.set_ylabel('Speed (examples/sec)', fontsize=12, fontweight='bold')
ax1.set_title('πŸ“Š Training Speed (100 examples)\n(Higher is Better)', fontsize=13, fontweight='bold', pad=10)
ax1.set_xticks(x)
ax1.set_xticklabels(quick_test['modes'], fontsize=10)
ax1.set_ylim(0, 120)
ax1.grid(True, alpha=0.2, axis='y')
for bar, val, speedup in zip(bars, quick_test['speed'], quick_test['speedup']):
label = f'{val:.1f} ex/s'
if speedup > 1:
label += f'\n(+{(speedup-1)*100:.0f}%)'
ax1.annotate(label, xy=(bar.get_x() + bar.get_width()/2, bar.get_height()),
ha='center', va='bottom', fontsize=10, fontweight='bold', color='white')
# ============================================================
# Chart 2: Matrix Ops Speed (Top Right)
# ============================================================
ax2 = fig.add_subplot(2, 2, 2)
x = np.arange(len(speed_test['modes']))
colors = [pytorch_color, oktoblas_color, turbo_color]
bars = ax2.bar(x, speed_test['time_ms'], color=colors, alpha=0.85, edgecolor='white', linewidth=2)
ax2.set_ylabel('Time (ms)', fontsize=12, fontweight='bold')
ax2.set_title('⚑ Matrix Ops Speed\n(Lower is Better)', fontsize=13, fontweight='bold', pad=10)
ax2.set_xticks(x)
ax2.set_xticklabels(speed_test['modes'], fontsize=9)
ax2.set_ylim(0, 12)
ax2.grid(True, alpha=0.2, axis='y')
for bar, val, speedup in zip(bars, speed_test['time_ms'], speed_test['speedup']):
label = f'{val:.2f}ms'
if speedup > 1:
label += f'\n({speedup:.2f}x)'
ax2.annotate(label, xy=(bar.get_x() + bar.get_width()/2, bar.get_height()),
ha='center', va='bottom', fontsize=9, fontweight='bold', color='white')
# ============================================================
# Chart 3: GEMM Performance (Bottom Left)
# ============================================================
ax3 = fig.add_subplot(2, 2, 3)
x_gemm = np.arange(len(gemm_data['operations']))
width = 0.35
bars1 = ax3.bar(x_gemm - width/2, gemm_data['pytorch'], width, label='PyTorch',
color=pytorch_color, alpha=0.85, edgecolor='white', linewidth=1.5)
bars2 = ax3.bar(x_gemm + width/2, gemm_data['oktoblas'], width, label='OktoBLAS',
color=oktoblas_color, alpha=0.85, edgecolor='white', linewidth=1.5)
ax3.set_ylabel('TFLOPS', fontsize=12, fontweight='bold')
ax3.set_title('πŸš€ GEMM Kernel Performance\n(Higher is Better)', fontsize=13, fontweight='bold', pad=10)
ax3.set_xticks(x_gemm)
ax3.set_xticklabels(gemm_data['operations'], fontsize=9)
ax3.legend(loc='upper left', fontsize=10)
ax3.grid(True, alpha=0.2, axis='y')
for i, (p, o) in enumerate(zip(gemm_data['pytorch'], gemm_data['oktoblas'])):
speedup = (o - p) / p * 100
if speedup > 0:
ax3.annotate(f'+{speedup:.0f}%',
xy=(x_gemm[i] + width/2, o),
ha='center', va='bottom', fontsize=9, color='#00ff88', fontweight='bold')
# ============================================================
# Chart 4: Summary Box (Bottom Right)
# ============================================================
ax4 = fig.add_subplot(2, 2, 4)
ax4.axis('off')
summary_text = """
╔══════════════════════════════════════════════════╗
β•‘ OktoBLAS BENCHMARK SUMMARY β•‘
╠══════════════════════════════════════════════════╣
β•‘ β•‘
β•‘ πŸš€ TRAINING SPEED (100 examples) β•‘
β•‘ ──────────────────────────────────────────── β•‘
β•‘ PyTorch FP32: 50.8 ex/s (baseline) β•‘
β•‘ OktoBLAS FP16: 93.7 ex/s (+85% faster) β•‘
β•‘ β•‘
β•‘ ⚑ MATRIX OPS SPEED β•‘
β•‘ ──────────────────────────────────────────── β•‘
β•‘ PyTorch FP32: 9.73 ms (baseline) β•‘
β•‘ OktoBLAS FP16: 4.86 ms (2.00x faster) β•‘
β•‘ OktoBLAS TURBO: 3.63 ms (2.68x faster) β•‘
β•‘ β•‘
β•‘ πŸ”₯ SPEEDUP SUMMARY β•‘
β•‘ ──────────────────────────────────────────── β•‘
β•‘ β€’ Training: +85% faster β•‘
β•‘ β€’ Matrix Ops: +100% faster β•‘
β•‘ β€’ TURBO Mode: +168% faster β•‘
β•‘ β€’ FP16 GEMM 1024: +25% TFLOPS β•‘
β•‘ β€’ Fused Attention: +243% TFLOPS β•‘
β•‘ β•‘
β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
"""
ax4.text(0.5, 0.5, summary_text, transform=ax4.transAxes, fontsize=9,
verticalalignment='center', horizontalalignment='center',
fontfamily='monospace', color='white',
bbox=dict(boxstyle='round,pad=0.5', facecolor='#1a1a2e',
edgecolor='#4ecdc4', linewidth=2))
plt.tight_layout(rect=[0, 0.02, 1, 0.95])
# Save
plt.savefig('benchmark_comparison.png', dpi=150, facecolor='#0d0d0d',
edgecolor='none', bbox_inches='tight', pad_inches=0.3)
print("βœ… Saved: benchmark_comparison.png")
print("\nπŸ“Š Chart generated with REAL benchmark data!")
print(" Training: 1.85x faster")
print(" Matrix Ops: 2.68x faster")