File size: 3,499 Bytes
fb753d5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | """Generate additional benchmarking plots."""
import json
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
RESULTS_DIR = '/Users/abhdey/Documents/My LLM/Research & Experiment/serving-results'
OUTPUT_DIR = '/Users/abhdey/Documents/My LLM/Research & Experiment/TinyLLMExperiment/serving-results'
models = ['tinystories_10m', 'tinystories_7m', 'tinystories_5m', 'tinystories_2_5m']
labels = ['10M', '7M', '5M', '2.5M']
colors = ['#0969da', '#e5383b', '#2d6a4f', '#9b5de5']
data = {}
for model, label in zip(models, labels):
with open(f'{RESULTS_DIR}/{model}_samples.json') as f:
samples = json.load(f)
data[label] = [s['metrics'] for s in samples]
# Plot 4: Tokens Generated vs Repetition Rate
fig, ax = plt.subplots(figsize=(10, 6))
for label, color in zip(labels, colors):
tokens = [m['tokens_generated'] for m in data[label]]
rep = [m['repetition_rate'] * 100 for m in data[label]]
ax.scatter(tokens, rep, c=color, alpha=0.6, s=30, label=label)
ax.set_xlabel('Tokens Generated', fontsize=12)
ax.set_ylabel('Repetition Rate (%)', fontsize=12)
ax.set_title('Tokens Generated vs Repetition Rate (100 samples per model)', fontsize=13)
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(f'{OUTPUT_DIR}/plot4_tokens_vs_repetition.png', dpi=150)
plt.close()
print("Plot 4 saved: tokens vs repetition")
# Plot 5: Tokens Generated vs Vocab Diversity
fig, ax = plt.subplots(figsize=(10, 6))
for label, color in zip(labels, colors):
tokens = [m['tokens_generated'] for m in data[label]]
div = [m['unique_tokens'] * 100 for m in data[label]]
ax.scatter(tokens, div, c=color, alpha=0.6, s=30, label=label)
ax.set_xlabel('Tokens Generated', fontsize=12)
ax.set_ylabel('Vocab Diversity (%)', fontsize=12)
ax.set_title('Tokens Generated vs Vocab Diversity (100 samples per model)', fontsize=13)
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(f'{OUTPUT_DIR}/plot5_tokens_vs_diversity.png', dpi=150)
plt.close()
print("Plot 5 saved: tokens vs diversity")
# Plot 6: Avg Token Probability vs Coherence Length
fig, ax = plt.subplots(figsize=(10, 6))
for label, color in zip(labels, colors):
prob = [m['avg_token_prob'] for m in data[label]]
coherence = [m['coherence_length'] for m in data[label]]
ax.scatter(prob, coherence, c=color, alpha=0.6, s=30, label=label)
ax.set_xlabel('Avg Token Probability', fontsize=12)
ax.set_ylabel('Coherence Length', fontsize=12)
ax.set_title('Token Confidence vs Coherence Length (100 samples per model)', fontsize=13)
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(f'{OUTPUT_DIR}/plot6_confidence_vs_coherence.png', dpi=150)
plt.close()
print("Plot 6 saved: confidence vs coherence")
# Plot 9: TPS vs Total Latency
fig, ax = plt.subplots(figsize=(10, 6))
for label, color in zip(labels, colors):
tps = [m['tps'] for m in data[label][1:]] # skip cold start
latency = [m['total_latency_ms'] for m in data[label][1:]]
ax.scatter(tps, latency, c=color, alpha=0.6, s=30, label=label)
ax.set_xlabel('TPS (tokens/sec)', fontsize=12)
ax.set_ylabel('Total Latency (ms)', fontsize=12)
ax.set_title('TPS vs Total Latency (99 samples per model, cold start excluded)', fontsize=13)
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(f'{OUTPUT_DIR}/plot9_tps_vs_latency.png', dpi=150)
plt.close()
print("Plot 9 saved: tps vs latency")
print(f"\nAll 4 plots saved to: {OUTPUT_DIR}")
|