| """TinyLM Benchmarking Calculator — Averages 9 metrics from serving results.""" |
| import json |
| import statistics |
| import os |
|
|
| RESULTS_DIR = os.path.dirname(os.path.abspath(__file__)) + '/serving-results' |
|
|
| models = ['tinystories_10m', 'tinystories_7m', 'tinystories_5m', 'tinystories_2_5m'] |
| labels = ['10M', '7M', '5M', '2.5M'] |
|
|
| keys = [ |
| ('ttft_ms', 'TTFT (ms)'), |
| ('tps', 'TPS (tokens/sec)'), |
| ('total_latency_ms', 'Total Latency (ms)'), |
| ('tokens_generated', 'Tokens Generated'), |
| ('perplexity', 'Perplexity'), |
| ('avg_token_prob', 'Avg Token Probability'), |
| ('repetition_rate', 'Repetition Rate (%)'), |
| ('coherence_length', 'Coherence Length'), |
| ('unique_tokens', 'Vocab Diversity (%)'), |
| ] |
|
|
| results = {} |
| for model, label in zip(models, labels): |
| filepath = f'{RESULTS_DIR}/{model}_samples.json' |
| with open(filepath) as f: |
| samples = json.load(f) |
| metrics = [s['metrics'] for s in samples] |
| results[label] = {} |
| for key, _ in keys: |
| vals = [m[key] for m in metrics] |
| results[label][key] = { |
| 'mean': statistics.mean(vals), |
| 'median': statistics.median(vals), |
| 'min': min(vals), |
| 'max': max(vals), |
| } |
|
|
| |
| print(f"\n{'Metric':<30} {'10M':>10} {'7M':>10} {'5M':>10} {'2.5M':>10}") |
| print("=" * 72) |
|
|
| for key, name in keys: |
| vals = [results[l][key]['mean'] for l in labels] |
| if key in ('repetition_rate', 'unique_tokens'): |
| print(f" {name:<28} {vals[0]*100:>9.1f}% {vals[1]*100:>9.1f}% {vals[2]*100:>9.1f}% {vals[3]*100:>9.1f}%") |
| elif key in ('ttft_ms', 'total_latency_ms'): |
| print(f" {name:<28} {vals[0]:>10.1f} {vals[1]:>10.1f} {vals[2]:>10.1f} {vals[3]:>10.1f}") |
| elif key == 'tps': |
| print(f" {name:<28} {vals[0]:>10.0f} {vals[1]:>10.0f} {vals[2]:>10.0f} {vals[3]:>10.0f}") |
| elif key == 'avg_token_prob': |
| print(f" {name:<28} {vals[0]:>10.4f} {vals[1]:>10.4f} {vals[2]:>10.4f} {vals[3]:>10.4f}") |
| else: |
| print(f" {name:<28} {vals[0]:>10.1f} {vals[1]:>10.1f} {vals[2]:>10.1f} {vals[3]:>10.1f}") |
|
|
| |
| output = {label: {name: results[label][key] for key, name in keys} for label in labels} |
| output_path = f'{RESULTS_DIR}/benchmarking_summary.json' |
| with open(output_path, 'w') as f: |
| json.dump(output, f, indent=2) |
| print(f"\nSaved: {output_path}") |
|
|