deycoding
/

deycoding-tiny-language-model

Text Generation

Model card Files Files and versions

deycoding-tiny-language-model / benchmarking-scripts /calculate-benchmarks.py

deycoding's picture

Upload folder using huggingface_hub

fb753d5 verified about 23 hours ago

History Blame Contribute Delete

2.34 kB

	"""TinyLM Benchmarking Calculator — Averages 9 metrics from serving results."""
	import json
	import statistics
	import os

	RESULTS_DIR = os.path.dirname(os.path.abspath(__file__)) + '/serving-results'

	models = ['tinystories_10m', 'tinystories_7m', 'tinystories_5m', 'tinystories_2_5m']
	labels = ['10M', '7M', '5M', '2.5M']

	keys = [
	('ttft_ms', 'TTFT (ms)'),
	('tps', 'TPS (tokens/sec)'),
	('total_latency_ms', 'Total Latency (ms)'),
	('tokens_generated', 'Tokens Generated'),
	('perplexity', 'Perplexity'),
	('avg_token_prob', 'Avg Token Probability'),
	('repetition_rate', 'Repetition Rate (%)'),
	('coherence_length', 'Coherence Length'),
	('unique_tokens', 'Vocab Diversity (%)'),
	]

	results = {}
	for model, label in zip(models, labels):
	filepath = f'{RESULTS_DIR}/{model}_samples.json'
	with open(filepath) as f:
	samples = json.load(f)
	metrics = [s['metrics'] for s in samples]
	results[label] = {}
	for key, _ in keys:
	vals = [m[key] for m in metrics]
	results[label][key] = {
	'mean': statistics.mean(vals),
	'median': statistics.median(vals),
	'min': min(vals),
	'max': max(vals),
	}

	# Print table
	print(f"\n{'Metric':<30} {'10M':>10} {'7M':>10} {'5M':>10} {'2.5M':>10}")
	print("=" * 72)

	for key, name in keys:
	vals = [results[l][key]['mean'] for l in labels]
	if key in ('repetition_rate', 'unique_tokens'):
	print(f" {name:<28} {vals[0]100:>9.1f}% {vals[1]100:>9.1f}% {vals[2]100:>9.1f}% {vals[3]100:>9.1f}%")
	elif key in ('ttft_ms', 'total_latency_ms'):
	print(f" {name:<28} {vals[0]:>10.1f} {vals[1]:>10.1f} {vals[2]:>10.1f} {vals[3]:>10.1f}")
	elif key == 'tps':
	print(f" {name:<28} {vals[0]:>10.0f} {vals[1]:>10.0f} {vals[2]:>10.0f} {vals[3]:>10.0f}")
	elif key == 'avg_token_prob':
	print(f" {name:<28} {vals[0]:>10.4f} {vals[1]:>10.4f} {vals[2]:>10.4f} {vals[3]:>10.4f}")
	else:
	print(f" {name:<28} {vals[0]:>10.1f} {vals[1]:>10.1f} {vals[2]:>10.1f} {vals[3]:>10.1f}")

	# Save as JSON
	output = {label: {name: results[label][key] for key, name in keys} for label in labels}
	output_path = f'{RESULTS_DIR}/benchmarking_summary.json'
	with open(output_path, 'w') as f:
	json.dump(output, f, indent=2)
	print(f"\nSaved: {output_path}")