Spaces:

DataMuncher-Labs
/

TrainingTime

Sleeping

App Files Files Community

TrainingTime / app.py

Roman190928

Update app.py

c36955e verified 3 days ago

raw

history blame contribute delete

13.5 kB

	import gradio as gr
	import math

	# ------------------------
	# GPU presets: TFLOPs (units: TFLOPs)
	# ------------------------
	GPUS = {
	# Turing / consumer
	"RTX 2060": {"FP32": 6.50, "FP16": 13.00, "INT4": 0.0},
	"RTX 2060 12GB": {"FP32": 7.20, "FP16": 14.40, "INT4": 0.0},
	"RTX 2060 SUPER": {"FP32": 8.90, "FP16": 17.80, "INT4": 0.0},
	"RTX 2070": {"FP32": 8.90, "FP16": 16.00, "INT4": 0.0},
	"RTX 2070 SUPER": {"FP32": 9.10, "FP16": 18.20, "INT4": 0.0},
	"RTX 2080": {"FP32": 10.10, "FP16": 20.20, "INT4": 0.0},
	"RTX 2080 SUPER": {"FP32": 11.15, "FP16": 22.30, "INT4": 0.0},
	"RTX 2080 Ti": {"FP32": 13.45, "FP16": 26.90, "INT4": 544.0},

	# Ampere / consumer
	"RTX 3050": {"FP32": 9.10, "FP16": 18.20, "INT4": 0.0},
	"RTX 3060": {"FP32": 12.70, "FP16": 25.40, "INT4": 0.0},
	"RTX 3060 Ti": {"FP32": 16.20, "FP16": 32.40, "INT4": 0.0},
	"RTX 3070": {"FP32": 20.30, "FP16": 40.60, "INT4": 0.0},
	"RTX 3070 Ti": {"FP32": 22.30, "FP16": 44.60, "INT4": 0.0},
	"RTX 3080": {"FP32": 29.80, "FP16": 59.60, "INT4": 1248.0},
	"RTX 3080 Ti": {"FP32": 34.10, "FP16": 68.20, "INT4": 1248.0},
	"RTX 3090": {"FP32": 35.58, "FP16": 71.16, "INT4": 1248.0},
	"RTX 3090 Ti": {"FP32": 40.00, "FP16": 80.00, "INT4": 1248.0},

	# Ada / Lovelace consumer
	"RTX 4050": {"FP32": 16.90, "FP16": 33.80, "INT4": 0.0},
	"RTX 4060": {"FP32": 31.10, "FP16": 62.20, "INT4": 0.0},
	"RTX 4060 Ti": {"FP32": 45.60, "FP16": 91.20, "INT4": 0.0},
	"RTX 4070": {"FP32": 75.00, "FP16": 150.00, "INT4": 0.0},
	"RTX 4070 Ti": {"FP32": 92.20, "FP16": 184.40, "INT4": 0.0},
	"RTX 4080": {"FP32":144.00, "FP16": 288.00, "INT4": 0.0},
	"RTX 4080 SUPER": {"FP32":167.60, "FP16": 335.20, "INT4": 0.0},
	"RTX 4090": {"FP32":201.00, "FP16": 402.00, "INT4":1676.0},

	# Blackwell consumer (RTX 50xx series)
	"RTX 5050": {"FP32": 16.90, "FP16": 33.80, "INT4": 0.0},
	"RTX 5060": {"FP32": 31.10, "FP16": 62.20, "INT4": 0.0},
	"RTX 5060 Ti": {"FP32": 45.60, "FP16": 91.20, "INT4": 0.0},
	"RTX 5070": {"FP32": 75.00, "FP16": 150.00, "INT4": 0.0},
	"RTX 5070 Ti": {"FP32": 92.20, "FP16": 184.40, "INT4": 0.0},
	"RTX 5080": {"FP32":144.00, "FP16": 288.00, "INT4": 0.0},
	"RTX 5090": {"FP32":201.00, "FP16": 402.00, "INT4":1676.0},

	# Data center / Tesla / A-series
	"Tesla T4": {"FP32": 8.10, "FP16": 65.13, "INT4": 0.0},
	"Tesla V100": {"FP32": 15.70, "FP16": 31.40, "INT4": 0.0},
	"NVIDIA A10": {"FP32": 31.20, "FP16": 62.40, "INT4": 0.0},
	"A100": {"FP32": 19.50, "FP16": 39.00, "INT4": 624.0},
	"A100 80GB": {"FP32": 19.50, "FP16": 39.00, "INT4": 624.0},

	# Hopper / Blackwell datacenter estimates
	"H100": {"FP32":300.0, "FP16": 600.0, "INT4":3000.0},
	"B100": {"FP32":400.0, "FP16": 800.0, "INT4":4000.0},
	"B200": {"FP32":500.0, "FP16":1000.0, "INT4":5000.0},

	# AMD (kept for completeness)
	"RX 5500 XT": {"FP32": 5.20, "FP16": 10.40, "INT4": 0.0},
	"RX 5600 XT": {"FP32": 10.80, "FP16": 21.60, "INT4": 0.0},
	"RX 5700": {"FP32": 14.40, "FP16": 28.80, "INT4": 0.0},
	"RX 5700 XT": {"FP32": 16.20, "FP16": 32.40, "INT4": 0.0},
	"RX 6600": {"FP32": 17.90, "FP16": 35.80, "INT4": 0.0},
	"RX 6600 XT": {"FP32": 20.00, "FP16": 40.00, "INT4": 0.0},
	"RX 6700 XT": {"FP32": 23.00, "FP16": 46.00, "INT4": 0.0},
	"RX 6800": {"FP32": 30.00, "FP16": 60.00, "INT4": 0.0},
	"RX 6800 XT": {"FP32": 34.00, "FP16": 68.00, "INT4": 0.0},
	"RX 6900 XT": {"FP32": 40.00, "FP16": 80.00, "INT4": 0.0},
	"RX 7600": {"FP32": 25.00, "FP16": 50.00, "INT4": 0.0},
	"RX 7700 XT": {"FP32": 35.00, "FP16": 70.00, "INT4": 0.0},
	"RX 7900 XT": {"FP32": 40.00, "FP16": 80.00, "INT4": 0.0},
	"RX 7900 XTX": {"FP32": 61.10, "FP16": 122.20, "INT4": 0.0},

	# AMD MI / CDNA datacenter
	"MI50": {"FP32": 13.70, "FP16": 27.40, "INT4": 0.0},
	"MI100": {"FP32": 23.10, "FP16": 46.20, "INT4": 0.0},
	"MI200": {"FP32": 300.0, "FP16": 600.0, "INT4":3000.0},
	"MI300": {"FP32": 400.0, "FP16": 800.0, "INT4":4000.0},
	"MI355X": {"FP32": 157, "FP16": 2500, "INT4": 10000},
	# Hopper / Grace superchips
	"H200": {"FP32": 350.0, "FP16": 700.0, "INT4": 3500.0},
	"GH200": {"FP32": 300.0, "FP16": 600.0, "INT4": 3000.0}, # H100-class GPU + Grace CPU
	"GB10": {"FP32": 400.0, "FP16": 800.0, "INT4": 4000.0}, # dev module, Blackwell-class

	# Ada Lovelace datacenter
	"L20": {"FP32": 44.0, "FP16": 88.0, "INT4": 700.0},
	"A40": {"FP32": 37.4, "FP16": 74.8, "INT4": 600.0},
	"A2": {"FP32": 4.5, "FP16": 9.0, "INT4": 160.0},

	# RTX Ada workstation GPUs
	"RTX A2000": {"FP32": 8.0, "FP16": 16.0, "INT4": 0.0},
	"RTX A4000": {"FP32": 19.2, "FP16": 38.4, "INT4": 0.0},
	"RTX A4500": {"FP32": 23.7, "FP16": 47.4, "INT4": 0.0},
	"RTX A5000": {"FP32": 27.8, "FP16": 55.6, "INT4": 0.0},
	"RTX A6000 Ada": {"FP32": 91.1, "FP16": 182.2, "INT4": 1450.0},
	}

	# ------------------------
	# CSS / Theme variables
	# ------------------------
	CSS = r"""
	:root { --bg:#071233; --card:#07112a; --accent:#2563eb; --text:#e8f0ff; --muted:#9fb6e8; }
	body { background: var(--bg); color:var(--text); font-family: Inter, system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial; }
	.gradio-container { max-width: 920px; margin: 14px auto; padding: 12px; }

	/* card */
	.card { background: var(--card); border-radius:12px; padding:14px; box-shadow: 0 8px 26px rgba(2,6,23,0.5); border:1px solid rgba(255,255,255,0.03); }

	/* accent and buttons */
	.btn-theme { background:transparent; color:var(--accent); border:1px solid var(--accent); padding:8px 12px; border-radius:10px; cursor:pointer; }
	.btn-theme:hover { background: rgba(255,255,255,0.02); }

	/* result */
	.result-box { background: linear-gradient(180deg, rgba(255,255,255,0.01), rgba(255,255,255,0.02)); border-radius:8px; padding:10px; border:1px solid rgba(255,255,255,0.03); color:var(--text); font-weight:600; }

	/* small text */
	.small-muted { color: var(--muted); font-size:0.92em; }

	/* themes */
	.theme-blue { --bg:#071233; --card:#07112a; --accent:#2563eb; --text:#e8f0ff; --muted:#9fb6e8; }
	.theme-green{ --bg:#07120a; --card:#07120a; --accent:#16a34a; --text:#e8fff0; --muted:#9fe8b0; }
	.theme-purple{ --bg:#120521; --card:#15061a; --accent:#8b5cf6; --text:#f2e8ff; --muted:#c9b8f6; }

	/* minor Gradio element tweaks */
	input[type="number"], .gradio-number { background: transparent; color: var(--text); border-radius:6px; }

	/* theme button row */
	.theme-btn-row { display:flex; gap:8px; align-items:center; }
	"""

	# ------------------------
	# Core logic
	# ------------------------
	def estimate_time(params_m: float,
	tokens_b: float,
	selected_gpu: str,
	dtype: str,
	tf_override: float,
	utilization_pct: float,
	gpu_count: float):
	if params_m <= 0 or tokens_b <= 0:
	return "Enter positive values for parameters and tokens."

	if gpu_count is None or gpu_count <= 0:
	return "Enter a positive number of GPUs."

	params = params_m * 1e6
	tokens = tokens_b * 1e9

	# choose TFLOPs per-GPU
	if tf_override is not None and tf_override > 0:
	chosen_tf_per_gpu = float(tf_override)
	source = "manual override"
	else:
	try:
	chosen_tf_per_gpu = float(GPUS[selected_gpu].get(dtype, 0.0))
	source = f"preset ({selected_gpu} / {dtype})"
	except Exception:
	return "Couldn't determine GPU TFLOPs. Pick a GPU or enter TFLOPs manually."

	if chosen_tf_per_gpu <= 0:
	return "Couldn't determine GPU TFLOPs. Pick a GPU or enter TFLOPs manually."

	# multiply by count and utilization -> FLOPs/sec
	total_tf = chosen_tf_per_gpu * float(gpu_count)
	gpu_flops_per_sec = total_tf * 1e12 * (max(0.001, utilization_pct / 100.0))

	flops_total = 6 * params * tokens
	seconds = flops_total / gpu_flops_per_sec
	hours = seconds / 3600.0
	days = hours / 24.0

	seq_len = 2048.0
	steps = max(1.0, tokens / seq_len)
	flops_per_step = flops_total / steps if steps > 0 else 0.0

	# warnings for absurd counts
	warnings = []
	if gpu_count >= 10000:
	warnings.append("⚠️ Wow that's a lot of GPUs — are you sure? Check units (e.g., 8 not 800k).")
	if total_tf > 1e6:
	warnings.append("⚠️ Total TFLOPs exceed 1e6 TFLOPs (exaFLOPs scale) — results are rough estimates.")

	out = [
	f"🔥 Roman's Training Time Estimator",
	"",
	f"Model params: {params_m:,.1f} M",
	f"Training tokens: {tokens_b:,.3f} B",
	f"Total training FLOPs (approx): {flops_total:.3e}",
	"",
	f"Hardware source: {source}",
	f"Per-GPU TFLOPs: {chosen_tf_per_gpu:.3f} TFLOPs",
	f"GPU count: {int(gpu_count):,}",
	f"Total effective TFLOPs (before utilization): {total_tf:,.3f} TFLOPs",
	f"Utilization: {utilization_pct:.0f}%",
	"",
	f"⏱️ Wall-clock estimate: {hours:,.2f} hours (~{days:,.2f} days)",
	f"Steps (rough, seq_len=2048): {steps:,.0f} steps",
	f"FLOPs / step (avg): {flops_per_step:.3e}",
	]

	if warnings:
	out.append("")
	out.extend(warnings)

	if tf_override and tf_override > 0 and selected_gpu != "Custom":
	out.append("")
	out.append("⚠️ Note: you overrode the preset TFLOPs. Ensure the value is in TFLOPs (e.g., 150 for A100 FP16-like).")

	return "\n".join(out)

	def preset_tf_for_ui(selected_gpu: str, dtype: str):
	if selected_gpu in GPUS:
	return GPUS[selected_gpu].get(dtype, 0.0)
	return 0.0

	# ------------------------
	# Build UI
	# ------------------------
	# Inline HTML for theme buttons with client-side onclick handlers
	THEME_BUTTONS_HTML = """
	<div class="theme-btn-row">
	<button class="btn-theme" onclick="document.documentElement.className='theme-blue'">Blue</button>
	<button class="btn-theme" onclick="document.documentElement.className='theme-green'">Green</button>
	<button class="btn-theme" onclick="document.documentElement.className='theme-purple'">Purple</button>
	</div>
	"""

	with gr.Blocks() as demo:
	# initial theme set (runs immediately on load)
	gr.HTML("<script>document.documentElement.className='theme-blue';</script>")

	with gr.Column(elem_classes="card"):
	with gr.Row():
	gr.Markdown("## 🧠 Roman’s Training Time Estimator")
	# render the theme buttons as raw HTML so onclick works client-side instantly
	gr.HTML(THEME_BUTTONS_HTML)

	with gr.Column(elem_classes="card"):
	gr.Markdown("### Model & Hardware")
	with gr.Row():
	params = gr.Slider(minimum=1, maximum=20000, value=100, step=0.1, label="Model Parameters (Millions)")
	tokens = gr.Number(value=1.0, label="Training Tokens (Billions)")
	with gr.Row():
	gpu_dropdown = gr.Dropdown(choices=list(GPUS.keys()), value="A100 80GB", label="GPU Preset (changes TFLOPs below)")
	dtype_dropdown = gr.Dropdown(choices=["FP32", "FP16", "INT4"], value="FP16", label="Training Precision / DType")
	with gr.Row():
	tf_override = gr.Number(value=preset_tf_for_ui("A100 80GB", "FP16"), label="GPU TFLOPs (teraFLOPs) — editable", precision=3)
	utilization = gr.Slider(minimum=1, maximum=100, value=80, step=1, label="Hardware Utilization (%) — realistic throughput")
	with gr.Row():
	gpu_count = gr.Number(value=1, label="GPU Count (how many of the chosen preset you have)", precision=0)

	with gr.Column(elem_classes="card"):
	gr.Markdown("### Estimate")
	result = gr.Textbox(lines=14, interactive=False, elem_classes="result-box", label="Result")
	run_btn = gr.Button("Estimate Training Time", elem_classes="btn-theme")

	# update TF override when gpu/dtype change
	def _update_tf(selected_gpu, dtype):
	return gr.update(value=preset_tf_for_ui(selected_gpu, dtype))
	gpu_dropdown.change(_update_tf, inputs=[gpu_dropdown, dtype_dropdown], outputs=[tf_override])
	dtype_dropdown.change(_update_tf, inputs=[gpu_dropdown, dtype_dropdown], outputs=[tf_override])

	# Run button computes estimate
	run_btn.click(estimate_time,
	inputs=[params, tokens, gpu_dropdown, dtype_dropdown, tf_override, utilization, gpu_count],
	outputs=[result])

	gr.HTML("<div class='small-muted'>Tip: GPU presets are TFLOPs per dtype. You can edit the TFLOPs number to override. Utilization reduces theoretical peak to realistic throughput.</div>")
	gr.HTML("<div class='small-muted'>Thanks to the contributions from Reality123b</div>")

	# pass CSS to launch
	if __name__ == "__main__":
	demo.launch(css=CSS)