File size: 13,523 Bytes
9cfa7c8 b222476 1097568 b222476 623d5e8 b222476 1097568 b222476 9e76d70 623d5e8 9cfa7c8 eac34f8 9cfa7c8 1097568 9cfa7c8 1097568 9cfa7c8 1097568 9cfa7c8 1097568 9cfa7c8 1097568 9cfa7c8 9ed2149 9cfa7c8 1097568 9cfa7c8 1097568 9cfa7c8 9ed2149 9cfa7c8 1097568 9cfa7c8 1097568 9cfa7c8 1097568 9cfa7c8 9ed2149 9cfa7c8 9ed2149 9cfa7c8 eac34f8 9ed2149 9cfa7c8 9ed2149 9cfa7c8 eac34f8 9cfa7c8 0dd02c7 9cfa7c8 1097568 9cfa7c8 1097568 9cfa7c8 9ed2149 9cfa7c8 3cdb1cb 9cfa7c8 1097568 9cfa7c8 9ed2149 c36955e 9cfa7c8 eac34f8 9cfa7c8 26d89a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 |
import gradio as gr
import math
# ------------------------
# GPU presets: TFLOPs (units: TFLOPs)
# ------------------------
GPUS = {
# Turing / consumer
"RTX 2060": {"FP32": 6.50, "FP16": 13.00, "INT4": 0.0},
"RTX 2060 12GB": {"FP32": 7.20, "FP16": 14.40, "INT4": 0.0},
"RTX 2060 SUPER": {"FP32": 8.90, "FP16": 17.80, "INT4": 0.0},
"RTX 2070": {"FP32": 8.90, "FP16": 16.00, "INT4": 0.0},
"RTX 2070 SUPER": {"FP32": 9.10, "FP16": 18.20, "INT4": 0.0},
"RTX 2080": {"FP32": 10.10, "FP16": 20.20, "INT4": 0.0},
"RTX 2080 SUPER": {"FP32": 11.15, "FP16": 22.30, "INT4": 0.0},
"RTX 2080 Ti": {"FP32": 13.45, "FP16": 26.90, "INT4": 544.0},
# Ampere / consumer
"RTX 3050": {"FP32": 9.10, "FP16": 18.20, "INT4": 0.0},
"RTX 3060": {"FP32": 12.70, "FP16": 25.40, "INT4": 0.0},
"RTX 3060 Ti": {"FP32": 16.20, "FP16": 32.40, "INT4": 0.0},
"RTX 3070": {"FP32": 20.30, "FP16": 40.60, "INT4": 0.0},
"RTX 3070 Ti": {"FP32": 22.30, "FP16": 44.60, "INT4": 0.0},
"RTX 3080": {"FP32": 29.80, "FP16": 59.60, "INT4": 1248.0},
"RTX 3080 Ti": {"FP32": 34.10, "FP16": 68.20, "INT4": 1248.0},
"RTX 3090": {"FP32": 35.58, "FP16": 71.16, "INT4": 1248.0},
"RTX 3090 Ti": {"FP32": 40.00, "FP16": 80.00, "INT4": 1248.0},
# Ada / Lovelace consumer
"RTX 4050": {"FP32": 16.90, "FP16": 33.80, "INT4": 0.0},
"RTX 4060": {"FP32": 31.10, "FP16": 62.20, "INT4": 0.0},
"RTX 4060 Ti": {"FP32": 45.60, "FP16": 91.20, "INT4": 0.0},
"RTX 4070": {"FP32": 75.00, "FP16": 150.00, "INT4": 0.0},
"RTX 4070 Ti": {"FP32": 92.20, "FP16": 184.40, "INT4": 0.0},
"RTX 4080": {"FP32":144.00, "FP16": 288.00, "INT4": 0.0},
"RTX 4080 SUPER": {"FP32":167.60, "FP16": 335.20, "INT4": 0.0},
"RTX 4090": {"FP32":201.00, "FP16": 402.00, "INT4":1676.0},
# Blackwell consumer (RTX 50xx series)
"RTX 5050": {"FP32": 16.90, "FP16": 33.80, "INT4": 0.0},
"RTX 5060": {"FP32": 31.10, "FP16": 62.20, "INT4": 0.0},
"RTX 5060 Ti": {"FP32": 45.60, "FP16": 91.20, "INT4": 0.0},
"RTX 5070": {"FP32": 75.00, "FP16": 150.00, "INT4": 0.0},
"RTX 5070 Ti": {"FP32": 92.20, "FP16": 184.40, "INT4": 0.0},
"RTX 5080": {"FP32":144.00, "FP16": 288.00, "INT4": 0.0},
"RTX 5090": {"FP32":201.00, "FP16": 402.00, "INT4":1676.0},
# Data center / Tesla / A-series
"Tesla T4": {"FP32": 8.10, "FP16": 65.13, "INT4": 0.0},
"Tesla V100": {"FP32": 15.70, "FP16": 31.40, "INT4": 0.0},
"NVIDIA A10": {"FP32": 31.20, "FP16": 62.40, "INT4": 0.0},
"A100": {"FP32": 19.50, "FP16": 39.00, "INT4": 624.0},
"A100 80GB": {"FP32": 19.50, "FP16": 39.00, "INT4": 624.0},
# Hopper / Blackwell datacenter estimates
"H100": {"FP32":300.0, "FP16": 600.0, "INT4":3000.0},
"B100": {"FP32":400.0, "FP16": 800.0, "INT4":4000.0},
"B200": {"FP32":500.0, "FP16":1000.0, "INT4":5000.0},
# AMD (kept for completeness)
"RX 5500 XT": {"FP32": 5.20, "FP16": 10.40, "INT4": 0.0},
"RX 5600 XT": {"FP32": 10.80, "FP16": 21.60, "INT4": 0.0},
"RX 5700": {"FP32": 14.40, "FP16": 28.80, "INT4": 0.0},
"RX 5700 XT": {"FP32": 16.20, "FP16": 32.40, "INT4": 0.0},
"RX 6600": {"FP32": 17.90, "FP16": 35.80, "INT4": 0.0},
"RX 6600 XT": {"FP32": 20.00, "FP16": 40.00, "INT4": 0.0},
"RX 6700 XT": {"FP32": 23.00, "FP16": 46.00, "INT4": 0.0},
"RX 6800": {"FP32": 30.00, "FP16": 60.00, "INT4": 0.0},
"RX 6800 XT": {"FP32": 34.00, "FP16": 68.00, "INT4": 0.0},
"RX 6900 XT": {"FP32": 40.00, "FP16": 80.00, "INT4": 0.0},
"RX 7600": {"FP32": 25.00, "FP16": 50.00, "INT4": 0.0},
"RX 7700 XT": {"FP32": 35.00, "FP16": 70.00, "INT4": 0.0},
"RX 7900 XT": {"FP32": 40.00, "FP16": 80.00, "INT4": 0.0},
"RX 7900 XTX": {"FP32": 61.10, "FP16": 122.20, "INT4": 0.0},
# AMD MI / CDNA datacenter
"MI50": {"FP32": 13.70, "FP16": 27.40, "INT4": 0.0},
"MI100": {"FP32": 23.10, "FP16": 46.20, "INT4": 0.0},
"MI200": {"FP32": 300.0, "FP16": 600.0, "INT4":3000.0},
"MI300": {"FP32": 400.0, "FP16": 800.0, "INT4":4000.0},
"MI355X": {"FP32": 157, "FP16": 2500, "INT4": 10000},
# Hopper / Grace superchips
"H200": {"FP32": 350.0, "FP16": 700.0, "INT4": 3500.0},
"GH200": {"FP32": 300.0, "FP16": 600.0, "INT4": 3000.0}, # H100-class GPU + Grace CPU
"GB10": {"FP32": 400.0, "FP16": 800.0, "INT4": 4000.0}, # dev module, Blackwell-class
# Ada Lovelace datacenter
"L20": {"FP32": 44.0, "FP16": 88.0, "INT4": 700.0},
"A40": {"FP32": 37.4, "FP16": 74.8, "INT4": 600.0},
"A2": {"FP32": 4.5, "FP16": 9.0, "INT4": 160.0},
# RTX Ada workstation GPUs
"RTX A2000": {"FP32": 8.0, "FP16": 16.0, "INT4": 0.0},
"RTX A4000": {"FP32": 19.2, "FP16": 38.4, "INT4": 0.0},
"RTX A4500": {"FP32": 23.7, "FP16": 47.4, "INT4": 0.0},
"RTX A5000": {"FP32": 27.8, "FP16": 55.6, "INT4": 0.0},
"RTX A6000 Ada": {"FP32": 91.1, "FP16": 182.2, "INT4": 1450.0},
}
# ------------------------
# CSS / Theme variables
# ------------------------
CSS = r"""
:root { --bg:#071233; --card:#07112a; --accent:#2563eb; --text:#e8f0ff; --muted:#9fb6e8; }
body { background: var(--bg); color:var(--text); font-family: Inter, system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial; }
.gradio-container { max-width: 920px; margin: 14px auto; padding: 12px; }
/* card */
.card { background: var(--card); border-radius:12px; padding:14px; box-shadow: 0 8px 26px rgba(2,6,23,0.5); border:1px solid rgba(255,255,255,0.03); }
/* accent and buttons */
.btn-theme { background:transparent; color:var(--accent); border:1px solid var(--accent); padding:8px 12px; border-radius:10px; cursor:pointer; }
.btn-theme:hover { background: rgba(255,255,255,0.02); }
/* result */
.result-box { background: linear-gradient(180deg, rgba(255,255,255,0.01), rgba(255,255,255,0.02)); border-radius:8px; padding:10px; border:1px solid rgba(255,255,255,0.03); color:var(--text); font-weight:600; }
/* small text */
.small-muted { color: var(--muted); font-size:0.92em; }
/* themes */
.theme-blue { --bg:#071233; --card:#07112a; --accent:#2563eb; --text:#e8f0ff; --muted:#9fb6e8; }
.theme-green{ --bg:#07120a; --card:#07120a; --accent:#16a34a; --text:#e8fff0; --muted:#9fe8b0; }
.theme-purple{ --bg:#120521; --card:#15061a; --accent:#8b5cf6; --text:#f2e8ff; --muted:#c9b8f6; }
/* minor Gradio element tweaks */
input[type="number"], .gradio-number { background: transparent; color: var(--text); border-radius:6px; }
/* theme button row */
.theme-btn-row { display:flex; gap:8px; align-items:center; }
"""
# ------------------------
# Core logic
# ------------------------
def estimate_time(params_m: float,
tokens_b: float,
selected_gpu: str,
dtype: str,
tf_override: float,
utilization_pct: float,
gpu_count: float):
if params_m <= 0 or tokens_b <= 0:
return "Enter positive values for parameters and tokens."
if gpu_count is None or gpu_count <= 0:
return "Enter a positive number of GPUs."
params = params_m * 1e6
tokens = tokens_b * 1e9
# choose TFLOPs per-GPU
if tf_override is not None and tf_override > 0:
chosen_tf_per_gpu = float(tf_override)
source = "manual override"
else:
try:
chosen_tf_per_gpu = float(GPUS[selected_gpu].get(dtype, 0.0))
source = f"preset ({selected_gpu} / {dtype})"
except Exception:
return "Couldn't determine GPU TFLOPs. Pick a GPU or enter TFLOPs manually."
if chosen_tf_per_gpu <= 0:
return "Couldn't determine GPU TFLOPs. Pick a GPU or enter TFLOPs manually."
# multiply by count and utilization -> FLOPs/sec
total_tf = chosen_tf_per_gpu * float(gpu_count)
gpu_flops_per_sec = total_tf * 1e12 * (max(0.001, utilization_pct / 100.0))
flops_total = 6 * params * tokens
seconds = flops_total / gpu_flops_per_sec
hours = seconds / 3600.0
days = hours / 24.0
seq_len = 2048.0
steps = max(1.0, tokens / seq_len)
flops_per_step = flops_total / steps if steps > 0 else 0.0
# warnings for absurd counts
warnings = []
if gpu_count >= 10000:
warnings.append("⚠️ Wow that's a lot of GPUs — are you sure? Check units (e.g., 8 not 800k).")
if total_tf > 1e6:
warnings.append("⚠️ Total TFLOPs exceed 1e6 TFLOPs (exaFLOPs scale) — results are rough estimates.")
out = [
f"🔥 Roman's Training Time Estimator",
"",
f"Model params: {params_m:,.1f} M",
f"Training tokens: {tokens_b:,.3f} B",
f"Total training FLOPs (approx): {flops_total:.3e}",
"",
f"Hardware source: {source}",
f"Per-GPU TFLOPs: {chosen_tf_per_gpu:.3f} TFLOPs",
f"GPU count: {int(gpu_count):,}",
f"Total effective TFLOPs (before utilization): {total_tf:,.3f} TFLOPs",
f"Utilization: {utilization_pct:.0f}%",
"",
f"⏱️ Wall-clock estimate: {hours:,.2f} hours (~{days:,.2f} days)",
f"Steps (rough, seq_len=2048): {steps:,.0f} steps",
f"FLOPs / step (avg): {flops_per_step:.3e}",
]
if warnings:
out.append("")
out.extend(warnings)
if tf_override and tf_override > 0 and selected_gpu != "Custom":
out.append("")
out.append("⚠️ Note: you overrode the preset TFLOPs. Ensure the value is in TFLOPs (e.g., 150 for A100 FP16-like).")
return "\n".join(out)
def preset_tf_for_ui(selected_gpu: str, dtype: str):
if selected_gpu in GPUS:
return GPUS[selected_gpu].get(dtype, 0.0)
return 0.0
# ------------------------
# Build UI
# ------------------------
# Inline HTML for theme buttons with client-side onclick handlers
THEME_BUTTONS_HTML = """
<div class="theme-btn-row">
<button class="btn-theme" onclick="document.documentElement.className='theme-blue'">Blue</button>
<button class="btn-theme" onclick="document.documentElement.className='theme-green'">Green</button>
<button class="btn-theme" onclick="document.documentElement.className='theme-purple'">Purple</button>
</div>
"""
with gr.Blocks() as demo:
# initial theme set (runs immediately on load)
gr.HTML("<script>document.documentElement.className='theme-blue';</script>")
with gr.Column(elem_classes="card"):
with gr.Row():
gr.Markdown("## 🧠 Roman’s Training Time Estimator")
# render the theme buttons as raw HTML so onclick works client-side instantly
gr.HTML(THEME_BUTTONS_HTML)
with gr.Column(elem_classes="card"):
gr.Markdown("### Model & Hardware")
with gr.Row():
params = gr.Slider(minimum=1, maximum=20000, value=100, step=0.1, label="Model Parameters (Millions)")
tokens = gr.Number(value=1.0, label="Training Tokens (Billions)")
with gr.Row():
gpu_dropdown = gr.Dropdown(choices=list(GPUS.keys()), value="A100 80GB", label="GPU Preset (changes TFLOPs below)")
dtype_dropdown = gr.Dropdown(choices=["FP32", "FP16", "INT4"], value="FP16", label="Training Precision / DType")
with gr.Row():
tf_override = gr.Number(value=preset_tf_for_ui("A100 80GB", "FP16"), label="GPU TFLOPs (teraFLOPs) — editable", precision=3)
utilization = gr.Slider(minimum=1, maximum=100, value=80, step=1, label="Hardware Utilization (%) — realistic throughput")
with gr.Row():
gpu_count = gr.Number(value=1, label="GPU Count (how many of the chosen preset you have)", precision=0)
with gr.Column(elem_classes="card"):
gr.Markdown("### Estimate")
result = gr.Textbox(lines=14, interactive=False, elem_classes="result-box", label="Result")
run_btn = gr.Button("Estimate Training Time", elem_classes="btn-theme")
# update TF override when gpu/dtype change
def _update_tf(selected_gpu, dtype):
return gr.update(value=preset_tf_for_ui(selected_gpu, dtype))
gpu_dropdown.change(_update_tf, inputs=[gpu_dropdown, dtype_dropdown], outputs=[tf_override])
dtype_dropdown.change(_update_tf, inputs=[gpu_dropdown, dtype_dropdown], outputs=[tf_override])
# Run button computes estimate
run_btn.click(estimate_time,
inputs=[params, tokens, gpu_dropdown, dtype_dropdown, tf_override, utilization, gpu_count],
outputs=[result])
gr.HTML("<div class='small-muted'>Tip: GPU presets are TFLOPs per dtype. You can edit the TFLOPs number to override. Utilization reduces theoretical peak to realistic throughput.</div>")
gr.HTML("<div class='small-muted'>Thanks to the contributions from Reality123b</div>")
# pass CSS to launch
if __name__ == "__main__":
demo.launch(css=CSS)
|