import gradio as gr import math # ------------------------ # GPU presets: TFLOPs (units: TFLOPs) # ------------------------ GPUS = { # Turing / consumer "RTX 2060": {"FP32": 6.50, "FP16": 13.00, "INT4": 0.0}, "RTX 2060 12GB": {"FP32": 7.20, "FP16": 14.40, "INT4": 0.0}, "RTX 2060 SUPER": {"FP32": 8.90, "FP16": 17.80, "INT4": 0.0}, "RTX 2070": {"FP32": 8.90, "FP16": 16.00, "INT4": 0.0}, "RTX 2070 SUPER": {"FP32": 9.10, "FP16": 18.20, "INT4": 0.0}, "RTX 2080": {"FP32": 10.10, "FP16": 20.20, "INT4": 0.0}, "RTX 2080 SUPER": {"FP32": 11.15, "FP16": 22.30, "INT4": 0.0}, "RTX 2080 Ti": {"FP32": 13.45, "FP16": 26.90, "INT4": 544.0}, # Ampere / consumer "RTX 3050": {"FP32": 9.10, "FP16": 18.20, "INT4": 0.0}, "RTX 3060": {"FP32": 12.70, "FP16": 25.40, "INT4": 0.0}, "RTX 3060 Ti": {"FP32": 16.20, "FP16": 32.40, "INT4": 0.0}, "RTX 3070": {"FP32": 20.30, "FP16": 40.60, "INT4": 0.0}, "RTX 3070 Ti": {"FP32": 22.30, "FP16": 44.60, "INT4": 0.0}, "RTX 3080": {"FP32": 29.80, "FP16": 59.60, "INT4": 1248.0}, "RTX 3080 Ti": {"FP32": 34.10, "FP16": 68.20, "INT4": 1248.0}, "RTX 3090": {"FP32": 35.58, "FP16": 71.16, "INT4": 1248.0}, "RTX 3090 Ti": {"FP32": 40.00, "FP16": 80.00, "INT4": 1248.0}, # Ada / Lovelace consumer "RTX 4050": {"FP32": 16.90, "FP16": 33.80, "INT4": 0.0}, "RTX 4060": {"FP32": 31.10, "FP16": 62.20, "INT4": 0.0}, "RTX 4060 Ti": {"FP32": 45.60, "FP16": 91.20, "INT4": 0.0}, "RTX 4070": {"FP32": 75.00, "FP16": 150.00, "INT4": 0.0}, "RTX 4070 Ti": {"FP32": 92.20, "FP16": 184.40, "INT4": 0.0}, "RTX 4080": {"FP32":144.00, "FP16": 288.00, "INT4": 0.0}, "RTX 4080 SUPER": {"FP32":167.60, "FP16": 335.20, "INT4": 0.0}, "RTX 4090": {"FP32":201.00, "FP16": 402.00, "INT4":1676.0}, # Blackwell consumer (RTX 50xx series) "RTX 5050": {"FP32": 16.90, "FP16": 33.80, "INT4": 0.0}, "RTX 5060": {"FP32": 31.10, "FP16": 62.20, "INT4": 0.0}, "RTX 5060 Ti": {"FP32": 45.60, "FP16": 91.20, "INT4": 0.0}, "RTX 5070": {"FP32": 75.00, "FP16": 150.00, "INT4": 0.0}, "RTX 5070 Ti": {"FP32": 92.20, "FP16": 184.40, "INT4": 0.0}, "RTX 5080": {"FP32":144.00, "FP16": 288.00, "INT4": 0.0}, "RTX 5090": {"FP32":201.00, "FP16": 402.00, "INT4":1676.0}, # Data center / Tesla / A-series "Tesla T4": {"FP32": 8.10, "FP16": 65.13, "INT4": 0.0}, "Tesla V100": {"FP32": 15.70, "FP16": 31.40, "INT4": 0.0}, "NVIDIA A10": {"FP32": 31.20, "FP16": 62.40, "INT4": 0.0}, "A100": {"FP32": 19.50, "FP16": 39.00, "INT4": 624.0}, "A100 80GB": {"FP32": 19.50, "FP16": 39.00, "INT4": 624.0}, # Hopper / Blackwell datacenter estimates "H100": {"FP32":300.0, "FP16": 600.0, "INT4":3000.0}, "B100": {"FP32":400.0, "FP16": 800.0, "INT4":4000.0}, "B200": {"FP32":500.0, "FP16":1000.0, "INT4":5000.0}, # AMD (kept for completeness) "RX 5500 XT": {"FP32": 5.20, "FP16": 10.40, "INT4": 0.0}, "RX 5600 XT": {"FP32": 10.80, "FP16": 21.60, "INT4": 0.0}, "RX 5700": {"FP32": 14.40, "FP16": 28.80, "INT4": 0.0}, "RX 5700 XT": {"FP32": 16.20, "FP16": 32.40, "INT4": 0.0}, "RX 6600": {"FP32": 17.90, "FP16": 35.80, "INT4": 0.0}, "RX 6600 XT": {"FP32": 20.00, "FP16": 40.00, "INT4": 0.0}, "RX 6700 XT": {"FP32": 23.00, "FP16": 46.00, "INT4": 0.0}, "RX 6800": {"FP32": 30.00, "FP16": 60.00, "INT4": 0.0}, "RX 6800 XT": {"FP32": 34.00, "FP16": 68.00, "INT4": 0.0}, "RX 6900 XT": {"FP32": 40.00, "FP16": 80.00, "INT4": 0.0}, "RX 7600": {"FP32": 25.00, "FP16": 50.00, "INT4": 0.0}, "RX 7700 XT": {"FP32": 35.00, "FP16": 70.00, "INT4": 0.0}, "RX 7900 XT": {"FP32": 40.00, "FP16": 80.00, "INT4": 0.0}, "RX 7900 XTX": {"FP32": 61.10, "FP16": 122.20, "INT4": 0.0}, # AMD MI / CDNA datacenter "MI50": {"FP32": 13.70, "FP16": 27.40, "INT4": 0.0}, "MI100": {"FP32": 23.10, "FP16": 46.20, "INT4": 0.0}, "MI200": {"FP32": 300.0, "FP16": 600.0, "INT4":3000.0}, "MI300": {"FP32": 400.0, "FP16": 800.0, "INT4":4000.0}, "MI355X": {"FP32": 157, "FP16": 2500, "INT4": 10000}, # Hopper / Grace superchips "H200": {"FP32": 350.0, "FP16": 700.0, "INT4": 3500.0}, "GH200": {"FP32": 300.0, "FP16": 600.0, "INT4": 3000.0}, # H100-class GPU + Grace CPU "GB10": {"FP32": 400.0, "FP16": 800.0, "INT4": 4000.0}, # dev module, Blackwell-class # Ada Lovelace datacenter "L20": {"FP32": 44.0, "FP16": 88.0, "INT4": 700.0}, "A40": {"FP32": 37.4, "FP16": 74.8, "INT4": 600.0}, "A2": {"FP32": 4.5, "FP16": 9.0, "INT4": 160.0}, # RTX Ada workstation GPUs "RTX A2000": {"FP32": 8.0, "FP16": 16.0, "INT4": 0.0}, "RTX A4000": {"FP32": 19.2, "FP16": 38.4, "INT4": 0.0}, "RTX A4500": {"FP32": 23.7, "FP16": 47.4, "INT4": 0.0}, "RTX A5000": {"FP32": 27.8, "FP16": 55.6, "INT4": 0.0}, "RTX A6000 Ada": {"FP32": 91.1, "FP16": 182.2, "INT4": 1450.0}, } # ------------------------ # CSS / Theme variables # ------------------------ CSS = r""" :root { --bg:#071233; --card:#07112a; --accent:#2563eb; --text:#e8f0ff; --muted:#9fb6e8; } body { background: var(--bg); color:var(--text); font-family: Inter, system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial; } .gradio-container { max-width: 920px; margin: 14px auto; padding: 12px; } /* card */ .card { background: var(--card); border-radius:12px; padding:14px; box-shadow: 0 8px 26px rgba(2,6,23,0.5); border:1px solid rgba(255,255,255,0.03); } /* accent and buttons */ .btn-theme { background:transparent; color:var(--accent); border:1px solid var(--accent); padding:8px 12px; border-radius:10px; cursor:pointer; } .btn-theme:hover { background: rgba(255,255,255,0.02); } /* result */ .result-box { background: linear-gradient(180deg, rgba(255,255,255,0.01), rgba(255,255,255,0.02)); border-radius:8px; padding:10px; border:1px solid rgba(255,255,255,0.03); color:var(--text); font-weight:600; } /* small text */ .small-muted { color: var(--muted); font-size:0.92em; } /* themes */ .theme-blue { --bg:#071233; --card:#07112a; --accent:#2563eb; --text:#e8f0ff; --muted:#9fb6e8; } .theme-green{ --bg:#07120a; --card:#07120a; --accent:#16a34a; --text:#e8fff0; --muted:#9fe8b0; } .theme-purple{ --bg:#120521; --card:#15061a; --accent:#8b5cf6; --text:#f2e8ff; --muted:#c9b8f6; } /* minor Gradio element tweaks */ input[type="number"], .gradio-number { background: transparent; color: var(--text); border-radius:6px; } /* theme button row */ .theme-btn-row { display:flex; gap:8px; align-items:center; } """ # ------------------------ # Core logic # ------------------------ def estimate_time(params_m: float, tokens_b: float, selected_gpu: str, dtype: str, tf_override: float, utilization_pct: float, gpu_count: float): if params_m <= 0 or tokens_b <= 0: return "Enter positive values for parameters and tokens." if gpu_count is None or gpu_count <= 0: return "Enter a positive number of GPUs." params = params_m * 1e6 tokens = tokens_b * 1e9 # choose TFLOPs per-GPU if tf_override is not None and tf_override > 0: chosen_tf_per_gpu = float(tf_override) source = "manual override" else: try: chosen_tf_per_gpu = float(GPUS[selected_gpu].get(dtype, 0.0)) source = f"preset ({selected_gpu} / {dtype})" except Exception: return "Couldn't determine GPU TFLOPs. Pick a GPU or enter TFLOPs manually." if chosen_tf_per_gpu <= 0: return "Couldn't determine GPU TFLOPs. Pick a GPU or enter TFLOPs manually." # multiply by count and utilization -> FLOPs/sec total_tf = chosen_tf_per_gpu * float(gpu_count) gpu_flops_per_sec = total_tf * 1e12 * (max(0.001, utilization_pct / 100.0)) flops_total = 6 * params * tokens seconds = flops_total / gpu_flops_per_sec hours = seconds / 3600.0 days = hours / 24.0 seq_len = 2048.0 steps = max(1.0, tokens / seq_len) flops_per_step = flops_total / steps if steps > 0 else 0.0 # warnings for absurd counts warnings = [] if gpu_count >= 10000: warnings.append("⚠️ Wow that's a lot of GPUs — are you sure? Check units (e.g., 8 not 800k).") if total_tf > 1e6: warnings.append("⚠️ Total TFLOPs exceed 1e6 TFLOPs (exaFLOPs scale) — results are rough estimates.") out = [ f"🔥 Roman's Training Time Estimator", "", f"Model params: {params_m:,.1f} M", f"Training tokens: {tokens_b:,.3f} B", f"Total training FLOPs (approx): {flops_total:.3e}", "", f"Hardware source: {source}", f"Per-GPU TFLOPs: {chosen_tf_per_gpu:.3f} TFLOPs", f"GPU count: {int(gpu_count):,}", f"Total effective TFLOPs (before utilization): {total_tf:,.3f} TFLOPs", f"Utilization: {utilization_pct:.0f}%", "", f"⏱️ Wall-clock estimate: {hours:,.2f} hours (~{days:,.2f} days)", f"Steps (rough, seq_len=2048): {steps:,.0f} steps", f"FLOPs / step (avg): {flops_per_step:.3e}", ] if warnings: out.append("") out.extend(warnings) if tf_override and tf_override > 0 and selected_gpu != "Custom": out.append("") out.append("⚠️ Note: you overrode the preset TFLOPs. Ensure the value is in TFLOPs (e.g., 150 for A100 FP16-like).") return "\n".join(out) def preset_tf_for_ui(selected_gpu: str, dtype: str): if selected_gpu in GPUS: return GPUS[selected_gpu].get(dtype, 0.0) return 0.0 # ------------------------ # Build UI # ------------------------ # Inline HTML for theme buttons with client-side onclick handlers THEME_BUTTONS_HTML = """