Spaces:

DataMuncher-Labs
/

TrainingTime

Paused

App Files Files Community

Roman190928 commited on Dec 30, 2025

Commit

1097568

verified ·

1 Parent(s): 0dd02c7

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -14

app.py CHANGED Viewed

@@ -36,7 +36,7 @@ GPUS = {
     "RTX 4080 SUPER":  {"FP32":167.60,  "FP16": 335.20,  "INT4":   0.0},
     "RTX 4090":        {"FP32":201.00,  "FP16": 402.00,  "INT4":1676.0},
-    # Blackwell consumer (RTX 50xx series—kept entries from your CSV)
     "RTX 5050":        {"FP32": 16.90,  "FP16":  33.80,  "INT4":   0.0},
     "RTX 5060":        {"FP32": 31.10,  "FP16":  62.20,  "INT4":   0.0},
     "RTX 5060 Ti":     {"FP32": 45.60,  "FP16":  91.20,  "INT4":   0.0},
@@ -52,7 +52,7 @@ GPUS = {
     "A100":            {"FP32": 19.50,  "FP16":  39.00,  "INT4": 624.0},
     "A100 80GB":       {"FP32": 19.50,  "FP16":  39.00,  "INT4": 624.0},
-    # Hopper / Blackwell datacenter estimates (kept as approximate / reported)
     "H100":            {"FP32":300.0,   "FP16": 600.0,   "INT4":3000.0},
     "B100":            {"FP32":400.0,   "FP16": 800.0,   "INT4":4000.0},
     "B200":            {"FP32":500.0,   "FP16":1000.0,   "INT4":5000.0},
@@ -78,12 +78,8 @@ GPUS = {
     "MI100":           {"FP32": 23.10,  "FP16":  46.20,  "INT4":   0.0},
     "MI200":           {"FP32":300.0,   "FP16": 600.0,   "INT4":3000.0},
     "MI300":           {"FP32":400.0,   "FP16": 800.0,   "INT4":4000.0},
-    # helper custom entry
-    #"Custom":          {"FP32":  1.00,  "FP16":   1.00,  "INT4":   1.0},
 }
 # ------------------------
 # CSS / Theme variables
 # ------------------------
@@ -125,27 +121,34 @@ def estimate_time(params_m: float,
                   selected_gpu: str,
                   dtype: str,
                   tf_override: float,
-                  utilization_pct: float):
     if params_m <= 0 or tokens_b <= 0:
         return "Enter positive values for parameters and tokens."
     params = params_m * 1e6
     tokens = tokens_b * 1e9
     if tf_override is not None and tf_override > 0:
-        chosen_tf = float(tf_override)
         source = "manual override"
     else:
         try:
-            chosen_tf = float(GPUS[selected_gpu].get(dtype, 0.0))
             source = f"preset ({selected_gpu} / {dtype})"
         except Exception:
             return "Couldn't determine GPU TFLOPs. Pick a GPU or enter TFLOPs manually."
-    if chosen_tf <= 0:
         return "Couldn't determine GPU TFLOPs. Pick a GPU or enter TFLOPs manually."
-    gpu_flops_per_sec = chosen_tf * 1e12 * (max(0.001, utilization_pct / 100.0))
     flops_total = 6 * params * tokens
     seconds = flops_total / gpu_flops_per_sec
@@ -156,6 +159,13 @@ def estimate_time(params_m: float,
     steps = max(1.0, tokens / seq_len)
     flops_per_step = flops_total / steps if steps > 0 else 0.0
     out = [
         f"🔥 Roman's Training Time Estimator",
         "",
@@ -164,13 +174,20 @@ def estimate_time(params_m: float,
         f"Total training FLOPs (approx): {flops_total:.3e}",
         "",
         f"Hardware source: {source}",
-        f"Effective TFLOPs used: {chosen_tf:.3f} TFLOPs (utilization {utilization_pct:.0f}%)",
         "",
         f"⏱️ Wall-clock estimate: {hours:,.2f} hours (~{days:,.2f} days)",
         f"Steps (rough, seq_len=2048): {steps:,.0f} steps",
         f"FLOPs / step (avg): {flops_per_step:.3e}",
     ]
     if tf_override and tf_override > 0 and selected_gpu != "Custom":
         out.append("")
         out.append("⚠️ Note: you overrode the preset TFLOPs. Ensure the value is in TFLOPs (e.g., 150 for A100 FP16-like).")
@@ -215,10 +232,12 @@ with gr.Blocks() as demo:
         with gr.Row():
             tf_override = gr.Number(value=preset_tf_for_ui("A100 80GB", "FP16"), label="GPU TFLOPs (teraFLOPs) — editable", precision=3)
             utilization = gr.Slider(minimum=1, maximum=100, value=80, step=1, label="Hardware Utilization (%) — realistic throughput")
     with gr.Column(elem_classes="card"):
         gr.Markdown("### Estimate")
-        result = gr.Textbox(lines=12, interactive=False, elem_classes="result-box", label="Result")
         run_btn = gr.Button("Estimate Training Time", elem_classes="btn-theme")
     # update TF override when gpu/dtype change
@@ -229,7 +248,7 @@ with gr.Blocks() as demo:
     # Run button computes estimate
     run_btn.click(estimate_time,
-                  inputs=[params, tokens, gpu_dropdown, dtype_dropdown, tf_override, utilization],
                   outputs=[result])
     gr.HTML("<div class='small-muted'>Tip: GPU presets are TFLOPs per dtype. You can edit the TFLOPs number to override. Utilization reduces theoretical peak to realistic throughput.</div>")

     "RTX 4080 SUPER":  {"FP32":167.60,  "FP16": 335.20,  "INT4":   0.0},
     "RTX 4090":        {"FP32":201.00,  "FP16": 402.00,  "INT4":1676.0},
+    # Blackwell consumer (RTX 50xx series)
     "RTX 5050":        {"FP32": 16.90,  "FP16":  33.80,  "INT4":   0.0},
     "RTX 5060":        {"FP32": 31.10,  "FP16":  62.20,  "INT4":   0.0},
     "RTX 5060 Ti":     {"FP32": 45.60,  "FP16":  91.20,  "INT4":   0.0},
     "A100":            {"FP32": 19.50,  "FP16":  39.00,  "INT4": 624.0},
     "A100 80GB":       {"FP32": 19.50,  "FP16":  39.00,  "INT4": 624.0},
+    # Hopper / Blackwell datacenter estimates
     "H100":            {"FP32":300.0,   "FP16": 600.0,   "INT4":3000.0},
     "B100":            {"FP32":400.0,   "FP16": 800.0,   "INT4":4000.0},
     "B200":            {"FP32":500.0,   "FP16":1000.0,   "INT4":5000.0},
     "MI100":           {"FP32": 23.10,  "FP16":  46.20,  "INT4":   0.0},
     "MI200":           {"FP32":300.0,   "FP16": 600.0,   "INT4":3000.0},
     "MI300":           {"FP32":400.0,   "FP16": 800.0,   "INT4":4000.0},
 }
 # ------------------------
 # CSS / Theme variables
 # ------------------------
                   selected_gpu: str,
                   dtype: str,
                   tf_override: float,
+                  utilization_pct: float,
+                  gpu_count: float):
     if params_m <= 0 or tokens_b <= 0:
         return "Enter positive values for parameters and tokens."
+    if gpu_count is None or gpu_count <= 0:
+        return "Enter a positive number of GPUs."
     params = params_m * 1e6
     tokens = tokens_b * 1e9
+    # choose TFLOPs per-GPU
     if tf_override is not None and tf_override > 0:
+        chosen_tf_per_gpu = float(tf_override)
         source = "manual override"
     else:
         try:
+            chosen_tf_per_gpu = float(GPUS[selected_gpu].get(dtype, 0.0))
             source = f"preset ({selected_gpu} / {dtype})"
         except Exception:
             return "Couldn't determine GPU TFLOPs. Pick a GPU or enter TFLOPs manually."
+    if chosen_tf_per_gpu <= 0:
         return "Couldn't determine GPU TFLOPs. Pick a GPU or enter TFLOPs manually."
+    # multiply by count and utilization -> FLOPs/sec
+    total_tf = chosen_tf_per_gpu * float(gpu_count)
+    gpu_flops_per_sec = total_tf * 1e12 * (max(0.001, utilization_pct / 100.0))
     flops_total = 6 * params * tokens
     seconds = flops_total / gpu_flops_per_sec
     steps = max(1.0, tokens / seq_len)
     flops_per_step = flops_total / steps if steps > 0 else 0.0
+    # warnings for absurd counts
+    warnings = []
+    if gpu_count >= 10000:
+        warnings.append("⚠️ Wow that's a lot of GPUs — are you sure? Check units (e.g., 8 not 800k).")
+    if total_tf > 1e6:
+        warnings.append("⚠️ Total TFLOPs exceed 1e6 TFLOPs (exaFLOPs scale) — results are rough estimates.")
     out = [
         f"🔥 Roman's Training Time Estimator",
         "",
         f"Total training FLOPs (approx): {flops_total:.3e}",
         "",
         f"Hardware source: {source}",
+        f"Per-GPU TFLOPs: {chosen_tf_per_gpu:.3f} TFLOPs",
+        f"GPU count: {int(gpu_count):,}",
+        f"Total effective TFLOPs (before utilization): {total_tf:,.3f} TFLOPs",
+        f"Utilization: {utilization_pct:.0f}%",
         "",
         f"⏱️ Wall-clock estimate: {hours:,.2f} hours (~{days:,.2f} days)",
         f"Steps (rough, seq_len=2048): {steps:,.0f} steps",
         f"FLOPs / step (avg): {flops_per_step:.3e}",
     ]
+    if warnings:
+        out.append("")
+        out.extend(warnings)
     if tf_override and tf_override > 0 and selected_gpu != "Custom":
         out.append("")
         out.append("⚠️ Note: you overrode the preset TFLOPs. Ensure the value is in TFLOPs (e.g., 150 for A100 FP16-like).")
         with gr.Row():
             tf_override = gr.Number(value=preset_tf_for_ui("A100 80GB", "FP16"), label="GPU TFLOPs (teraFLOPs) — editable", precision=3)
             utilization = gr.Slider(minimum=1, maximum=100, value=80, step=1, label="Hardware Utilization (%) — realistic throughput")
+        with gr.Row():
+            gpu_count = gr.Number(value=1, label="GPU Count (how many of the chosen preset you have)", precision=0)
     with gr.Column(elem_classes="card"):
         gr.Markdown("### Estimate")
+        result = gr.Textbox(lines=14, interactive=False, elem_classes="result-box", label="Result")
         run_btn = gr.Button("Estimate Training Time", elem_classes="btn-theme")
     # update TF override when gpu/dtype change
     # Run button computes estimate
     run_btn.click(estimate_time,
+                  inputs=[params, tokens, gpu_dropdown, dtype_dropdown, tf_override, utilization, gpu_count],
                   outputs=[result])
     gr.HTML("<div class='small-muted'>Tip: GPU presets are TFLOPs per dtype. You can edit the TFLOPs number to override. Utilization reduces theoretical peak to realistic throughput.</div>")