Roman190928 commited on
Commit
1097568
·
verified ·
1 Parent(s): 0dd02c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -14
app.py CHANGED
@@ -36,7 +36,7 @@ GPUS = {
36
  "RTX 4080 SUPER": {"FP32":167.60, "FP16": 335.20, "INT4": 0.0},
37
  "RTX 4090": {"FP32":201.00, "FP16": 402.00, "INT4":1676.0},
38
 
39
- # Blackwell consumer (RTX 50xx series—kept entries from your CSV)
40
  "RTX 5050": {"FP32": 16.90, "FP16": 33.80, "INT4": 0.0},
41
  "RTX 5060": {"FP32": 31.10, "FP16": 62.20, "INT4": 0.0},
42
  "RTX 5060 Ti": {"FP32": 45.60, "FP16": 91.20, "INT4": 0.0},
@@ -52,7 +52,7 @@ GPUS = {
52
  "A100": {"FP32": 19.50, "FP16": 39.00, "INT4": 624.0},
53
  "A100 80GB": {"FP32": 19.50, "FP16": 39.00, "INT4": 624.0},
54
 
55
- # Hopper / Blackwell datacenter estimates (kept as approximate / reported)
56
  "H100": {"FP32":300.0, "FP16": 600.0, "INT4":3000.0},
57
  "B100": {"FP32":400.0, "FP16": 800.0, "INT4":4000.0},
58
  "B200": {"FP32":500.0, "FP16":1000.0, "INT4":5000.0},
@@ -78,12 +78,8 @@ GPUS = {
78
  "MI100": {"FP32": 23.10, "FP16": 46.20, "INT4": 0.0},
79
  "MI200": {"FP32":300.0, "FP16": 600.0, "INT4":3000.0},
80
  "MI300": {"FP32":400.0, "FP16": 800.0, "INT4":4000.0},
81
-
82
- # helper custom entry
83
- #"Custom": {"FP32": 1.00, "FP16": 1.00, "INT4": 1.0},
84
  }
85
 
86
-
87
  # ------------------------
88
  # CSS / Theme variables
89
  # ------------------------
@@ -125,27 +121,34 @@ def estimate_time(params_m: float,
125
  selected_gpu: str,
126
  dtype: str,
127
  tf_override: float,
128
- utilization_pct: float):
 
129
  if params_m <= 0 or tokens_b <= 0:
130
  return "Enter positive values for parameters and tokens."
131
 
 
 
 
132
  params = params_m * 1e6
133
  tokens = tokens_b * 1e9
134
 
 
135
  if tf_override is not None and tf_override > 0:
136
- chosen_tf = float(tf_override)
137
  source = "manual override"
138
  else:
139
  try:
140
- chosen_tf = float(GPUS[selected_gpu].get(dtype, 0.0))
141
  source = f"preset ({selected_gpu} / {dtype})"
142
  except Exception:
143
  return "Couldn't determine GPU TFLOPs. Pick a GPU or enter TFLOPs manually."
144
 
145
- if chosen_tf <= 0:
146
  return "Couldn't determine GPU TFLOPs. Pick a GPU or enter TFLOPs manually."
147
 
148
- gpu_flops_per_sec = chosen_tf * 1e12 * (max(0.001, utilization_pct / 100.0))
 
 
149
 
150
  flops_total = 6 * params * tokens
151
  seconds = flops_total / gpu_flops_per_sec
@@ -156,6 +159,13 @@ def estimate_time(params_m: float,
156
  steps = max(1.0, tokens / seq_len)
157
  flops_per_step = flops_total / steps if steps > 0 else 0.0
158
 
 
 
 
 
 
 
 
159
  out = [
160
  f"🔥 Roman's Training Time Estimator",
161
  "",
@@ -164,13 +174,20 @@ def estimate_time(params_m: float,
164
  f"Total training FLOPs (approx): {flops_total:.3e}",
165
  "",
166
  f"Hardware source: {source}",
167
- f"Effective TFLOPs used: {chosen_tf:.3f} TFLOPs (utilization {utilization_pct:.0f}%)",
 
 
 
168
  "",
169
  f"⏱️ Wall-clock estimate: {hours:,.2f} hours (~{days:,.2f} days)",
170
  f"Steps (rough, seq_len=2048): {steps:,.0f} steps",
171
  f"FLOPs / step (avg): {flops_per_step:.3e}",
172
  ]
173
 
 
 
 
 
174
  if tf_override and tf_override > 0 and selected_gpu != "Custom":
175
  out.append("")
176
  out.append("⚠️ Note: you overrode the preset TFLOPs. Ensure the value is in TFLOPs (e.g., 150 for A100 FP16-like).")
@@ -215,10 +232,12 @@ with gr.Blocks() as demo:
215
  with gr.Row():
216
  tf_override = gr.Number(value=preset_tf_for_ui("A100 80GB", "FP16"), label="GPU TFLOPs (teraFLOPs) — editable", precision=3)
217
  utilization = gr.Slider(minimum=1, maximum=100, value=80, step=1, label="Hardware Utilization (%) — realistic throughput")
 
 
218
 
219
  with gr.Column(elem_classes="card"):
220
  gr.Markdown("### Estimate")
221
- result = gr.Textbox(lines=12, interactive=False, elem_classes="result-box", label="Result")
222
  run_btn = gr.Button("Estimate Training Time", elem_classes="btn-theme")
223
 
224
  # update TF override when gpu/dtype change
@@ -229,7 +248,7 @@ with gr.Blocks() as demo:
229
 
230
  # Run button computes estimate
231
  run_btn.click(estimate_time,
232
- inputs=[params, tokens, gpu_dropdown, dtype_dropdown, tf_override, utilization],
233
  outputs=[result])
234
 
235
  gr.HTML("<div class='small-muted'>Tip: GPU presets are TFLOPs per dtype. You can edit the TFLOPs number to override. Utilization reduces theoretical peak to realistic throughput.</div>")
 
36
  "RTX 4080 SUPER": {"FP32":167.60, "FP16": 335.20, "INT4": 0.0},
37
  "RTX 4090": {"FP32":201.00, "FP16": 402.00, "INT4":1676.0},
38
 
39
+ # Blackwell consumer (RTX 50xx series)
40
  "RTX 5050": {"FP32": 16.90, "FP16": 33.80, "INT4": 0.0},
41
  "RTX 5060": {"FP32": 31.10, "FP16": 62.20, "INT4": 0.0},
42
  "RTX 5060 Ti": {"FP32": 45.60, "FP16": 91.20, "INT4": 0.0},
 
52
  "A100": {"FP32": 19.50, "FP16": 39.00, "INT4": 624.0},
53
  "A100 80GB": {"FP32": 19.50, "FP16": 39.00, "INT4": 624.0},
54
 
55
+ # Hopper / Blackwell datacenter estimates
56
  "H100": {"FP32":300.0, "FP16": 600.0, "INT4":3000.0},
57
  "B100": {"FP32":400.0, "FP16": 800.0, "INT4":4000.0},
58
  "B200": {"FP32":500.0, "FP16":1000.0, "INT4":5000.0},
 
78
  "MI100": {"FP32": 23.10, "FP16": 46.20, "INT4": 0.0},
79
  "MI200": {"FP32":300.0, "FP16": 600.0, "INT4":3000.0},
80
  "MI300": {"FP32":400.0, "FP16": 800.0, "INT4":4000.0},
 
 
 
81
  }
82
 
 
83
  # ------------------------
84
  # CSS / Theme variables
85
  # ------------------------
 
121
  selected_gpu: str,
122
  dtype: str,
123
  tf_override: float,
124
+ utilization_pct: float,
125
+ gpu_count: float):
126
  if params_m <= 0 or tokens_b <= 0:
127
  return "Enter positive values for parameters and tokens."
128
 
129
+ if gpu_count is None or gpu_count <= 0:
130
+ return "Enter a positive number of GPUs."
131
+
132
  params = params_m * 1e6
133
  tokens = tokens_b * 1e9
134
 
135
+ # choose TFLOPs per-GPU
136
  if tf_override is not None and tf_override > 0:
137
+ chosen_tf_per_gpu = float(tf_override)
138
  source = "manual override"
139
  else:
140
  try:
141
+ chosen_tf_per_gpu = float(GPUS[selected_gpu].get(dtype, 0.0))
142
  source = f"preset ({selected_gpu} / {dtype})"
143
  except Exception:
144
  return "Couldn't determine GPU TFLOPs. Pick a GPU or enter TFLOPs manually."
145
 
146
+ if chosen_tf_per_gpu <= 0:
147
  return "Couldn't determine GPU TFLOPs. Pick a GPU or enter TFLOPs manually."
148
 
149
+ # multiply by count and utilization -> FLOPs/sec
150
+ total_tf = chosen_tf_per_gpu * float(gpu_count)
151
+ gpu_flops_per_sec = total_tf * 1e12 * (max(0.001, utilization_pct / 100.0))
152
 
153
  flops_total = 6 * params * tokens
154
  seconds = flops_total / gpu_flops_per_sec
 
159
  steps = max(1.0, tokens / seq_len)
160
  flops_per_step = flops_total / steps if steps > 0 else 0.0
161
 
162
+ # warnings for absurd counts
163
+ warnings = []
164
+ if gpu_count >= 10000:
165
+ warnings.append("⚠️ Wow that's a lot of GPUs — are you sure? Check units (e.g., 8 not 800k).")
166
+ if total_tf > 1e6:
167
+ warnings.append("⚠️ Total TFLOPs exceed 1e6 TFLOPs (exaFLOPs scale) — results are rough estimates.")
168
+
169
  out = [
170
  f"🔥 Roman's Training Time Estimator",
171
  "",
 
174
  f"Total training FLOPs (approx): {flops_total:.3e}",
175
  "",
176
  f"Hardware source: {source}",
177
+ f"Per-GPU TFLOPs: {chosen_tf_per_gpu:.3f} TFLOPs",
178
+ f"GPU count: {int(gpu_count):,}",
179
+ f"Total effective TFLOPs (before utilization): {total_tf:,.3f} TFLOPs",
180
+ f"Utilization: {utilization_pct:.0f}%",
181
  "",
182
  f"⏱️ Wall-clock estimate: {hours:,.2f} hours (~{days:,.2f} days)",
183
  f"Steps (rough, seq_len=2048): {steps:,.0f} steps",
184
  f"FLOPs / step (avg): {flops_per_step:.3e}",
185
  ]
186
 
187
+ if warnings:
188
+ out.append("")
189
+ out.extend(warnings)
190
+
191
  if tf_override and tf_override > 0 and selected_gpu != "Custom":
192
  out.append("")
193
  out.append("⚠️ Note: you overrode the preset TFLOPs. Ensure the value is in TFLOPs (e.g., 150 for A100 FP16-like).")
 
232
  with gr.Row():
233
  tf_override = gr.Number(value=preset_tf_for_ui("A100 80GB", "FP16"), label="GPU TFLOPs (teraFLOPs) — editable", precision=3)
234
  utilization = gr.Slider(minimum=1, maximum=100, value=80, step=1, label="Hardware Utilization (%) — realistic throughput")
235
+ with gr.Row():
236
+ gpu_count = gr.Number(value=1, label="GPU Count (how many of the chosen preset you have)", precision=0)
237
 
238
  with gr.Column(elem_classes="card"):
239
  gr.Markdown("### Estimate")
240
+ result = gr.Textbox(lines=14, interactive=False, elem_classes="result-box", label="Result")
241
  run_btn = gr.Button("Estimate Training Time", elem_classes="btn-theme")
242
 
243
  # update TF override when gpu/dtype change
 
248
 
249
  # Run button computes estimate
250
  run_btn.click(estimate_time,
251
+ inputs=[params, tokens, gpu_dropdown, dtype_dropdown, tf_override, utilization, gpu_count],
252
  outputs=[result])
253
 
254
  gr.HTML("<div class='small-muted'>Tip: GPU presets are TFLOPs per dtype. You can edit the TFLOPs number to override. Utilization reduces theoretical peak to realistic throughput.</div>")