3v324v23 Claude Opus 4.7 (1M context) commited on
Commit
862c42d
·
1 Parent(s): 1f383ce

Add Gradio app comparing cloud GPU vs API costs

Browse files

Interactive break-even calculator: editable model/cloud pricing presets,
k-token request sliders, "your workload" RPS slider with live cost
comparison, and a Plotly chart marking break-even crossings.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (3) hide show
  1. .gitignore +4 -0
  2. app.py +330 -0
  3. requirements.txt +3 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ .gradio/
app.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import plotly.graph_objects as go
4
+
5
+
6
+ MODEL_PRESETS = {
7
+ "DeepSeek V4 — OpenRouter (~90% cache)": (0.041, 0.87),
8
+ "Claude Sonnet 4.6": (3.0, 15.0),
9
+ "Claude Haiku 4.5": (1.0, 5.0),
10
+ "Custom": None,
11
+ }
12
+
13
+ CLOUD_PRESETS = {
14
+ "GMI Cloud": [["H200 × 8", 20.8], ["B200 × 8", 32.0], ["GB200 × 4", 32.0]],
15
+ "Custom": None,
16
+ }
17
+
18
+ DEFAULT_MODEL = "DeepSeek V4 — OpenRouter (~90% cache)"
19
+ DEFAULT_CLOUD = "GMI Cloud"
20
+ DEFAULT_IN_K = 64.0
21
+ DEFAULT_OUT_K = 4.0
22
+ DEFAULT_RPS = 1.0
23
+
24
+ GPU_COLORS = ["#2E86DE", "#10AC84", "#EE5253", "#8854D0", "#F79F1F", "#576574"]
25
+ WORKLOAD_COLOR = "#9b59b6"
26
+
27
+
28
+ def cost_per_request(in_k: float, out_k: float, in_price: float, out_price: float) -> float:
29
+ return (in_k * 1000 * in_price + out_k * 1000 * out_price) / 1_000_000
30
+
31
+
32
+ def parse_gpus(df):
33
+ if isinstance(df, pd.DataFrame):
34
+ rows = df.fillna(0).values.tolist()
35
+ else:
36
+ rows = df or []
37
+ out = []
38
+ for row in rows:
39
+ if not row or len(row) < 2:
40
+ continue
41
+ name = str(row[0]).strip() if row[0] is not None else ""
42
+ try:
43
+ hourly = float(row[1])
44
+ except (TypeError, ValueError):
45
+ continue
46
+ if not name or hourly <= 0:
47
+ continue
48
+ out.append((name, hourly))
49
+ return out
50
+
51
+
52
+ def compute(in_price, out_price, in_k, out_k, gpu_df, planned_rps):
53
+ cpr = cost_per_request(in_k, out_k, in_price, out_price)
54
+ gpus = parse_gpus(gpu_df)
55
+ headline = _headline(cpr, in_k, out_k, in_price, out_price)
56
+
57
+ if cpr <= 0 or not gpus:
58
+ empty_break = pd.DataFrame(columns=["GPU config", "$/hour", "Break-even req/hr", "Break-even RPS"])
59
+ empty_workload = pd.DataFrame(columns=["Option", "$ / hour", "vs API"])
60
+ return headline, empty_break, empty_workload, _empty_figure()
61
+
62
+ break_rows = []
63
+ max_rps = 0.0
64
+ for name, hourly in gpus:
65
+ rph = hourly / cpr
66
+ rps = rph / 3600
67
+ max_rps = max(max_rps, rps)
68
+ break_rows.append({
69
+ "GPU config": name,
70
+ "$/hour": f"${hourly:,.2f}",
71
+ "Break-even req/hr": f"{rph:,.0f}",
72
+ "Break-even RPS": f"{rps:,.3f}",
73
+ })
74
+ break_df = pd.DataFrame(break_rows)
75
+
76
+ api_hourly = planned_rps * 3600 * cpr
77
+ workload_rows = [{
78
+ "Option": "API",
79
+ "$ / hour": f"${api_hourly:,.2f}",
80
+ "vs API": "—",
81
+ }]
82
+ for name, hourly in gpus:
83
+ diff = hourly - api_hourly
84
+ if abs(diff) < 0.005:
85
+ note = "break-even"
86
+ elif diff < 0:
87
+ note = f"−${abs(diff):,.2f}/hr cheaper than API"
88
+ else:
89
+ note = f"+${diff:,.2f}/hr pricier than API"
90
+ workload_rows.append({
91
+ "Option": name,
92
+ "$ / hour": f"${hourly:,.2f}",
93
+ "vs API": note,
94
+ })
95
+ workload_df = pd.DataFrame(workload_rows)
96
+
97
+ x_max = max(max_rps * 1.6, planned_rps * 1.3, 0.1)
98
+ fig = _build_figure(cpr, gpus, x_max, planned_rps)
99
+ return headline, break_df, workload_df, fig
100
+
101
+
102
+ def _headline(cpr, in_k, out_k, in_price, out_price):
103
+ return (
104
+ f"### API cost per request: **${cpr:,.6f}** \n"
105
+ f"_({int(in_k * 1000):,} in × ${in_price}/1M + {int(out_k * 1000):,} out × ${out_price}/1M)_"
106
+ )
107
+
108
+
109
+ def _empty_figure():
110
+ fig = go.Figure()
111
+ fig.update_layout(
112
+ template="plotly_white",
113
+ height=480,
114
+ annotations=[dict(text="Set positive values for tokens, prices, and at least one GPU row.",
115
+ xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)],
116
+ )
117
+ return fig
118
+
119
+
120
+ def _build_figure(cpr, gpus, x_max, planned_rps):
121
+ n = 200
122
+ xs = [x_max * i / (n - 1) for i in range(n)]
123
+ api_costs = [r * 3600 * cpr for r in xs]
124
+
125
+ fig = go.Figure()
126
+ fig.add_trace(go.Scatter(
127
+ x=xs, y=api_costs, mode="lines",
128
+ name="API cost",
129
+ line=dict(color="#222f3e", width=3),
130
+ hovertemplate="RPS: %{x:.3f}<br>API $/hr: $%{y:,.2f}<extra></extra>",
131
+ ))
132
+
133
+ y_max = max(api_costs[-1], max(h for _, h in gpus)) * 1.18
134
+
135
+ for i, (name, hourly) in enumerate(gpus):
136
+ color = GPU_COLORS[i % len(GPU_COLORS)]
137
+ fig.add_trace(go.Scatter(
138
+ x=[0, x_max], y=[hourly, hourly], mode="lines",
139
+ name=f"{name} (${hourly:.2f}/hr)",
140
+ line=dict(color=color, width=2, dash="dash"),
141
+ hovertemplate=f"{name}<br>$/hr: ${hourly:,.2f}<extra></extra>",
142
+ ))
143
+ rph = hourly / cpr
144
+ rps = rph / 3600
145
+ if rps <= x_max:
146
+ fig.add_trace(go.Scatter(
147
+ x=[rps], y=[hourly],
148
+ mode="markers+text",
149
+ marker=dict(color=color, size=11, line=dict(color="white", width=2)),
150
+ text=[f" {rps:.3f} RPS"],
151
+ textposition="middle right",
152
+ textfont=dict(color=color, size=12),
153
+ showlegend=False,
154
+ hovertemplate=(
155
+ f"{name} break-even<br>"
156
+ f"RPS: {rps:.3f}<br>"
157
+ f"req/hr: {rph:,.0f}<br>"
158
+ f"$/hr: ${hourly:,.2f}<extra></extra>"
159
+ ),
160
+ ))
161
+
162
+ api_at = planned_rps * 3600 * cpr
163
+ fig.add_shape(type="line",
164
+ x0=planned_rps, x1=planned_rps, y0=0, y1=y_max,
165
+ line=dict(color=WORKLOAD_COLOR, width=2, dash="dot"))
166
+ fig.add_annotation(x=planned_rps, y=y_max,
167
+ text=f"your workload: {planned_rps:.2f} RPS",
168
+ showarrow=False,
169
+ font=dict(color=WORKLOAD_COLOR, size=12),
170
+ yshift=8)
171
+ fig.add_trace(go.Scatter(
172
+ x=[planned_rps], y=[api_at],
173
+ mode="markers",
174
+ marker=dict(color=WORKLOAD_COLOR, size=11, symbol="diamond",
175
+ line=dict(color="white", width=2)),
176
+ name="Your workload (on API)",
177
+ hovertemplate=f"At {planned_rps:.2f} RPS<br>API $/hr: ${api_at:,.2f}<extra></extra>",
178
+ ))
179
+
180
+ fig.update_layout(
181
+ template="plotly_white",
182
+ height=480,
183
+ margin=dict(l=60, r=30, t=70, b=50),
184
+ xaxis=dict(title="Requests per second", range=[0, x_max]),
185
+ yaxis=dict(title="$ / hour", rangemode="tozero"),
186
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
187
+ title=dict(text="Cloud GPU $/hr vs API $/hr — where lines cross is break-even",
188
+ font=dict(size=14)),
189
+ )
190
+ return fig
191
+
192
+
193
+ def apply_model_preset(preset_name, cur_in, cur_out):
194
+ p = MODEL_PRESETS.get(preset_name)
195
+ if p is None:
196
+ return cur_in, cur_out
197
+ return p[0], p[1]
198
+
199
+
200
+ def apply_cloud_preset(preset_name, cur_df):
201
+ p = CLOUD_PRESETS.get(preset_name)
202
+ if p is None:
203
+ return cur_df
204
+ return p
205
+
206
+
207
+ def reset_all():
208
+ return (
209
+ DEFAULT_MODEL,
210
+ DEFAULT_CLOUD,
211
+ MODEL_PRESETS[DEFAULT_MODEL][0],
212
+ MODEL_PRESETS[DEFAULT_MODEL][1],
213
+ DEFAULT_IN_K,
214
+ DEFAULT_OUT_K,
215
+ CLOUD_PRESETS[DEFAULT_CLOUD],
216
+ DEFAULT_RPS,
217
+ )
218
+
219
+
220
+ with gr.Blocks(title="Cloud bills vs API bills") as demo:
221
+ gr.Markdown(
222
+ """
223
+ # Cloud bills vs API bills
224
+ At what request rate does renting GPUs beat paying per token?
225
+ Drag the **Your workload** slider to see live cost at your planned scale.
226
+ """
227
+ )
228
+
229
+ with gr.Row():
230
+ with gr.Column(scale=1):
231
+ gr.Markdown("### Model & API pricing")
232
+ model_preset = gr.Dropdown(
233
+ choices=list(MODEL_PRESETS.keys()),
234
+ value=DEFAULT_MODEL,
235
+ label="Model preset",
236
+ info="Pick a preset or switch to Custom to enter your own prices.",
237
+ )
238
+ in_price = gr.Number(
239
+ value=MODEL_PRESETS[DEFAULT_MODEL][0],
240
+ label="Input $ / 1M tokens",
241
+ precision=4,
242
+ info="Effective input price (post-cache for OpenRouter-style providers).",
243
+ )
244
+ out_price = gr.Number(
245
+ value=MODEL_PRESETS[DEFAULT_MODEL][1],
246
+ label="Output $ / 1M tokens",
247
+ precision=4,
248
+ )
249
+
250
+ gr.Markdown("### Request shape")
251
+ in_tokens_k = gr.Slider(
252
+ 1, 256, value=DEFAULT_IN_K, step=1,
253
+ label="Input tokens / request (k)",
254
+ info="64 means 64,000 tokens. Slide for typical context size.",
255
+ )
256
+ out_tokens_k = gr.Slider(
257
+ 0.1, 32, value=DEFAULT_OUT_K, step=0.1,
258
+ label="Output tokens / request (k)",
259
+ info="4 means 4,000 tokens.",
260
+ )
261
+
262
+ gr.Markdown("### Cloud GPU rates")
263
+ cloud_preset = gr.Dropdown(
264
+ choices=list(CLOUD_PRESETS.keys()),
265
+ value=DEFAULT_CLOUD,
266
+ label="Cloud provider preset",
267
+ info="Edit the table below to match your contract.",
268
+ )
269
+ gpu_df = gr.Dataframe(
270
+ value=CLOUD_PRESETS[DEFAULT_CLOUD],
271
+ headers=["Config", "$ / hour"],
272
+ datatype=["str", "number"],
273
+ column_count=(2, "fixed"),
274
+ row_count=(3, "dynamic"),
275
+ interactive=True,
276
+ )
277
+
278
+ reset_btn = gr.Button("↺ Reset to defaults", variant="secondary", size="sm")
279
+
280
+ with gr.Column(scale=2):
281
+ gr.Markdown("### Your workload")
282
+ planned_rps = gr.Slider(
283
+ 0, 5, value=DEFAULT_RPS, step=0.05,
284
+ label="Planned requests / second",
285
+ info="What scale do you expect to run at? The dotted line on the chart marks this point.",
286
+ )
287
+ workload_table = gr.Dataframe(
288
+ headers=["Option", "$ / hour", "vs API"],
289
+ interactive=False,
290
+ wrap=True,
291
+ )
292
+
293
+ gr.Markdown("### Break-even points")
294
+ headline = gr.Markdown()
295
+ break_table = gr.Dataframe(
296
+ headers=["GPU config", "$/hour", "Break-even req/hr", "Break-even RPS"],
297
+ interactive=False,
298
+ wrap=True,
299
+ )
300
+
301
+ chart = gr.Plot()
302
+
303
+ inputs = [in_price, out_price, in_tokens_k, out_tokens_k, gpu_df, planned_rps]
304
+ outputs = [headline, break_table, workload_table, chart]
305
+
306
+ for c in inputs:
307
+ c.change(compute, inputs=inputs, outputs=outputs)
308
+
309
+ model_preset.change(
310
+ apply_model_preset,
311
+ inputs=[model_preset, in_price, out_price],
312
+ outputs=[in_price, out_price],
313
+ )
314
+ cloud_preset.change(
315
+ apply_cloud_preset,
316
+ inputs=[cloud_preset, gpu_df],
317
+ outputs=[gpu_df],
318
+ )
319
+
320
+ reset_outputs = [model_preset, cloud_preset, in_price, out_price,
321
+ in_tokens_k, out_tokens_k, gpu_df, planned_rps]
322
+ reset_btn.click(reset_all, outputs=reset_outputs).then(
323
+ compute, inputs=inputs, outputs=outputs
324
+ )
325
+
326
+ demo.load(compute, inputs=inputs, outputs=outputs)
327
+
328
+
329
+ if __name__ == "__main__":
330
+ demo.launch(theme=gr.themes.Soft())
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==6.14.0
2
+ plotly>=5.20
3
+ pandas>=2.0