Spaces:

andynoodles
/

CloudOrAPI

Sleeping

3v324v23 Claude Opus 4.7 (1M context) commited on 29 days ago

Commit

862c42d

1 Parent(s): 1f383ce

Add Gradio app comparing cloud GPU vs API costs

Interactive break-even calculator: editable model/cloud pricing presets,
k-token request sliders, "your workload" RPS slider with live cost
comparison, and a Plotly chart marking break-even crossings.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (3) hide show

.gitignore +4 -0
app.py +330 -0
requirements.txt +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.venv/
+__pycache__/
+*.pyc
+.gradio/

app.py ADDED Viewed

	@@ -0,0 +1,330 @@

+import gradio as gr
+import pandas as pd
+import plotly.graph_objects as go
+MODEL_PRESETS = {
+    "DeepSeek V4 — OpenRouter (~90% cache)": (0.041, 0.87),
+    "Claude Sonnet 4.6": (3.0, 15.0),
+    "Claude Haiku 4.5": (1.0, 5.0),
+    "Custom": None,
+}
+CLOUD_PRESETS = {
+    "GMI Cloud": [["H200 × 8", 20.8], ["B200 × 8", 32.0], ["GB200 × 4", 32.0]],
+    "Custom": None,
+}
+DEFAULT_MODEL = "DeepSeek V4 — OpenRouter (~90% cache)"
+DEFAULT_CLOUD = "GMI Cloud"
+DEFAULT_IN_K = 64.0
+DEFAULT_OUT_K = 4.0
+DEFAULT_RPS = 1.0
+GPU_COLORS = ["#2E86DE", "#10AC84", "#EE5253", "#8854D0", "#F79F1F", "#576574"]
+WORKLOAD_COLOR = "#9b59b6"
+def cost_per_request(in_k: float, out_k: float, in_price: float, out_price: float) -> float:
+    return (in_k * 1000 * in_price + out_k * 1000 * out_price) / 1_000_000
+def parse_gpus(df):
+    if isinstance(df, pd.DataFrame):
+        rows = df.fillna(0).values.tolist()
+    else:
+        rows = df or []
+    out = []
+    for row in rows:
+        if not row or len(row) < 2:
+            continue
+        name = str(row[0]).strip() if row[0] is not None else ""
+        try:
+            hourly = float(row[1])
+        except (TypeError, ValueError):
+            continue
+        if not name or hourly <= 0:
+            continue
+        out.append((name, hourly))
+    return out
+def compute(in_price, out_price, in_k, out_k, gpu_df, planned_rps):
+    cpr = cost_per_request(in_k, out_k, in_price, out_price)
+    gpus = parse_gpus(gpu_df)
+    headline = _headline(cpr, in_k, out_k, in_price, out_price)
+    if cpr <= 0 or not gpus:
+        empty_break = pd.DataFrame(columns=["GPU config", "$/hour", "Break-even req/hr", "Break-even RPS"])
+        empty_workload = pd.DataFrame(columns=["Option", "$ / hour", "vs API"])
+        return headline, empty_break, empty_workload, _empty_figure()
+    break_rows = []
+    max_rps = 0.0
+    for name, hourly in gpus:
+        rph = hourly / cpr
+        rps = rph / 3600
+        max_rps = max(max_rps, rps)
+        break_rows.append({
+            "GPU config": name,
+            "$/hour": f"${hourly:,.2f}",
+            "Break-even req/hr": f"{rph:,.0f}",
+            "Break-even RPS": f"{rps:,.3f}",
+        })
+    break_df = pd.DataFrame(break_rows)
+    api_hourly = planned_rps * 3600 * cpr
+    workload_rows = [{
+        "Option": "API",
+        "$ / hour": f"${api_hourly:,.2f}",
+        "vs API": "—",
+    }]
+    for name, hourly in gpus:
+        diff = hourly - api_hourly
+        if abs(diff) < 0.005:
+            note = "break-even"
+        elif diff < 0:
+            note = f"−${abs(diff):,.2f}/hr cheaper than API"
+        else:
+            note = f"+${diff:,.2f}/hr pricier than API"
+        workload_rows.append({
+            "Option": name,
+            "$ / hour": f"${hourly:,.2f}",
+            "vs API": note,
+        })
+    workload_df = pd.DataFrame(workload_rows)
+    x_max = max(max_rps * 1.6, planned_rps * 1.3, 0.1)
+    fig = _build_figure(cpr, gpus, x_max, planned_rps)
+    return headline, break_df, workload_df, fig
+def _headline(cpr, in_k, out_k, in_price, out_price):
+    return (
+        f"### API cost per request: **${cpr:,.6f}**  \n"
+        f"_({int(in_k * 1000):,} in × ${in_price}/1M  +  {int(out_k * 1000):,} out × ${out_price}/1M)_"
+    )
+def _empty_figure():
+    fig = go.Figure()
+    fig.update_layout(
+        template="plotly_white",
+        height=480,
+        annotations=[dict(text="Set positive values for tokens, prices, and at least one GPU row.",
+                          xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)],
+    )
+    return fig
+def _build_figure(cpr, gpus, x_max, planned_rps):
+    n = 200
+    xs = [x_max * i / (n - 1) for i in range(n)]
+    api_costs = [r * 3600 * cpr for r in xs]
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(
+        x=xs, y=api_costs, mode="lines",
+        name="API cost",
+        line=dict(color="#222f3e", width=3),
+        hovertemplate="RPS: %{x:.3f}<br>API $/hr: $%{y:,.2f}<extra></extra>",
+    ))
+    y_max = max(api_costs[-1], max(h for _, h in gpus)) * 1.18
+    for i, (name, hourly) in enumerate(gpus):
+        color = GPU_COLORS[i % len(GPU_COLORS)]
+        fig.add_trace(go.Scatter(
+            x=[0, x_max], y=[hourly, hourly], mode="lines",
+            name=f"{name} (${hourly:.2f}/hr)",
+            line=dict(color=color, width=2, dash="dash"),
+            hovertemplate=f"{name}<br>$/hr: ${hourly:,.2f}<extra></extra>",
+        ))
+        rph = hourly / cpr
+        rps = rph / 3600
+        if rps <= x_max:
+            fig.add_trace(go.Scatter(
+                x=[rps], y=[hourly],
+                mode="markers+text",
+                marker=dict(color=color, size=11, line=dict(color="white", width=2)),
+                text=[f"  {rps:.3f} RPS"],
+                textposition="middle right",
+                textfont=dict(color=color, size=12),
+                showlegend=False,
+                hovertemplate=(
+                    f"{name} break-even<br>"
+                    f"RPS: {rps:.3f}<br>"
+                    f"req/hr: {rph:,.0f}<br>"
+                    f"$/hr: ${hourly:,.2f}<extra></extra>"
+                ),
+            ))
+    api_at = planned_rps * 3600 * cpr
+    fig.add_shape(type="line",
+                  x0=planned_rps, x1=planned_rps, y0=0, y1=y_max,
+                  line=dict(color=WORKLOAD_COLOR, width=2, dash="dot"))
+    fig.add_annotation(x=planned_rps, y=y_max,
+                       text=f"your workload: {planned_rps:.2f} RPS",
+                       showarrow=False,
+                       font=dict(color=WORKLOAD_COLOR, size=12),
+                       yshift=8)
+    fig.add_trace(go.Scatter(
+        x=[planned_rps], y=[api_at],
+        mode="markers",
+        marker=dict(color=WORKLOAD_COLOR, size=11, symbol="diamond",
+                    line=dict(color="white", width=2)),
+        name="Your workload (on API)",
+        hovertemplate=f"At {planned_rps:.2f} RPS<br>API $/hr: ${api_at:,.2f}<extra></extra>",
+    ))
+    fig.update_layout(
+        template="plotly_white",
+        height=480,
+        margin=dict(l=60, r=30, t=70, b=50),
+        xaxis=dict(title="Requests per second", range=[0, x_max]),
+        yaxis=dict(title="$ / hour", rangemode="tozero"),
+        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
+        title=dict(text="Cloud GPU $/hr vs API $/hr — where lines cross is break-even",
+                   font=dict(size=14)),
+    )
+    return fig
+def apply_model_preset(preset_name, cur_in, cur_out):
+    p = MODEL_PRESETS.get(preset_name)
+    if p is None:
+        return cur_in, cur_out
+    return p[0], p[1]
+def apply_cloud_preset(preset_name, cur_df):
+    p = CLOUD_PRESETS.get(preset_name)
+    if p is None:
+        return cur_df
+    return p
+def reset_all():
+    return (
+        DEFAULT_MODEL,
+        DEFAULT_CLOUD,
+        MODEL_PRESETS[DEFAULT_MODEL][0],
+        MODEL_PRESETS[DEFAULT_MODEL][1],
+        DEFAULT_IN_K,
+        DEFAULT_OUT_K,
+        CLOUD_PRESETS[DEFAULT_CLOUD],
+        DEFAULT_RPS,
+    )
+with gr.Blocks(title="Cloud bills vs API bills") as demo:
+    gr.Markdown(
+        """
+# Cloud bills vs API bills
+At what request rate does renting GPUs beat paying per token?
+Drag the **Your workload** slider to see live cost at your planned scale.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### Model & API pricing")
+            model_preset = gr.Dropdown(
+                choices=list(MODEL_PRESETS.keys()),
+                value=DEFAULT_MODEL,
+                label="Model preset",
+                info="Pick a preset or switch to Custom to enter your own prices.",
+            )
+            in_price = gr.Number(
+                value=MODEL_PRESETS[DEFAULT_MODEL][0],
+                label="Input $ / 1M tokens",
+                precision=4,
+                info="Effective input price (post-cache for OpenRouter-style providers).",
+            )
+            out_price = gr.Number(
+                value=MODEL_PRESETS[DEFAULT_MODEL][1],
+                label="Output $ / 1M tokens",
+                precision=4,
+            )
+            gr.Markdown("### Request shape")
+            in_tokens_k = gr.Slider(
+                1, 256, value=DEFAULT_IN_K, step=1,
+                label="Input tokens / request  (k)",
+                info="64 means 64,000 tokens. Slide for typical context size.",
+            )
+            out_tokens_k = gr.Slider(
+                0.1, 32, value=DEFAULT_OUT_K, step=0.1,
+                label="Output tokens / request  (k)",
+                info="4 means 4,000 tokens.",
+            )
+            gr.Markdown("### Cloud GPU rates")
+            cloud_preset = gr.Dropdown(
+                choices=list(CLOUD_PRESETS.keys()),
+                value=DEFAULT_CLOUD,
+                label="Cloud provider preset",
+                info="Edit the table below to match your contract.",
+            )
+            gpu_df = gr.Dataframe(
+                value=CLOUD_PRESETS[DEFAULT_CLOUD],
+                headers=["Config", "$ / hour"],
+                datatype=["str", "number"],
+                column_count=(2, "fixed"),
+                row_count=(3, "dynamic"),
+                interactive=True,
+            )
+            reset_btn = gr.Button("↺ Reset to defaults", variant="secondary", size="sm")
+        with gr.Column(scale=2):
+            gr.Markdown("### Your workload")
+            planned_rps = gr.Slider(
+                0, 5, value=DEFAULT_RPS, step=0.05,
+                label="Planned requests / second",
+                info="What scale do you expect to run at? The dotted line on the chart marks this point.",
+            )
+            workload_table = gr.Dataframe(
+                headers=["Option", "$ / hour", "vs API"],
+                interactive=False,
+                wrap=True,
+            )
+            gr.Markdown("### Break-even points")
+            headline = gr.Markdown()
+            break_table = gr.Dataframe(
+                headers=["GPU config", "$/hour", "Break-even req/hr", "Break-even RPS"],
+                interactive=False,
+                wrap=True,
+            )
+            chart = gr.Plot()
+    inputs = [in_price, out_price, in_tokens_k, out_tokens_k, gpu_df, planned_rps]
+    outputs = [headline, break_table, workload_table, chart]
+    for c in inputs:
+        c.change(compute, inputs=inputs, outputs=outputs)
+    model_preset.change(
+        apply_model_preset,
+        inputs=[model_preset, in_price, out_price],
+        outputs=[in_price, out_price],
+    )
+    cloud_preset.change(
+        apply_cloud_preset,
+        inputs=[cloud_preset, gpu_df],
+        outputs=[gpu_df],
+    )
+    reset_outputs = [model_preset, cloud_preset, in_price, out_price,
+                     in_tokens_k, out_tokens_k, gpu_df, planned_rps]
+    reset_btn.click(reset_all, outputs=reset_outputs).then(
+        compute, inputs=inputs, outputs=outputs
+    )
+    demo.load(compute, inputs=inputs, outputs=outputs)
+if __name__ == "__main__":
+    demo.launch(theme=gr.themes.Soft())

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio==6.14.0
+plotly>=5.20
+pandas>=2.0