Spaces:
Sleeping
Sleeping
File size: 8,130 Bytes
8d104c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
# app.py
import gradio as gr
import math
from pathlib import Path
##
# -----------------------
# Config / presets
# -----------------------
GB_PER_B_TOKEN = 4.6 # 1B tokens ≈ 4.6 GB text
MODEL_PRESETS = {
"MiMo-V2 Flash": {"params": 315_000_000_000, "tokens": 27_000_000_000_000},
"NVIDIA-Nemotron-3-Nano 30B A3B Base": {"params": 30_000_000_000, "tokens": 10_650_000_000_000},
"Kimi-K2-Instruct": {"params": 1_000_000_000_000, "tokens":15_500_000_000_000 },
"Llama 4 Scout": {"params": 109_000_000_000, "tokens": 40_000_000_000_000},
"Llama 4 Maverick": {"params": 400_000_000_000, "tokens": 22_000_000_000_000},
"Llama 3.1 8B": {"params": 8_000_000_000, "tokens": 15_000_000_000_000},
"Llama 3.1 70B": {"params": 70_000_000_000, "tokens": 15_000_000_000_000},
"Llama 3.1 405B": {"params": 405_000_000_000, "tokens": 15_000_000_000_000},
"Ling-1T": {"params": 1_000_000_000_000, "tokens": 20_000_000_000_000},
"Ling-flash-2.0": {"params": 100_000_000_000, "tokens": 20_000_000_000_000},
"Ling-mini-2.0": {"params": 16_000_000_000, "tokens": 20_000_000_000_000},
"Phi 4": {"params": 16_000_000_000, "tokens": 9_800_000_000_000},
"Phi 3.5 42B": {"params": 42_000_000_000, "tokens": 4_900_000_000_000},
"Phi 1": {"params": 1_000_000_000, "tokens": 54_000_000_000},
"Qwen3-235B-A22B": {"params": 235_000_000_000, "tokens": 36_000_000_000_000},
"Qwen2.5-72B-Instruct": {"params": 72_000_000_000, "tokens": 18_000_000_000_000},
"Qwen2-57B-A14B-Instruct": {"params": 57_000_000_000, "tokens": 40_000_000_000},
#templates "GPT-2 Small (124M)": {"params": 124_000_000, "tokens": 40_000_000_000},
"GPT-2 Small (124M)": {"params": 124_000_000, "tokens": 40_000_000_000},
}
# -----------------------
# Helpers (defensive)
# -----------------------
def as_positive_number(x):
# gr.Number returns None if empty; normalize to 0
try:
if x is None:
return 0
if isinstance(x, str) and x.strip() == "":
return 0
return float(x)
except Exception:
return 0
def preset_calc(preset_name, override_params, override_tokens):
"""Return tokens/param rounded up, defensive against None."""
data = MODEL_PRESETS.get(preset_name, {})
op = as_positive_number(override_params)
ot = as_positive_number(override_tokens)
base_params = int(data.get("params", 0) or 0)
base_tokens = int(data.get("tokens", 0) or 0)
params = int(op) if op > 0 else base_params
tokens = int(ot) if ot > 0 else base_tokens
if params <= 0 or tokens <= 0:
return "—"
ratio = math.ceil(tokens / params)
return f"{ratio:,} tokens / parameter"
def reverse_calc(params_in, tokens_per_param_in):
"""Given model params and tokens/param, return total tokens and GB estimate."""
p = as_positive_number(params_in)
tpp = as_positive_number(tokens_per_param_in)
if p <= 0 or tpp <= 0:
return "—", "—"
total_tokens = int(p * tpp)
total_gb = (total_tokens / 1e9) * GB_PER_B_TOKEN
return f"{total_tokens:,} tokens", f"{total_gb:.2f} GB of text"
# -----------------------
# Theme header generator
# -----------------------
def build_header_html(theme):
if theme == "Neon":
accent = "#00FFC6"
subtitle = "Neon mode: high voltage scaling"
emoji = "⚡️"
elif theme == "Cyber":
accent = "#7C5CFF"
subtitle = "Cyber vibes, measured in tokens"
emoji = "🛰️"
else: # Dark (default)
accent = "#F5C26B"
subtitle = "Scaling laws, but make it aesthetic."
emoji = "🧮"
html = f"""
<div style="text-align:center; padding:28px; margin-bottom:8px;">
<div style="display:inline-block; padding:18px 28px; border-radius:14px;
background:linear-gradient(90deg, rgba(255,255,255,0.02), rgba(255,255,255,0.01));
box-shadow: 0 6px 30px rgba(0,0,0,0.6);">
<div style="font-size:1.9rem; font-weight:700; color: {accent};">
{emoji} Roman’s Parameter ↔ Token Calculator
</div>
<div style="color: rgba(255,255,255,0.7); margin-top:6px;">{subtitle}</div>
</div>
</div>
"""
return html
# -----------------------
# CSS (pass to launch)
# -----------------------
CSS = """
:root{
--bg1: #0f1222;
--bg2: #111218;
--card: #151626;
--muted: rgba(255,255,255,0.65);
--mono: ui-monospace, SFMono-Regular, Menlo, monospace;
}
body { background: linear-gradient(180deg,var(--bg1), var(--bg2)); color: #e9eef8; }
.gradio-container { max-width: 980px; margin: 20px auto; }
.card { background: linear-gradient(180deg, rgba(255,255,255,0.02), rgba(255,255,255,0.01));
padding: 18px; border-radius: 12px; border: 1px solid rgba(255,255,255,0.03); margin-bottom: 18px; }
.mono input, .mono textarea, .mono .input_textbox { font-family: var(--mono); font-size:1.02rem; }
label { color: var(--muted); font-size:0.95rem; }
h1 { margin:0; padding:0; color: #fff; }
.gradio-row { gap: 12px; }
.small-muted { color: rgba(255,255,255,0.55); font-size:0.9rem; }
.big-output { font-family: var(--mono); font-size:1.05rem; background: rgba(0,0,0,0.12); padding:10px; border-radius:8px; }
"""
# -----------------------
# Build UI
# -----------------------
with gr.Blocks() as demo:
header_html = gr.HTML(build_header_html("Dark"))
with gr.Group(elem_classes="card"):
gr.Markdown("### Model Preset Calculator")
with gr.Row():
preset = gr.Dropdown(choices=list(MODEL_PRESETS.keys()), value="Your 75M Model", label="Model Preset")
ratio_out = gr.Textbox(label="Tokens per Parameter (auto)", interactive=False, elem_classes="mono big-output")
with gr.Row():
override_params = gr.Number(label="Override Parameters (optional)", precision=0, value=0)
override_tokens = gr.Number(label="Override Training Tokens (optional)", precision=0, value=0)
# make sure initial compute happens
preset.change(preset_calc, inputs=[preset, override_params, override_tokens], outputs=ratio_out)
override_params.change(preset_calc, inputs=[preset, override_params, override_tokens], outputs=ratio_out)
override_tokens.change(preset_calc, inputs=[preset, override_params, override_tokens], outputs=ratio_out)
# theme selector and small help row
with gr.Row():
theme_select = gr.Radio(["Dark", "Neon", "Cyber"], value="Dark", label="Theme", info="Change header flair")
gr.Markdown("<div class='small-muted'>Tip: override values let you test alternate configs quickly.</div>")
# reverse calculator
with gr.Group(elem_classes="card"):
gr.Markdown("### 🔁 Reverse Calculator (params → tokens)")
with gr.Row():
params_in = gr.Number(label="Model Parameters", precision=0, value=75_000_000)
tpp_in = gr.Number(label="Tokens per Parameter", precision=2, value=20.0)
with gr.Row():
total_tokens_out = gr.Textbox(label="Total Training Tokens", interactive=False, elem_classes="mono big-output")
total_gb_out = gr.Textbox(label="Estimated Dataset Size", interactive=False, elem_classes="mono big-output")
params_in.change(reverse_calc, inputs=[params_in, tpp_in], outputs=[total_tokens_out, total_gb_out])
tpp_in.change(reverse_calc, inputs=[params_in, tpp_in], outputs=[total_tokens_out, total_gb_out])
# footer
with gr.Row():
notes = gr.Markdown("<div class='small-muted'>1B tokens ≈ 4.6 GB. Chinchilla guidance ≈ 20 tokens/param.</div>")
# theme change updates header HTML
def on_theme_change(theme):
return build_header_html(theme)
theme_select.change(on_theme_change, inputs=[theme_select], outputs=[header_html])
# -----------------------
# Launch (css passed to launch)
# -----------------------
if __name__ == "__main__":
demo.launch(
share=True,
server_name="0.0.0.0",
show_error=True,
css=CSS, # pass CSS here (Gradio 6.0+)
)
|