Roman190928 commited on
Commit
8d104c2
·
verified ·
1 Parent(s): fa0ee04

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +190 -0
app.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ import math
4
+ from pathlib import Path
5
+ ##
6
+ # -----------------------
7
+ # Config / presets
8
+ # -----------------------
9
+ GB_PER_B_TOKEN = 4.6 # 1B tokens ≈ 4.6 GB text
10
+
11
+ MODEL_PRESETS = {
12
+ "MiMo-V2 Flash": {"params": 315_000_000_000, "tokens": 27_000_000_000_000},
13
+
14
+ "NVIDIA-Nemotron-3-Nano 30B A3B Base": {"params": 30_000_000_000, "tokens": 10_650_000_000_000},
15
+
16
+ "Kimi-K2-Instruct": {"params": 1_000_000_000_000, "tokens":15_500_000_000_000 },
17
+
18
+ "Llama 4 Scout": {"params": 109_000_000_000, "tokens": 40_000_000_000_000},
19
+ "Llama 4 Maverick": {"params": 400_000_000_000, "tokens": 22_000_000_000_000},
20
+
21
+ "Llama 3.1 8B": {"params": 8_000_000_000, "tokens": 15_000_000_000_000},
22
+ "Llama 3.1 70B": {"params": 70_000_000_000, "tokens": 15_000_000_000_000},
23
+ "Llama 3.1 405B": {"params": 405_000_000_000, "tokens": 15_000_000_000_000},
24
+
25
+ "Ling-1T": {"params": 1_000_000_000_000, "tokens": 20_000_000_000_000},
26
+ "Ling-flash-2.0": {"params": 100_000_000_000, "tokens": 20_000_000_000_000},
27
+ "Ling-mini-2.0": {"params": 16_000_000_000, "tokens": 20_000_000_000_000},
28
+
29
+ "Phi 4": {"params": 16_000_000_000, "tokens": 9_800_000_000_000},
30
+ "Phi 3.5 42B": {"params": 42_000_000_000, "tokens": 4_900_000_000_000},
31
+ "Phi 1": {"params": 1_000_000_000, "tokens": 54_000_000_000},
32
+
33
+ "Qwen3-235B-A22B": {"params": 235_000_000_000, "tokens": 36_000_000_000_000},
34
+ "Qwen2.5-72B-Instruct": {"params": 72_000_000_000, "tokens": 18_000_000_000_000},
35
+ "Qwen2-57B-A14B-Instruct": {"params": 57_000_000_000, "tokens": 40_000_000_000},
36
+ #templates "GPT-2 Small (124M)": {"params": 124_000_000, "tokens": 40_000_000_000},
37
+ "GPT-2 Small (124M)": {"params": 124_000_000, "tokens": 40_000_000_000},
38
+
39
+ }
40
+
41
+ # -----------------------
42
+ # Helpers (defensive)
43
+ # -----------------------
44
+ def as_positive_number(x):
45
+ # gr.Number returns None if empty; normalize to 0
46
+ try:
47
+ if x is None:
48
+ return 0
49
+ if isinstance(x, str) and x.strip() == "":
50
+ return 0
51
+ return float(x)
52
+ except Exception:
53
+ return 0
54
+
55
+ def preset_calc(preset_name, override_params, override_tokens):
56
+ """Return tokens/param rounded up, defensive against None."""
57
+ data = MODEL_PRESETS.get(preset_name, {})
58
+ op = as_positive_number(override_params)
59
+ ot = as_positive_number(override_tokens)
60
+
61
+ base_params = int(data.get("params", 0) or 0)
62
+ base_tokens = int(data.get("tokens", 0) or 0)
63
+
64
+ params = int(op) if op > 0 else base_params
65
+ tokens = int(ot) if ot > 0 else base_tokens
66
+
67
+ if params <= 0 or tokens <= 0:
68
+ return "—"
69
+ ratio = math.ceil(tokens / params)
70
+ return f"{ratio:,} tokens / parameter"
71
+
72
+ def reverse_calc(params_in, tokens_per_param_in):
73
+ """Given model params and tokens/param, return total tokens and GB estimate."""
74
+ p = as_positive_number(params_in)
75
+ tpp = as_positive_number(tokens_per_param_in)
76
+ if p <= 0 or tpp <= 0:
77
+ return "—", "—"
78
+ total_tokens = int(p * tpp)
79
+ total_gb = (total_tokens / 1e9) * GB_PER_B_TOKEN
80
+ return f"{total_tokens:,} tokens", f"{total_gb:.2f} GB of text"
81
+
82
+ # -----------------------
83
+ # Theme header generator
84
+ # -----------------------
85
+ def build_header_html(theme):
86
+ if theme == "Neon":
87
+ accent = "#00FFC6"
88
+ subtitle = "Neon mode: high voltage scaling"
89
+ emoji = "⚡️"
90
+ elif theme == "Cyber":
91
+ accent = "#7C5CFF"
92
+ subtitle = "Cyber vibes, measured in tokens"
93
+ emoji = "🛰️"
94
+ else: # Dark (default)
95
+ accent = "#F5C26B"
96
+ subtitle = "Scaling laws, but make it aesthetic."
97
+ emoji = "🧮"
98
+
99
+ html = f"""
100
+ <div style="text-align:center; padding:28px; margin-bottom:8px;">
101
+ <div style="display:inline-block; padding:18px 28px; border-radius:14px;
102
+ background:linear-gradient(90deg, rgba(255,255,255,0.02), rgba(255,255,255,0.01));
103
+ box-shadow: 0 6px 30px rgba(0,0,0,0.6);">
104
+ <div style="font-size:1.9rem; font-weight:700; color: {accent};">
105
+ {emoji} &nbsp; Roman’s Parameter ↔ Token Calculator
106
+ </div>
107
+ <div style="color: rgba(255,255,255,0.7); margin-top:6px;">{subtitle}</div>
108
+ </div>
109
+ </div>
110
+ """
111
+ return html
112
+
113
+ # -----------------------
114
+ # CSS (pass to launch)
115
+ # -----------------------
116
+ CSS = """
117
+ :root{
118
+ --bg1: #0f1222;
119
+ --bg2: #111218;
120
+ --card: #151626;
121
+ --muted: rgba(255,255,255,0.65);
122
+ --mono: ui-monospace, SFMono-Regular, Menlo, monospace;
123
+ }
124
+ body { background: linear-gradient(180deg,var(--bg1), var(--bg2)); color: #e9eef8; }
125
+ .gradio-container { max-width: 980px; margin: 20px auto; }
126
+ .card { background: linear-gradient(180deg, rgba(255,255,255,0.02), rgba(255,255,255,0.01));
127
+ padding: 18px; border-radius: 12px; border: 1px solid rgba(255,255,255,0.03); margin-bottom: 18px; }
128
+ .mono input, .mono textarea, .mono .input_textbox { font-family: var(--mono); font-size:1.02rem; }
129
+ label { color: var(--muted); font-size:0.95rem; }
130
+ h1 { margin:0; padding:0; color: #fff; }
131
+ .gradio-row { gap: 12px; }
132
+ .small-muted { color: rgba(255,255,255,0.55); font-size:0.9rem; }
133
+ .big-output { font-family: var(--mono); font-size:1.05rem; background: rgba(0,0,0,0.12); padding:10px; border-radius:8px; }
134
+ """
135
+
136
+ # -----------------------
137
+ # Build UI
138
+ # -----------------------
139
+ with gr.Blocks() as demo:
140
+ header_html = gr.HTML(build_header_html("Dark"))
141
+
142
+ with gr.Group(elem_classes="card"):
143
+ gr.Markdown("### Model Preset Calculator")
144
+ with gr.Row():
145
+ preset = gr.Dropdown(choices=list(MODEL_PRESETS.keys()), value="Your 75M Model", label="Model Preset")
146
+ ratio_out = gr.Textbox(label="Tokens per Parameter (auto)", interactive=False, elem_classes="mono big-output")
147
+ with gr.Row():
148
+ override_params = gr.Number(label="Override Parameters (optional)", precision=0, value=0)
149
+ override_tokens = gr.Number(label="Override Training Tokens (optional)", precision=0, value=0)
150
+ # make sure initial compute happens
151
+ preset.change(preset_calc, inputs=[preset, override_params, override_tokens], outputs=ratio_out)
152
+ override_params.change(preset_calc, inputs=[preset, override_params, override_tokens], outputs=ratio_out)
153
+ override_tokens.change(preset_calc, inputs=[preset, override_params, override_tokens], outputs=ratio_out)
154
+
155
+ # theme selector and small help row
156
+ with gr.Row():
157
+ theme_select = gr.Radio(["Dark", "Neon", "Cyber"], value="Dark", label="Theme", info="Change header flair")
158
+ gr.Markdown("<div class='small-muted'>Tip: override values let you test alternate configs quickly.</div>")
159
+
160
+ # reverse calculator
161
+ with gr.Group(elem_classes="card"):
162
+ gr.Markdown("### 🔁 Reverse Calculator (params → tokens)")
163
+ with gr.Row():
164
+ params_in = gr.Number(label="Model Parameters", precision=0, value=75_000_000)
165
+ tpp_in = gr.Number(label="Tokens per Parameter", precision=2, value=20.0)
166
+ with gr.Row():
167
+ total_tokens_out = gr.Textbox(label="Total Training Tokens", interactive=False, elem_classes="mono big-output")
168
+ total_gb_out = gr.Textbox(label="Estimated Dataset Size", interactive=False, elem_classes="mono big-output")
169
+ params_in.change(reverse_calc, inputs=[params_in, tpp_in], outputs=[total_tokens_out, total_gb_out])
170
+ tpp_in.change(reverse_calc, inputs=[params_in, tpp_in], outputs=[total_tokens_out, total_gb_out])
171
+
172
+ # footer
173
+ with gr.Row():
174
+ notes = gr.Markdown("<div class='small-muted'>1B tokens ≈ 4.6 GB. Chinchilla guidance ≈ 20 tokens/param.</div>")
175
+
176
+ # theme change updates header HTML
177
+ def on_theme_change(theme):
178
+ return build_header_html(theme)
179
+ theme_select.change(on_theme_change, inputs=[theme_select], outputs=[header_html])
180
+
181
+ # -----------------------
182
+ # Launch (css passed to launch)
183
+ # -----------------------
184
+ if __name__ == "__main__":
185
+ demo.launch(
186
+ share=True,
187
+ server_name="0.0.0.0",
188
+ show_error=True,
189
+ css=CSS, # pass CSS here (Gradio 6.0+)
190
+ )