Joseph Pollack commited on
Commit
7c96057
·
unverified ·
1 Parent(s): c15f462

initial commit

Browse files
README.md CHANGED
@@ -1,14 +1,40 @@
1
  ---
2
- title: Fr On Device
3
- emoji: 👁
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 6.6.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
- short_description: fully subsidized versus non-subsidized fr understanding
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Baguettotron vs Luth models
3
+ emoji: 🥖
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: "4"
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ short_description: All models, all outputs — apples-to-apples comparison by parameter size
12
  ---
13
 
14
+ # Baguettotron vs Luth models
15
+
16
+ Apples-to-apples comparison of **Baguettotron** (PleIAs, 321M) and **5 Luth models** (kurakurai, 0.4B–1.7B) from the [Luth Models collection](https://huggingface.co/collections/kurakurai/luth-models).
17
+
18
+ ## Features
19
+
20
+ - **All models, all outputs:** Each prompt runs through all 6 models; outputs appear in tabs grouped by parameter size.
21
+ - **Ultimate footprint:** Per-model disk size and VRAM estimates; combined footprint for all models.
22
+ - **Per-tier hyperparameters:** Temperature, max_tokens, top_p, top_k, repeat_penalty per size tier.
23
+ - **Transformers-only:** No quantization; all models run in BF16/FP16.
24
+
25
+ ## Size tiers
26
+
27
+ | Tier | Models |
28
+ |------|--------|
29
+ | ~0.3–0.4B (Small) | Baguettotron, Luth-LFM2-350M |
30
+ | ~0.6–0.7B (Medium) | Luth-0.6B-Instruct, Luth-LFM2-700M |
31
+ | ~1–2B (Large) | Luth-LFM2-1.2B, Luth-1.7B-Instruct |
32
+
33
+ ## Baguettotron EOS quirk
34
+
35
+ Baguettotron's tokenizer uses `"<|im_end>"` (no trailing pipe) for EOS. The app uses manual prompt formatting and stop sequences to avoid multi-token tokenization. See [quirk.md](quirk.md) for details.
36
+
37
+ ## Deployment
38
+
39
+ - **Hugging Face Spaces:** Set hardware to **Zero GPU** (or standard GPU). The app uses `@spaces.GPU` when available.
40
+ - **Local:** Run `python app.py`; requires a GPU with ~10 GB VRAM for all 6 models.
__pycache__/app.cpython-313.pyc ADDED
Binary file (8.72 kB). View file
 
__pycache__/inference.cpython-313.pyc ADDED
Binary file (6.57 kB). View file
 
__pycache__/model_config.cpython-313.pyc ADDED
Binary file (4.76 kB). View file
 
app.py CHANGED
@@ -1,7 +1,258 @@
 
 
 
 
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
1
+ """
2
+ Baguettotron vs Luth models — Gradio comparison app.
3
+ All models, all outputs; tabbed by parameter size.
4
+ """
5
+
6
  import gradio as gr
7
 
8
+ from inference import run_all
9
+ from model_config import (
10
+ TIER_LABELS,
11
+ combined_footprint,
12
+ footprint_table_data,
13
+ get_models_by_tier,
14
+ MODEL_IDS,
15
+ )
16
+
17
+ # Optional: use @spaces.GPU for ZeroGPU deployment
18
+ try:
19
+ import spaces
20
+
21
+ GPU_DECORATOR = spaces.GPU
22
+ except ImportError:
23
+ GPU_DECORATOR = lambda f: f # no-op when not on Spaces
24
+
25
+
26
+ def build_params_by_model(
27
+ temp_small: float,
28
+ max_tok_small: int,
29
+ top_p_small: float,
30
+ top_k_small: int,
31
+ rep_small: float,
32
+ temp_med: float,
33
+ max_tok_med: int,
34
+ top_p_med: float,
35
+ top_k_med: int,
36
+ rep_med: float,
37
+ temp_large: float,
38
+ max_tok_large: int,
39
+ top_p_large: float,
40
+ top_k_large: int,
41
+ rep_large: float,
42
+ ) -> dict[str, dict]:
43
+ """Build params dict keyed by model_id from tier-level controls."""
44
+ tier_params = {
45
+ "small": {
46
+ "temperature": temp_small,
47
+ "max_tokens": max_tok_small,
48
+ "top_p": top_p_small,
49
+ "top_k": top_k_small,
50
+ "repeat_penalty": rep_small,
51
+ },
52
+ "medium": {
53
+ "temperature": temp_med,
54
+ "max_tokens": max_tok_med,
55
+ "top_p": top_p_med,
56
+ "top_k": top_k_med,
57
+ "repeat_penalty": rep_med,
58
+ },
59
+ "large": {
60
+ "temperature": temp_large,
61
+ "max_tokens": max_tok_large,
62
+ "top_p": top_p_large,
63
+ "top_k": top_k_large,
64
+ "repeat_penalty": rep_large,
65
+ },
66
+ }
67
+ models_by_tier = get_models_by_tier()
68
+ params_by_model: dict[str, dict] = {}
69
+ for tier, models in models_by_tier.items():
70
+ p = tier_params[tier]
71
+ for m in models:
72
+ params_by_model[m.repo_id] = p.copy()
73
+ return params_by_model
74
+
75
+
76
+ @GPU_DECORATOR
77
+ def generate_all(
78
+ prompt: str,
79
+ temp_small: float,
80
+ max_tok_small: int,
81
+ top_p_small: float,
82
+ top_k_small: int,
83
+ rep_small: float,
84
+ temp_med: float,
85
+ max_tok_med: int,
86
+ top_p_med: float,
87
+ top_k_med: int,
88
+ rep_med: float,
89
+ temp_large: float,
90
+ max_tok_large: int,
91
+ top_p_large: float,
92
+ top_k_large: int,
93
+ rep_large: float,
94
+ ) -> tuple[str, str, str, str, str, str]:
95
+ """Run all 6 models, return outputs in tab order: small (2), medium (2), large (2)."""
96
+ if not prompt.strip():
97
+ return ("",) * 6
98
+
99
+ params = build_params_by_model(
100
+ temp_small,
101
+ max_tok_small,
102
+ top_p_small,
103
+ top_k_small,
104
+ rep_small,
105
+ temp_med,
106
+ max_tok_med,
107
+ top_p_med,
108
+ top_k_med,
109
+ rep_med,
110
+ temp_large,
111
+ max_tok_large,
112
+ top_p_large,
113
+ top_k_large,
114
+ rep_large,
115
+ )
116
+
117
+ results = run_all(prompt, params)
118
+
119
+ models_by_tier = get_models_by_tier()
120
+ outputs: list[str] = []
121
+ for tier in ["small", "medium", "large"]:
122
+ for m in models_by_tier[tier]:
123
+ outputs.append(results.get(m.repo_id, ""))
124
+
125
+ return tuple(outputs)
126
+
127
+
128
+ def create_ui():
129
+ total_disk, total_vram = combined_footprint()
130
+ footprint_md = f"""
131
+ **Combined footprint —** Total disk: {total_disk:,} MB | Total VRAM (est.): {total_vram:.2f} GB
132
+ """
133
+
134
+ with gr.Blocks(title="Baguettotron vs Luth models") as demo:
135
+ gr.Markdown("# Baguettotron vs Luth models")
136
+ gr.Markdown(
137
+ "All models, all outputs — apples-to-apples comparison by parameter size."
138
+ )
139
+
140
+ # Row 1: Footprint table
141
+ gr.Markdown("## Model footprint")
142
+ footprint_df = gr.Dataframe(
143
+ value=footprint_table_data(),
144
+ headers=["Model", "Params", "File size (MB)", "Est. VRAM (MB)"],
145
+ interactive=False,
146
+ )
147
+ gr.Markdown(footprint_md)
148
+
149
+ # Row 2: Per-tier hyperparameters
150
+ gr.Markdown("## Generation settings (by size tier)")
151
+ with gr.Accordion("~0.3–0.4B (Small)", open=False):
152
+ temp_small = gr.Slider(0, 2, value=0.7, label="Temperature")
153
+ max_tok_small = gr.Number(value=256, label="Max tokens", minimum=64, maximum=2048)
154
+ top_p_small = gr.Slider(0, 1, value=0.9, label="Top p")
155
+ top_k_small = gr.Number(value=40, label="Top k")
156
+ rep_small = gr.Slider(1.0, 1.5, value=1.1, label="Repeat penalty")
157
+
158
+ with gr.Accordion("~0.6–0.7B (Medium)", open=False):
159
+ temp_med = gr.Slider(0, 2, value=0.7, label="Temperature")
160
+ max_tok_med = gr.Number(value=256, label="Max tokens", minimum=64, maximum=2048)
161
+ top_p_med = gr.Slider(0, 1, value=0.9, label="Top p")
162
+ top_k_med = gr.Number(value=40, label="Top k")
163
+ rep_med = gr.Slider(1.0, 1.5, value=1.1, label="Repeat penalty")
164
+
165
+ with gr.Accordion("~1–2B (Large)", open=False):
166
+ temp_large = gr.Slider(0, 2, value=0.7, label="Temperature")
167
+ max_tok_large = gr.Number(value=256, label="Max tokens", minimum=64, maximum=2048)
168
+ top_p_large = gr.Slider(0, 1, value=0.9, label="Top p")
169
+ top_k_large = gr.Number(value=40, label="Top k")
170
+ rep_large = gr.Slider(1.0, 1.5, value=1.1, label="Repeat penalty")
171
+
172
+ # Row 3: Prompt + Generate + tabbed outputs
173
+ gr.Markdown("## Live inference")
174
+ prompt_in = gr.Textbox(
175
+ label="Prompt",
176
+ placeholder="Enter your prompt here...",
177
+ lines=3,
178
+ )
179
+ gen_btn = gr.Button("Generate", variant="primary")
180
+
181
+ models_by_tier = get_models_by_tier()
182
+ with gr.Tabs():
183
+ with gr.Tab(TIER_LABELS["small"]):
184
+ with gr.Row():
185
+ out_baguettotron = gr.Textbox(
186
+ label="Baguettotron (321M)",
187
+ lines=12,
188
+ max_lines=24,
189
+ )
190
+ out_luth_350 = gr.Textbox(
191
+ label="Luth-LFM2-350M (0.4B)",
192
+ lines=12,
193
+ max_lines=24,
194
+ )
195
+ with gr.Tab(TIER_LABELS["medium"]):
196
+ with gr.Row():
197
+ out_luth_06 = gr.Textbox(
198
+ label="Luth-0.6B-Instruct",
199
+ lines=12,
200
+ max_lines=24,
201
+ )
202
+ out_luth_07 = gr.Textbox(
203
+ label="Luth-LFM2-700M",
204
+ lines=12,
205
+ max_lines=24,
206
+ )
207
+ with gr.Tab(TIER_LABELS["large"]):
208
+ with gr.Row():
209
+ out_luth_12 = gr.Textbox(
210
+ label="Luth-LFM2-1.2B",
211
+ lines=12,
212
+ max_lines=24,
213
+ )
214
+ out_luth_17 = gr.Textbox(
215
+ label="Luth-1.7B-Instruct",
216
+ lines=12,
217
+ max_lines=24,
218
+ )
219
+
220
+ all_inputs = [
221
+ prompt_in,
222
+ temp_small,
223
+ max_tok_small,
224
+ top_p_small,
225
+ top_k_small,
226
+ rep_small,
227
+ temp_med,
228
+ max_tok_med,
229
+ top_p_med,
230
+ top_k_med,
231
+ rep_med,
232
+ temp_large,
233
+ max_tok_large,
234
+ top_p_large,
235
+ top_k_large,
236
+ rep_large,
237
+ ]
238
+ all_outputs = [
239
+ out_baguettotron,
240
+ out_luth_350,
241
+ out_luth_06,
242
+ out_luth_07,
243
+ out_luth_12,
244
+ out_luth_17,
245
+ ]
246
+
247
+ gen_btn.click(
248
+ fn=generate_all,
249
+ inputs=all_inputs,
250
+ outputs=all_outputs,
251
+ )
252
+
253
+ return demo
254
+
255
 
256
+ if __name__ == "__main__":
257
+ demo = create_ui()
258
+ demo.launch()
inference.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Parallel load and inference for all 6 models (Baguettotron + 5 Luth).
3
+ Baguettotron uses EOS-safe formatting: "<|im_end>" (no trailing pipe), stop=["<|im_end>", "</think>"].
4
+ """
5
+
6
+ from concurrent.futures import ThreadPoolExecutor, as_completed
7
+ from typing import Any
8
+
9
+ import torch
10
+
11
+ from model_config import MODEL_IDS
12
+ from transformers import AutoModelForCausalLM, AutoTokenizer
13
+
14
+ # In-memory cache: model_id -> (model, tokenizer)
15
+ _model_cache: dict[str, tuple[Any, Any]] = {}
16
+ _cache_lock = __import__("threading").Lock()
17
+
18
+ # Baguettotron repo_id for EOS quirk handling
19
+ BAGUETTOTRON_ID = "PleIAs/Baguettotron"
20
+
21
+
22
+ def _format_prompt_baguettotron(prompt: str) -> tuple[str, list[str]]:
23
+ """
24
+ Manual prompt build for Baguettotron. Uses "<|im_end>" (no trailing pipe)
25
+ per tokenizer; stop=["<|im_end>", "</think>"] for generation.
26
+ """
27
+ # Qwen-style: <|im_start|>user\n{content}<|im_end>\n<|im_start|>assistant\n<think>\n
28
+ text = f"<|im_start|>user\n{prompt}<|im_end>\n<|im_start|>assistant\n<think>\n"
29
+ stop = ["<|im_end>", "</think>"]
30
+ return text, stop
31
+
32
+
33
+ def _format_prompt_luth(prompt: str, tokenizer: Any) -> tuple[dict[str, Any], list[str] | None]:
34
+ """Use tokenizer's chat template for Luth models."""
35
+ messages = [{"role": "user", "content": prompt}]
36
+ inputs = tokenizer.apply_chat_template(
37
+ messages,
38
+ add_generation_prompt=True,
39
+ tokenize=True,
40
+ return_tensors="pt",
41
+ return_dict=True,
42
+ )
43
+ return inputs, None # no custom stop for Luth
44
+
45
+
46
+ def _get_device() -> str:
47
+ return "cuda" if torch.cuda.is_available() else "cpu"
48
+
49
+
50
+ def _load_model(model_id: str, device: str | None = None) -> tuple[Any, Any]:
51
+ """Load model and tokenizer; cache by model_id."""
52
+ if device is None:
53
+ device = _get_device()
54
+ with _cache_lock:
55
+ if model_id in _model_cache:
56
+ return _model_cache[model_id]
57
+
58
+ model = AutoModelForCausalLM.from_pretrained(
59
+ model_id,
60
+ torch_dtype="auto",
61
+ device_map="auto" if device == "cuda" else device,
62
+ trust_remote_code=True,
63
+ )
64
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
65
+
66
+ with _cache_lock:
67
+ _model_cache[model_id] = (model, tokenizer)
68
+
69
+ return model, tokenizer
70
+
71
+
72
+ def _generate_one(
73
+ model_id: str,
74
+ prompt: str,
75
+ params: dict[str, Any],
76
+ device: str = "cuda",
77
+ ) -> tuple[str, str]:
78
+ """Load (or use cached) model, run inference, return (model_id, text)."""
79
+ model, tokenizer = _load_model(model_id, device)
80
+
81
+ device = next(model.parameters()).device
82
+ gen_kwargs: dict[str, Any] = {
83
+ "max_new_tokens": params.get("max_tokens", 256),
84
+ "temperature": params.get("temperature", 0.7),
85
+ "top_p": params.get("top_p", 0.9),
86
+ "top_k": params.get("top_k", 40),
87
+ "repetition_penalty": params.get("repeat_penalty", 1.1),
88
+ "do_sample": True,
89
+ "pad_token_id": tokenizer.eos_token_id or tokenizer.pad_token_id,
90
+ }
91
+
92
+ if model_id == BAGUETTOTRON_ID:
93
+ text_prompt, _stop = _format_prompt_baguettotron(prompt)
94
+ inputs = tokenizer(text_prompt, return_tensors="pt")
95
+ inputs = {k: v.to(device) for k, v in inputs.items()}
96
+ else:
97
+ inputs_dict, _ = _format_prompt_luth(prompt, tokenizer)
98
+ inputs = {k: v.to(device) for k, v in inputs_dict.items()}
99
+
100
+ outputs = model.generate(**inputs, **gen_kwargs)
101
+ input_len = inputs["input_ids"].shape[-1]
102
+ text = tokenizer.decode(outputs[0][input_len:], skip_special_tokens=True)
103
+
104
+ # Post-process: truncate at stop strings for Baguettotron
105
+ if model_id == BAGUETTOTRON_ID:
106
+ for s in ["<|im_end>", "</think>"]:
107
+ if s in text:
108
+ text = text.split(s)[0].strip()
109
+
110
+ return model_id, text
111
+
112
+
113
+ def run_all(
114
+ prompt: str,
115
+ params_by_model: dict[str, dict[str, Any]],
116
+ device: str | None = None,
117
+ max_workers: int = 6,
118
+ ) -> dict[str, str]:
119
+ """
120
+ Load all 6 models in parallel, run all 6 inferences in parallel.
121
+ Returns dict {model_id: text}.
122
+ """
123
+ if device is None:
124
+ device = _get_device()
125
+ default_params = {
126
+ "temperature": 0.7,
127
+ "max_tokens": 256,
128
+ "top_p": 0.9,
129
+ "top_k": 40,
130
+ "repeat_penalty": 1.1,
131
+ }
132
+
133
+ def task(model_id: str):
134
+ p = {**default_params, **(params_by_model.get(model_id) or {})}
135
+ return _generate_one(model_id, prompt, p, device)
136
+
137
+ results: dict[str, str] = {}
138
+ with ThreadPoolExecutor(max_workers=max_workers) as ex:
139
+ futures = {ex.submit(task, mid): mid for mid in MODEL_IDS}
140
+ for fut in as_completed(futures):
141
+ model_id, text = fut.result()
142
+ results[model_id] = text
143
+
144
+ return results
model_config.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Model registry for Baguettotron vs Luth comparison app.
3
+ All 6 models with footprint data and size tiers for tab grouping.
4
+ """
5
+
6
+ from dataclasses import dataclass
7
+ from typing import Literal
8
+
9
+ SizeTier = Literal["small", "medium", "large"]
10
+
11
+
12
+ @dataclass
13
+ class ModelEntry:
14
+ repo_id: str
15
+ name: str
16
+ author: str
17
+ params: int
18
+ params_display: str
19
+ file_size_mb: int
20
+ vram_estimate_mb: int
21
+ size_tier: SizeTier
22
+ description: str
23
+ architecture: str = "decoder"
24
+ license: str = "apache-2.0"
25
+ model_card_url: str = ""
26
+
27
+
28
+ # Baguettotron: 321M, ~642 MB (BF16)
29
+ # Luth models: from HF safetensors metadata where available; else params * 2 bytes
30
+ MODELS: list[ModelEntry] = [
31
+ ModelEntry(
32
+ repo_id="PleIAs/Baguettotron",
33
+ name="Baguettotron",
34
+ author="PleIAs",
35
+ params=320_956_992,
36
+ params_display="321M",
37
+ file_size_mb=642,
38
+ vram_estimate_mb=642,
39
+ size_tier="small",
40
+ description="321M generalist reasoning model, SYNTH, 80 layers",
41
+ model_card_url="https://huggingface.co/PleIAs/Baguettotron",
42
+ ),
43
+ ModelEntry(
44
+ repo_id="kurakurai/Luth-LFM2-350M",
45
+ name="Luth-LFM2-350M",
46
+ author="kurakurai",
47
+ params=354_483_968,
48
+ params_display="0.4B",
49
+ file_size_mb=709,
50
+ vram_estimate_mb=709,
51
+ size_tier="small",
52
+ description="French fine-tuned LFM2-350M",
53
+ model_card_url="https://huggingface.co/kurakurai/Luth-LFM2-350M",
54
+ ),
55
+ ModelEntry(
56
+ repo_id="kurakurai/Luth-0.6B-Instruct",
57
+ name="Luth-0.6B-Instruct",
58
+ author="kurakurai",
59
+ params=600_000_000,
60
+ params_display="0.6B",
61
+ file_size_mb=1200,
62
+ vram_estimate_mb=1200,
63
+ size_tier="medium",
64
+ description="Luth 0.6B Instruct",
65
+ model_card_url="https://huggingface.co/kurakurai/Luth-0.6B-Instruct",
66
+ ),
67
+ ModelEntry(
68
+ repo_id="kurakurai/Luth-LFM2-700M",
69
+ name="Luth-LFM2-700M",
70
+ author="kurakurai",
71
+ params=700_000_000,
72
+ params_display="0.7B",
73
+ file_size_mb=1400,
74
+ vram_estimate_mb=1400,
75
+ size_tier="medium",
76
+ description="Luth LFM2 700M",
77
+ model_card_url="https://huggingface.co/kurakurai/Luth-LFM2-700M",
78
+ ),
79
+ ModelEntry(
80
+ repo_id="kurakurai/Luth-LFM2-1.2B",
81
+ name="Luth-LFM2-1.2B",
82
+ author="kurakurai",
83
+ params=1_200_000_000,
84
+ params_display="1.2B",
85
+ file_size_mb=2400,
86
+ vram_estimate_mb=2400,
87
+ size_tier="large",
88
+ description="Luth LFM2 1.2B",
89
+ model_card_url="https://huggingface.co/kurakurai/Luth-LFM2-1.2B",
90
+ ),
91
+ ModelEntry(
92
+ repo_id="kurakurai/Luth-1.7B-Instruct",
93
+ name="Luth-1.7B-Instruct",
94
+ author="kurakurai",
95
+ params=1_700_000_000,
96
+ params_display="1.7B",
97
+ file_size_mb=3400,
98
+ vram_estimate_mb=3400,
99
+ size_tier="large",
100
+ description="Luth 1.7B Instruct",
101
+ model_card_url="https://huggingface.co/kurakurai/Luth-1.7B-Instruct",
102
+ ),
103
+ ]
104
+
105
+ # Model IDs for inference (repo_id as key)
106
+ MODEL_IDS = [m.repo_id for m in MODELS]
107
+
108
+ # Group by size tier for tabs
109
+ TIER_ORDER: list[SizeTier] = ["small", "medium", "large"]
110
+ TIER_LABELS: dict[SizeTier, str] = {
111
+ "small": "~0.3–0.4B (Small)",
112
+ "medium": "~0.6–0.7B (Medium)",
113
+ "large": "~1–2B (Large)",
114
+ }
115
+
116
+
117
+ def get_models_by_tier() -> dict[SizeTier, list[ModelEntry]]:
118
+ out: dict[SizeTier, list[ModelEntry]] = {t: [] for t in TIER_ORDER}
119
+ for m in MODELS:
120
+ out[m.size_tier].append(m)
121
+ return out
122
+
123
+
124
+ def get_model_by_id(repo_id: str) -> ModelEntry | None:
125
+ for m in MODELS:
126
+ if m.repo_id == repo_id:
127
+ return m
128
+ return None
129
+
130
+
131
+ def footprint_table_data() -> list[list[str]]:
132
+ """Rows for gr.Dataframe: Model | Params | File size (MB) | Est. VRAM (MB)"""
133
+ return [
134
+ [m.name, m.params_display, str(m.file_size_mb), str(m.vram_estimate_mb)]
135
+ for m in MODELS
136
+ ]
137
+
138
+
139
+ def combined_footprint() -> tuple[int, float]:
140
+ """Total disk (MB) and total VRAM (GB) for all 6 models."""
141
+ total_disk = sum(m.file_size_mb for m in MODELS)
142
+ total_vram_mb = sum(m.vram_estimate_mb for m in MODELS)
143
+ return total_disk, total_vram_mb / 1024
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio>=4.0
2
+ transformers>=4.36
3
+ accelerate
4
+ safetensors
5
+ huggingface_hub
6
+ torch