Joseph Pollack commited on
Commit
935bdc8
·
unverified ·
1 Parent(s): cfdc81c

adds real lfm and pleais numbers

Browse files
Files changed (9) hide show
  1. .gitignore +2 -0
  2. README.md +26 -2
  3. app.py +141 -135
  4. bundle_luth.py +357 -0
  5. download_bundles.py +248 -0
  6. inference.py +18 -9
  7. model_config.py +23 -0
  8. requirements-bundle.txt +6 -0
  9. ui_strings.py +44 -0
.gitignore CHANGED
@@ -1,3 +1,5 @@
1
  __pycache__/
2
  *.py[cod]
3
  *$py.class
 
 
 
1
  __pycache__/
2
  *.py[cod]
3
  *$py.class
4
+ luth_bundle_downloads
5
+ luth_bundle_work
README.md CHANGED
@@ -18,8 +18,9 @@ Apples-to-apples comparison of **Baguettotron** (PleIAs, 321M) and **5 Luth mode
18
  ## Features
19
 
20
  - **All models, all outputs:** Each prompt runs through all 6 models; outputs appear in tabs grouped by parameter size.
21
- - **Ultimate footprint:** Per-model disk size and VRAM estimates; combined footprint for all models.
22
- - **Per-tier hyperparameters:** Temperature, max_tokens, top_p, top_k, repeat_penalty per size tier.
 
23
  - **Transformers-only:** No quantization; all models run in BF16/FP16.
24
 
25
  ## Size tiers
@@ -34,6 +35,29 @@ Apples-to-apples comparison of **Baguettotron** (PleIAs, 321M) and **5 Luth mode
34
 
35
  Baguettotron's tokenizer uses `"<|im_end>"` (no trailing pipe) for EOS. The app uses manual prompt formatting and stop sequences to avoid multi-token tokenization. See [quirk.md](quirk.md) for details.
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  ## Deployment
38
 
39
  - **Hugging Face Spaces:** Set hardware to **Zero GPU** (or standard GPU). The app uses `@spaces.GPU` when available.
 
18
  ## Features
19
 
20
  - **All models, all outputs:** Each prompt runs through all 6 models; outputs appear in tabs grouped by parameter size.
21
+ - **System prompt:** Optional system prompt supported for both Baguettotron (Qwen-style) and Luth (chat template) model families.
22
+ - **Ultimate footprint:** Per-model disk size and VRAM estimates; combined footprint for all models. A **GGUF & LEAP bundle** reference table lists PleIAs Baguettotron GGUF variants and Liquid LFM2 GGUF sizes (from [LEAP](https://leap.liquid.ai/models) / [PleIAs/Baguettotron-GGUF](https://huggingface.co/PleIAs/Baguettotron-GGUF)).
23
+ - **Per-family generation settings:** Two columns (Baguettotron | Luth) with sensible defaults: Baguettotron tuned for reasoning (e.g. temp 0.5, 512 tokens); Luth for instruct (e.g. temp 0.7, repeat_penalty 1.05).
24
  - **Transformers-only:** No quantization; all models run in BF16/FP16.
25
 
26
  ## Size tiers
 
35
 
36
  Baguettotron's tokenizer uses `"<|im_end>"` (no trailing pipe) for EOS. The app uses manual prompt formatting and stop sequences to avoid multi-token tokenization. See [quirk.md](quirk.md) for details.
37
 
38
+ ## Bundling Luth models (LEAP)
39
+
40
+ The script `bundle_luth.py` downloads a Luth model, validates it for LEAP, creates a GGUF bundle on the device, and investigates the result (sizes, optional inference).
41
+
42
+ ```bash
43
+ pip install -r requirements-bundle.txt
44
+ leap-bundle login <api-key> # from https://leap.liquid.ai/profile#/api-keys
45
+ # Single model:
46
+ python bundle_luth.py --model kurakurai/Luth-LFM2-350M
47
+ # All 5 Luth models (download → validate → create → download GGUF → investigate):
48
+ python bundle_luth.py --all
49
+ ```
50
+
51
+ Options: `--all` (every Luth model in sequence; LEAP free tier = 5 requests/24h), `--work-dir`, `--quantization` (e.g. Q4_K_M, Q8_0), `--dry-run` (download + validate only, no create), `--skip-create`, `--request-id <id>` to download an existing bundle.
52
+
53
+ **Download and inspect bundles:** Use `download_bundles.py` to fetch completed bundle outputs by request ID. Per Liquid AI docs, artifacts are `.gguf` (default) or `.bundle` (ExecuTorch). The script inspects both and can run a short inference on `.gguf`:
54
+
55
+ ```bash
56
+ python download_bundles.py --list # list requests, download all completed
57
+ python download_bundles.py --request-ids 1 2 # download specific IDs
58
+ python download_bundles.py --inspect-only --infer # inspect existing downloads and run inference
59
+ ```
60
+
61
  ## Deployment
62
 
63
  - **Hugging Face Spaces:** Set hardware to **Zero GPU** (or standard GPU). The app uses `@spaces.GPU` when available.
app.py CHANGED
@@ -5,13 +5,46 @@ All models, all outputs; tabbed by parameter size.
5
 
6
  import gradio as gr
7
 
8
- from inference import run_all
9
  from model_config import (
10
  TIER_LABELS,
11
  combined_footprint,
12
  footprint_table_data,
 
13
  get_models_by_tier,
14
- MODEL_IDS,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  )
16
 
17
  # Optional: use @spaces.GPU for ZeroGPU deployment
@@ -24,97 +57,71 @@ except ImportError:
24
 
25
 
26
  def build_params_by_model(
27
- temp_small: float,
28
- max_tok_small: int,
29
- top_p_small: float,
30
- top_k_small: int,
31
- rep_small: float,
32
- temp_med: float,
33
- max_tok_med: int,
34
- top_p_med: float,
35
- top_k_med: int,
36
- rep_med: float,
37
- temp_large: float,
38
- max_tok_large: int,
39
- top_p_large: float,
40
- top_k_large: int,
41
- rep_large: float,
42
  ) -> dict[str, dict]:
43
- """Build params dict keyed by model_id from tier-level controls."""
44
- tier_params = {
45
- "small": {
46
- "temperature": temp_small,
47
- "max_tokens": max_tok_small,
48
- "top_p": top_p_small,
49
- "top_k": top_k_small,
50
- "repeat_penalty": rep_small,
51
- },
52
- "medium": {
53
- "temperature": temp_med,
54
- "max_tokens": max_tok_med,
55
- "top_p": top_p_med,
56
- "top_k": top_k_med,
57
- "repeat_penalty": rep_med,
58
- },
59
- "large": {
60
- "temperature": temp_large,
61
- "max_tokens": max_tok_large,
62
- "top_p": top_p_large,
63
- "top_k": top_k_large,
64
- "repeat_penalty": rep_large,
65
- },
66
  }
67
- models_by_tier = get_models_by_tier()
68
  params_by_model: dict[str, dict] = {}
69
- for tier, models in models_by_tier.items():
70
- p = tier_params[tier]
71
- for m in models:
72
- params_by_model[m.repo_id] = p.copy()
73
  return params_by_model
74
 
75
 
76
  @GPU_DECORATOR
77
  def generate_all(
78
  prompt: str,
79
- temp_small: float,
80
- max_tok_small: int,
81
- top_p_small: float,
82
- top_k_small: int,
83
- rep_small: float,
84
- temp_med: float,
85
- max_tok_med: int,
86
- top_p_med: float,
87
- top_k_med: int,
88
- rep_med: float,
89
- temp_large: float,
90
- max_tok_large: int,
91
- top_p_large: float,
92
- top_k_large: int,
93
- rep_large: float,
94
  ) -> tuple[str, str, str, str, str, str]:
95
  """Run all 6 models, return outputs in tab order: small (2), medium (2), large (2)."""
96
  if not prompt.strip():
97
  return ("",) * 6
98
 
99
  params = build_params_by_model(
100
- temp_small,
101
- max_tok_small,
102
- top_p_small,
103
- top_k_small,
104
- rep_small,
105
- temp_med,
106
- max_tok_med,
107
- top_p_med,
108
- top_k_med,
109
- rep_med,
110
- temp_large,
111
- max_tok_large,
112
- top_p_large,
113
- top_k_large,
114
- rep_large,
115
  )
116
 
117
- results = run_all(prompt, params)
118
 
119
  models_by_tier = get_models_by_tier()
120
  outputs: list[str] = []
@@ -127,113 +134,112 @@ def generate_all(
127
 
128
  def create_ui():
129
  total_disk, total_vram = combined_footprint()
130
- footprint_md = f"""
131
- **Combined footprint —** Total disk: {total_disk:,} MB | Total VRAM (est.): {total_vram:.2f} GB
132
- """
133
 
134
- with gr.Blocks(title="Baguettotron vs Luth models") as demo:
135
- gr.Markdown("# Baguettotron vs Luth models")
136
- gr.Markdown(
137
- "All models, all outputs — apples-to-apples comparison by parameter size."
138
- )
139
 
140
- # Row 1: Footprint table
141
- gr.Markdown("## Model footprint")
142
  footprint_df = gr.Dataframe(
143
  value=footprint_table_data(),
144
- headers=["Model", "Params", "File size (MB)", "Est. VRAM (MB)"],
145
  interactive=False,
146
  )
147
  gr.Markdown(footprint_md)
 
 
 
 
 
 
 
148
 
149
- # Row 2: Per-tier hyperparameters
150
- gr.Markdown("## Generation settings (by size tier)")
151
- with gr.Accordion("~0.3–0.4B (Small)", open=False):
152
- temp_small = gr.Slider(0, 2, value=0.7, label="Temperature")
153
- max_tok_small = gr.Number(value=256, label="Max tokens", minimum=64, maximum=2048)
154
- top_p_small = gr.Slider(0, 1, value=0.9, label="Top p")
155
- top_k_small = gr.Number(value=40, label="Top k")
156
- rep_small = gr.Slider(1.0, 1.5, value=1.1, label="Repeat penalty")
157
-
158
- with gr.Accordion("~0.6–0.7B (Medium)", open=False):
159
- temp_med = gr.Slider(0, 2, value=0.7, label="Temperature")
160
- max_tok_med = gr.Number(value=256, label="Max tokens", minimum=64, maximum=2048)
161
- top_p_med = gr.Slider(0, 1, value=0.9, label="Top p")
162
- top_k_med = gr.Number(value=40, label="Top k")
163
- rep_med = gr.Slider(1.0, 1.5, value=1.1, label="Repeat penalty")
164
-
165
- with gr.Accordion("~1–2B (Large)", open=False):
166
- temp_large = gr.Slider(0, 2, value=0.7, label="Temperature")
167
- max_tok_large = gr.Number(value=256, label="Max tokens", minimum=64, maximum=2048)
168
- top_p_large = gr.Slider(0, 1, value=0.9, label="Top p")
169
- top_k_large = gr.Number(value=40, label="Top k")
170
- rep_large = gr.Slider(1.0, 1.5, value=1.1, label="Repeat penalty")
171
 
172
- # Row 3: Prompt + Generate + tabbed outputs
173
- gr.Markdown("## Live inference")
 
 
 
 
 
174
  prompt_in = gr.Textbox(
175
- label="Prompt",
176
- placeholder="Enter your prompt here...",
177
  lines=3,
178
  )
179
- gen_btn = gr.Button("Generate", variant="primary")
180
 
181
  models_by_tier = get_models_by_tier()
182
  with gr.Tabs():
183
  with gr.Tab(TIER_LABELS["small"]):
184
  with gr.Row():
185
  out_baguettotron = gr.Textbox(
186
- label="Baguettotron (321M)",
187
  lines=12,
188
  max_lines=24,
189
  )
190
  out_luth_350 = gr.Textbox(
191
- label="Luth-LFM2-350M (0.4B)",
192
  lines=12,
193
  max_lines=24,
194
  )
195
  with gr.Tab(TIER_LABELS["medium"]):
196
  with gr.Row():
197
  out_luth_06 = gr.Textbox(
198
- label="Luth-0.6B-Instruct",
199
  lines=12,
200
  max_lines=24,
201
  )
202
  out_luth_07 = gr.Textbox(
203
- label="Luth-LFM2-700M",
204
  lines=12,
205
  max_lines=24,
206
  )
207
  with gr.Tab(TIER_LABELS["large"]):
208
  with gr.Row():
209
  out_luth_12 = gr.Textbox(
210
- label="Luth-LFM2-1.2B",
211
  lines=12,
212
  max_lines=24,
213
  )
214
  out_luth_17 = gr.Textbox(
215
- label="Luth-1.7B-Instruct",
216
  lines=12,
217
  max_lines=24,
218
  )
219
 
220
  all_inputs = [
221
  prompt_in,
222
- temp_small,
223
- max_tok_small,
224
- top_p_small,
225
- top_k_small,
226
- rep_small,
227
- temp_med,
228
- max_tok_med,
229
- top_p_med,
230
- top_k_med,
231
- rep_med,
232
- temp_large,
233
- max_tok_large,
234
- top_p_large,
235
- top_k_large,
236
- rep_large,
237
  ]
238
  all_outputs = [
239
  out_baguettotron,
 
5
 
6
  import gradio as gr
7
 
8
+ from inference import BAGUETTOTRON_ID, run_all
9
  from model_config import (
10
  TIER_LABELS,
11
  combined_footprint,
12
  footprint_table_data,
13
+ gguf_footprint_table_data,
14
  get_models_by_tier,
15
+ MODELS,
16
+ )
17
+ from ui_strings import (
18
+ BTN_GENERATE,
19
+ COL_BAGUETTOTRON_HEADING,
20
+ COL_LUTH_HEADING,
21
+ FOOTPRINT_GGUF_HEADERS,
22
+ FOOTPRINT_HEADERS,
23
+ FOOTPRINT_SUMMARY_TEMPLATE,
24
+ GGUF_LEAP_INTRO,
25
+ HEADING_FOOTPRINT,
26
+ HEADING_GGUF_LEAP,
27
+ HEADING_GENERATION,
28
+ HEADING_LIVE_INFERENCE,
29
+ INFO_REP_LUTH,
30
+ INFO_TEMP_BAGUETTOTRON,
31
+ LABEL_MAX_TOKENS,
32
+ LABEL_OUT_BAGUETTOTRON,
33
+ LABEL_OUT_LUTH_06,
34
+ LABEL_OUT_LUTH_07,
35
+ LABEL_OUT_LUTH_12,
36
+ LABEL_OUT_LUTH_17,
37
+ LABEL_OUT_LUTH_350,
38
+ LABEL_PROMPT,
39
+ LABEL_REPEAT_PENALTY,
40
+ LABEL_SYSTEM_PROMPT,
41
+ LABEL_TEMPERATURE,
42
+ LABEL_TOP_K,
43
+ LABEL_TOP_P,
44
+ PLACEHOLDER_PROMPT,
45
+ PLACEHOLDER_SYSTEM_PROMPT,
46
+ SUBTITLE,
47
+ TITLE,
48
  )
49
 
50
  # Optional: use @spaces.GPU for ZeroGPU deployment
 
57
 
58
 
59
  def build_params_by_model(
60
+ temp_baguettotron: float,
61
+ max_tok_baguettotron: int,
62
+ top_p_baguettotron: float,
63
+ top_k_baguettotron: int,
64
+ rep_baguettotron: float,
65
+ temp_luth: float,
66
+ max_tok_luth: int,
67
+ top_p_luth: float,
68
+ top_k_luth: int,
69
+ rep_luth: float,
 
 
 
 
 
70
  ) -> dict[str, dict]:
71
+ """Build params dict keyed by model_id from Baguettotron vs Luth controls."""
72
+ baguettotron_params = {
73
+ "temperature": temp_baguettotron,
74
+ "max_tokens": max_tok_baguettotron,
75
+ "top_p": top_p_baguettotron,
76
+ "top_k": top_k_baguettotron,
77
+ "repeat_penalty": rep_baguettotron,
78
+ }
79
+ luth_params = {
80
+ "temperature": temp_luth,
81
+ "max_tokens": max_tok_luth,
82
+ "top_p": top_p_luth,
83
+ "top_k": top_k_luth,
84
+ "repeat_penalty": rep_luth,
 
 
 
 
 
 
 
 
 
85
  }
 
86
  params_by_model: dict[str, dict] = {}
87
+ for m in MODELS:
88
+ params_by_model[m.repo_id] = (baguettotron_params if m.repo_id == BAGUETTOTRON_ID else luth_params).copy()
 
 
89
  return params_by_model
90
 
91
 
92
  @GPU_DECORATOR
93
  def generate_all(
94
  prompt: str,
95
+ system_prompt: str,
96
+ temp_baguettotron: float,
97
+ max_tok_baguettotron: int,
98
+ top_p_baguettotron: float,
99
+ top_k_baguettotron: int,
100
+ rep_baguettotron: float,
101
+ temp_luth: float,
102
+ max_tok_luth: int,
103
+ top_p_luth: float,
104
+ top_k_luth: int,
105
+ rep_luth: float,
 
 
 
 
106
  ) -> tuple[str, str, str, str, str, str]:
107
  """Run all 6 models, return outputs in tab order: small (2), medium (2), large (2)."""
108
  if not prompt.strip():
109
  return ("",) * 6
110
 
111
  params = build_params_by_model(
112
+ temp_baguettotron,
113
+ max_tok_baguettotron,
114
+ top_p_baguettotron,
115
+ top_k_baguettotron,
116
+ rep_baguettotron,
117
+ temp_luth,
118
+ max_tok_luth,
119
+ top_p_luth,
120
+ top_k_luth,
121
+ rep_luth,
 
 
 
 
 
122
  )
123
 
124
+ results = run_all(prompt, params, system_prompt=system_prompt)
125
 
126
  models_by_tier = get_models_by_tier()
127
  outputs: list[str] = []
 
134
 
135
  def create_ui():
136
  total_disk, total_vram = combined_footprint()
137
+ footprint_md = FOOTPRINT_SUMMARY_TEMPLATE.format(total_disk=total_disk, total_vram=total_vram)
 
 
138
 
139
+ with gr.Blocks(title=TITLE) as demo:
140
+ gr.Markdown(f"# {TITLE}")
141
+ gr.Markdown(SUBTITLE)
 
 
142
 
143
+ # Row 1: Footprint tables (transformers + GGUF/LEAP reference)
144
+ gr.Markdown(HEADING_FOOTPRINT)
145
  footprint_df = gr.Dataframe(
146
  value=footprint_table_data(),
147
+ headers=FOOTPRINT_HEADERS,
148
  interactive=False,
149
  )
150
  gr.Markdown(footprint_md)
151
+ gr.Markdown(HEADING_GGUF_LEAP)
152
+ gr.Markdown(GGUF_LEAP_INTRO)
153
+ gguf_footprint_df = gr.Dataframe(
154
+ value=gguf_footprint_table_data(),
155
+ headers=FOOTPRINT_GGUF_HEADERS,
156
+ interactive=False,
157
+ )
158
 
159
+ # Row 2: Generation settings — two columns (Baguettotron | Luth)
160
+ gr.Markdown(HEADING_GENERATION)
161
+ with gr.Row():
162
+ with gr.Column():
163
+ gr.Markdown(COL_BAGUETTOTRON_HEADING)
164
+ temp_baguettotron = gr.Slider(0, 2, value=0.5, label=LABEL_TEMPERATURE, info=INFO_TEMP_BAGUETTOTRON)
165
+ max_tok_baguettotron = gr.Number(value=512, label=LABEL_MAX_TOKENS, minimum=64, maximum=2048)
166
+ top_p_baguettotron = gr.Slider(0, 1, value=0.9, label=LABEL_TOP_P)
167
+ top_k_baguettotron = gr.Number(value=40, label=LABEL_TOP_K)
168
+ rep_baguettotron = gr.Slider(1.0, 1.5, value=1.1, label=LABEL_REPEAT_PENALTY)
169
+ with gr.Column():
170
+ gr.Markdown(COL_LUTH_HEADING)
171
+ temp_luth = gr.Slider(0, 2, value=0.7, label=LABEL_TEMPERATURE)
172
+ max_tok_luth = gr.Number(value=256, label=LABEL_MAX_TOKENS, minimum=64, maximum=2048)
173
+ top_p_luth = gr.Slider(0, 1, value=0.9, label=LABEL_TOP_P)
174
+ top_k_luth = gr.Number(value=40, label=LABEL_TOP_K)
175
+ rep_luth = gr.Slider(1.0, 1.5, value=1.05, label=LABEL_REPEAT_PENALTY, info=INFO_REP_LUTH)
 
 
 
 
 
176
 
177
+ # Row 3: System prompt + User prompt + Generate + tabbed outputs
178
+ gr.Markdown(HEADING_LIVE_INFERENCE)
179
+ system_prompt_in = gr.Textbox(
180
+ label=LABEL_SYSTEM_PROMPT,
181
+ placeholder=PLACEHOLDER_SYSTEM_PROMPT,
182
+ lines=2,
183
+ )
184
  prompt_in = gr.Textbox(
185
+ label=LABEL_PROMPT,
186
+ placeholder=PLACEHOLDER_PROMPT,
187
  lines=3,
188
  )
189
+ gen_btn = gr.Button(BTN_GENERATE, variant="primary")
190
 
191
  models_by_tier = get_models_by_tier()
192
  with gr.Tabs():
193
  with gr.Tab(TIER_LABELS["small"]):
194
  with gr.Row():
195
  out_baguettotron = gr.Textbox(
196
+ label=LABEL_OUT_BAGUETTOTRON,
197
  lines=12,
198
  max_lines=24,
199
  )
200
  out_luth_350 = gr.Textbox(
201
+ label=LABEL_OUT_LUTH_350,
202
  lines=12,
203
  max_lines=24,
204
  )
205
  with gr.Tab(TIER_LABELS["medium"]):
206
  with gr.Row():
207
  out_luth_06 = gr.Textbox(
208
+ label=LABEL_OUT_LUTH_06,
209
  lines=12,
210
  max_lines=24,
211
  )
212
  out_luth_07 = gr.Textbox(
213
+ label=LABEL_OUT_LUTH_07,
214
  lines=12,
215
  max_lines=24,
216
  )
217
  with gr.Tab(TIER_LABELS["large"]):
218
  with gr.Row():
219
  out_luth_12 = gr.Textbox(
220
+ label=LABEL_OUT_LUTH_12,
221
  lines=12,
222
  max_lines=24,
223
  )
224
  out_luth_17 = gr.Textbox(
225
+ label=LABEL_OUT_LUTH_17,
226
  lines=12,
227
  max_lines=24,
228
  )
229
 
230
  all_inputs = [
231
  prompt_in,
232
+ system_prompt_in,
233
+ temp_baguettotron,
234
+ max_tok_baguettotron,
235
+ top_p_baguettotron,
236
+ top_k_baguettotron,
237
+ rep_baguettotron,
238
+ temp_luth,
239
+ max_tok_luth,
240
+ top_p_luth,
241
+ top_k_luth,
242
+ rep_luth,
 
 
 
 
243
  ]
244
  all_outputs = [
245
  out_baguettotron,
bundle_luth.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Bundle a Luth model on this device with LEAP (leap-bundle) and investigate the result.
4
+
5
+ Per Liquid AI docs: leap-bundle create produces .gguf (default) or .bundle (--executorch).
6
+ We inspect both artifact types.
7
+
8
+ Steps:
9
+ 1. Download the Luth model from Hugging Face to a local directory.
10
+ 2. Validate the directory with leap-bundle validate.
11
+ 3. Create a bundle with leap-bundle create (requires LEAP auth).
12
+ 4. Poll until the bundle is completed, then download the output.
13
+ 5. Investigate: report file sizes (.gguf / .bundle) and optionally run inference on .gguf.
14
+
15
+ Requires: pip install leap-bundle huggingface_hub
16
+ LEAP auth: leap-bundle login <api-key> (from https://leap.liquid.ai/profile#/api-keys)
17
+ """
18
+
19
+ import argparse
20
+ import json
21
+ import os
22
+ import re
23
+ import subprocess
24
+ import sys
25
+ import time
26
+ from pathlib import Path
27
+
28
+
29
+ def _leap_env() -> dict[str, str]:
30
+ """Environment for leap-bundle subprocess so UTF-8 is used (avoids Windows cp1252 + checkmark)."""
31
+ env = os.environ.copy()
32
+ env["PYTHONUTF8"] = "1"
33
+ return env
34
+
35
+ # Luth model repo IDs (LFM2-based are most likely LEAP-compatible)
36
+ LUTH_REPOS = [
37
+ "kurakurai/Luth-LFM2-350M",
38
+ "kurakurai/Luth-LFM2-700M",
39
+ "kurakurai/Luth-LFM2-1.2B",
40
+ "kurakurai/Luth-0.6B-Instruct",
41
+ "kurakurai/Luth-1.7B-Instruct",
42
+ ]
43
+
44
+ DEFAULT_REPO = LUTH_REPOS[0]
45
+ DEFAULT_WORK_DIR = Path("./luth_bundle_work")
46
+ DEFAULT_QUANTIZATION = "Q4_K_M"
47
+ POLL_INTERVAL_SEC = 60
48
+ POLL_MAX_MINUTES = 30
49
+
50
+
51
+ def run(cmd: list[str], capture: bool = True, cwd: Path | None = None) -> subprocess.CompletedProcess:
52
+ """Run a command; raise on non-zero exit unless capture is False."""
53
+ kwargs = {
54
+ "cwd": str(cwd) if cwd else None,
55
+ "text": True,
56
+ "encoding": "utf-8",
57
+ "errors": "replace",
58
+ "env": _leap_env(),
59
+ }
60
+ if capture:
61
+ kwargs["capture_output"] = True
62
+ r = subprocess.run(cmd, **kwargs)
63
+ if r.returncode != 0 and capture:
64
+ raise RuntimeError(f"Command failed: {' '.join(cmd)}\nstdout: {r.stdout}\nstderr: {r.stderr}")
65
+ return r
66
+
67
+
68
+ def has_leap_bundle() -> bool:
69
+ try:
70
+ run(["leap-bundle", "--version"], capture=True)
71
+ return True
72
+ except (FileNotFoundError, RuntimeError):
73
+ return False
74
+
75
+
76
+ def download_model(repo_id: str, work_dir: Path) -> Path:
77
+ """Download Hugging Face model to work_dir/models/<repo_slug>. Returns path to model dir."""
78
+ try:
79
+ from huggingface_hub import snapshot_download
80
+ except ImportError:
81
+ raise SystemExit("Install huggingface_hub: pip install huggingface_hub")
82
+ slug = repo_id.replace("/", "--")
83
+ dest = work_dir / "models" / slug
84
+ dest.mkdir(parents=True, exist_ok=True)
85
+ print(f"Downloading {repo_id} to {dest} ...")
86
+ snapshot_download(repo_id=repo_id, local_dir=str(dest))
87
+ return dest
88
+
89
+
90
+ def validate_bundle(model_path: Path) -> bool:
91
+ """Run leap-bundle validate. Returns True if valid."""
92
+ r = run(["leap-bundle", "validate", str(model_path)], capture=True)
93
+ return r.returncode == 0
94
+
95
+
96
+ def _parse_request_id(out: str) -> str | None:
97
+ """Parse request_id from JSON output; API may return integer or string."""
98
+ try:
99
+ # Handle single line or multi-line JSON
100
+ data = json.loads(out.strip())
101
+ rid = data.get("request_id")
102
+ if rid is not None:
103
+ return str(rid)
104
+ except (json.JSONDecodeError, TypeError):
105
+ pass
106
+ match = re.search(r'"request_id"\s*:\s*("([^"]+)"|(\d+))', out)
107
+ if match:
108
+ return match.group(2) or match.group(3)
109
+ return None
110
+
111
+
112
+ def create_bundle(model_path: Path, work_dir: Path) -> tuple[str | None, str | None]:
113
+ """Run leap-bundle create --json. Returns (request_id, pending_id).
114
+ On success: (request_id, None). On 'pending request' error: (None, pending_id). Else: (None, None).
115
+ """
116
+ r = subprocess.run(
117
+ ["leap-bundle", "create", str(model_path), "--json"],
118
+ capture_output=True,
119
+ text=True,
120
+ encoding="utf-8",
121
+ errors="replace",
122
+ cwd=work_dir,
123
+ env=_leap_env(),
124
+ )
125
+ out = (r.stdout or r.stderr or "").strip()
126
+ if r.returncode != 0:
127
+ print("Create failed:", out or f"exit code {r.returncode}")
128
+ pending_id = _parse_pending_request_id(out)
129
+ if pending_id:
130
+ return None, pending_id
131
+ if "login" in out.lower() or "authenticat" in out.lower():
132
+ print("Run: leap-bundle login <api-key> (get key from https://leap.liquid.ai/profile#/api-keys)")
133
+ return None, None
134
+ # Parse request_id (API can return {"request_id": 1, "status": "success"})
135
+ rid = _parse_request_id(out)
136
+ if rid:
137
+ return rid, None
138
+ if "already exists" in out or "exists" in out:
139
+ print("Bundle request already exists for this model (same hash). Check leap-bundle list.")
140
+ return None, None
141
+ print("Create output:", out)
142
+ return None, None
143
+
144
+
145
+ def _parse_pending_request_id(out: str) -> str | None:
146
+ """Extract pending request ID from error message."""
147
+ match = re.search(r"pending request\s*\(ID:\s*(\d+)\)", out, re.IGNORECASE)
148
+ return match.group(1) if match else None
149
+
150
+
151
+ def get_request_status(request_id: str) -> str:
152
+ """Get status of a bundle request. Returns status string."""
153
+ r = subprocess.run(
154
+ ["leap-bundle", "list", str(request_id)],
155
+ capture_output=True,
156
+ text=True,
157
+ encoding="utf-8",
158
+ errors="replace",
159
+ env=_leap_env(),
160
+ )
161
+ out = (r.stdout or r.stderr or "").lower()
162
+ if "completed" in out:
163
+ return "completed"
164
+ if "failed" in out:
165
+ return "failed"
166
+ if "processing" in out or "upload" in out or "pending" in out:
167
+ return "processing"
168
+ return "unknown"
169
+
170
+
171
+ def wait_for_bundle(request_id: str) -> bool:
172
+ """Poll until completed or failed. Returns True if completed."""
173
+ deadline = time.monotonic() + POLL_MAX_MINUTES * 60
174
+ while time.monotonic() < deadline:
175
+ status = get_request_status(request_id)
176
+ print(f" Status: {status}")
177
+ if status == "completed":
178
+ return True
179
+ if status == "failed":
180
+ print("Bundle request failed. Run: leap-bundle list", request_id)
181
+ return False
182
+ time.sleep(POLL_INTERVAL_SEC)
183
+ print("Timed out waiting for bundle.")
184
+ return False
185
+
186
+
187
+ # Per Liquid AI docs: create output is .gguf (default) or .bundle (--executorch)
188
+ BUNDLE_EXTENSIONS = (".gguf", ".bundle")
189
+
190
+
191
+ def _find_bundle_artifact(work_dir: Path) -> Path | None:
192
+ """Return first .gguf or .bundle file under work_dir or cwd."""
193
+ for d in [work_dir, Path.cwd()]:
194
+ for ext in BUNDLE_EXTENSIONS:
195
+ for f in d.glob(f"*{ext}"):
196
+ return f
197
+ return None
198
+
199
+
200
+ def download_bundle(request_id: str, work_dir: Path) -> Path | None:
201
+ """Run leap-bundle download <request_id>. Returns path to downloaded bundle artifact if found."""
202
+ r = run(["leap-bundle", "download", request_id], capture=True, cwd=work_dir)
203
+ artifact = _find_bundle_artifact(work_dir)
204
+ if artifact is None and r.returncode != 0:
205
+ err = (r.stderr or r.stdout or "")
206
+ if "signed_url" in err:
207
+ print(" (LEAP download failed: 'signed_url' – try later: python download_bundles.py --request-ids", request_id + ")", file=sys.stderr)
208
+ return artifact
209
+
210
+
211
+ def investigate(bundle_path: Path | None, model_path: Path) -> None:
212
+ """Report sizes for source dir and bundle artifact (.gguf or .bundle); run inference only on .gguf."""
213
+ print("\n--- Investigation ---")
214
+ if model_path.exists():
215
+ total = sum(f.stat().st_size for f in model_path.rglob("*") if f.is_file())
216
+ print(f" Source model dir: {model_path} total size: {total / (1024**2):.1f} MB")
217
+ if bundle_path and bundle_path.exists():
218
+ size_mb = bundle_path.stat().st_size / (1024**2)
219
+ kind = "GGUF" if bundle_path.suffix == ".gguf" else "ExecuTorch (.bundle)"
220
+ print(f" Bundle file: {bundle_path} size: {size_mb:.1f} MB [{kind}]")
221
+ if bundle_path.suffix == ".gguf":
222
+ try:
223
+ from llama_cpp import Llama
224
+ print(" Running short inference (llama_cpp)...")
225
+ llm = Llama(model_path=str(bundle_path), n_ctx=256, verbose=False)
226
+ out = llm("Bonjour, dis-moi une phrase courte en français.\n", max_tokens=32, temperature=0.3)
227
+ text = out["choices"][0]["text"].strip()
228
+ print(f" Sample output: {text[:200]}")
229
+ except ImportError:
230
+ print(" (Install llama-cpp-python to run a sample inference on the GGUF)")
231
+ else:
232
+ print(" (ExecuTorch .bundle; use LEAP SDK for inference)")
233
+ else:
234
+ print(" No bundle file (.gguf or .bundle) found to inspect.")
235
+
236
+
237
+ def main() -> int:
238
+ p = argparse.ArgumentParser(
239
+ description="Bundle a Luth model with LEAP and investigate the result.",
240
+ epilog="Requires: pip install leap-bundle huggingface_hub. Auth: leap-bundle login <api-key>",
241
+ )
242
+ p.add_argument(
243
+ "--model",
244
+ default=DEFAULT_REPO,
245
+ choices=LUTH_REPOS,
246
+ help="Luth model repo ID (default: %(default)s); ignored if --all",
247
+ )
248
+ p.add_argument(
249
+ "--all",
250
+ action="store_true",
251
+ help="Bundle and inspect every Luth model in sequence (5 models; LEAP free tier = 5 requests/24h)",
252
+ )
253
+ p.add_argument(
254
+ "--work-dir",
255
+ type=Path,
256
+ default=DEFAULT_WORK_DIR,
257
+ help="Working directory for download and bundle output (default: %(default)s)",
258
+ )
259
+ p.add_argument(
260
+ "--quantization",
261
+ default=DEFAULT_QUANTIZATION,
262
+ help="(Reserved; current leap-bundle create has no --quantization option)",
263
+ )
264
+ p.add_argument(
265
+ "--dry-run",
266
+ action="store_true",
267
+ help="Only download and validate; do not create or download bundle",
268
+ )
269
+ p.add_argument(
270
+ "--skip-create",
271
+ action="store_true",
272
+ help="Skip bundle create (use existing local model dir only); still run investigate",
273
+ )
274
+ p.add_argument(
275
+ "--request-id",
276
+ type=str,
277
+ metavar="ID",
278
+ help="If bundle already created, download by request ID and then investigate",
279
+ )
280
+ args = p.parse_args()
281
+
282
+ args.work_dir = args.work_dir.resolve()
283
+ args.work_dir.mkdir(parents=True, exist_ok=True)
284
+
285
+ if not has_leap_bundle():
286
+ print("leap-bundle CLI not found. Install: pip install leap-bundle", file=sys.stderr)
287
+ return 1
288
+
289
+ models_to_run = LUTH_REPOS if args.all else [args.model]
290
+ if args.all and args.request_id:
291
+ print("--request-id is ignored when using --all.", file=sys.stderr)
292
+ args.request_id = None
293
+ if args.all:
294
+ print(f"Running for all {len(models_to_run)} Luth models: {', '.join(models_to_run)}")
295
+ print("Note: LEAP free tier allows 5 bundle requests per 24h.\n")
296
+
297
+ exit_code = 0
298
+ for repo_id in models_to_run:
299
+ print(f"\n{'='*60}\n {repo_id}\n{'='*60}")
300
+ try:
301
+ # 1. Download
302
+ model_path = download_model(repo_id, args.work_dir)
303
+
304
+ # 2. Validate
305
+ print("Validating directory for LEAP bundle...")
306
+ if not validate_bundle(model_path):
307
+ print("Validation failed. Fix the model directory and retry.", file=sys.stderr)
308
+ exit_code = 1
309
+ continue
310
+ print("Validation passed.")
311
+
312
+ if args.dry_run:
313
+ investigate(None, model_path)
314
+ continue
315
+
316
+ gguf_path: Path | None = None
317
+
318
+ if args.request_id and not args.all:
319
+ # Download existing bundle by ID (single-model only)
320
+ print(f"Downloading bundle request {args.request_id}...")
321
+ gguf_path = download_bundle(args.request_id, args.work_dir)
322
+ elif not args.skip_create:
323
+ # 3. Create bundle (LEAP allows only one pending request; wait for it if needed)
324
+ request_id: str | None = None
325
+ pending_id: str | None = None
326
+ print("Creating bundle...")
327
+ request_id, pending_id = create_bundle(model_path, args.work_dir)
328
+ if pending_id:
329
+ print(f"Waiting for previous bundle request {pending_id} to complete...")
330
+ if wait_for_bundle(pending_id):
331
+ download_bundle(pending_id, args.work_dir)
332
+ print("Retrying create for this model...")
333
+ request_id, pending_id = create_bundle(model_path, args.work_dir)
334
+ if pending_id:
335
+ print("Still pending; skipping create for this model.", file=sys.stderr)
336
+ request_id = None
337
+ if request_id:
338
+ # 4. Wait and download
339
+ print(f"Waiting for bundle request {request_id} (poll every {POLL_INTERVAL_SEC}s)...")
340
+ if wait_for_bundle(request_id):
341
+ gguf_path = download_bundle(request_id, args.work_dir)
342
+ elif not pending_id:
343
+ print("No new request created. Use --request-id <id> to download an existing bundle.")
344
+ else:
345
+ print("Skipping bundle create (--skip-create).")
346
+
347
+ # 5. Investigate
348
+ investigate(gguf_path, model_path)
349
+ except Exception as e:
350
+ print(f"Error processing {repo_id}: {e}", file=sys.stderr)
351
+ exit_code = 1
352
+
353
+ return exit_code
354
+
355
+
356
+ if __name__ == "__main__":
357
+ sys.exit(main())
download_bundles.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Download LEAP bundle outputs by request ID and inspect the files.
4
+
5
+ Per Liquid AI docs: leap-bundle create produces .gguf (default, GGUF) or
6
+ .bundle (ExecuTorch, with --executorch). This script finds and inspects both.
7
+
8
+ Uses leap-bundle list (--json per request) and leap-bundle download
9
+ with --output-path. Reports all bundle artifacts (.gguf, .bundle) and
10
+ optionally runs short inference on .gguf (llama-cpp).
11
+
12
+ Requires: pip install leap-bundle
13
+ LEAP auth: leap-bundle login <api-key>
14
+ """
15
+
16
+ import argparse
17
+ import json
18
+ import os
19
+ import re
20
+ import subprocess
21
+ import sys
22
+ from pathlib import Path
23
+
24
+
25
+ def _leap_env() -> dict[str, str]:
26
+ env = os.environ.copy()
27
+ env["PYTHONUTF8"] = "1"
28
+ return env
29
+
30
+
31
+ def run(cmd: list[str], capture: bool = True, cwd: Path | None = None) -> subprocess.CompletedProcess:
32
+ kwargs = {
33
+ "cwd": str(cwd) if cwd else None,
34
+ "text": True,
35
+ "encoding": "utf-8",
36
+ "errors": "replace",
37
+ "env": _leap_env(),
38
+ }
39
+ if capture:
40
+ kwargs["capture_output"] = True
41
+ return subprocess.run(cmd, **kwargs)
42
+
43
+
44
+ def list_request(request_id: str) -> dict | None:
45
+ """Get details for one request; returns parsed JSON or None."""
46
+ r = run(["leap-bundle", "list", str(request_id), "--json"], capture=True)
47
+ if r.returncode != 0:
48
+ return None
49
+ out = (r.stdout or "").strip()
50
+ try:
51
+ return json.loads(out)
52
+ except json.JSONDecodeError:
53
+ return None
54
+
55
+
56
+ def list_all_request_ids() -> list[str]:
57
+ """Run leap-bundle list (no id) and parse table for request IDs. Returns list of ID strings."""
58
+ r = run(["leap-bundle", "list"], capture=True)
59
+ out = (r.stdout or r.stderr or "")
60
+ ids: list[str] = []
61
+ # Table rows: first column is often the ID (integer)
62
+ for line in out.splitlines():
63
+ parts = line.split()
64
+ if parts and parts[0].isdigit():
65
+ ids.append(parts[0])
66
+ # Fallback: any line with a pipe or spaces and a leading number (rich table)
67
+ if not ids:
68
+ for line in out.splitlines():
69
+ m = re.search(r"[\|\s](\d{1,6})[\|\s]", line)
70
+ if m:
71
+ ids.append(m.group(1))
72
+ # Fallback: JSON-like "request_id": N or "id": N
73
+ if not ids:
74
+ for m in re.finditer(r'"(?:request_id|id)"\s*:\s*(\d+)', out):
75
+ ids.append(m.group(1))
76
+ return list(dict.fromkeys(ids))
77
+
78
+
79
+ def get_status(data: dict) -> str:
80
+ """Extract status from list request JSON."""
81
+ s = (data.get("status") or data.get("Status") or "").lower()
82
+ return s
83
+
84
+
85
+ def download_bundle(request_id: str, output_path: Path) -> tuple[bool, str]:
86
+ """Run leap-bundle download <id> --output-path <dir>. Returns (success, stderr_or_empty)."""
87
+ output_path.mkdir(parents=True, exist_ok=True)
88
+ r = run(
89
+ ["leap-bundle", "download", str(request_id), "--output-path", str(output_path)],
90
+ capture=True,
91
+ )
92
+ err = (r.stderr or r.stdout or "").strip()
93
+ return r.returncode == 0, err
94
+
95
+
96
+ # Per Liquid AI docs: create produces .gguf (default) or .bundle (--executorch)
97
+ BUNDLE_EXTENSIONS = (".gguf", ".bundle")
98
+
99
+
100
+ def find_bundle_files(root: Path) -> list[Path]:
101
+ """Return all LEAP bundle artifact files (.gguf, .bundle) under root."""
102
+ out: list[Path] = []
103
+ for ext in BUNDLE_EXTENSIONS:
104
+ out.extend(root.rglob(f"*{ext}"))
105
+ return sorted(out)
106
+
107
+
108
+ def inspect_file(path: Path, run_inference: bool = False, root: Path | None = None) -> None:
109
+ """Print path, size, type; run short inference only for .gguf (llama-cpp)."""
110
+ size_mb = path.stat().st_size / (1024**2)
111
+ try:
112
+ disp = path.relative_to(root) if root else path
113
+ except ValueError:
114
+ disp = path
115
+ kind = "GGUF" if path.suffix == ".gguf" else "ExecuTorch (.bundle)"
116
+ print(f" {disp} {size_mb:.1f} MB [{kind}]")
117
+ if run_inference and path.suffix == ".gguf":
118
+ try:
119
+ from llama_cpp import Llama
120
+ print(" Running short inference (llama-cpp)...")
121
+ llm = Llama(model_path=str(path), n_ctx=256, verbose=False)
122
+ out = llm("Bonjour, une phrase en français.\n", max_tokens=24, temperature=0.3)
123
+ text = (out["choices"][0]["text"] or "").strip()
124
+ print(f" -> {text[:150]}")
125
+ except ImportError:
126
+ print(" (Install llama-cpp-python to run inference)")
127
+ except Exception as e:
128
+ print(f" Inference error: {e}")
129
+ elif run_inference and path.suffix == ".bundle":
130
+ print(" (ExecuTorch .bundle; inference via LEAP SDK, not llama-cpp)")
131
+
132
+
133
+ def main() -> int:
134
+ p = argparse.ArgumentParser(
135
+ description="Download LEAP bundle outputs by request ID and inspect files (.gguf or .bundle per Liquid AI docs).",
136
+ epilog="Requires: leap-bundle (pip install leap-bundle). Auth: leap-bundle login <api-key>",
137
+ )
138
+ p.add_argument(
139
+ "--output-dir",
140
+ type=Path,
141
+ default=Path("./luth_bundle_downloads"),
142
+ help="Directory to download each bundle into (default: ./luth_bundle_downloads)",
143
+ )
144
+ p.add_argument(
145
+ "--request-ids",
146
+ type=str,
147
+ nargs="*",
148
+ metavar="ID",
149
+ help="Bundle request IDs to download (e.g. 1 2 3)",
150
+ )
151
+ p.add_argument(
152
+ "--from-file",
153
+ type=Path,
154
+ metavar="FILE",
155
+ help="Text file with one request ID per line",
156
+ )
157
+ p.add_argument(
158
+ "--list",
159
+ action="store_true",
160
+ help="Run leap-bundle list and download all completed requests",
161
+ )
162
+ p.add_argument(
163
+ "--infer",
164
+ action="store_true",
165
+ help="Run a short inference on each downloaded GGUF (requires llama-cpp-python)",
166
+ )
167
+ p.add_argument(
168
+ "--inspect-only",
169
+ action="store_true",
170
+ help="Only inspect existing bundle files (.gguf, .bundle) under --output-dir; do not download",
171
+ )
172
+ args = p.parse_args()
173
+
174
+ args.output_dir = args.output_dir.resolve()
175
+ request_ids: list[str] = []
176
+
177
+ if args.inspect_only:
178
+ args.output_dir.mkdir(parents=True, exist_ok=True)
179
+ bundles = find_bundle_files(args.output_dir)
180
+ print(f"Inspecting {len(bundles)} bundle file(s) (.gguf / .bundle) under {args.output_dir}\n")
181
+ for f in bundles:
182
+ inspect_file(f, run_inference=args.infer, root=args.output_dir)
183
+ return 0
184
+
185
+ if args.list:
186
+ print("Fetching bundle request list...")
187
+ request_ids = list_all_request_ids()
188
+ if not request_ids:
189
+ print("No request IDs found from list.", file=sys.stderr)
190
+ print("If you have existing bundle requests (e.g. from bundle_luth.py --all), run:", file=sys.stderr)
191
+ print(" python download_bundles.py --request-ids 1 2 3 4 5", file=sys.stderr)
192
+ return 1
193
+ print(f"Found {len(request_ids)} request(s): {request_ids}")
194
+ else:
195
+ if args.request_ids:
196
+ request_ids.extend(args.request_ids)
197
+ if args.from_file:
198
+ if not args.from_file.exists():
199
+ print(f"File not found: {args.from_file}", file=sys.stderr)
200
+ return 1
201
+ for line in args.from_file.read_text(encoding="utf-8", errors="replace").splitlines():
202
+ rid = line.strip()
203
+ if rid and rid.isdigit():
204
+ request_ids.append(rid)
205
+ if not request_ids:
206
+ print("Provide --request-ids, --from-file, or --list.", file=sys.stderr)
207
+ return 1
208
+
209
+ args.output_dir.mkdir(parents=True, exist_ok=True)
210
+ downloaded: list[Path] = []
211
+
212
+ for rid in request_ids:
213
+ print(f"\n--- Request ID {rid} ---")
214
+ info = list_request(rid)
215
+ status = get_status(info) if info else ""
216
+ if status:
217
+ print(f" Status: {status}")
218
+ if "completed" not in status and "complete" not in status:
219
+ print(" Skipping (not completed).")
220
+ continue
221
+ else:
222
+ print(" (Status unknown; attempting download.)")
223
+ dest = args.output_dir / f"request_{rid}"
224
+ print(f" Downloading to {dest} ...")
225
+ ok, err = download_bundle(rid, dest)
226
+ if ok:
227
+ for f in find_bundle_files(dest):
228
+ downloaded.append(f)
229
+ kind = "GGUF" if f.suffix == ".gguf" else ".bundle"
230
+ print(f" Downloaded: {f.name} ({f.stat().st_size / (1024**2):.1f} MB) [{kind}]")
231
+ else:
232
+ print(" Download failed.", file=sys.stderr)
233
+ if "signed_url" in err:
234
+ print(" (LEAP CLI/API 'signed_url' error – try again later or check LEAP status.)", file=sys.stderr)
235
+
236
+ print("\n" + "=" * 60)
237
+ print("Inspection summary (bundle artifacts: .gguf / .bundle)")
238
+ print("=" * 60)
239
+ all_bundles = find_bundle_files(args.output_dir)
240
+ for f in all_bundles:
241
+ inspect_file(f, run_inference=args.infer, root=args.output_dir)
242
+ if not all_bundles:
243
+ print(" No bundle files (.gguf or .bundle) found.")
244
+ return 0
245
+
246
+
247
+ if __name__ == "__main__":
248
+ sys.exit(main())
inference.py CHANGED
@@ -19,20 +19,27 @@ _cache_lock = __import__("threading").Lock()
19
  BAGUETTOTRON_ID = "PleIAs/Baguettotron"
20
 
21
 
22
- def _format_prompt_baguettotron(prompt: str) -> tuple[str, list[str]]:
23
  """
24
  Manual prompt build for Baguettotron. Uses "<|im_end>" (no trailing pipe)
25
  per tokenizer; stop=["<|im_end>", "</think>"] for generation.
 
26
  """
27
- # Qwen-style: <|im_start|>user\n{content}<|im_end>\n<|im_start|>assistant\n<think>\n
28
- text = f"<|im_start|>user\n{prompt}<|im_end>\n<|im_start|>assistant\n<think>\n"
 
 
 
29
  stop = ["<|im_end>", "</think>"]
30
  return text, stop
31
 
32
 
33
- def _format_prompt_luth(prompt: str, tokenizer: Any) -> tuple[dict[str, Any], list[str] | None]:
34
- """Use tokenizer's chat template for Luth models."""
35
- messages = [{"role": "user", "content": prompt}]
 
 
 
36
  inputs = tokenizer.apply_chat_template(
37
  messages,
38
  add_generation_prompt=True,
@@ -74,6 +81,7 @@ def _generate_one(
74
  prompt: str,
75
  params: dict[str, Any],
76
  device: str = "cuda",
 
77
  ) -> tuple[str, str]:
78
  """Load (or use cached) model, run inference, return (model_id, text)."""
79
  model, tokenizer = _load_model(model_id, device)
@@ -95,10 +103,10 @@ def _generate_one(
95
  }
96
 
97
  if model_id == BAGUETTOTRON_ID:
98
- text_prompt, _stop = _format_prompt_baguettotron(prompt)
99
  inputs = tokenizer(text_prompt, return_tensors="pt")
100
  else:
101
- inputs = _format_prompt_luth(prompt, tokenizer)[0]
102
 
103
  # Move to device (input_ids/attention_mask are int; no dtype cast needed)
104
  inputs = {k: v.to(device) for k, v in inputs.items()}
@@ -126,6 +134,7 @@ def run_all(
126
  params_by_model: dict[str, dict[str, Any]],
127
  device: str | None = None,
128
  max_workers: int = 6,
 
129
  ) -> dict[str, str]:
130
  """
131
  Load all 6 models in parallel, run all 6 inferences in parallel.
@@ -143,7 +152,7 @@ def run_all(
143
 
144
  def task(model_id: str):
145
  p = {**default_params, **(params_by_model.get(model_id) or {})}
146
- return _generate_one(model_id, prompt, p, device)
147
 
148
  results: dict[str, str] = {}
149
  with ThreadPoolExecutor(max_workers=max_workers) as ex:
 
19
  BAGUETTOTRON_ID = "PleIAs/Baguettotron"
20
 
21
 
22
+ def _format_prompt_baguettotron(prompt: str, system_prompt: str = "") -> tuple[str, list[str]]:
23
  """
24
  Manual prompt build for Baguettotron. Uses "<|im_end>" (no trailing pipe)
25
  per tokenizer; stop=["<|im_end>", "</think>"] for generation.
26
+ Qwen-style: system (optional) + user + assistant.
27
  """
28
+ parts: list[str] = []
29
+ if system_prompt.strip():
30
+ parts.append(f"<|im_start|>system\n{system_prompt.strip()}<|im_end>\n")
31
+ parts.append(f"<|im_start|>user\n{prompt}<|im_end>\n<|im_start|>assistant\n<think>\n")
32
+ text = "".join(parts)
33
  stop = ["<|im_end>", "</think>"]
34
  return text, stop
35
 
36
 
37
+ def _format_prompt_luth(prompt: str, tokenizer: Any, system_prompt: str = "") -> tuple[dict[str, Any], list[str] | None]:
38
+ """Use tokenizer's chat template for Luth models. Supports optional system message."""
39
+ messages: list[dict[str, str]] = []
40
+ if system_prompt.strip():
41
+ messages.append({"role": "system", "content": system_prompt.strip()})
42
+ messages.append({"role": "user", "content": prompt})
43
  inputs = tokenizer.apply_chat_template(
44
  messages,
45
  add_generation_prompt=True,
 
81
  prompt: str,
82
  params: dict[str, Any],
83
  device: str = "cuda",
84
+ system_prompt: str = "",
85
  ) -> tuple[str, str]:
86
  """Load (or use cached) model, run inference, return (model_id, text)."""
87
  model, tokenizer = _load_model(model_id, device)
 
103
  }
104
 
105
  if model_id == BAGUETTOTRON_ID:
106
+ text_prompt, _stop = _format_prompt_baguettotron(prompt, system_prompt)
107
  inputs = tokenizer(text_prompt, return_tensors="pt")
108
  else:
109
+ inputs = _format_prompt_luth(prompt, tokenizer, system_prompt)[0]
110
 
111
  # Move to device (input_ids/attention_mask are int; no dtype cast needed)
112
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
134
  params_by_model: dict[str, dict[str, Any]],
135
  device: str | None = None,
136
  max_workers: int = 6,
137
+ system_prompt: str = "",
138
  ) -> dict[str, str]:
139
  """
140
  Load all 6 models in parallel, run all 6 inferences in parallel.
 
152
 
153
  def task(model_id: str):
154
  p = {**default_params, **(params_by_model.get(model_id) or {})}
155
+ return _generate_one(model_id, prompt, p, device, system_prompt)
156
 
157
  results: dict[str, str] = {}
158
  with ThreadPoolExecutor(max_workers=max_workers) as ex:
model_config.py CHANGED
@@ -141,3 +141,26 @@ def combined_footprint() -> tuple[int, float]:
141
  total_disk = sum(m.file_size_mb for m in MODELS)
142
  total_vram_mb = sum(m.vram_estimate_mb for m in MODELS)
143
  return total_disk, total_vram_mb / 1024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  total_disk = sum(m.file_size_mb for m in MODELS)
142
  total_vram_mb = sum(m.vram_estimate_mb for m in MODELS)
143
  return total_disk, total_vram_mb / 1024
144
+
145
+
146
+ # --- GGUF / LEAP bundle reference ---
147
+ # Baguettotron: PleIAs/Baguettotron-GGUF (Hugging Face)
148
+ # LFM2 / Luth: actual LEAP bundle outputs (leap-bundle create + download)
149
+ GGUF_REFERENCE_ROWS: list[list[str]] = [
150
+ # Model/Variant | Params | File size (MB) | Source
151
+ ["Baguettotron Q4_0", "321M", "202", "PleIAs/Baguettotron-GGUF"],
152
+ ["Baguettotron Q4_K_M", "321M", "240", "PleIAs/Baguettotron-GGUF"],
153
+ ["Baguettotron Q5_K_M", "321M", "257", "PleIAs/Baguettotron-GGUF"],
154
+ ["Baguettotron Q8_0", "321M", "344", "PleIAs/Baguettotron-GGUF"],
155
+ ["Baguettotron BF16", "321M", "644", "PleIAs/Baguettotron-GGUF"],
156
+ ["LFM2-350M Q4_K_M", "0.4B", "219", "LEAP bundle (Luth-LFM2-350M)"],
157
+ ["LFM2-700M Q4_K_M", "0.7B", "447", "LEAP bundle (Luth-LFM2-700M)"],
158
+ ["LFM2-1.2B Q4_K_M", "1.2B", "697", "LEAP bundle (Luth-LFM2-1.2B)"],
159
+ ["Luth-0.6B-Instruct (Qwen3) Q4_K_M", "0.6B", "378", "LEAP bundle"],
160
+ ["Luth-1.7B-Instruct (Qwen3) Q4_K_M", "1.7B", "1,056", "LEAP bundle"],
161
+ ]
162
+
163
+
164
+ def gguf_footprint_table_data() -> list[list[str]]:
165
+ """Rows for GGUF/LEAP reference table: Model/Variant | Params | File size (MB) | Source."""
166
+ return GGUF_REFERENCE_ROWS
requirements-bundle.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # For bundle_luth.py: bundle Luth models with LEAP and investigate
2
+ leap-bundle
3
+ huggingface_hub
4
+ # Optional: run a short inference on the downloaded GGUF
5
+ # llama-cpp-python
6
+ hf_xet
ui_strings.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ UI text strings for the Baguettotron vs Luth Gradio app.
3
+ Centralized for reuse and easier i18n.
4
+ """
5
+
6
+ # App identity
7
+ TITLE = "Baguettotron vs Luth models"
8
+ SUBTITLE = "All models, all outputs — apples-to-apples comparison by parameter size."
9
+
10
+ # Footprint section
11
+ HEADING_FOOTPRINT = "## Model footprint"
12
+ FOOTPRINT_HEADERS = ["Model", "Params", "File size (MB)", "Est. VRAM (MB)"]
13
+ FOOTPRINT_SUMMARY_TEMPLATE = "**Combined footprint —** Total disk: {total_disk:,} MB | Total VRAM (est.): {total_vram:.2f} GB"
14
+ HEADING_GGUF_LEAP = "### GGUF & LEAP bundle sizes (reference)"
15
+ GGUF_LEAP_INTRO = "PleIAs Baguettotron GGUF variants (Hugging Face) and Liquid LFM2 GGUF sizes (LEAP model library). Download with `leap-bundle download <model>` or from the links below."
16
+ FOOTPRINT_GGUF_HEADERS = ["Model / Variant", "Params", "File size (MB)", "Source"]
17
+
18
+ # Generation settings
19
+ HEADING_GENERATION = "## Generation settings (by model family)"
20
+ COL_BAGUETTOTRON_HEADING = "**Baguettotron (321M)** — *reasoning*"
21
+ COL_LUTH_HEADING = "**Luth models (0.4B–1.7B)** — *instruct*"
22
+ LABEL_TEMPERATURE = "Temperature"
23
+ LABEL_MAX_TOKENS = "Max tokens"
24
+ LABEL_TOP_P = "Top p"
25
+ LABEL_TOP_K = "Top k"
26
+ LABEL_REPEAT_PENALTY = "Repeat penalty"
27
+ INFO_TEMP_BAGUETTOTRON = "Lower for more deterministic reasoning"
28
+ INFO_REP_LUTH = "Luth/LFM2 often use ~1.05"
29
+
30
+ # Live inference
31
+ HEADING_LIVE_INFERENCE = "## Live inference"
32
+ LABEL_SYSTEM_PROMPT = "System prompt (optional)"
33
+ PLACEHOLDER_SYSTEM_PROMPT = "e.g. You are a helpful assistant that answers in French."
34
+ LABEL_PROMPT = "Prompt"
35
+ PLACEHOLDER_PROMPT = "Enter your prompt here..."
36
+ BTN_GENERATE = "Generate"
37
+
38
+ # Output textbox labels (per model)
39
+ LABEL_OUT_BAGUETTOTRON = "Baguettotron (321M)"
40
+ LABEL_OUT_LUTH_350 = "Luth-LFM2-350M (0.4B)"
41
+ LABEL_OUT_LUTH_06 = "Luth-0.6B-Instruct"
42
+ LABEL_OUT_LUTH_07 = "Luth-LFM2-700M"
43
+ LABEL_OUT_LUTH_12 = "Luth-LFM2-1.2B"
44
+ LABEL_OUT_LUTH_17 = "Luth-1.7B-Instruct"