Spaces:
Sleeping
Sleeping
Joseph Pollack commited on
adds real lfm and pleais numbers
Browse files- .gitignore +2 -0
- README.md +26 -2
- app.py +141 -135
- bundle_luth.py +357 -0
- download_bundles.py +248 -0
- inference.py +18 -9
- model_config.py +23 -0
- requirements-bundle.txt +6 -0
- ui_strings.py +44 -0
.gitignore
CHANGED
|
@@ -1,3 +1,5 @@
|
|
| 1 |
__pycache__/
|
| 2 |
*.py[cod]
|
| 3 |
*$py.class
|
|
|
|
|
|
|
|
|
| 1 |
__pycache__/
|
| 2 |
*.py[cod]
|
| 3 |
*$py.class
|
| 4 |
+
luth_bundle_downloads
|
| 5 |
+
luth_bundle_work
|
README.md
CHANGED
|
@@ -18,8 +18,9 @@ Apples-to-apples comparison of **Baguettotron** (PleIAs, 321M) and **5 Luth mode
|
|
| 18 |
## Features
|
| 19 |
|
| 20 |
- **All models, all outputs:** Each prompt runs through all 6 models; outputs appear in tabs grouped by parameter size.
|
| 21 |
-
- **
|
| 22 |
-
- **Per-
|
|
|
|
| 23 |
- **Transformers-only:** No quantization; all models run in BF16/FP16.
|
| 24 |
|
| 25 |
## Size tiers
|
|
@@ -34,6 +35,29 @@ Apples-to-apples comparison of **Baguettotron** (PleIAs, 321M) and **5 Luth mode
|
|
| 34 |
|
| 35 |
Baguettotron's tokenizer uses `"<|im_end>"` (no trailing pipe) for EOS. The app uses manual prompt formatting and stop sequences to avoid multi-token tokenization. See [quirk.md](quirk.md) for details.
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
## Deployment
|
| 38 |
|
| 39 |
- **Hugging Face Spaces:** Set hardware to **Zero GPU** (or standard GPU). The app uses `@spaces.GPU` when available.
|
|
|
|
| 18 |
## Features
|
| 19 |
|
| 20 |
- **All models, all outputs:** Each prompt runs through all 6 models; outputs appear in tabs grouped by parameter size.
|
| 21 |
+
- **System prompt:** Optional system prompt supported for both Baguettotron (Qwen-style) and Luth (chat template) model families.
|
| 22 |
+
- **Ultimate footprint:** Per-model disk size and VRAM estimates; combined footprint for all models. A **GGUF & LEAP bundle** reference table lists PleIAs Baguettotron GGUF variants and Liquid LFM2 GGUF sizes (from [LEAP](https://leap.liquid.ai/models) / [PleIAs/Baguettotron-GGUF](https://huggingface.co/PleIAs/Baguettotron-GGUF)).
|
| 23 |
+
- **Per-family generation settings:** Two columns (Baguettotron | Luth) with sensible defaults: Baguettotron tuned for reasoning (e.g. temp 0.5, 512 tokens); Luth for instruct (e.g. temp 0.7, repeat_penalty 1.05).
|
| 24 |
- **Transformers-only:** No quantization; all models run in BF16/FP16.
|
| 25 |
|
| 26 |
## Size tiers
|
|
|
|
| 35 |
|
| 36 |
Baguettotron's tokenizer uses `"<|im_end>"` (no trailing pipe) for EOS. The app uses manual prompt formatting and stop sequences to avoid multi-token tokenization. See [quirk.md](quirk.md) for details.
|
| 37 |
|
| 38 |
+
## Bundling Luth models (LEAP)
|
| 39 |
+
|
| 40 |
+
The script `bundle_luth.py` downloads a Luth model, validates it for LEAP, creates a GGUF bundle on the device, and investigates the result (sizes, optional inference).
|
| 41 |
+
|
| 42 |
+
```bash
|
| 43 |
+
pip install -r requirements-bundle.txt
|
| 44 |
+
leap-bundle login <api-key> # from https://leap.liquid.ai/profile#/api-keys
|
| 45 |
+
# Single model:
|
| 46 |
+
python bundle_luth.py --model kurakurai/Luth-LFM2-350M
|
| 47 |
+
# All 5 Luth models (download → validate → create → download GGUF → investigate):
|
| 48 |
+
python bundle_luth.py --all
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
Options: `--all` (every Luth model in sequence; LEAP free tier = 5 requests/24h), `--work-dir`, `--quantization` (e.g. Q4_K_M, Q8_0), `--dry-run` (download + validate only, no create), `--skip-create`, `--request-id <id>` to download an existing bundle.
|
| 52 |
+
|
| 53 |
+
**Download and inspect bundles:** Use `download_bundles.py` to fetch completed bundle outputs by request ID. Per Liquid AI docs, artifacts are `.gguf` (default) or `.bundle` (ExecuTorch). The script inspects both and can run a short inference on `.gguf`:
|
| 54 |
+
|
| 55 |
+
```bash
|
| 56 |
+
python download_bundles.py --list # list requests, download all completed
|
| 57 |
+
python download_bundles.py --request-ids 1 2 # download specific IDs
|
| 58 |
+
python download_bundles.py --inspect-only --infer # inspect existing downloads and run inference
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
## Deployment
|
| 62 |
|
| 63 |
- **Hugging Face Spaces:** Set hardware to **Zero GPU** (or standard GPU). The app uses `@spaces.GPU` when available.
|
app.py
CHANGED
|
@@ -5,13 +5,46 @@ All models, all outputs; tabbed by parameter size.
|
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
|
| 8 |
-
from inference import run_all
|
| 9 |
from model_config import (
|
| 10 |
TIER_LABELS,
|
| 11 |
combined_footprint,
|
| 12 |
footprint_table_data,
|
|
|
|
| 13 |
get_models_by_tier,
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
)
|
| 16 |
|
| 17 |
# Optional: use @spaces.GPU for ZeroGPU deployment
|
|
@@ -24,97 +57,71 @@ except ImportError:
|
|
| 24 |
|
| 25 |
|
| 26 |
def build_params_by_model(
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
temp_large: float,
|
| 38 |
-
max_tok_large: int,
|
| 39 |
-
top_p_large: float,
|
| 40 |
-
top_k_large: int,
|
| 41 |
-
rep_large: float,
|
| 42 |
) -> dict[str, dict]:
|
| 43 |
-
"""Build params dict keyed by model_id from
|
| 44 |
-
|
| 45 |
-
"
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
"
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
"repeat_penalty": rep_med,
|
| 58 |
-
},
|
| 59 |
-
"large": {
|
| 60 |
-
"temperature": temp_large,
|
| 61 |
-
"max_tokens": max_tok_large,
|
| 62 |
-
"top_p": top_p_large,
|
| 63 |
-
"top_k": top_k_large,
|
| 64 |
-
"repeat_penalty": rep_large,
|
| 65 |
-
},
|
| 66 |
}
|
| 67 |
-
models_by_tier = get_models_by_tier()
|
| 68 |
params_by_model: dict[str, dict] = {}
|
| 69 |
-
for
|
| 70 |
-
|
| 71 |
-
for m in models:
|
| 72 |
-
params_by_model[m.repo_id] = p.copy()
|
| 73 |
return params_by_model
|
| 74 |
|
| 75 |
|
| 76 |
@GPU_DECORATOR
|
| 77 |
def generate_all(
|
| 78 |
prompt: str,
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
max_tok_large: int,
|
| 91 |
-
top_p_large: float,
|
| 92 |
-
top_k_large: int,
|
| 93 |
-
rep_large: float,
|
| 94 |
) -> tuple[str, str, str, str, str, str]:
|
| 95 |
"""Run all 6 models, return outputs in tab order: small (2), medium (2), large (2)."""
|
| 96 |
if not prompt.strip():
|
| 97 |
return ("",) * 6
|
| 98 |
|
| 99 |
params = build_params_by_model(
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
temp_large,
|
| 111 |
-
max_tok_large,
|
| 112 |
-
top_p_large,
|
| 113 |
-
top_k_large,
|
| 114 |
-
rep_large,
|
| 115 |
)
|
| 116 |
|
| 117 |
-
results = run_all(prompt, params)
|
| 118 |
|
| 119 |
models_by_tier = get_models_by_tier()
|
| 120 |
outputs: list[str] = []
|
|
@@ -127,113 +134,112 @@ def generate_all(
|
|
| 127 |
|
| 128 |
def create_ui():
|
| 129 |
total_disk, total_vram = combined_footprint()
|
| 130 |
-
footprint_md =
|
| 131 |
-
**Combined footprint —** Total disk: {total_disk:,} MB | Total VRAM (est.): {total_vram:.2f} GB
|
| 132 |
-
"""
|
| 133 |
|
| 134 |
-
with gr.Blocks(title=
|
| 135 |
-
gr.Markdown("#
|
| 136 |
-
gr.Markdown(
|
| 137 |
-
"All models, all outputs — apples-to-apples comparison by parameter size."
|
| 138 |
-
)
|
| 139 |
|
| 140 |
-
# Row 1: Footprint
|
| 141 |
-
gr.Markdown(
|
| 142 |
footprint_df = gr.Dataframe(
|
| 143 |
value=footprint_table_data(),
|
| 144 |
-
headers=
|
| 145 |
interactive=False,
|
| 146 |
)
|
| 147 |
gr.Markdown(footprint_md)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
-
# Row 2:
|
| 150 |
-
gr.Markdown(
|
| 151 |
-
with gr.
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
temp_large = gr.Slider(0, 2, value=0.7, label="Temperature")
|
| 167 |
-
max_tok_large = gr.Number(value=256, label="Max tokens", minimum=64, maximum=2048)
|
| 168 |
-
top_p_large = gr.Slider(0, 1, value=0.9, label="Top p")
|
| 169 |
-
top_k_large = gr.Number(value=40, label="Top k")
|
| 170 |
-
rep_large = gr.Slider(1.0, 1.5, value=1.1, label="Repeat penalty")
|
| 171 |
|
| 172 |
-
# Row 3:
|
| 173 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
prompt_in = gr.Textbox(
|
| 175 |
-
label=
|
| 176 |
-
placeholder=
|
| 177 |
lines=3,
|
| 178 |
)
|
| 179 |
-
gen_btn = gr.Button(
|
| 180 |
|
| 181 |
models_by_tier = get_models_by_tier()
|
| 182 |
with gr.Tabs():
|
| 183 |
with gr.Tab(TIER_LABELS["small"]):
|
| 184 |
with gr.Row():
|
| 185 |
out_baguettotron = gr.Textbox(
|
| 186 |
-
label=
|
| 187 |
lines=12,
|
| 188 |
max_lines=24,
|
| 189 |
)
|
| 190 |
out_luth_350 = gr.Textbox(
|
| 191 |
-
label=
|
| 192 |
lines=12,
|
| 193 |
max_lines=24,
|
| 194 |
)
|
| 195 |
with gr.Tab(TIER_LABELS["medium"]):
|
| 196 |
with gr.Row():
|
| 197 |
out_luth_06 = gr.Textbox(
|
| 198 |
-
label=
|
| 199 |
lines=12,
|
| 200 |
max_lines=24,
|
| 201 |
)
|
| 202 |
out_luth_07 = gr.Textbox(
|
| 203 |
-
label=
|
| 204 |
lines=12,
|
| 205 |
max_lines=24,
|
| 206 |
)
|
| 207 |
with gr.Tab(TIER_LABELS["large"]):
|
| 208 |
with gr.Row():
|
| 209 |
out_luth_12 = gr.Textbox(
|
| 210 |
-
label=
|
| 211 |
lines=12,
|
| 212 |
max_lines=24,
|
| 213 |
)
|
| 214 |
out_luth_17 = gr.Textbox(
|
| 215 |
-
label=
|
| 216 |
lines=12,
|
| 217 |
max_lines=24,
|
| 218 |
)
|
| 219 |
|
| 220 |
all_inputs = [
|
| 221 |
prompt_in,
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
max_tok_large,
|
| 234 |
-
top_p_large,
|
| 235 |
-
top_k_large,
|
| 236 |
-
rep_large,
|
| 237 |
]
|
| 238 |
all_outputs = [
|
| 239 |
out_baguettotron,
|
|
|
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
|
| 8 |
+
from inference import BAGUETTOTRON_ID, run_all
|
| 9 |
from model_config import (
|
| 10 |
TIER_LABELS,
|
| 11 |
combined_footprint,
|
| 12 |
footprint_table_data,
|
| 13 |
+
gguf_footprint_table_data,
|
| 14 |
get_models_by_tier,
|
| 15 |
+
MODELS,
|
| 16 |
+
)
|
| 17 |
+
from ui_strings import (
|
| 18 |
+
BTN_GENERATE,
|
| 19 |
+
COL_BAGUETTOTRON_HEADING,
|
| 20 |
+
COL_LUTH_HEADING,
|
| 21 |
+
FOOTPRINT_GGUF_HEADERS,
|
| 22 |
+
FOOTPRINT_HEADERS,
|
| 23 |
+
FOOTPRINT_SUMMARY_TEMPLATE,
|
| 24 |
+
GGUF_LEAP_INTRO,
|
| 25 |
+
HEADING_FOOTPRINT,
|
| 26 |
+
HEADING_GGUF_LEAP,
|
| 27 |
+
HEADING_GENERATION,
|
| 28 |
+
HEADING_LIVE_INFERENCE,
|
| 29 |
+
INFO_REP_LUTH,
|
| 30 |
+
INFO_TEMP_BAGUETTOTRON,
|
| 31 |
+
LABEL_MAX_TOKENS,
|
| 32 |
+
LABEL_OUT_BAGUETTOTRON,
|
| 33 |
+
LABEL_OUT_LUTH_06,
|
| 34 |
+
LABEL_OUT_LUTH_07,
|
| 35 |
+
LABEL_OUT_LUTH_12,
|
| 36 |
+
LABEL_OUT_LUTH_17,
|
| 37 |
+
LABEL_OUT_LUTH_350,
|
| 38 |
+
LABEL_PROMPT,
|
| 39 |
+
LABEL_REPEAT_PENALTY,
|
| 40 |
+
LABEL_SYSTEM_PROMPT,
|
| 41 |
+
LABEL_TEMPERATURE,
|
| 42 |
+
LABEL_TOP_K,
|
| 43 |
+
LABEL_TOP_P,
|
| 44 |
+
PLACEHOLDER_PROMPT,
|
| 45 |
+
PLACEHOLDER_SYSTEM_PROMPT,
|
| 46 |
+
SUBTITLE,
|
| 47 |
+
TITLE,
|
| 48 |
)
|
| 49 |
|
| 50 |
# Optional: use @spaces.GPU for ZeroGPU deployment
|
|
|
|
| 57 |
|
| 58 |
|
| 59 |
def build_params_by_model(
|
| 60 |
+
temp_baguettotron: float,
|
| 61 |
+
max_tok_baguettotron: int,
|
| 62 |
+
top_p_baguettotron: float,
|
| 63 |
+
top_k_baguettotron: int,
|
| 64 |
+
rep_baguettotron: float,
|
| 65 |
+
temp_luth: float,
|
| 66 |
+
max_tok_luth: int,
|
| 67 |
+
top_p_luth: float,
|
| 68 |
+
top_k_luth: int,
|
| 69 |
+
rep_luth: float,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
) -> dict[str, dict]:
|
| 71 |
+
"""Build params dict keyed by model_id from Baguettotron vs Luth controls."""
|
| 72 |
+
baguettotron_params = {
|
| 73 |
+
"temperature": temp_baguettotron,
|
| 74 |
+
"max_tokens": max_tok_baguettotron,
|
| 75 |
+
"top_p": top_p_baguettotron,
|
| 76 |
+
"top_k": top_k_baguettotron,
|
| 77 |
+
"repeat_penalty": rep_baguettotron,
|
| 78 |
+
}
|
| 79 |
+
luth_params = {
|
| 80 |
+
"temperature": temp_luth,
|
| 81 |
+
"max_tokens": max_tok_luth,
|
| 82 |
+
"top_p": top_p_luth,
|
| 83 |
+
"top_k": top_k_luth,
|
| 84 |
+
"repeat_penalty": rep_luth,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
}
|
|
|
|
| 86 |
params_by_model: dict[str, dict] = {}
|
| 87 |
+
for m in MODELS:
|
| 88 |
+
params_by_model[m.repo_id] = (baguettotron_params if m.repo_id == BAGUETTOTRON_ID else luth_params).copy()
|
|
|
|
|
|
|
| 89 |
return params_by_model
|
| 90 |
|
| 91 |
|
| 92 |
@GPU_DECORATOR
|
| 93 |
def generate_all(
|
| 94 |
prompt: str,
|
| 95 |
+
system_prompt: str,
|
| 96 |
+
temp_baguettotron: float,
|
| 97 |
+
max_tok_baguettotron: int,
|
| 98 |
+
top_p_baguettotron: float,
|
| 99 |
+
top_k_baguettotron: int,
|
| 100 |
+
rep_baguettotron: float,
|
| 101 |
+
temp_luth: float,
|
| 102 |
+
max_tok_luth: int,
|
| 103 |
+
top_p_luth: float,
|
| 104 |
+
top_k_luth: int,
|
| 105 |
+
rep_luth: float,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
) -> tuple[str, str, str, str, str, str]:
|
| 107 |
"""Run all 6 models, return outputs in tab order: small (2), medium (2), large (2)."""
|
| 108 |
if not prompt.strip():
|
| 109 |
return ("",) * 6
|
| 110 |
|
| 111 |
params = build_params_by_model(
|
| 112 |
+
temp_baguettotron,
|
| 113 |
+
max_tok_baguettotron,
|
| 114 |
+
top_p_baguettotron,
|
| 115 |
+
top_k_baguettotron,
|
| 116 |
+
rep_baguettotron,
|
| 117 |
+
temp_luth,
|
| 118 |
+
max_tok_luth,
|
| 119 |
+
top_p_luth,
|
| 120 |
+
top_k_luth,
|
| 121 |
+
rep_luth,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
)
|
| 123 |
|
| 124 |
+
results = run_all(prompt, params, system_prompt=system_prompt)
|
| 125 |
|
| 126 |
models_by_tier = get_models_by_tier()
|
| 127 |
outputs: list[str] = []
|
|
|
|
| 134 |
|
| 135 |
def create_ui():
|
| 136 |
total_disk, total_vram = combined_footprint()
|
| 137 |
+
footprint_md = FOOTPRINT_SUMMARY_TEMPLATE.format(total_disk=total_disk, total_vram=total_vram)
|
|
|
|
|
|
|
| 138 |
|
| 139 |
+
with gr.Blocks(title=TITLE) as demo:
|
| 140 |
+
gr.Markdown(f"# {TITLE}")
|
| 141 |
+
gr.Markdown(SUBTITLE)
|
|
|
|
|
|
|
| 142 |
|
| 143 |
+
# Row 1: Footprint tables (transformers + GGUF/LEAP reference)
|
| 144 |
+
gr.Markdown(HEADING_FOOTPRINT)
|
| 145 |
footprint_df = gr.Dataframe(
|
| 146 |
value=footprint_table_data(),
|
| 147 |
+
headers=FOOTPRINT_HEADERS,
|
| 148 |
interactive=False,
|
| 149 |
)
|
| 150 |
gr.Markdown(footprint_md)
|
| 151 |
+
gr.Markdown(HEADING_GGUF_LEAP)
|
| 152 |
+
gr.Markdown(GGUF_LEAP_INTRO)
|
| 153 |
+
gguf_footprint_df = gr.Dataframe(
|
| 154 |
+
value=gguf_footprint_table_data(),
|
| 155 |
+
headers=FOOTPRINT_GGUF_HEADERS,
|
| 156 |
+
interactive=False,
|
| 157 |
+
)
|
| 158 |
|
| 159 |
+
# Row 2: Generation settings — two columns (Baguettotron | Luth)
|
| 160 |
+
gr.Markdown(HEADING_GENERATION)
|
| 161 |
+
with gr.Row():
|
| 162 |
+
with gr.Column():
|
| 163 |
+
gr.Markdown(COL_BAGUETTOTRON_HEADING)
|
| 164 |
+
temp_baguettotron = gr.Slider(0, 2, value=0.5, label=LABEL_TEMPERATURE, info=INFO_TEMP_BAGUETTOTRON)
|
| 165 |
+
max_tok_baguettotron = gr.Number(value=512, label=LABEL_MAX_TOKENS, minimum=64, maximum=2048)
|
| 166 |
+
top_p_baguettotron = gr.Slider(0, 1, value=0.9, label=LABEL_TOP_P)
|
| 167 |
+
top_k_baguettotron = gr.Number(value=40, label=LABEL_TOP_K)
|
| 168 |
+
rep_baguettotron = gr.Slider(1.0, 1.5, value=1.1, label=LABEL_REPEAT_PENALTY)
|
| 169 |
+
with gr.Column():
|
| 170 |
+
gr.Markdown(COL_LUTH_HEADING)
|
| 171 |
+
temp_luth = gr.Slider(0, 2, value=0.7, label=LABEL_TEMPERATURE)
|
| 172 |
+
max_tok_luth = gr.Number(value=256, label=LABEL_MAX_TOKENS, minimum=64, maximum=2048)
|
| 173 |
+
top_p_luth = gr.Slider(0, 1, value=0.9, label=LABEL_TOP_P)
|
| 174 |
+
top_k_luth = gr.Number(value=40, label=LABEL_TOP_K)
|
| 175 |
+
rep_luth = gr.Slider(1.0, 1.5, value=1.05, label=LABEL_REPEAT_PENALTY, info=INFO_REP_LUTH)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
+
# Row 3: System prompt + User prompt + Generate + tabbed outputs
|
| 178 |
+
gr.Markdown(HEADING_LIVE_INFERENCE)
|
| 179 |
+
system_prompt_in = gr.Textbox(
|
| 180 |
+
label=LABEL_SYSTEM_PROMPT,
|
| 181 |
+
placeholder=PLACEHOLDER_SYSTEM_PROMPT,
|
| 182 |
+
lines=2,
|
| 183 |
+
)
|
| 184 |
prompt_in = gr.Textbox(
|
| 185 |
+
label=LABEL_PROMPT,
|
| 186 |
+
placeholder=PLACEHOLDER_PROMPT,
|
| 187 |
lines=3,
|
| 188 |
)
|
| 189 |
+
gen_btn = gr.Button(BTN_GENERATE, variant="primary")
|
| 190 |
|
| 191 |
models_by_tier = get_models_by_tier()
|
| 192 |
with gr.Tabs():
|
| 193 |
with gr.Tab(TIER_LABELS["small"]):
|
| 194 |
with gr.Row():
|
| 195 |
out_baguettotron = gr.Textbox(
|
| 196 |
+
label=LABEL_OUT_BAGUETTOTRON,
|
| 197 |
lines=12,
|
| 198 |
max_lines=24,
|
| 199 |
)
|
| 200 |
out_luth_350 = gr.Textbox(
|
| 201 |
+
label=LABEL_OUT_LUTH_350,
|
| 202 |
lines=12,
|
| 203 |
max_lines=24,
|
| 204 |
)
|
| 205 |
with gr.Tab(TIER_LABELS["medium"]):
|
| 206 |
with gr.Row():
|
| 207 |
out_luth_06 = gr.Textbox(
|
| 208 |
+
label=LABEL_OUT_LUTH_06,
|
| 209 |
lines=12,
|
| 210 |
max_lines=24,
|
| 211 |
)
|
| 212 |
out_luth_07 = gr.Textbox(
|
| 213 |
+
label=LABEL_OUT_LUTH_07,
|
| 214 |
lines=12,
|
| 215 |
max_lines=24,
|
| 216 |
)
|
| 217 |
with gr.Tab(TIER_LABELS["large"]):
|
| 218 |
with gr.Row():
|
| 219 |
out_luth_12 = gr.Textbox(
|
| 220 |
+
label=LABEL_OUT_LUTH_12,
|
| 221 |
lines=12,
|
| 222 |
max_lines=24,
|
| 223 |
)
|
| 224 |
out_luth_17 = gr.Textbox(
|
| 225 |
+
label=LABEL_OUT_LUTH_17,
|
| 226 |
lines=12,
|
| 227 |
max_lines=24,
|
| 228 |
)
|
| 229 |
|
| 230 |
all_inputs = [
|
| 231 |
prompt_in,
|
| 232 |
+
system_prompt_in,
|
| 233 |
+
temp_baguettotron,
|
| 234 |
+
max_tok_baguettotron,
|
| 235 |
+
top_p_baguettotron,
|
| 236 |
+
top_k_baguettotron,
|
| 237 |
+
rep_baguettotron,
|
| 238 |
+
temp_luth,
|
| 239 |
+
max_tok_luth,
|
| 240 |
+
top_p_luth,
|
| 241 |
+
top_k_luth,
|
| 242 |
+
rep_luth,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
]
|
| 244 |
all_outputs = [
|
| 245 |
out_baguettotron,
|
bundle_luth.py
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Bundle a Luth model on this device with LEAP (leap-bundle) and investigate the result.
|
| 4 |
+
|
| 5 |
+
Per Liquid AI docs: leap-bundle create produces .gguf (default) or .bundle (--executorch).
|
| 6 |
+
We inspect both artifact types.
|
| 7 |
+
|
| 8 |
+
Steps:
|
| 9 |
+
1. Download the Luth model from Hugging Face to a local directory.
|
| 10 |
+
2. Validate the directory with leap-bundle validate.
|
| 11 |
+
3. Create a bundle with leap-bundle create (requires LEAP auth).
|
| 12 |
+
4. Poll until the bundle is completed, then download the output.
|
| 13 |
+
5. Investigate: report file sizes (.gguf / .bundle) and optionally run inference on .gguf.
|
| 14 |
+
|
| 15 |
+
Requires: pip install leap-bundle huggingface_hub
|
| 16 |
+
LEAP auth: leap-bundle login <api-key> (from https://leap.liquid.ai/profile#/api-keys)
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
import argparse
|
| 20 |
+
import json
|
| 21 |
+
import os
|
| 22 |
+
import re
|
| 23 |
+
import subprocess
|
| 24 |
+
import sys
|
| 25 |
+
import time
|
| 26 |
+
from pathlib import Path
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _leap_env() -> dict[str, str]:
|
| 30 |
+
"""Environment for leap-bundle subprocess so UTF-8 is used (avoids Windows cp1252 + checkmark)."""
|
| 31 |
+
env = os.environ.copy()
|
| 32 |
+
env["PYTHONUTF8"] = "1"
|
| 33 |
+
return env
|
| 34 |
+
|
| 35 |
+
# Luth model repo IDs (LFM2-based are most likely LEAP-compatible)
|
| 36 |
+
LUTH_REPOS = [
|
| 37 |
+
"kurakurai/Luth-LFM2-350M",
|
| 38 |
+
"kurakurai/Luth-LFM2-700M",
|
| 39 |
+
"kurakurai/Luth-LFM2-1.2B",
|
| 40 |
+
"kurakurai/Luth-0.6B-Instruct",
|
| 41 |
+
"kurakurai/Luth-1.7B-Instruct",
|
| 42 |
+
]
|
| 43 |
+
|
| 44 |
+
DEFAULT_REPO = LUTH_REPOS[0]
|
| 45 |
+
DEFAULT_WORK_DIR = Path("./luth_bundle_work")
|
| 46 |
+
DEFAULT_QUANTIZATION = "Q4_K_M"
|
| 47 |
+
POLL_INTERVAL_SEC = 60
|
| 48 |
+
POLL_MAX_MINUTES = 30
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def run(cmd: list[str], capture: bool = True, cwd: Path | None = None) -> subprocess.CompletedProcess:
|
| 52 |
+
"""Run a command; raise on non-zero exit unless capture is False."""
|
| 53 |
+
kwargs = {
|
| 54 |
+
"cwd": str(cwd) if cwd else None,
|
| 55 |
+
"text": True,
|
| 56 |
+
"encoding": "utf-8",
|
| 57 |
+
"errors": "replace",
|
| 58 |
+
"env": _leap_env(),
|
| 59 |
+
}
|
| 60 |
+
if capture:
|
| 61 |
+
kwargs["capture_output"] = True
|
| 62 |
+
r = subprocess.run(cmd, **kwargs)
|
| 63 |
+
if r.returncode != 0 and capture:
|
| 64 |
+
raise RuntimeError(f"Command failed: {' '.join(cmd)}\nstdout: {r.stdout}\nstderr: {r.stderr}")
|
| 65 |
+
return r
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def has_leap_bundle() -> bool:
|
| 69 |
+
try:
|
| 70 |
+
run(["leap-bundle", "--version"], capture=True)
|
| 71 |
+
return True
|
| 72 |
+
except (FileNotFoundError, RuntimeError):
|
| 73 |
+
return False
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def download_model(repo_id: str, work_dir: Path) -> Path:
|
| 77 |
+
"""Download Hugging Face model to work_dir/models/<repo_slug>. Returns path to model dir."""
|
| 78 |
+
try:
|
| 79 |
+
from huggingface_hub import snapshot_download
|
| 80 |
+
except ImportError:
|
| 81 |
+
raise SystemExit("Install huggingface_hub: pip install huggingface_hub")
|
| 82 |
+
slug = repo_id.replace("/", "--")
|
| 83 |
+
dest = work_dir / "models" / slug
|
| 84 |
+
dest.mkdir(parents=True, exist_ok=True)
|
| 85 |
+
print(f"Downloading {repo_id} to {dest} ...")
|
| 86 |
+
snapshot_download(repo_id=repo_id, local_dir=str(dest))
|
| 87 |
+
return dest
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def validate_bundle(model_path: Path) -> bool:
|
| 91 |
+
"""Run leap-bundle validate. Returns True if valid."""
|
| 92 |
+
r = run(["leap-bundle", "validate", str(model_path)], capture=True)
|
| 93 |
+
return r.returncode == 0
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def _parse_request_id(out: str) -> str | None:
|
| 97 |
+
"""Parse request_id from JSON output; API may return integer or string."""
|
| 98 |
+
try:
|
| 99 |
+
# Handle single line or multi-line JSON
|
| 100 |
+
data = json.loads(out.strip())
|
| 101 |
+
rid = data.get("request_id")
|
| 102 |
+
if rid is not None:
|
| 103 |
+
return str(rid)
|
| 104 |
+
except (json.JSONDecodeError, TypeError):
|
| 105 |
+
pass
|
| 106 |
+
match = re.search(r'"request_id"\s*:\s*("([^"]+)"|(\d+))', out)
|
| 107 |
+
if match:
|
| 108 |
+
return match.group(2) or match.group(3)
|
| 109 |
+
return None
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def create_bundle(model_path: Path, work_dir: Path) -> tuple[str | None, str | None]:
|
| 113 |
+
"""Run leap-bundle create --json. Returns (request_id, pending_id).
|
| 114 |
+
On success: (request_id, None). On 'pending request' error: (None, pending_id). Else: (None, None).
|
| 115 |
+
"""
|
| 116 |
+
r = subprocess.run(
|
| 117 |
+
["leap-bundle", "create", str(model_path), "--json"],
|
| 118 |
+
capture_output=True,
|
| 119 |
+
text=True,
|
| 120 |
+
encoding="utf-8",
|
| 121 |
+
errors="replace",
|
| 122 |
+
cwd=work_dir,
|
| 123 |
+
env=_leap_env(),
|
| 124 |
+
)
|
| 125 |
+
out = (r.stdout or r.stderr or "").strip()
|
| 126 |
+
if r.returncode != 0:
|
| 127 |
+
print("Create failed:", out or f"exit code {r.returncode}")
|
| 128 |
+
pending_id = _parse_pending_request_id(out)
|
| 129 |
+
if pending_id:
|
| 130 |
+
return None, pending_id
|
| 131 |
+
if "login" in out.lower() or "authenticat" in out.lower():
|
| 132 |
+
print("Run: leap-bundle login <api-key> (get key from https://leap.liquid.ai/profile#/api-keys)")
|
| 133 |
+
return None, None
|
| 134 |
+
# Parse request_id (API can return {"request_id": 1, "status": "success"})
|
| 135 |
+
rid = _parse_request_id(out)
|
| 136 |
+
if rid:
|
| 137 |
+
return rid, None
|
| 138 |
+
if "already exists" in out or "exists" in out:
|
| 139 |
+
print("Bundle request already exists for this model (same hash). Check leap-bundle list.")
|
| 140 |
+
return None, None
|
| 141 |
+
print("Create output:", out)
|
| 142 |
+
return None, None
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def _parse_pending_request_id(out: str) -> str | None:
|
| 146 |
+
"""Extract pending request ID from error message."""
|
| 147 |
+
match = re.search(r"pending request\s*\(ID:\s*(\d+)\)", out, re.IGNORECASE)
|
| 148 |
+
return match.group(1) if match else None
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def get_request_status(request_id: str) -> str:
|
| 152 |
+
"""Get status of a bundle request. Returns status string."""
|
| 153 |
+
r = subprocess.run(
|
| 154 |
+
["leap-bundle", "list", str(request_id)],
|
| 155 |
+
capture_output=True,
|
| 156 |
+
text=True,
|
| 157 |
+
encoding="utf-8",
|
| 158 |
+
errors="replace",
|
| 159 |
+
env=_leap_env(),
|
| 160 |
+
)
|
| 161 |
+
out = (r.stdout or r.stderr or "").lower()
|
| 162 |
+
if "completed" in out:
|
| 163 |
+
return "completed"
|
| 164 |
+
if "failed" in out:
|
| 165 |
+
return "failed"
|
| 166 |
+
if "processing" in out or "upload" in out or "pending" in out:
|
| 167 |
+
return "processing"
|
| 168 |
+
return "unknown"
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def wait_for_bundle(request_id: str) -> bool:
|
| 172 |
+
"""Poll until completed or failed. Returns True if completed."""
|
| 173 |
+
deadline = time.monotonic() + POLL_MAX_MINUTES * 60
|
| 174 |
+
while time.monotonic() < deadline:
|
| 175 |
+
status = get_request_status(request_id)
|
| 176 |
+
print(f" Status: {status}")
|
| 177 |
+
if status == "completed":
|
| 178 |
+
return True
|
| 179 |
+
if status == "failed":
|
| 180 |
+
print("Bundle request failed. Run: leap-bundle list", request_id)
|
| 181 |
+
return False
|
| 182 |
+
time.sleep(POLL_INTERVAL_SEC)
|
| 183 |
+
print("Timed out waiting for bundle.")
|
| 184 |
+
return False
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
# Per Liquid AI docs: create output is .gguf (default) or .bundle (--executorch)
|
| 188 |
+
BUNDLE_EXTENSIONS = (".gguf", ".bundle")
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def _find_bundle_artifact(work_dir: Path) -> Path | None:
|
| 192 |
+
"""Return first .gguf or .bundle file under work_dir or cwd."""
|
| 193 |
+
for d in [work_dir, Path.cwd()]:
|
| 194 |
+
for ext in BUNDLE_EXTENSIONS:
|
| 195 |
+
for f in d.glob(f"*{ext}"):
|
| 196 |
+
return f
|
| 197 |
+
return None
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def download_bundle(request_id: str, work_dir: Path) -> Path | None:
|
| 201 |
+
"""Run leap-bundle download <request_id>. Returns path to downloaded bundle artifact if found."""
|
| 202 |
+
r = run(["leap-bundle", "download", request_id], capture=True, cwd=work_dir)
|
| 203 |
+
artifact = _find_bundle_artifact(work_dir)
|
| 204 |
+
if artifact is None and r.returncode != 0:
|
| 205 |
+
err = (r.stderr or r.stdout or "")
|
| 206 |
+
if "signed_url" in err:
|
| 207 |
+
print(" (LEAP download failed: 'signed_url' – try later: python download_bundles.py --request-ids", request_id + ")", file=sys.stderr)
|
| 208 |
+
return artifact
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
def investigate(bundle_path: Path | None, model_path: Path) -> None:
|
| 212 |
+
"""Report sizes for source dir and bundle artifact (.gguf or .bundle); run inference only on .gguf."""
|
| 213 |
+
print("\n--- Investigation ---")
|
| 214 |
+
if model_path.exists():
|
| 215 |
+
total = sum(f.stat().st_size for f in model_path.rglob("*") if f.is_file())
|
| 216 |
+
print(f" Source model dir: {model_path} total size: {total / (1024**2):.1f} MB")
|
| 217 |
+
if bundle_path and bundle_path.exists():
|
| 218 |
+
size_mb = bundle_path.stat().st_size / (1024**2)
|
| 219 |
+
kind = "GGUF" if bundle_path.suffix == ".gguf" else "ExecuTorch (.bundle)"
|
| 220 |
+
print(f" Bundle file: {bundle_path} size: {size_mb:.1f} MB [{kind}]")
|
| 221 |
+
if bundle_path.suffix == ".gguf":
|
| 222 |
+
try:
|
| 223 |
+
from llama_cpp import Llama
|
| 224 |
+
print(" Running short inference (llama_cpp)...")
|
| 225 |
+
llm = Llama(model_path=str(bundle_path), n_ctx=256, verbose=False)
|
| 226 |
+
out = llm("Bonjour, dis-moi une phrase courte en français.\n", max_tokens=32, temperature=0.3)
|
| 227 |
+
text = out["choices"][0]["text"].strip()
|
| 228 |
+
print(f" Sample output: {text[:200]}")
|
| 229 |
+
except ImportError:
|
| 230 |
+
print(" (Install llama-cpp-python to run a sample inference on the GGUF)")
|
| 231 |
+
else:
|
| 232 |
+
print(" (ExecuTorch .bundle; use LEAP SDK for inference)")
|
| 233 |
+
else:
|
| 234 |
+
print(" No bundle file (.gguf or .bundle) found to inspect.")
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
def main() -> int:
|
| 238 |
+
p = argparse.ArgumentParser(
|
| 239 |
+
description="Bundle a Luth model with LEAP and investigate the result.",
|
| 240 |
+
epilog="Requires: pip install leap-bundle huggingface_hub. Auth: leap-bundle login <api-key>",
|
| 241 |
+
)
|
| 242 |
+
p.add_argument(
|
| 243 |
+
"--model",
|
| 244 |
+
default=DEFAULT_REPO,
|
| 245 |
+
choices=LUTH_REPOS,
|
| 246 |
+
help="Luth model repo ID (default: %(default)s); ignored if --all",
|
| 247 |
+
)
|
| 248 |
+
p.add_argument(
|
| 249 |
+
"--all",
|
| 250 |
+
action="store_true",
|
| 251 |
+
help="Bundle and inspect every Luth model in sequence (5 models; LEAP free tier = 5 requests/24h)",
|
| 252 |
+
)
|
| 253 |
+
p.add_argument(
|
| 254 |
+
"--work-dir",
|
| 255 |
+
type=Path,
|
| 256 |
+
default=DEFAULT_WORK_DIR,
|
| 257 |
+
help="Working directory for download and bundle output (default: %(default)s)",
|
| 258 |
+
)
|
| 259 |
+
p.add_argument(
|
| 260 |
+
"--quantization",
|
| 261 |
+
default=DEFAULT_QUANTIZATION,
|
| 262 |
+
help="(Reserved; current leap-bundle create has no --quantization option)",
|
| 263 |
+
)
|
| 264 |
+
p.add_argument(
|
| 265 |
+
"--dry-run",
|
| 266 |
+
action="store_true",
|
| 267 |
+
help="Only download and validate; do not create or download bundle",
|
| 268 |
+
)
|
| 269 |
+
p.add_argument(
|
| 270 |
+
"--skip-create",
|
| 271 |
+
action="store_true",
|
| 272 |
+
help="Skip bundle create (use existing local model dir only); still run investigate",
|
| 273 |
+
)
|
| 274 |
+
p.add_argument(
|
| 275 |
+
"--request-id",
|
| 276 |
+
type=str,
|
| 277 |
+
metavar="ID",
|
| 278 |
+
help="If bundle already created, download by request ID and then investigate",
|
| 279 |
+
)
|
| 280 |
+
args = p.parse_args()
|
| 281 |
+
|
| 282 |
+
args.work_dir = args.work_dir.resolve()
|
| 283 |
+
args.work_dir.mkdir(parents=True, exist_ok=True)
|
| 284 |
+
|
| 285 |
+
if not has_leap_bundle():
|
| 286 |
+
print("leap-bundle CLI not found. Install: pip install leap-bundle", file=sys.stderr)
|
| 287 |
+
return 1
|
| 288 |
+
|
| 289 |
+
models_to_run = LUTH_REPOS if args.all else [args.model]
|
| 290 |
+
if args.all and args.request_id:
|
| 291 |
+
print("--request-id is ignored when using --all.", file=sys.stderr)
|
| 292 |
+
args.request_id = None
|
| 293 |
+
if args.all:
|
| 294 |
+
print(f"Running for all {len(models_to_run)} Luth models: {', '.join(models_to_run)}")
|
| 295 |
+
print("Note: LEAP free tier allows 5 bundle requests per 24h.\n")
|
| 296 |
+
|
| 297 |
+
exit_code = 0
|
| 298 |
+
for repo_id in models_to_run:
|
| 299 |
+
print(f"\n{'='*60}\n {repo_id}\n{'='*60}")
|
| 300 |
+
try:
|
| 301 |
+
# 1. Download
|
| 302 |
+
model_path = download_model(repo_id, args.work_dir)
|
| 303 |
+
|
| 304 |
+
# 2. Validate
|
| 305 |
+
print("Validating directory for LEAP bundle...")
|
| 306 |
+
if not validate_bundle(model_path):
|
| 307 |
+
print("Validation failed. Fix the model directory and retry.", file=sys.stderr)
|
| 308 |
+
exit_code = 1
|
| 309 |
+
continue
|
| 310 |
+
print("Validation passed.")
|
| 311 |
+
|
| 312 |
+
if args.dry_run:
|
| 313 |
+
investigate(None, model_path)
|
| 314 |
+
continue
|
| 315 |
+
|
| 316 |
+
gguf_path: Path | None = None
|
| 317 |
+
|
| 318 |
+
if args.request_id and not args.all:
|
| 319 |
+
# Download existing bundle by ID (single-model only)
|
| 320 |
+
print(f"Downloading bundle request {args.request_id}...")
|
| 321 |
+
gguf_path = download_bundle(args.request_id, args.work_dir)
|
| 322 |
+
elif not args.skip_create:
|
| 323 |
+
# 3. Create bundle (LEAP allows only one pending request; wait for it if needed)
|
| 324 |
+
request_id: str | None = None
|
| 325 |
+
pending_id: str | None = None
|
| 326 |
+
print("Creating bundle...")
|
| 327 |
+
request_id, pending_id = create_bundle(model_path, args.work_dir)
|
| 328 |
+
if pending_id:
|
| 329 |
+
print(f"Waiting for previous bundle request {pending_id} to complete...")
|
| 330 |
+
if wait_for_bundle(pending_id):
|
| 331 |
+
download_bundle(pending_id, args.work_dir)
|
| 332 |
+
print("Retrying create for this model...")
|
| 333 |
+
request_id, pending_id = create_bundle(model_path, args.work_dir)
|
| 334 |
+
if pending_id:
|
| 335 |
+
print("Still pending; skipping create for this model.", file=sys.stderr)
|
| 336 |
+
request_id = None
|
| 337 |
+
if request_id:
|
| 338 |
+
# 4. Wait and download
|
| 339 |
+
print(f"Waiting for bundle request {request_id} (poll every {POLL_INTERVAL_SEC}s)...")
|
| 340 |
+
if wait_for_bundle(request_id):
|
| 341 |
+
gguf_path = download_bundle(request_id, args.work_dir)
|
| 342 |
+
elif not pending_id:
|
| 343 |
+
print("No new request created. Use --request-id <id> to download an existing bundle.")
|
| 344 |
+
else:
|
| 345 |
+
print("Skipping bundle create (--skip-create).")
|
| 346 |
+
|
| 347 |
+
# 5. Investigate
|
| 348 |
+
investigate(gguf_path, model_path)
|
| 349 |
+
except Exception as e:
|
| 350 |
+
print(f"Error processing {repo_id}: {e}", file=sys.stderr)
|
| 351 |
+
exit_code = 1
|
| 352 |
+
|
| 353 |
+
return exit_code
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
if __name__ == "__main__":
|
| 357 |
+
sys.exit(main())
|
download_bundles.py
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Download LEAP bundle outputs by request ID and inspect the files.
|
| 4 |
+
|
| 5 |
+
Per Liquid AI docs: leap-bundle create produces .gguf (default, GGUF) or
|
| 6 |
+
.bundle (ExecuTorch, with --executorch). This script finds and inspects both.
|
| 7 |
+
|
| 8 |
+
Uses leap-bundle list (--json per request) and leap-bundle download
|
| 9 |
+
with --output-path. Reports all bundle artifacts (.gguf, .bundle) and
|
| 10 |
+
optionally runs short inference on .gguf (llama-cpp).
|
| 11 |
+
|
| 12 |
+
Requires: pip install leap-bundle
|
| 13 |
+
LEAP auth: leap-bundle login <api-key>
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import argparse
|
| 17 |
+
import json
|
| 18 |
+
import os
|
| 19 |
+
import re
|
| 20 |
+
import subprocess
|
| 21 |
+
import sys
|
| 22 |
+
from pathlib import Path
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def _leap_env() -> dict[str, str]:
|
| 26 |
+
env = os.environ.copy()
|
| 27 |
+
env["PYTHONUTF8"] = "1"
|
| 28 |
+
return env
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def run(cmd: list[str], capture: bool = True, cwd: Path | None = None) -> subprocess.CompletedProcess:
|
| 32 |
+
kwargs = {
|
| 33 |
+
"cwd": str(cwd) if cwd else None,
|
| 34 |
+
"text": True,
|
| 35 |
+
"encoding": "utf-8",
|
| 36 |
+
"errors": "replace",
|
| 37 |
+
"env": _leap_env(),
|
| 38 |
+
}
|
| 39 |
+
if capture:
|
| 40 |
+
kwargs["capture_output"] = True
|
| 41 |
+
return subprocess.run(cmd, **kwargs)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def list_request(request_id: str) -> dict | None:
|
| 45 |
+
"""Get details for one request; returns parsed JSON or None."""
|
| 46 |
+
r = run(["leap-bundle", "list", str(request_id), "--json"], capture=True)
|
| 47 |
+
if r.returncode != 0:
|
| 48 |
+
return None
|
| 49 |
+
out = (r.stdout or "").strip()
|
| 50 |
+
try:
|
| 51 |
+
return json.loads(out)
|
| 52 |
+
except json.JSONDecodeError:
|
| 53 |
+
return None
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def list_all_request_ids() -> list[str]:
|
| 57 |
+
"""Run leap-bundle list (no id) and parse table for request IDs. Returns list of ID strings."""
|
| 58 |
+
r = run(["leap-bundle", "list"], capture=True)
|
| 59 |
+
out = (r.stdout or r.stderr or "")
|
| 60 |
+
ids: list[str] = []
|
| 61 |
+
# Table rows: first column is often the ID (integer)
|
| 62 |
+
for line in out.splitlines():
|
| 63 |
+
parts = line.split()
|
| 64 |
+
if parts and parts[0].isdigit():
|
| 65 |
+
ids.append(parts[0])
|
| 66 |
+
# Fallback: any line with a pipe or spaces and a leading number (rich table)
|
| 67 |
+
if not ids:
|
| 68 |
+
for line in out.splitlines():
|
| 69 |
+
m = re.search(r"[\|\s](\d{1,6})[\|\s]", line)
|
| 70 |
+
if m:
|
| 71 |
+
ids.append(m.group(1))
|
| 72 |
+
# Fallback: JSON-like "request_id": N or "id": N
|
| 73 |
+
if not ids:
|
| 74 |
+
for m in re.finditer(r'"(?:request_id|id)"\s*:\s*(\d+)', out):
|
| 75 |
+
ids.append(m.group(1))
|
| 76 |
+
return list(dict.fromkeys(ids))
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def get_status(data: dict) -> str:
|
| 80 |
+
"""Extract status from list request JSON."""
|
| 81 |
+
s = (data.get("status") or data.get("Status") or "").lower()
|
| 82 |
+
return s
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def download_bundle(request_id: str, output_path: Path) -> tuple[bool, str]:
|
| 86 |
+
"""Run leap-bundle download <id> --output-path <dir>. Returns (success, stderr_or_empty)."""
|
| 87 |
+
output_path.mkdir(parents=True, exist_ok=True)
|
| 88 |
+
r = run(
|
| 89 |
+
["leap-bundle", "download", str(request_id), "--output-path", str(output_path)],
|
| 90 |
+
capture=True,
|
| 91 |
+
)
|
| 92 |
+
err = (r.stderr or r.stdout or "").strip()
|
| 93 |
+
return r.returncode == 0, err
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# Per Liquid AI docs: create produces .gguf (default) or .bundle (--executorch)
|
| 97 |
+
BUNDLE_EXTENSIONS = (".gguf", ".bundle")
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def find_bundle_files(root: Path) -> list[Path]:
|
| 101 |
+
"""Return all LEAP bundle artifact files (.gguf, .bundle) under root."""
|
| 102 |
+
out: list[Path] = []
|
| 103 |
+
for ext in BUNDLE_EXTENSIONS:
|
| 104 |
+
out.extend(root.rglob(f"*{ext}"))
|
| 105 |
+
return sorted(out)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def inspect_file(path: Path, run_inference: bool = False, root: Path | None = None) -> None:
|
| 109 |
+
"""Print path, size, type; run short inference only for .gguf (llama-cpp)."""
|
| 110 |
+
size_mb = path.stat().st_size / (1024**2)
|
| 111 |
+
try:
|
| 112 |
+
disp = path.relative_to(root) if root else path
|
| 113 |
+
except ValueError:
|
| 114 |
+
disp = path
|
| 115 |
+
kind = "GGUF" if path.suffix == ".gguf" else "ExecuTorch (.bundle)"
|
| 116 |
+
print(f" {disp} {size_mb:.1f} MB [{kind}]")
|
| 117 |
+
if run_inference and path.suffix == ".gguf":
|
| 118 |
+
try:
|
| 119 |
+
from llama_cpp import Llama
|
| 120 |
+
print(" Running short inference (llama-cpp)...")
|
| 121 |
+
llm = Llama(model_path=str(path), n_ctx=256, verbose=False)
|
| 122 |
+
out = llm("Bonjour, une phrase en français.\n", max_tokens=24, temperature=0.3)
|
| 123 |
+
text = (out["choices"][0]["text"] or "").strip()
|
| 124 |
+
print(f" -> {text[:150]}")
|
| 125 |
+
except ImportError:
|
| 126 |
+
print(" (Install llama-cpp-python to run inference)")
|
| 127 |
+
except Exception as e:
|
| 128 |
+
print(f" Inference error: {e}")
|
| 129 |
+
elif run_inference and path.suffix == ".bundle":
|
| 130 |
+
print(" (ExecuTorch .bundle; inference via LEAP SDK, not llama-cpp)")
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def main() -> int:
|
| 134 |
+
p = argparse.ArgumentParser(
|
| 135 |
+
description="Download LEAP bundle outputs by request ID and inspect files (.gguf or .bundle per Liquid AI docs).",
|
| 136 |
+
epilog="Requires: leap-bundle (pip install leap-bundle). Auth: leap-bundle login <api-key>",
|
| 137 |
+
)
|
| 138 |
+
p.add_argument(
|
| 139 |
+
"--output-dir",
|
| 140 |
+
type=Path,
|
| 141 |
+
default=Path("./luth_bundle_downloads"),
|
| 142 |
+
help="Directory to download each bundle into (default: ./luth_bundle_downloads)",
|
| 143 |
+
)
|
| 144 |
+
p.add_argument(
|
| 145 |
+
"--request-ids",
|
| 146 |
+
type=str,
|
| 147 |
+
nargs="*",
|
| 148 |
+
metavar="ID",
|
| 149 |
+
help="Bundle request IDs to download (e.g. 1 2 3)",
|
| 150 |
+
)
|
| 151 |
+
p.add_argument(
|
| 152 |
+
"--from-file",
|
| 153 |
+
type=Path,
|
| 154 |
+
metavar="FILE",
|
| 155 |
+
help="Text file with one request ID per line",
|
| 156 |
+
)
|
| 157 |
+
p.add_argument(
|
| 158 |
+
"--list",
|
| 159 |
+
action="store_true",
|
| 160 |
+
help="Run leap-bundle list and download all completed requests",
|
| 161 |
+
)
|
| 162 |
+
p.add_argument(
|
| 163 |
+
"--infer",
|
| 164 |
+
action="store_true",
|
| 165 |
+
help="Run a short inference on each downloaded GGUF (requires llama-cpp-python)",
|
| 166 |
+
)
|
| 167 |
+
p.add_argument(
|
| 168 |
+
"--inspect-only",
|
| 169 |
+
action="store_true",
|
| 170 |
+
help="Only inspect existing bundle files (.gguf, .bundle) under --output-dir; do not download",
|
| 171 |
+
)
|
| 172 |
+
args = p.parse_args()
|
| 173 |
+
|
| 174 |
+
args.output_dir = args.output_dir.resolve()
|
| 175 |
+
request_ids: list[str] = []
|
| 176 |
+
|
| 177 |
+
if args.inspect_only:
|
| 178 |
+
args.output_dir.mkdir(parents=True, exist_ok=True)
|
| 179 |
+
bundles = find_bundle_files(args.output_dir)
|
| 180 |
+
print(f"Inspecting {len(bundles)} bundle file(s) (.gguf / .bundle) under {args.output_dir}\n")
|
| 181 |
+
for f in bundles:
|
| 182 |
+
inspect_file(f, run_inference=args.infer, root=args.output_dir)
|
| 183 |
+
return 0
|
| 184 |
+
|
| 185 |
+
if args.list:
|
| 186 |
+
print("Fetching bundle request list...")
|
| 187 |
+
request_ids = list_all_request_ids()
|
| 188 |
+
if not request_ids:
|
| 189 |
+
print("No request IDs found from list.", file=sys.stderr)
|
| 190 |
+
print("If you have existing bundle requests (e.g. from bundle_luth.py --all), run:", file=sys.stderr)
|
| 191 |
+
print(" python download_bundles.py --request-ids 1 2 3 4 5", file=sys.stderr)
|
| 192 |
+
return 1
|
| 193 |
+
print(f"Found {len(request_ids)} request(s): {request_ids}")
|
| 194 |
+
else:
|
| 195 |
+
if args.request_ids:
|
| 196 |
+
request_ids.extend(args.request_ids)
|
| 197 |
+
if args.from_file:
|
| 198 |
+
if not args.from_file.exists():
|
| 199 |
+
print(f"File not found: {args.from_file}", file=sys.stderr)
|
| 200 |
+
return 1
|
| 201 |
+
for line in args.from_file.read_text(encoding="utf-8", errors="replace").splitlines():
|
| 202 |
+
rid = line.strip()
|
| 203 |
+
if rid and rid.isdigit():
|
| 204 |
+
request_ids.append(rid)
|
| 205 |
+
if not request_ids:
|
| 206 |
+
print("Provide --request-ids, --from-file, or --list.", file=sys.stderr)
|
| 207 |
+
return 1
|
| 208 |
+
|
| 209 |
+
args.output_dir.mkdir(parents=True, exist_ok=True)
|
| 210 |
+
downloaded: list[Path] = []
|
| 211 |
+
|
| 212 |
+
for rid in request_ids:
|
| 213 |
+
print(f"\n--- Request ID {rid} ---")
|
| 214 |
+
info = list_request(rid)
|
| 215 |
+
status = get_status(info) if info else ""
|
| 216 |
+
if status:
|
| 217 |
+
print(f" Status: {status}")
|
| 218 |
+
if "completed" not in status and "complete" not in status:
|
| 219 |
+
print(" Skipping (not completed).")
|
| 220 |
+
continue
|
| 221 |
+
else:
|
| 222 |
+
print(" (Status unknown; attempting download.)")
|
| 223 |
+
dest = args.output_dir / f"request_{rid}"
|
| 224 |
+
print(f" Downloading to {dest} ...")
|
| 225 |
+
ok, err = download_bundle(rid, dest)
|
| 226 |
+
if ok:
|
| 227 |
+
for f in find_bundle_files(dest):
|
| 228 |
+
downloaded.append(f)
|
| 229 |
+
kind = "GGUF" if f.suffix == ".gguf" else ".bundle"
|
| 230 |
+
print(f" Downloaded: {f.name} ({f.stat().st_size / (1024**2):.1f} MB) [{kind}]")
|
| 231 |
+
else:
|
| 232 |
+
print(" Download failed.", file=sys.stderr)
|
| 233 |
+
if "signed_url" in err:
|
| 234 |
+
print(" (LEAP CLI/API 'signed_url' error – try again later or check LEAP status.)", file=sys.stderr)
|
| 235 |
+
|
| 236 |
+
print("\n" + "=" * 60)
|
| 237 |
+
print("Inspection summary (bundle artifacts: .gguf / .bundle)")
|
| 238 |
+
print("=" * 60)
|
| 239 |
+
all_bundles = find_bundle_files(args.output_dir)
|
| 240 |
+
for f in all_bundles:
|
| 241 |
+
inspect_file(f, run_inference=args.infer, root=args.output_dir)
|
| 242 |
+
if not all_bundles:
|
| 243 |
+
print(" No bundle files (.gguf or .bundle) found.")
|
| 244 |
+
return 0
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
if __name__ == "__main__":
|
| 248 |
+
sys.exit(main())
|
inference.py
CHANGED
|
@@ -19,20 +19,27 @@ _cache_lock = __import__("threading").Lock()
|
|
| 19 |
BAGUETTOTRON_ID = "PleIAs/Baguettotron"
|
| 20 |
|
| 21 |
|
| 22 |
-
def _format_prompt_baguettotron(prompt: str) -> tuple[str, list[str]]:
|
| 23 |
"""
|
| 24 |
Manual prompt build for Baguettotron. Uses "<|im_end>" (no trailing pipe)
|
| 25 |
per tokenizer; stop=["<|im_end>", "</think>"] for generation.
|
|
|
|
| 26 |
"""
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
| 29 |
stop = ["<|im_end>", "</think>"]
|
| 30 |
return text, stop
|
| 31 |
|
| 32 |
|
| 33 |
-
def _format_prompt_luth(prompt: str, tokenizer: Any) -> tuple[dict[str, Any], list[str] | None]:
|
| 34 |
-
"""Use tokenizer's chat template for Luth models."""
|
| 35 |
-
messages
|
|
|
|
|
|
|
|
|
|
| 36 |
inputs = tokenizer.apply_chat_template(
|
| 37 |
messages,
|
| 38 |
add_generation_prompt=True,
|
|
@@ -74,6 +81,7 @@ def _generate_one(
|
|
| 74 |
prompt: str,
|
| 75 |
params: dict[str, Any],
|
| 76 |
device: str = "cuda",
|
|
|
|
| 77 |
) -> tuple[str, str]:
|
| 78 |
"""Load (or use cached) model, run inference, return (model_id, text)."""
|
| 79 |
model, tokenizer = _load_model(model_id, device)
|
|
@@ -95,10 +103,10 @@ def _generate_one(
|
|
| 95 |
}
|
| 96 |
|
| 97 |
if model_id == BAGUETTOTRON_ID:
|
| 98 |
-
text_prompt, _stop = _format_prompt_baguettotron(prompt)
|
| 99 |
inputs = tokenizer(text_prompt, return_tensors="pt")
|
| 100 |
else:
|
| 101 |
-
inputs = _format_prompt_luth(prompt, tokenizer)[0]
|
| 102 |
|
| 103 |
# Move to device (input_ids/attention_mask are int; no dtype cast needed)
|
| 104 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
|
@@ -126,6 +134,7 @@ def run_all(
|
|
| 126 |
params_by_model: dict[str, dict[str, Any]],
|
| 127 |
device: str | None = None,
|
| 128 |
max_workers: int = 6,
|
|
|
|
| 129 |
) -> dict[str, str]:
|
| 130 |
"""
|
| 131 |
Load all 6 models in parallel, run all 6 inferences in parallel.
|
|
@@ -143,7 +152,7 @@ def run_all(
|
|
| 143 |
|
| 144 |
def task(model_id: str):
|
| 145 |
p = {**default_params, **(params_by_model.get(model_id) or {})}
|
| 146 |
-
return _generate_one(model_id, prompt, p, device)
|
| 147 |
|
| 148 |
results: dict[str, str] = {}
|
| 149 |
with ThreadPoolExecutor(max_workers=max_workers) as ex:
|
|
|
|
| 19 |
BAGUETTOTRON_ID = "PleIAs/Baguettotron"
|
| 20 |
|
| 21 |
|
| 22 |
+
def _format_prompt_baguettotron(prompt: str, system_prompt: str = "") -> tuple[str, list[str]]:
|
| 23 |
"""
|
| 24 |
Manual prompt build for Baguettotron. Uses "<|im_end>" (no trailing pipe)
|
| 25 |
per tokenizer; stop=["<|im_end>", "</think>"] for generation.
|
| 26 |
+
Qwen-style: system (optional) + user + assistant.
|
| 27 |
"""
|
| 28 |
+
parts: list[str] = []
|
| 29 |
+
if system_prompt.strip():
|
| 30 |
+
parts.append(f"<|im_start|>system\n{system_prompt.strip()}<|im_end>\n")
|
| 31 |
+
parts.append(f"<|im_start|>user\n{prompt}<|im_end>\n<|im_start|>assistant\n<think>\n")
|
| 32 |
+
text = "".join(parts)
|
| 33 |
stop = ["<|im_end>", "</think>"]
|
| 34 |
return text, stop
|
| 35 |
|
| 36 |
|
| 37 |
+
def _format_prompt_luth(prompt: str, tokenizer: Any, system_prompt: str = "") -> tuple[dict[str, Any], list[str] | None]:
|
| 38 |
+
"""Use tokenizer's chat template for Luth models. Supports optional system message."""
|
| 39 |
+
messages: list[dict[str, str]] = []
|
| 40 |
+
if system_prompt.strip():
|
| 41 |
+
messages.append({"role": "system", "content": system_prompt.strip()})
|
| 42 |
+
messages.append({"role": "user", "content": prompt})
|
| 43 |
inputs = tokenizer.apply_chat_template(
|
| 44 |
messages,
|
| 45 |
add_generation_prompt=True,
|
|
|
|
| 81 |
prompt: str,
|
| 82 |
params: dict[str, Any],
|
| 83 |
device: str = "cuda",
|
| 84 |
+
system_prompt: str = "",
|
| 85 |
) -> tuple[str, str]:
|
| 86 |
"""Load (or use cached) model, run inference, return (model_id, text)."""
|
| 87 |
model, tokenizer = _load_model(model_id, device)
|
|
|
|
| 103 |
}
|
| 104 |
|
| 105 |
if model_id == BAGUETTOTRON_ID:
|
| 106 |
+
text_prompt, _stop = _format_prompt_baguettotron(prompt, system_prompt)
|
| 107 |
inputs = tokenizer(text_prompt, return_tensors="pt")
|
| 108 |
else:
|
| 109 |
+
inputs = _format_prompt_luth(prompt, tokenizer, system_prompt)[0]
|
| 110 |
|
| 111 |
# Move to device (input_ids/attention_mask are int; no dtype cast needed)
|
| 112 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
|
|
|
| 134 |
params_by_model: dict[str, dict[str, Any]],
|
| 135 |
device: str | None = None,
|
| 136 |
max_workers: int = 6,
|
| 137 |
+
system_prompt: str = "",
|
| 138 |
) -> dict[str, str]:
|
| 139 |
"""
|
| 140 |
Load all 6 models in parallel, run all 6 inferences in parallel.
|
|
|
|
| 152 |
|
| 153 |
def task(model_id: str):
|
| 154 |
p = {**default_params, **(params_by_model.get(model_id) or {})}
|
| 155 |
+
return _generate_one(model_id, prompt, p, device, system_prompt)
|
| 156 |
|
| 157 |
results: dict[str, str] = {}
|
| 158 |
with ThreadPoolExecutor(max_workers=max_workers) as ex:
|
model_config.py
CHANGED
|
@@ -141,3 +141,26 @@ def combined_footprint() -> tuple[int, float]:
|
|
| 141 |
total_disk = sum(m.file_size_mb for m in MODELS)
|
| 142 |
total_vram_mb = sum(m.vram_estimate_mb for m in MODELS)
|
| 143 |
return total_disk, total_vram_mb / 1024
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
total_disk = sum(m.file_size_mb for m in MODELS)
|
| 142 |
total_vram_mb = sum(m.vram_estimate_mb for m in MODELS)
|
| 143 |
return total_disk, total_vram_mb / 1024
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
# --- GGUF / LEAP bundle reference ---
|
| 147 |
+
# Baguettotron: PleIAs/Baguettotron-GGUF (Hugging Face)
|
| 148 |
+
# LFM2 / Luth: actual LEAP bundle outputs (leap-bundle create + download)
|
| 149 |
+
GGUF_REFERENCE_ROWS: list[list[str]] = [
|
| 150 |
+
# Model/Variant | Params | File size (MB) | Source
|
| 151 |
+
["Baguettotron Q4_0", "321M", "202", "PleIAs/Baguettotron-GGUF"],
|
| 152 |
+
["Baguettotron Q4_K_M", "321M", "240", "PleIAs/Baguettotron-GGUF"],
|
| 153 |
+
["Baguettotron Q5_K_M", "321M", "257", "PleIAs/Baguettotron-GGUF"],
|
| 154 |
+
["Baguettotron Q8_0", "321M", "344", "PleIAs/Baguettotron-GGUF"],
|
| 155 |
+
["Baguettotron BF16", "321M", "644", "PleIAs/Baguettotron-GGUF"],
|
| 156 |
+
["LFM2-350M Q4_K_M", "0.4B", "219", "LEAP bundle (Luth-LFM2-350M)"],
|
| 157 |
+
["LFM2-700M Q4_K_M", "0.7B", "447", "LEAP bundle (Luth-LFM2-700M)"],
|
| 158 |
+
["LFM2-1.2B Q4_K_M", "1.2B", "697", "LEAP bundle (Luth-LFM2-1.2B)"],
|
| 159 |
+
["Luth-0.6B-Instruct (Qwen3) Q4_K_M", "0.6B", "378", "LEAP bundle"],
|
| 160 |
+
["Luth-1.7B-Instruct (Qwen3) Q4_K_M", "1.7B", "1,056", "LEAP bundle"],
|
| 161 |
+
]
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def gguf_footprint_table_data() -> list[list[str]]:
|
| 165 |
+
"""Rows for GGUF/LEAP reference table: Model/Variant | Params | File size (MB) | Source."""
|
| 166 |
+
return GGUF_REFERENCE_ROWS
|
requirements-bundle.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# For bundle_luth.py: bundle Luth models with LEAP and investigate
|
| 2 |
+
leap-bundle
|
| 3 |
+
huggingface_hub
|
| 4 |
+
# Optional: run a short inference on the downloaded GGUF
|
| 5 |
+
# llama-cpp-python
|
| 6 |
+
hf_xet
|
ui_strings.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
UI text strings for the Baguettotron vs Luth Gradio app.
|
| 3 |
+
Centralized for reuse and easier i18n.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
# App identity
|
| 7 |
+
TITLE = "Baguettotron vs Luth models"
|
| 8 |
+
SUBTITLE = "All models, all outputs — apples-to-apples comparison by parameter size."
|
| 9 |
+
|
| 10 |
+
# Footprint section
|
| 11 |
+
HEADING_FOOTPRINT = "## Model footprint"
|
| 12 |
+
FOOTPRINT_HEADERS = ["Model", "Params", "File size (MB)", "Est. VRAM (MB)"]
|
| 13 |
+
FOOTPRINT_SUMMARY_TEMPLATE = "**Combined footprint —** Total disk: {total_disk:,} MB | Total VRAM (est.): {total_vram:.2f} GB"
|
| 14 |
+
HEADING_GGUF_LEAP = "### GGUF & LEAP bundle sizes (reference)"
|
| 15 |
+
GGUF_LEAP_INTRO = "PleIAs Baguettotron GGUF variants (Hugging Face) and Liquid LFM2 GGUF sizes (LEAP model library). Download with `leap-bundle download <model>` or from the links below."
|
| 16 |
+
FOOTPRINT_GGUF_HEADERS = ["Model / Variant", "Params", "File size (MB)", "Source"]
|
| 17 |
+
|
| 18 |
+
# Generation settings
|
| 19 |
+
HEADING_GENERATION = "## Generation settings (by model family)"
|
| 20 |
+
COL_BAGUETTOTRON_HEADING = "**Baguettotron (321M)** — *reasoning*"
|
| 21 |
+
COL_LUTH_HEADING = "**Luth models (0.4B–1.7B)** — *instruct*"
|
| 22 |
+
LABEL_TEMPERATURE = "Temperature"
|
| 23 |
+
LABEL_MAX_TOKENS = "Max tokens"
|
| 24 |
+
LABEL_TOP_P = "Top p"
|
| 25 |
+
LABEL_TOP_K = "Top k"
|
| 26 |
+
LABEL_REPEAT_PENALTY = "Repeat penalty"
|
| 27 |
+
INFO_TEMP_BAGUETTOTRON = "Lower for more deterministic reasoning"
|
| 28 |
+
INFO_REP_LUTH = "Luth/LFM2 often use ~1.05"
|
| 29 |
+
|
| 30 |
+
# Live inference
|
| 31 |
+
HEADING_LIVE_INFERENCE = "## Live inference"
|
| 32 |
+
LABEL_SYSTEM_PROMPT = "System prompt (optional)"
|
| 33 |
+
PLACEHOLDER_SYSTEM_PROMPT = "e.g. You are a helpful assistant that answers in French."
|
| 34 |
+
LABEL_PROMPT = "Prompt"
|
| 35 |
+
PLACEHOLDER_PROMPT = "Enter your prompt here..."
|
| 36 |
+
BTN_GENERATE = "Generate"
|
| 37 |
+
|
| 38 |
+
# Output textbox labels (per model)
|
| 39 |
+
LABEL_OUT_BAGUETTOTRON = "Baguettotron (321M)"
|
| 40 |
+
LABEL_OUT_LUTH_350 = "Luth-LFM2-350M (0.4B)"
|
| 41 |
+
LABEL_OUT_LUTH_06 = "Luth-0.6B-Instruct"
|
| 42 |
+
LABEL_OUT_LUTH_07 = "Luth-LFM2-700M"
|
| 43 |
+
LABEL_OUT_LUTH_12 = "Luth-LFM2-1.2B"
|
| 44 |
+
LABEL_OUT_LUTH_17 = "Luth-1.7B-Instruct"
|