Spaces:

Tonic
/

fr-on-device

Sleeping

App Files Files Community

Joseph Pollack commited on Feb 20

Commit

935bdc8

unverified ·

1 Parent(s): cfdc81c

adds real lfm and pleais numbers

Browse files

Files changed (9) hide show

.gitignore +2 -0
README.md +26 -2
app.py +141 -135
bundle_luth.py +357 -0
download_bundles.py +248 -0
inference.py +18 -9
model_config.py +23 -0
requirements-bundle.txt +6 -0
ui_strings.py +44 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,5 @@
 __pycache__/
 *.py[cod]
 *$py.class

 __pycache__/
 *.py[cod]
 *$py.class
+luth_bundle_downloads
+luth_bundle_work

README.md CHANGED Viewed

@@ -18,8 +18,9 @@ Apples-to-apples comparison of **Baguettotron** (PleIAs, 321M) and **5 Luth mode
 ## Features
 - **All models, all outputs:** Each prompt runs through all 6 models; outputs appear in tabs grouped by parameter size.
-- **Ultimate footprint:** Per-model disk size and VRAM estimates; combined footprint for all models.
-- **Per-tier hyperparameters:** Temperature, max_tokens, top_p, top_k, repeat_penalty per size tier.
 - **Transformers-only:** No quantization; all models run in BF16/FP16.
 ## Size tiers
@@ -34,6 +35,29 @@ Apples-to-apples comparison of **Baguettotron** (PleIAs, 321M) and **5 Luth mode
 Baguettotron's tokenizer uses `"<|im_end>"` (no trailing pipe) for EOS. The app uses manual prompt formatting and stop sequences to avoid multi-token tokenization. See [quirk.md](quirk.md) for details.
 ## Deployment
 - **Hugging Face Spaces:** Set hardware to **Zero GPU** (or standard GPU). The app uses `@spaces.GPU` when available.

 ## Features
 - **All models, all outputs:** Each prompt runs through all 6 models; outputs appear in tabs grouped by parameter size.
+- **System prompt:** Optional system prompt supported for both Baguettotron (Qwen-style) and Luth (chat template) model families.
+- **Ultimate footprint:** Per-model disk size and VRAM estimates; combined footprint for all models. A **GGUF & LEAP bundle** reference table lists PleIAs Baguettotron GGUF variants and Liquid LFM2 GGUF sizes (from [LEAP](https://leap.liquid.ai/models) / [PleIAs/Baguettotron-GGUF](https://huggingface.co/PleIAs/Baguettotron-GGUF)).
+- **Per-family generation settings:** Two columns (Baguettotron | Luth) with sensible defaults: Baguettotron tuned for reasoning (e.g. temp 0.5, 512 tokens); Luth for instruct (e.g. temp 0.7, repeat_penalty 1.05).
 - **Transformers-only:** No quantization; all models run in BF16/FP16.
 ## Size tiers
 Baguettotron's tokenizer uses `"<|im_end>"` (no trailing pipe) for EOS. The app uses manual prompt formatting and stop sequences to avoid multi-token tokenization. See [quirk.md](quirk.md) for details.
+## Bundling Luth models (LEAP)
+The script `bundle_luth.py` downloads a Luth model, validates it for LEAP, creates a GGUF bundle on the device, and investigates the result (sizes, optional inference).
+```bash
+pip install -r requirements-bundle.txt
+leap-bundle login <api-key>   # from https://leap.liquid.ai/profile#/api-keys
+# Single model:
+python bundle_luth.py --model kurakurai/Luth-LFM2-350M
+# All 5 Luth models (download → validate → create → download GGUF → investigate):
+python bundle_luth.py --all
+```
+Options: `--all` (every Luth model in sequence; LEAP free tier = 5 requests/24h), `--work-dir`, `--quantization` (e.g. Q4_K_M, Q8_0), `--dry-run` (download + validate only, no create), `--skip-create`, `--request-id <id>` to download an existing bundle.
+**Download and inspect bundles:** Use `download_bundles.py` to fetch completed bundle outputs by request ID. Per Liquid AI docs, artifacts are `.gguf` (default) or `.bundle` (ExecuTorch). The script inspects both and can run a short inference on `.gguf`:
+```bash
+python download_bundles.py --list                    # list requests, download all completed
+python download_bundles.py --request-ids 1 2        # download specific IDs
+python download_bundles.py --inspect-only --infer   # inspect existing downloads and run inference
+```
 ## Deployment
 - **Hugging Face Spaces:** Set hardware to **Zero GPU** (or standard GPU). The app uses `@spaces.GPU` when available.

app.py CHANGED Viewed

@@ -5,13 +5,46 @@ All models, all outputs; tabbed by parameter size.
 import gradio as gr
-from inference import run_all
 from model_config import (
     TIER_LABELS,
     combined_footprint,
     footprint_table_data,
     get_models_by_tier,
-    MODEL_IDS,
 )
 # Optional: use @spaces.GPU for ZeroGPU deployment
@@ -24,97 +57,71 @@ except ImportError:
 def build_params_by_model(
-    temp_small: float,
-    max_tok_small: int,
-    top_p_small: float,
-    top_k_small: int,
-    rep_small: float,
-    temp_med: float,
-    max_tok_med: int,
-    top_p_med: float,
-    top_k_med: int,
-    rep_med: float,
-    temp_large: float,
-    max_tok_large: int,
-    top_p_large: float,
-    top_k_large: int,
-    rep_large: float,
 ) -> dict[str, dict]:
-    """Build params dict keyed by model_id from tier-level controls."""
-    tier_params = {
-        "small": {
-            "temperature": temp_small,
-            "max_tokens": max_tok_small,
-            "top_p": top_p_small,
-            "top_k": top_k_small,
-            "repeat_penalty": rep_small,
-        },
-        "medium": {
-            "temperature": temp_med,
-            "max_tokens": max_tok_med,
-            "top_p": top_p_med,
-            "top_k": top_k_med,
-            "repeat_penalty": rep_med,
-        },
-        "large": {
-            "temperature": temp_large,
-            "max_tokens": max_tok_large,
-            "top_p": top_p_large,
-            "top_k": top_k_large,
-            "repeat_penalty": rep_large,
-        },
     }
-    models_by_tier = get_models_by_tier()
     params_by_model: dict[str, dict] = {}
-    for tier, models in models_by_tier.items():
-        p = tier_params[tier]
-        for m in models:
-            params_by_model[m.repo_id] = p.copy()
     return params_by_model
 @GPU_DECORATOR
 def generate_all(
     prompt: str,
-    temp_small: float,
-    max_tok_small: int,
-    top_p_small: float,
-    top_k_small: int,
-    rep_small: float,
-    temp_med: float,
-    max_tok_med: int,
-    top_p_med: float,
-    top_k_med: int,
-    rep_med: float,
-    temp_large: float,
-    max_tok_large: int,
-    top_p_large: float,
-    top_k_large: int,
-    rep_large: float,
 ) -> tuple[str, str, str, str, str, str]:
     """Run all 6 models, return outputs in tab order: small (2), medium (2), large (2)."""
     if not prompt.strip():
         return ("",) * 6
     params = build_params_by_model(
-        temp_small,
-        max_tok_small,
-        top_p_small,
-        top_k_small,
-        rep_small,
-        temp_med,
-        max_tok_med,
-        top_p_med,
-        top_k_med,
-        rep_med,
-        temp_large,
-        max_tok_large,
-        top_p_large,
-        top_k_large,
-        rep_large,
     )
-    results = run_all(prompt, params)
     models_by_tier = get_models_by_tier()
     outputs: list[str] = []
@@ -127,113 +134,112 @@ def generate_all(
 def create_ui():
     total_disk, total_vram = combined_footprint()
-    footprint_md = f"""
-**Combined footprint —** Total disk: {total_disk:,} MB | Total VRAM (est.): {total_vram:.2f} GB
-"""
-    with gr.Blocks(title="Baguettotron vs Luth models") as demo:
-        gr.Markdown("# Baguettotron vs Luth models")
-        gr.Markdown(
-            "All models, all outputs — apples-to-apples comparison by parameter size."
-        )
-        # Row 1: Footprint table
-        gr.Markdown("## Model footprint")
         footprint_df = gr.Dataframe(
             value=footprint_table_data(),
-            headers=["Model", "Params", "File size (MB)", "Est. VRAM (MB)"],
             interactive=False,
         )
         gr.Markdown(footprint_md)
-        # Row 2: Per-tier hyperparameters
-        gr.Markdown("## Generation settings (by size tier)")
-        with gr.Accordion("~0.3–0.4B (Small)", open=False):
-            temp_small = gr.Slider(0, 2, value=0.7, label="Temperature")
-            max_tok_small = gr.Number(value=256, label="Max tokens", minimum=64, maximum=2048)
-            top_p_small = gr.Slider(0, 1, value=0.9, label="Top p")
-            top_k_small = gr.Number(value=40, label="Top k")
-            rep_small = gr.Slider(1.0, 1.5, value=1.1, label="Repeat penalty")
-        with gr.Accordion("~0.6–0.7B (Medium)", open=False):
-            temp_med = gr.Slider(0, 2, value=0.7, label="Temperature")
-            max_tok_med = gr.Number(value=256, label="Max tokens", minimum=64, maximum=2048)
-            top_p_med = gr.Slider(0, 1, value=0.9, label="Top p")
-            top_k_med = gr.Number(value=40, label="Top k")
-            rep_med = gr.Slider(1.0, 1.5, value=1.1, label="Repeat penalty")
-        with gr.Accordion("~1–2B (Large)", open=False):
-            temp_large = gr.Slider(0, 2, value=0.7, label="Temperature")
-            max_tok_large = gr.Number(value=256, label="Max tokens", minimum=64, maximum=2048)
-            top_p_large = gr.Slider(0, 1, value=0.9, label="Top p")
-            top_k_large = gr.Number(value=40, label="Top k")
-            rep_large = gr.Slider(1.0, 1.5, value=1.1, label="Repeat penalty")
-        # Row 3: Prompt + Generate + tabbed outputs
-        gr.Markdown("## Live inference")
         prompt_in = gr.Textbox(
-            label="Prompt",
-            placeholder="Enter your prompt here...",
             lines=3,
         )
-        gen_btn = gr.Button("Generate", variant="primary")
         models_by_tier = get_models_by_tier()
         with gr.Tabs():
             with gr.Tab(TIER_LABELS["small"]):
                 with gr.Row():
                     out_baguettotron = gr.Textbox(
-                        label="Baguettotron (321M)",
                         lines=12,
                         max_lines=24,
                     )
                     out_luth_350 = gr.Textbox(
-                        label="Luth-LFM2-350M (0.4B)",
                         lines=12,
                         max_lines=24,
                     )
             with gr.Tab(TIER_LABELS["medium"]):
                 with gr.Row():
                     out_luth_06 = gr.Textbox(
-                        label="Luth-0.6B-Instruct",
                         lines=12,
                         max_lines=24,
                     )
                     out_luth_07 = gr.Textbox(
-                        label="Luth-LFM2-700M",
                         lines=12,
                         max_lines=24,
                     )
             with gr.Tab(TIER_LABELS["large"]):
                 with gr.Row():
                     out_luth_12 = gr.Textbox(
-                        label="Luth-LFM2-1.2B",
                         lines=12,
                         max_lines=24,
                     )
                     out_luth_17 = gr.Textbox(
-                        label="Luth-1.7B-Instruct",
                         lines=12,
                         max_lines=24,
                     )
         all_inputs = [
             prompt_in,
-            temp_small,
-            max_tok_small,
-            top_p_small,
-            top_k_small,
-            rep_small,
-            temp_med,
-            max_tok_med,
-            top_p_med,
-            top_k_med,
-            rep_med,
-            temp_large,
-            max_tok_large,
-            top_p_large,
-            top_k_large,
-            rep_large,
         ]
         all_outputs = [
             out_baguettotron,

 import gradio as gr
+from inference import BAGUETTOTRON_ID, run_all
 from model_config import (
     TIER_LABELS,
     combined_footprint,
     footprint_table_data,
+    gguf_footprint_table_data,
     get_models_by_tier,
+    MODELS,
+)
+from ui_strings import (
+    BTN_GENERATE,
+    COL_BAGUETTOTRON_HEADING,
+    COL_LUTH_HEADING,
+    FOOTPRINT_GGUF_HEADERS,
+    FOOTPRINT_HEADERS,
+    FOOTPRINT_SUMMARY_TEMPLATE,
+    GGUF_LEAP_INTRO,
+    HEADING_FOOTPRINT,
+    HEADING_GGUF_LEAP,
+    HEADING_GENERATION,
+    HEADING_LIVE_INFERENCE,
+    INFO_REP_LUTH,
+    INFO_TEMP_BAGUETTOTRON,
+    LABEL_MAX_TOKENS,
+    LABEL_OUT_BAGUETTOTRON,
+    LABEL_OUT_LUTH_06,
+    LABEL_OUT_LUTH_07,
+    LABEL_OUT_LUTH_12,
+    LABEL_OUT_LUTH_17,
+    LABEL_OUT_LUTH_350,
+    LABEL_PROMPT,
+    LABEL_REPEAT_PENALTY,
+    LABEL_SYSTEM_PROMPT,
+    LABEL_TEMPERATURE,
+    LABEL_TOP_K,
+    LABEL_TOP_P,
+    PLACEHOLDER_PROMPT,
+    PLACEHOLDER_SYSTEM_PROMPT,
+    SUBTITLE,
+    TITLE,
 )
 # Optional: use @spaces.GPU for ZeroGPU deployment
 def build_params_by_model(
+    temp_baguettotron: float,
+    max_tok_baguettotron: int,
+    top_p_baguettotron: float,
+    top_k_baguettotron: int,
+    rep_baguettotron: float,
+    temp_luth: float,
+    max_tok_luth: int,
+    top_p_luth: float,
+    top_k_luth: int,
+    rep_luth: float,
 ) -> dict[str, dict]:
+    """Build params dict keyed by model_id from Baguettotron vs Luth controls."""
+    baguettotron_params = {
+        "temperature": temp_baguettotron,
+        "max_tokens": max_tok_baguettotron,
+        "top_p": top_p_baguettotron,
+        "top_k": top_k_baguettotron,
+        "repeat_penalty": rep_baguettotron,
+    }
+    luth_params = {
+        "temperature": temp_luth,
+        "max_tokens": max_tok_luth,
+        "top_p": top_p_luth,
+        "top_k": top_k_luth,
+        "repeat_penalty": rep_luth,
     }
     params_by_model: dict[str, dict] = {}
+    for m in MODELS:
+        params_by_model[m.repo_id] = (baguettotron_params if m.repo_id == BAGUETTOTRON_ID else luth_params).copy()
     return params_by_model
 @GPU_DECORATOR
 def generate_all(
     prompt: str,
+    system_prompt: str,
+    temp_baguettotron: float,
+    max_tok_baguettotron: int,
+    top_p_baguettotron: float,
+    top_k_baguettotron: int,
+    rep_baguettotron: float,
+    temp_luth: float,
+    max_tok_luth: int,
+    top_p_luth: float,
+    top_k_luth: int,
+    rep_luth: float,
 ) -> tuple[str, str, str, str, str, str]:
     """Run all 6 models, return outputs in tab order: small (2), medium (2), large (2)."""
     if not prompt.strip():
         return ("",) * 6
     params = build_params_by_model(
+        temp_baguettotron,
+        max_tok_baguettotron,
+        top_p_baguettotron,
+        top_k_baguettotron,
+        rep_baguettotron,
+        temp_luth,
+        max_tok_luth,
+        top_p_luth,
+        top_k_luth,
+        rep_luth,
     )
+    results = run_all(prompt, params, system_prompt=system_prompt)
     models_by_tier = get_models_by_tier()
     outputs: list[str] = []
 def create_ui():
     total_disk, total_vram = combined_footprint()
+    footprint_md = FOOTPRINT_SUMMARY_TEMPLATE.format(total_disk=total_disk, total_vram=total_vram)
+    with gr.Blocks(title=TITLE) as demo:
+        gr.Markdown(f"# {TITLE}")
+        gr.Markdown(SUBTITLE)
+        # Row 1: Footprint tables (transformers + GGUF/LEAP reference)
+        gr.Markdown(HEADING_FOOTPRINT)
         footprint_df = gr.Dataframe(
             value=footprint_table_data(),
+            headers=FOOTPRINT_HEADERS,
             interactive=False,
         )
         gr.Markdown(footprint_md)
+        gr.Markdown(HEADING_GGUF_LEAP)
+        gr.Markdown(GGUF_LEAP_INTRO)
+        gguf_footprint_df = gr.Dataframe(
+            value=gguf_footprint_table_data(),
+            headers=FOOTPRINT_GGUF_HEADERS,
+            interactive=False,
+        )
+        # Row 2: Generation settings — two columns (Baguettotron | Luth)
+        gr.Markdown(HEADING_GENERATION)
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown(COL_BAGUETTOTRON_HEADING)
+                temp_baguettotron = gr.Slider(0, 2, value=0.5, label=LABEL_TEMPERATURE, info=INFO_TEMP_BAGUETTOTRON)
+                max_tok_baguettotron = gr.Number(value=512, label=LABEL_MAX_TOKENS, minimum=64, maximum=2048)
+                top_p_baguettotron = gr.Slider(0, 1, value=0.9, label=LABEL_TOP_P)
+                top_k_baguettotron = gr.Number(value=40, label=LABEL_TOP_K)
+                rep_baguettotron = gr.Slider(1.0, 1.5, value=1.1, label=LABEL_REPEAT_PENALTY)
+            with gr.Column():
+                gr.Markdown(COL_LUTH_HEADING)
+                temp_luth = gr.Slider(0, 2, value=0.7, label=LABEL_TEMPERATURE)
+                max_tok_luth = gr.Number(value=256, label=LABEL_MAX_TOKENS, minimum=64, maximum=2048)
+                top_p_luth = gr.Slider(0, 1, value=0.9, label=LABEL_TOP_P)
+                top_k_luth = gr.Number(value=40, label=LABEL_TOP_K)
+                rep_luth = gr.Slider(1.0, 1.5, value=1.05, label=LABEL_REPEAT_PENALTY, info=INFO_REP_LUTH)
+        # Row 3: System prompt + User prompt + Generate + tabbed outputs
+        gr.Markdown(HEADING_LIVE_INFERENCE)
+        system_prompt_in = gr.Textbox(
+            label=LABEL_SYSTEM_PROMPT,
+            placeholder=PLACEHOLDER_SYSTEM_PROMPT,
+            lines=2,
+        )
         prompt_in = gr.Textbox(
+            label=LABEL_PROMPT,
+            placeholder=PLACEHOLDER_PROMPT,
             lines=3,
         )
+        gen_btn = gr.Button(BTN_GENERATE, variant="primary")
         models_by_tier = get_models_by_tier()
         with gr.Tabs():
             with gr.Tab(TIER_LABELS["small"]):
                 with gr.Row():
                     out_baguettotron = gr.Textbox(
+                        label=LABEL_OUT_BAGUETTOTRON,
                         lines=12,
                         max_lines=24,
                     )
                     out_luth_350 = gr.Textbox(
+                        label=LABEL_OUT_LUTH_350,
                         lines=12,
                         max_lines=24,
                     )
             with gr.Tab(TIER_LABELS["medium"]):
                 with gr.Row():
                     out_luth_06 = gr.Textbox(
+                        label=LABEL_OUT_LUTH_06,
                         lines=12,
                         max_lines=24,
                     )
                     out_luth_07 = gr.Textbox(
+                        label=LABEL_OUT_LUTH_07,
                         lines=12,
                         max_lines=24,
                     )
             with gr.Tab(TIER_LABELS["large"]):
                 with gr.Row():
                     out_luth_12 = gr.Textbox(
+                        label=LABEL_OUT_LUTH_12,
                         lines=12,
                         max_lines=24,
                     )
                     out_luth_17 = gr.Textbox(
+                        label=LABEL_OUT_LUTH_17,
                         lines=12,
                         max_lines=24,
                     )
         all_inputs = [
             prompt_in,
+            system_prompt_in,
+            temp_baguettotron,
+            max_tok_baguettotron,
+            top_p_baguettotron,
+            top_k_baguettotron,
+            rep_baguettotron,
+            temp_luth,
+            max_tok_luth,
+            top_p_luth,
+            top_k_luth,
+            rep_luth,
         ]
         all_outputs = [
             out_baguettotron,

bundle_luth.py ADDED Viewed

	@@ -0,0 +1,357 @@

+#!/usr/bin/env python3
+"""
+Bundle a Luth model on this device with LEAP (leap-bundle) and investigate the result.
+Per Liquid AI docs: leap-bundle create produces .gguf (default) or .bundle (--executorch).
+We inspect both artifact types.
+Steps:
+  1. Download the Luth model from Hugging Face to a local directory.
+  2. Validate the directory with leap-bundle validate.
+  3. Create a bundle with leap-bundle create (requires LEAP auth).
+  4. Poll until the bundle is completed, then download the output.
+  5. Investigate: report file sizes (.gguf / .bundle) and optionally run inference on .gguf.
+Requires: pip install leap-bundle huggingface_hub
+LEAP auth: leap-bundle login <api-key>  (from https://leap.liquid.ai/profile#/api-keys)
+"""
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+import time
+from pathlib import Path
+def _leap_env() -> dict[str, str]:
+    """Environment for leap-bundle subprocess so UTF-8 is used (avoids Windows cp1252 + checkmark)."""
+    env = os.environ.copy()
+    env["PYTHONUTF8"] = "1"
+    return env
+# Luth model repo IDs (LFM2-based are most likely LEAP-compatible)
+LUTH_REPOS = [
+    "kurakurai/Luth-LFM2-350M",
+    "kurakurai/Luth-LFM2-700M",
+    "kurakurai/Luth-LFM2-1.2B",
+    "kurakurai/Luth-0.6B-Instruct",
+    "kurakurai/Luth-1.7B-Instruct",
+]
+DEFAULT_REPO = LUTH_REPOS[0]
+DEFAULT_WORK_DIR = Path("./luth_bundle_work")
+DEFAULT_QUANTIZATION = "Q4_K_M"
+POLL_INTERVAL_SEC = 60
+POLL_MAX_MINUTES = 30
+def run(cmd: list[str], capture: bool = True, cwd: Path | None = None) -> subprocess.CompletedProcess:
+    """Run a command; raise on non-zero exit unless capture is False."""
+    kwargs = {
+        "cwd": str(cwd) if cwd else None,
+        "text": True,
+        "encoding": "utf-8",
+        "errors": "replace",
+        "env": _leap_env(),
+    }
+    if capture:
+        kwargs["capture_output"] = True
+    r = subprocess.run(cmd, **kwargs)
+    if r.returncode != 0 and capture:
+        raise RuntimeError(f"Command failed: {' '.join(cmd)}\nstdout: {r.stdout}\nstderr: {r.stderr}")
+    return r
+def has_leap_bundle() -> bool:
+    try:
+        run(["leap-bundle", "--version"], capture=True)
+        return True
+    except (FileNotFoundError, RuntimeError):
+        return False
+def download_model(repo_id: str, work_dir: Path) -> Path:
+    """Download Hugging Face model to work_dir/models/<repo_slug>. Returns path to model dir."""
+    try:
+        from huggingface_hub import snapshot_download
+    except ImportError:
+        raise SystemExit("Install huggingface_hub: pip install huggingface_hub")
+    slug = repo_id.replace("/", "--")
+    dest = work_dir / "models" / slug
+    dest.mkdir(parents=True, exist_ok=True)
+    print(f"Downloading {repo_id} to {dest} ...")
+    snapshot_download(repo_id=repo_id, local_dir=str(dest))
+    return dest
+def validate_bundle(model_path: Path) -> bool:
+    """Run leap-bundle validate. Returns True if valid."""
+    r = run(["leap-bundle", "validate", str(model_path)], capture=True)
+    return r.returncode == 0
+def _parse_request_id(out: str) -> str | None:
+    """Parse request_id from JSON output; API may return integer or string."""
+    try:
+        # Handle single line or multi-line JSON
+        data = json.loads(out.strip())
+        rid = data.get("request_id")
+        if rid is not None:
+            return str(rid)
+    except (json.JSONDecodeError, TypeError):
+        pass
+    match = re.search(r'"request_id"\s*:\s*("([^"]+)"|(\d+))', out)
+    if match:
+        return match.group(2) or match.group(3)
+    return None
+def create_bundle(model_path: Path, work_dir: Path) -> tuple[str | None, str | None]:
+    """Run leap-bundle create --json. Returns (request_id, pending_id).
+    On success: (request_id, None). On 'pending request' error: (None, pending_id). Else: (None, None).
+    """
+    r = subprocess.run(
+        ["leap-bundle", "create", str(model_path), "--json"],
+        capture_output=True,
+        text=True,
+        encoding="utf-8",
+        errors="replace",
+        cwd=work_dir,
+        env=_leap_env(),
+    )
+    out = (r.stdout or r.stderr or "").strip()
+    if r.returncode != 0:
+        print("Create failed:", out or f"exit code {r.returncode}")
+        pending_id = _parse_pending_request_id(out)
+        if pending_id:
+            return None, pending_id
+        if "login" in out.lower() or "authenticat" in out.lower():
+            print("Run: leap-bundle login <api-key>  (get key from https://leap.liquid.ai/profile#/api-keys)")
+        return None, None
+    # Parse request_id (API can return {"request_id": 1, "status": "success"})
+    rid = _parse_request_id(out)
+    if rid:
+        return rid, None
+    if "already exists" in out or "exists" in out:
+        print("Bundle request already exists for this model (same hash). Check leap-bundle list.")
+        return None, None
+    print("Create output:", out)
+    return None, None
+def _parse_pending_request_id(out: str) -> str | None:
+    """Extract pending request ID from error message."""
+    match = re.search(r"pending request\s*\(ID:\s*(\d+)\)", out, re.IGNORECASE)
+    return match.group(1) if match else None
+def get_request_status(request_id: str) -> str:
+    """Get status of a bundle request. Returns status string."""
+    r = subprocess.run(
+        ["leap-bundle", "list", str(request_id)],
+        capture_output=True,
+        text=True,
+        encoding="utf-8",
+        errors="replace",
+        env=_leap_env(),
+    )
+    out = (r.stdout or r.stderr or "").lower()
+    if "completed" in out:
+        return "completed"
+    if "failed" in out:
+        return "failed"
+    if "processing" in out or "upload" in out or "pending" in out:
+        return "processing"
+    return "unknown"
+def wait_for_bundle(request_id: str) -> bool:
+    """Poll until completed or failed. Returns True if completed."""
+    deadline = time.monotonic() + POLL_MAX_MINUTES * 60
+    while time.monotonic() < deadline:
+        status = get_request_status(request_id)
+        print(f"  Status: {status}")
+        if status == "completed":
+            return True
+        if status == "failed":
+            print("Bundle request failed. Run: leap-bundle list", request_id)
+            return False
+        time.sleep(POLL_INTERVAL_SEC)
+    print("Timed out waiting for bundle.")
+    return False
+# Per Liquid AI docs: create output is .gguf (default) or .bundle (--executorch)
+BUNDLE_EXTENSIONS = (".gguf", ".bundle")
+def _find_bundle_artifact(work_dir: Path) -> Path | None:
+    """Return first .gguf or .bundle file under work_dir or cwd."""
+    for d in [work_dir, Path.cwd()]:
+        for ext in BUNDLE_EXTENSIONS:
+            for f in d.glob(f"*{ext}"):
+                return f
+    return None
+def download_bundle(request_id: str, work_dir: Path) -> Path | None:
+    """Run leap-bundle download <request_id>. Returns path to downloaded bundle artifact if found."""
+    r = run(["leap-bundle", "download", request_id], capture=True, cwd=work_dir)
+    artifact = _find_bundle_artifact(work_dir)
+    if artifact is None and r.returncode != 0:
+        err = (r.stderr or r.stdout or "")
+        if "signed_url" in err:
+            print("  (LEAP download failed: 'signed_url' – try later: python download_bundles.py --request-ids", request_id + ")", file=sys.stderr)
+    return artifact
+def investigate(bundle_path: Path | None, model_path: Path) -> None:
+    """Report sizes for source dir and bundle artifact (.gguf or .bundle); run inference only on .gguf."""
+    print("\n--- Investigation ---")
+    if model_path.exists():
+        total = sum(f.stat().st_size for f in model_path.rglob("*") if f.is_file())
+        print(f"  Source model dir: {model_path}  total size: {total / (1024**2):.1f} MB")
+    if bundle_path and bundle_path.exists():
+        size_mb = bundle_path.stat().st_size / (1024**2)
+        kind = "GGUF" if bundle_path.suffix == ".gguf" else "ExecuTorch (.bundle)"
+        print(f"  Bundle file: {bundle_path}  size: {size_mb:.1f} MB  [{kind}]")
+        if bundle_path.suffix == ".gguf":
+            try:
+                from llama_cpp import Llama
+                print("  Running short inference (llama_cpp)...")
+                llm = Llama(model_path=str(bundle_path), n_ctx=256, verbose=False)
+                out = llm("Bonjour, dis-moi une phrase courte en français.\n", max_tokens=32, temperature=0.3)
+                text = out["choices"][0]["text"].strip()
+                print(f"  Sample output: {text[:200]}")
+            except ImportError:
+                print("  (Install llama-cpp-python to run a sample inference on the GGUF)")
+        else:
+            print("  (ExecuTorch .bundle; use LEAP SDK for inference)")
+    else:
+        print("  No bundle file (.gguf or .bundle) found to inspect.")
+def main() -> int:
+    p = argparse.ArgumentParser(
+        description="Bundle a Luth model with LEAP and investigate the result.",
+        epilog="Requires: pip install leap-bundle huggingface_hub. Auth: leap-bundle login <api-key>",
+    )
+    p.add_argument(
+        "--model",
+        default=DEFAULT_REPO,
+        choices=LUTH_REPOS,
+        help="Luth model repo ID (default: %(default)s); ignored if --all",
+    )
+    p.add_argument(
+        "--all",
+        action="store_true",
+        help="Bundle and inspect every Luth model in sequence (5 models; LEAP free tier = 5 requests/24h)",
+    )
+    p.add_argument(
+        "--work-dir",
+        type=Path,
+        default=DEFAULT_WORK_DIR,
+        help="Working directory for download and bundle output (default: %(default)s)",
+    )
+    p.add_argument(
+        "--quantization",
+        default=DEFAULT_QUANTIZATION,
+        help="(Reserved; current leap-bundle create has no --quantization option)",
+    )
+    p.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Only download and validate; do not create or download bundle",
+    )
+    p.add_argument(
+        "--skip-create",
+        action="store_true",
+        help="Skip bundle create (use existing local model dir only); still run investigate",
+    )
+    p.add_argument(
+        "--request-id",
+        type=str,
+        metavar="ID",
+        help="If bundle already created, download by request ID and then investigate",
+    )
+    args = p.parse_args()
+    args.work_dir = args.work_dir.resolve()
+    args.work_dir.mkdir(parents=True, exist_ok=True)
+    if not has_leap_bundle():
+        print("leap-bundle CLI not found. Install: pip install leap-bundle", file=sys.stderr)
+        return 1
+    models_to_run = LUTH_REPOS if args.all else [args.model]
+    if args.all and args.request_id:
+        print("--request-id is ignored when using --all.", file=sys.stderr)
+        args.request_id = None
+    if args.all:
+        print(f"Running for all {len(models_to_run)} Luth models: {', '.join(models_to_run)}")
+        print("Note: LEAP free tier allows 5 bundle requests per 24h.\n")
+    exit_code = 0
+    for repo_id in models_to_run:
+        print(f"\n{'='*60}\n  {repo_id}\n{'='*60}")
+        try:
+            # 1. Download
+            model_path = download_model(repo_id, args.work_dir)
+            # 2. Validate
+            print("Validating directory for LEAP bundle...")
+            if not validate_bundle(model_path):
+                print("Validation failed. Fix the model directory and retry.", file=sys.stderr)
+                exit_code = 1
+                continue
+            print("Validation passed.")
+            if args.dry_run:
+                investigate(None, model_path)
+                continue
+            gguf_path: Path | None = None
+            if args.request_id and not args.all:
+                # Download existing bundle by ID (single-model only)
+                print(f"Downloading bundle request {args.request_id}...")
+                gguf_path = download_bundle(args.request_id, args.work_dir)
+            elif not args.skip_create:
+                # 3. Create bundle (LEAP allows only one pending request; wait for it if needed)
+                request_id: str | None = None
+                pending_id: str | None = None
+                print("Creating bundle...")
+                request_id, pending_id = create_bundle(model_path, args.work_dir)
+                if pending_id:
+                    print(f"Waiting for previous bundle request {pending_id} to complete...")
+                    if wait_for_bundle(pending_id):
+                        download_bundle(pending_id, args.work_dir)
+                    print("Retrying create for this model...")
+                    request_id, pending_id = create_bundle(model_path, args.work_dir)
+                    if pending_id:
+                        print("Still pending; skipping create for this model.", file=sys.stderr)
+                        request_id = None
+                if request_id:
+                    # 4. Wait and download
+                    print(f"Waiting for bundle request {request_id} (poll every {POLL_INTERVAL_SEC}s)...")
+                    if wait_for_bundle(request_id):
+                        gguf_path = download_bundle(request_id, args.work_dir)
+                elif not pending_id:
+                    print("No new request created. Use --request-id <id> to download an existing bundle.")
+            else:
+                print("Skipping bundle create (--skip-create).")
+            # 5. Investigate
+            investigate(gguf_path, model_path)
+        except Exception as e:
+            print(f"Error processing {repo_id}: {e}", file=sys.stderr)
+            exit_code = 1
+    return exit_code
+if __name__ == "__main__":
+    sys.exit(main())

download_bundles.py ADDED Viewed

	@@ -0,0 +1,248 @@

+#!/usr/bin/env python3
+"""
+Download LEAP bundle outputs by request ID and inspect the files.
+Per Liquid AI docs: leap-bundle create produces .gguf (default, GGUF) or
+.bundle (ExecuTorch, with --executorch). This script finds and inspects both.
+Uses leap-bundle list (--json per request) and leap-bundle download
+with --output-path. Reports all bundle artifacts (.gguf, .bundle) and
+optionally runs short inference on .gguf (llama-cpp).
+Requires: pip install leap-bundle
+LEAP auth: leap-bundle login <api-key>
+"""
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+from pathlib import Path
+def _leap_env() -> dict[str, str]:
+    env = os.environ.copy()
+    env["PYTHONUTF8"] = "1"
+    return env
+def run(cmd: list[str], capture: bool = True, cwd: Path | None = None) -> subprocess.CompletedProcess:
+    kwargs = {
+        "cwd": str(cwd) if cwd else None,
+        "text": True,
+        "encoding": "utf-8",
+        "errors": "replace",
+        "env": _leap_env(),
+    }
+    if capture:
+        kwargs["capture_output"] = True
+    return subprocess.run(cmd, **kwargs)
+def list_request(request_id: str) -> dict | None:
+    """Get details for one request; returns parsed JSON or None."""
+    r = run(["leap-bundle", "list", str(request_id), "--json"], capture=True)
+    if r.returncode != 0:
+        return None
+    out = (r.stdout or "").strip()
+    try:
+        return json.loads(out)
+    except json.JSONDecodeError:
+        return None
+def list_all_request_ids() -> list[str]:
+    """Run leap-bundle list (no id) and parse table for request IDs. Returns list of ID strings."""
+    r = run(["leap-bundle", "list"], capture=True)
+    out = (r.stdout or r.stderr or "")
+    ids: list[str] = []
+    # Table rows: first column is often the ID (integer)
+    for line in out.splitlines():
+        parts = line.split()
+        if parts and parts[0].isdigit():
+            ids.append(parts[0])
+    # Fallback: any line with a pipe or spaces and a leading number (rich table)
+    if not ids:
+        for line in out.splitlines():
+            m = re.search(r"[\|\s](\d{1,6})[\|\s]", line)
+            if m:
+                ids.append(m.group(1))
+    # Fallback: JSON-like "request_id": N or "id": N
+    if not ids:
+        for m in re.finditer(r'"(?:request_id|id)"\s*:\s*(\d+)', out):
+            ids.append(m.group(1))
+    return list(dict.fromkeys(ids))
+def get_status(data: dict) -> str:
+    """Extract status from list request JSON."""
+    s = (data.get("status") or data.get("Status") or "").lower()
+    return s
+def download_bundle(request_id: str, output_path: Path) -> tuple[bool, str]:
+    """Run leap-bundle download <id> --output-path <dir>. Returns (success, stderr_or_empty)."""
+    output_path.mkdir(parents=True, exist_ok=True)
+    r = run(
+        ["leap-bundle", "download", str(request_id), "--output-path", str(output_path)],
+        capture=True,
+    )
+    err = (r.stderr or r.stdout or "").strip()
+    return r.returncode == 0, err
+# Per Liquid AI docs: create produces .gguf (default) or .bundle (--executorch)
+BUNDLE_EXTENSIONS = (".gguf", ".bundle")
+def find_bundle_files(root: Path) -> list[Path]:
+    """Return all LEAP bundle artifact files (.gguf, .bundle) under root."""
+    out: list[Path] = []
+    for ext in BUNDLE_EXTENSIONS:
+        out.extend(root.rglob(f"*{ext}"))
+    return sorted(out)
+def inspect_file(path: Path, run_inference: bool = False, root: Path | None = None) -> None:
+    """Print path, size, type; run short inference only for .gguf (llama-cpp)."""
+    size_mb = path.stat().st_size / (1024**2)
+    try:
+        disp = path.relative_to(root) if root else path
+    except ValueError:
+        disp = path
+    kind = "GGUF" if path.suffix == ".gguf" else "ExecuTorch (.bundle)"
+    print(f"  {disp}  {size_mb:.1f} MB  [{kind}]")
+    if run_inference and path.suffix == ".gguf":
+        try:
+            from llama_cpp import Llama
+            print("    Running short inference (llama-cpp)...")
+            llm = Llama(model_path=str(path), n_ctx=256, verbose=False)
+            out = llm("Bonjour, une phrase en français.\n", max_tokens=24, temperature=0.3)
+            text = (out["choices"][0]["text"] or "").strip()
+            print(f"    -> {text[:150]}")
+        except ImportError:
+            print("    (Install llama-cpp-python to run inference)")
+        except Exception as e:
+            print(f"    Inference error: {e}")
+    elif run_inference and path.suffix == ".bundle":
+        print("    (ExecuTorch .bundle; inference via LEAP SDK, not llama-cpp)")
+def main() -> int:
+    p = argparse.ArgumentParser(
+        description="Download LEAP bundle outputs by request ID and inspect files (.gguf or .bundle per Liquid AI docs).",
+        epilog="Requires: leap-bundle (pip install leap-bundle). Auth: leap-bundle login <api-key>",
+    )
+    p.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path("./luth_bundle_downloads"),
+        help="Directory to download each bundle into (default: ./luth_bundle_downloads)",
+    )
+    p.add_argument(
+        "--request-ids",
+        type=str,
+        nargs="*",
+        metavar="ID",
+        help="Bundle request IDs to download (e.g. 1 2 3)",
+    )
+    p.add_argument(
+        "--from-file",
+        type=Path,
+        metavar="FILE",
+        help="Text file with one request ID per line",
+    )
+    p.add_argument(
+        "--list",
+        action="store_true",
+        help="Run leap-bundle list and download all completed requests",
+    )
+    p.add_argument(
+        "--infer",
+        action="store_true",
+        help="Run a short inference on each downloaded GGUF (requires llama-cpp-python)",
+    )
+    p.add_argument(
+        "--inspect-only",
+        action="store_true",
+        help="Only inspect existing bundle files (.gguf, .bundle) under --output-dir; do not download",
+    )
+    args = p.parse_args()
+    args.output_dir = args.output_dir.resolve()
+    request_ids: list[str] = []
+    if args.inspect_only:
+        args.output_dir.mkdir(parents=True, exist_ok=True)
+        bundles = find_bundle_files(args.output_dir)
+        print(f"Inspecting {len(bundles)} bundle file(s) (.gguf / .bundle) under {args.output_dir}\n")
+        for f in bundles:
+            inspect_file(f, run_inference=args.infer, root=args.output_dir)
+        return 0
+    if args.list:
+        print("Fetching bundle request list...")
+        request_ids = list_all_request_ids()
+        if not request_ids:
+            print("No request IDs found from list.", file=sys.stderr)
+            print("If you have existing bundle requests (e.g. from bundle_luth.py --all), run:", file=sys.stderr)
+            print("  python download_bundles.py --request-ids 1 2 3 4 5", file=sys.stderr)
+            return 1
+        print(f"Found {len(request_ids)} request(s): {request_ids}")
+    else:
+        if args.request_ids:
+            request_ids.extend(args.request_ids)
+        if args.from_file:
+            if not args.from_file.exists():
+                print(f"File not found: {args.from_file}", file=sys.stderr)
+                return 1
+            for line in args.from_file.read_text(encoding="utf-8", errors="replace").splitlines():
+                rid = line.strip()
+                if rid and rid.isdigit():
+                    request_ids.append(rid)
+        if not request_ids:
+            print("Provide --request-ids, --from-file, or --list.", file=sys.stderr)
+            return 1
+    args.output_dir.mkdir(parents=True, exist_ok=True)
+    downloaded: list[Path] = []
+    for rid in request_ids:
+        print(f"\n--- Request ID {rid} ---")
+        info = list_request(rid)
+        status = get_status(info) if info else ""
+        if status:
+            print(f"  Status: {status}")
+            if "completed" not in status and "complete" not in status:
+                print("  Skipping (not completed).")
+                continue
+        else:
+            print("  (Status unknown; attempting download.)")
+        dest = args.output_dir / f"request_{rid}"
+        print(f"  Downloading to {dest} ...")
+        ok, err = download_bundle(rid, dest)
+        if ok:
+            for f in find_bundle_files(dest):
+                downloaded.append(f)
+                kind = "GGUF" if f.suffix == ".gguf" else ".bundle"
+                print(f"  Downloaded: {f.name} ({f.stat().st_size / (1024**2):.1f} MB) [{kind}]")
+        else:
+            print("  Download failed.", file=sys.stderr)
+            if "signed_url" in err:
+                print("  (LEAP CLI/API 'signed_url' error – try again later or check LEAP status.)", file=sys.stderr)
+    print("\n" + "=" * 60)
+    print("Inspection summary (bundle artifacts: .gguf / .bundle)")
+    print("=" * 60)
+    all_bundles = find_bundle_files(args.output_dir)
+    for f in all_bundles:
+        inspect_file(f, run_inference=args.infer, root=args.output_dir)
+    if not all_bundles:
+        print("  No bundle files (.gguf or .bundle) found.")
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

inference.py CHANGED Viewed

@@ -19,20 +19,27 @@ _cache_lock = __import__("threading").Lock()
 BAGUETTOTRON_ID = "PleIAs/Baguettotron"
-def _format_prompt_baguettotron(prompt: str) -> tuple[str, list[str]]:
     """
     Manual prompt build for Baguettotron. Uses "<|im_end>" (no trailing pipe)
     per tokenizer; stop=["<|im_end>", "</think>"] for generation.
     """
-    # Qwen-style: <|im_start|>user\n{content}<|im_end>\n<|im_start|>assistant\n<think>\n
-    text = f"<|im_start|>user\n{prompt}<|im_end>\n<|im_start|>assistant\n<think>\n"
     stop = ["<|im_end>", "</think>"]
     return text, stop
-def _format_prompt_luth(prompt: str, tokenizer: Any) -> tuple[dict[str, Any], list[str] | None]:
-    """Use tokenizer's chat template for Luth models."""
-    messages = [{"role": "user", "content": prompt}]
     inputs = tokenizer.apply_chat_template(
         messages,
         add_generation_prompt=True,
@@ -74,6 +81,7 @@ def _generate_one(
     prompt: str,
     params: dict[str, Any],
     device: str = "cuda",
 ) -> tuple[str, str]:
     """Load (or use cached) model, run inference, return (model_id, text)."""
     model, tokenizer = _load_model(model_id, device)
@@ -95,10 +103,10 @@ def _generate_one(
     }
     if model_id == BAGUETTOTRON_ID:
-        text_prompt, _stop = _format_prompt_baguettotron(prompt)
         inputs = tokenizer(text_prompt, return_tensors="pt")
     else:
-        inputs = _format_prompt_luth(prompt, tokenizer)[0]
     # Move to device (input_ids/attention_mask are int; no dtype cast needed)
     inputs = {k: v.to(device) for k, v in inputs.items()}
@@ -126,6 +134,7 @@ def run_all(
     params_by_model: dict[str, dict[str, Any]],
     device: str | None = None,
     max_workers: int = 6,
 ) -> dict[str, str]:
     """
     Load all 6 models in parallel, run all 6 inferences in parallel.
@@ -143,7 +152,7 @@ def run_all(
     def task(model_id: str):
         p = {**default_params, **(params_by_model.get(model_id) or {})}
-        return _generate_one(model_id, prompt, p, device)
     results: dict[str, str] = {}
     with ThreadPoolExecutor(max_workers=max_workers) as ex:

 BAGUETTOTRON_ID = "PleIAs/Baguettotron"
+def _format_prompt_baguettotron(prompt: str, system_prompt: str = "") -> tuple[str, list[str]]:
     """
     Manual prompt build for Baguettotron. Uses "<|im_end>" (no trailing pipe)
     per tokenizer; stop=["<|im_end>", "</think>"] for generation.
+    Qwen-style: system (optional) + user + assistant.
     """
+    parts: list[str] = []
+    if system_prompt.strip():
+        parts.append(f"<|im_start|>system\n{system_prompt.strip()}<|im_end>\n")
+    parts.append(f"<|im_start|>user\n{prompt}<|im_end>\n<|im_start|>assistant\n<think>\n")
+    text = "".join(parts)
     stop = ["<|im_end>", "</think>"]
     return text, stop
+def _format_prompt_luth(prompt: str, tokenizer: Any, system_prompt: str = "") -> tuple[dict[str, Any], list[str] | None]:
+    """Use tokenizer's chat template for Luth models. Supports optional system message."""
+    messages: list[dict[str, str]] = []
+    if system_prompt.strip():
+        messages.append({"role": "system", "content": system_prompt.strip()})
+    messages.append({"role": "user", "content": prompt})
     inputs = tokenizer.apply_chat_template(
         messages,
         add_generation_prompt=True,
     prompt: str,
     params: dict[str, Any],
     device: str = "cuda",
+    system_prompt: str = "",
 ) -> tuple[str, str]:
     """Load (or use cached) model, run inference, return (model_id, text)."""
     model, tokenizer = _load_model(model_id, device)
     }
     if model_id == BAGUETTOTRON_ID:
+        text_prompt, _stop = _format_prompt_baguettotron(prompt, system_prompt)
         inputs = tokenizer(text_prompt, return_tensors="pt")
     else:
+        inputs = _format_prompt_luth(prompt, tokenizer, system_prompt)[0]
     # Move to device (input_ids/attention_mask are int; no dtype cast needed)
     inputs = {k: v.to(device) for k, v in inputs.items()}
     params_by_model: dict[str, dict[str, Any]],
     device: str | None = None,
     max_workers: int = 6,
+    system_prompt: str = "",
 ) -> dict[str, str]:
     """
     Load all 6 models in parallel, run all 6 inferences in parallel.
     def task(model_id: str):
         p = {**default_params, **(params_by_model.get(model_id) or {})}
+        return _generate_one(model_id, prompt, p, device, system_prompt)
     results: dict[str, str] = {}
     with ThreadPoolExecutor(max_workers=max_workers) as ex:

model_config.py CHANGED Viewed

@@ -141,3 +141,26 @@ def combined_footprint() -> tuple[int, float]:
     total_disk = sum(m.file_size_mb for m in MODELS)
     total_vram_mb = sum(m.vram_estimate_mb for m in MODELS)
     return total_disk, total_vram_mb / 1024

     total_disk = sum(m.file_size_mb for m in MODELS)
     total_vram_mb = sum(m.vram_estimate_mb for m in MODELS)
     return total_disk, total_vram_mb / 1024
+# --- GGUF / LEAP bundle reference ---
+# Baguettotron: PleIAs/Baguettotron-GGUF (Hugging Face)
+# LFM2 / Luth: actual LEAP bundle outputs (leap-bundle create + download)
+GGUF_REFERENCE_ROWS: list[list[str]] = [
+    # Model/Variant | Params | File size (MB) | Source
+    ["Baguettotron Q4_0", "321M", "202", "PleIAs/Baguettotron-GGUF"],
+    ["Baguettotron Q4_K_M", "321M", "240", "PleIAs/Baguettotron-GGUF"],
+    ["Baguettotron Q5_K_M", "321M", "257", "PleIAs/Baguettotron-GGUF"],
+    ["Baguettotron Q8_0", "321M", "344", "PleIAs/Baguettotron-GGUF"],
+    ["Baguettotron BF16", "321M", "644", "PleIAs/Baguettotron-GGUF"],
+    ["LFM2-350M Q4_K_M", "0.4B", "219", "LEAP bundle (Luth-LFM2-350M)"],
+    ["LFM2-700M Q4_K_M", "0.7B", "447", "LEAP bundle (Luth-LFM2-700M)"],
+    ["LFM2-1.2B Q4_K_M", "1.2B", "697", "LEAP bundle (Luth-LFM2-1.2B)"],
+    ["Luth-0.6B-Instruct (Qwen3) Q4_K_M", "0.6B", "378", "LEAP bundle"],
+    ["Luth-1.7B-Instruct (Qwen3) Q4_K_M", "1.7B", "1,056", "LEAP bundle"],
+]
+def gguf_footprint_table_data() -> list[list[str]]:
+    """Rows for GGUF/LEAP reference table: Model/Variant | Params | File size (MB) | Source."""
+    return GGUF_REFERENCE_ROWS

requirements-bundle.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+# For bundle_luth.py: bundle Luth models with LEAP and investigate
+leap-bundle
+huggingface_hub
+# Optional: run a short inference on the downloaded GGUF
+# llama-cpp-python
+hf_xet

ui_strings.py ADDED Viewed

	@@ -0,0 +1,44 @@

+"""
+UI text strings for the Baguettotron vs Luth Gradio app.
+Centralized for reuse and easier i18n.
+"""
+# App identity
+TITLE = "Baguettotron vs Luth models"
+SUBTITLE = "All models, all outputs — apples-to-apples comparison by parameter size."
+# Footprint section
+HEADING_FOOTPRINT = "## Model footprint"
+FOOTPRINT_HEADERS = ["Model", "Params", "File size (MB)", "Est. VRAM (MB)"]
+FOOTPRINT_SUMMARY_TEMPLATE = "**Combined footprint —** Total disk: {total_disk:,} MB | Total VRAM (est.): {total_vram:.2f} GB"
+HEADING_GGUF_LEAP = "### GGUF & LEAP bundle sizes (reference)"
+GGUF_LEAP_INTRO = "PleIAs Baguettotron GGUF variants (Hugging Face) and Liquid LFM2 GGUF sizes (LEAP model library). Download with `leap-bundle download <model>` or from the links below."
+FOOTPRINT_GGUF_HEADERS = ["Model / Variant", "Params", "File size (MB)", "Source"]
+# Generation settings
+HEADING_GENERATION = "## Generation settings (by model family)"
+COL_BAGUETTOTRON_HEADING = "**Baguettotron (321M)** — *reasoning*"
+COL_LUTH_HEADING = "**Luth models (0.4B–1.7B)** — *instruct*"
+LABEL_TEMPERATURE = "Temperature"
+LABEL_MAX_TOKENS = "Max tokens"
+LABEL_TOP_P = "Top p"
+LABEL_TOP_K = "Top k"
+LABEL_REPEAT_PENALTY = "Repeat penalty"
+INFO_TEMP_BAGUETTOTRON = "Lower for more deterministic reasoning"
+INFO_REP_LUTH = "Luth/LFM2 often use ~1.05"
+# Live inference
+HEADING_LIVE_INFERENCE = "## Live inference"
+LABEL_SYSTEM_PROMPT = "System prompt (optional)"
+PLACEHOLDER_SYSTEM_PROMPT = "e.g. You are a helpful assistant that answers in French."
+LABEL_PROMPT = "Prompt"
+PLACEHOLDER_PROMPT = "Enter your prompt here..."
+BTN_GENERATE = "Generate"
+# Output textbox labels (per model)
+LABEL_OUT_BAGUETTOTRON = "Baguettotron (321M)"
+LABEL_OUT_LUTH_350 = "Luth-LFM2-350M (0.4B)"
+LABEL_OUT_LUTH_06 = "Luth-0.6B-Instruct"
+LABEL_OUT_LUTH_07 = "Luth-LFM2-700M"
+LABEL_OUT_LUTH_12 = "Luth-LFM2-1.2B"
+LABEL_OUT_LUTH_17 = "Luth-1.7B-Instruct"