Spaces:

build-small-hackathon
/

tiny-press

Running on Zero

App Files Files Community

sriharsha-cr commited on 4 days ago

Commit

ca8a415

1 Parent(s): 6ea3105

Default model changes

Browse files

Files changed (2) hide show

config.py +39 -3
ui/compress_tab.py +8 -0

config.py CHANGED Viewed

@@ -1,18 +1,54 @@
 import os
 # Model settings
-LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen2.5-1.5B-Instruct")
 EMBEDDER_MODEL = os.getenv("EMBEDDER_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
 # Curated <32B open-weight causal LMs for local inference (shown in the UI dropdown).
 AVAILABLE_MODELS = [
-    "Qwen/Qwen2.5-1.5B-Instruct",
     "Qwen/Qwen2.5-0.5B-Instruct",
     "HuggingFaceTB/SmolLM2-1.7B-Instruct",
     "microsoft/Phi-3.5-mini-instruct",
-    "meta-llama/Llama-3.2-1B-Instruct",
 ]
 # Curated sentence-transformer embedding models for quality scoring.
 AVAILABLE_EMBEDDER_MODELS = [
     "sentence-transformers/all-MiniLM-L6-v2",

 import os
 # Model settings
+LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen2.5-0.5B-Instruct")
 EMBEDDER_MODEL = os.getenv("EMBEDDER_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
 # Curated <32B open-weight causal LMs for local inference (shown in the UI dropdown).
 AVAILABLE_MODELS = [
     "Qwen/Qwen2.5-0.5B-Instruct",
+    "HuggingFaceTB/SmolLM2-135M-Instruct",
+    "HuggingFaceTB/SmolLM2-360M-Instruct",
+    "Qwen/Qwen2.5-1.5B-Instruct",
+    "meta-llama/Llama-3.2-1B-Instruct",
     "HuggingFaceTB/SmolLM2-1.7B-Instruct",
     "microsoft/Phi-3.5-mini-instruct",
 ]
+MODEL_INFO = {
+    "HuggingFaceTB/SmolLM2-135M-Instruct": (
+        "⚡ **Fastest · 135M params** — Loads in ~10 s.  \n"
+        "Good for exploring the interface. May produce looser compressions; "
+        "switch up for high-fidelity output."
+    ),
+    "HuggingFaceTB/SmolLM2-360M-Instruct": (
+        "🚀 **Fast · 360M params** — Loads in ~20 s.  \n"
+        "Noticeably better compression than 135M with a small speed trade-off."
+    ),
+    "Qwen/Qwen2.5-0.5B-Instruct": (
+        "⚡ **Fast · 500M params · Default** — Loads in ~25 s.  \n"
+        "Strong instruction-following for its size; reliably respects token budgets. "
+        "Best balance of speed and quality."
+    ),
+    "meta-llama/Llama-3.2-1B-Instruct": (
+        "🚀 **Fast · 1B params** — Loads in ~40 s.  \n"
+        "Meta's smallest Llama; good general-purpose compression. "
+        "Requires accepting the Llama licence on HF Hub."
+    ),
+    "Qwen/Qwen2.5-1.5B-Instruct": (
+        "⚖️ **Balanced · 1.5B params** — Loads in ~60 s.  \n"
+        "Solid all-rounder; strong semantic fidelity and budget adherence."
+    ),
+    "HuggingFaceTB/SmolLM2-1.7B-Instruct": (
+        "⚖️ **Balanced · 1.7B params** — Loads in ~60 s.  \n"
+        "Designed for edge inference; efficient on CPU."
+    ),
+    "microsoft/Phi-3.5-mini-instruct": (
+        "🏆 **Best quality · 3.8B params** — Loads in ~2 min.  \n"
+        "Strongest reasoning and fidelity in this list. GPU strongly recommended."
+    ),
+}
 # Curated sentence-transformer embedding models for quality scoring.
 AVAILABLE_EMBEDDER_MODELS = [
     "sentence-transformers/all-MiniLM-L6-v2",

ui/compress_tab.py CHANGED Viewed

@@ -11,6 +11,10 @@ from core.tokenizer_utils import count_tokens, get_token_strings
 from db.store import save_run, update_feedback, update_feedback_comment
 from models.model_loader import get_current_model_id, get_current_tokenizer_id, switch_llm, switch_embedder, get_current_embedder_id
 # ── token colour palette (10 soft pastels, cycles) ───────────────────────────
 _PALETTE = [
@@ -211,6 +215,9 @@ def build_compress_tab(run_store) -> gr.Tab:
                 label="Compression Model",
                 allow_custom_value=True,
             )
             load_model_btn = gr.Button("Load Model", variant="secondary")
             model_status = gr.Textbox(
                 label="Model Status",
@@ -302,6 +309,7 @@ def build_compress_tab(run_store) -> gr.Tab:
         target_slider.change(fn=compression_status, **_status_args)
         load_model_btn.click(fn=load_model, inputs=[model_dropdown], outputs=[model_status])
         embedder_dropdown.change(fn=on_embedder_change, inputs=[embedder_dropdown], outputs=[embedder_info_panel])
         load_embedder_btn.click(fn=load_embedder, inputs=[embedder_dropdown], outputs=[embedder_status])
         compress_btn.click(

 from db.store import save_run, update_feedback, update_feedback_comment
 from models.model_loader import get_current_model_id, get_current_tokenizer_id, switch_llm, switch_embedder, get_current_embedder_id
+def on_model_change(model_id: str) -> str:
+    return config.MODEL_INFO.get(model_id, "")
 # ── token colour palette (10 soft pastels, cycles) ───────────────────────────
 _PALETTE = [
                 label="Compression Model",
                 allow_custom_value=True,
             )
+            model_info_panel = gr.Markdown(
+                value=config.MODEL_INFO.get(config.LLM_MODEL, "")
+            )
             load_model_btn = gr.Button("Load Model", variant="secondary")
             model_status = gr.Textbox(
                 label="Model Status",
         target_slider.change(fn=compression_status, **_status_args)
         load_model_btn.click(fn=load_model, inputs=[model_dropdown], outputs=[model_status])
+        model_dropdown.change(fn=on_model_change, inputs=[model_dropdown], outputs=[model_info_panel])
         embedder_dropdown.change(fn=on_embedder_change, inputs=[embedder_dropdown], outputs=[embedder_info_panel])
         load_embedder_btn.click(fn=load_embedder, inputs=[embedder_dropdown], outputs=[embedder_status])
         compress_btn.click(