sriharsha-cr commited on
Commit
ca8a415
Β·
1 Parent(s): 6ea3105

Default model changes

Browse files
Files changed (2) hide show
  1. config.py +39 -3
  2. ui/compress_tab.py +8 -0
config.py CHANGED
@@ -1,18 +1,54 @@
1
  import os
2
 
3
  # Model settings
4
- LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen2.5-1.5B-Instruct")
5
  EMBEDDER_MODEL = os.getenv("EMBEDDER_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
6
 
7
  # Curated <32B open-weight causal LMs for local inference (shown in the UI dropdown).
8
  AVAILABLE_MODELS = [
9
- "Qwen/Qwen2.5-1.5B-Instruct",
10
  "Qwen/Qwen2.5-0.5B-Instruct",
 
 
 
 
11
  "HuggingFaceTB/SmolLM2-1.7B-Instruct",
12
  "microsoft/Phi-3.5-mini-instruct",
13
- "meta-llama/Llama-3.2-1B-Instruct",
14
  ]
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Curated sentence-transformer embedding models for quality scoring.
17
  AVAILABLE_EMBEDDER_MODELS = [
18
  "sentence-transformers/all-MiniLM-L6-v2",
 
1
  import os
2
 
3
  # Model settings
4
+ LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen2.5-0.5B-Instruct")
5
  EMBEDDER_MODEL = os.getenv("EMBEDDER_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
6
 
7
  # Curated <32B open-weight causal LMs for local inference (shown in the UI dropdown).
8
  AVAILABLE_MODELS = [
 
9
  "Qwen/Qwen2.5-0.5B-Instruct",
10
+ "HuggingFaceTB/SmolLM2-135M-Instruct",
11
+ "HuggingFaceTB/SmolLM2-360M-Instruct",
12
+ "Qwen/Qwen2.5-1.5B-Instruct",
13
+ "meta-llama/Llama-3.2-1B-Instruct",
14
  "HuggingFaceTB/SmolLM2-1.7B-Instruct",
15
  "microsoft/Phi-3.5-mini-instruct",
 
16
  ]
17
 
18
+ MODEL_INFO = {
19
+ "HuggingFaceTB/SmolLM2-135M-Instruct": (
20
+ "⚑ **Fastest Β· 135M params** β€” Loads in ~10 s. \n"
21
+ "Good for exploring the interface. May produce looser compressions; "
22
+ "switch up for high-fidelity output."
23
+ ),
24
+ "HuggingFaceTB/SmolLM2-360M-Instruct": (
25
+ "πŸš€ **Fast Β· 360M params** β€” Loads in ~20 s. \n"
26
+ "Noticeably better compression than 135M with a small speed trade-off."
27
+ ),
28
+ "Qwen/Qwen2.5-0.5B-Instruct": (
29
+ "⚑ **Fast Β· 500M params Β· Default** β€” Loads in ~25 s. \n"
30
+ "Strong instruction-following for its size; reliably respects token budgets. "
31
+ "Best balance of speed and quality."
32
+ ),
33
+ "meta-llama/Llama-3.2-1B-Instruct": (
34
+ "πŸš€ **Fast Β· 1B params** β€” Loads in ~40 s. \n"
35
+ "Meta's smallest Llama; good general-purpose compression. "
36
+ "Requires accepting the Llama licence on HF Hub."
37
+ ),
38
+ "Qwen/Qwen2.5-1.5B-Instruct": (
39
+ "βš–οΈ **Balanced Β· 1.5B params** β€” Loads in ~60 s. \n"
40
+ "Solid all-rounder; strong semantic fidelity and budget adherence."
41
+ ),
42
+ "HuggingFaceTB/SmolLM2-1.7B-Instruct": (
43
+ "βš–οΈ **Balanced Β· 1.7B params** β€” Loads in ~60 s. \n"
44
+ "Designed for edge inference; efficient on CPU."
45
+ ),
46
+ "microsoft/Phi-3.5-mini-instruct": (
47
+ "πŸ† **Best quality Β· 3.8B params** β€” Loads in ~2 min. \n"
48
+ "Strongest reasoning and fidelity in this list. GPU strongly recommended."
49
+ ),
50
+ }
51
+
52
  # Curated sentence-transformer embedding models for quality scoring.
53
  AVAILABLE_EMBEDDER_MODELS = [
54
  "sentence-transformers/all-MiniLM-L6-v2",
ui/compress_tab.py CHANGED
@@ -11,6 +11,10 @@ from core.tokenizer_utils import count_tokens, get_token_strings
11
  from db.store import save_run, update_feedback, update_feedback_comment
12
  from models.model_loader import get_current_model_id, get_current_tokenizer_id, switch_llm, switch_embedder, get_current_embedder_id
13
 
 
 
 
 
14
  # ── token colour palette (10 soft pastels, cycles) ───────────────────────────
15
 
16
  _PALETTE = [
@@ -211,6 +215,9 @@ def build_compress_tab(run_store) -> gr.Tab:
211
  label="Compression Model",
212
  allow_custom_value=True,
213
  )
 
 
 
214
  load_model_btn = gr.Button("Load Model", variant="secondary")
215
  model_status = gr.Textbox(
216
  label="Model Status",
@@ -302,6 +309,7 @@ def build_compress_tab(run_store) -> gr.Tab:
302
  target_slider.change(fn=compression_status, **_status_args)
303
 
304
  load_model_btn.click(fn=load_model, inputs=[model_dropdown], outputs=[model_status])
 
305
  embedder_dropdown.change(fn=on_embedder_change, inputs=[embedder_dropdown], outputs=[embedder_info_panel])
306
  load_embedder_btn.click(fn=load_embedder, inputs=[embedder_dropdown], outputs=[embedder_status])
307
  compress_btn.click(
 
11
  from db.store import save_run, update_feedback, update_feedback_comment
12
  from models.model_loader import get_current_model_id, get_current_tokenizer_id, switch_llm, switch_embedder, get_current_embedder_id
13
 
14
+
15
+ def on_model_change(model_id: str) -> str:
16
+ return config.MODEL_INFO.get(model_id, "")
17
+
18
  # ── token colour palette (10 soft pastels, cycles) ───────────────────────────
19
 
20
  _PALETTE = [
 
215
  label="Compression Model",
216
  allow_custom_value=True,
217
  )
218
+ model_info_panel = gr.Markdown(
219
+ value=config.MODEL_INFO.get(config.LLM_MODEL, "")
220
+ )
221
  load_model_btn = gr.Button("Load Model", variant="secondary")
222
  model_status = gr.Textbox(
223
  label="Model Status",
 
309
  target_slider.change(fn=compression_status, **_status_args)
310
 
311
  load_model_btn.click(fn=load_model, inputs=[model_dropdown], outputs=[model_status])
312
+ model_dropdown.change(fn=on_model_change, inputs=[model_dropdown], outputs=[model_info_panel])
313
  embedder_dropdown.change(fn=on_embedder_change, inputs=[embedder_dropdown], outputs=[embedder_info_panel])
314
  load_embedder_btn.click(fn=load_embedder, inputs=[embedder_dropdown], outputs=[embedder_status])
315
  compress_btn.click(