imtrt004 commited on
Commit
ab16882
·
1 Parent(s): 6780118

fix: update model

Browse files
Files changed (1) hide show
  1. model/loader.py +3 -2
model/loader.py CHANGED
@@ -5,8 +5,9 @@ All speeds measured on 2 vCPU / 16 GB RAM (HF Free Tier).
5
  Model options (set LLM_MODEL env var in HF Space to switch, no redeploy needed):
6
  #1 TinyLlama/TinyLlama-1.1B-Chat-v1.0 ~1 GB 40-60 tok/s Apache 2.0 demos, prototypes
7
  #2 Qwen/Qwen3-0.6B ~0.5 GB 45-55 tok/s Apache 2.0 speed-critical, Think mode
8
- #3 meta-llama/Llama-3.2-1B-Instruct [DEF] ~1.5 GB 35-50 tok/s Community 128K ctx, long-context
9
  #4 HuggingFaceTB/SmolLM2-1.7B-Instruct ~2 GB 25-35 tok/s Apache 2.0 good quality/size ratio
 
10
  #5 Qwen/Qwen2.5-1.5B-Instruct ~2 GB 25-40 tok/s Apache 2.0 multilingual, 32K ctx
11
  #6 stabilityai/stablelm-2-zephyr-1_6b ~2 GB 25-40 tok/s MIT DPO-tuned chat feel
12
  #7 Qwen/Qwen2.5-Coder-1.5B-Instruct ~2 GB 25-40 tok/s Apache 2.0 code completion/review
@@ -34,7 +35,7 @@ warnings.filterwarnings(
34
  category=FutureWarning,
35
  )
36
 
37
- MODEL_ID = os.environ.get("LLM_MODEL", "meta-llama/Llama-3.2-1B-Instruct")
38
 
39
  # Models that need trust_remote_code=True (custom architectures)
40
  _TRUST_REMOTE_CODE_MODELS = (
 
5
  Model options (set LLM_MODEL env var in HF Space to switch, no redeploy needed):
6
  #1 TinyLlama/TinyLlama-1.1B-Chat-v1.0 ~1 GB 40-60 tok/s Apache 2.0 demos, prototypes
7
  #2 Qwen/Qwen3-0.6B ~0.5 GB 45-55 tok/s Apache 2.0 speed-critical, Think mode
8
+ #3 meta-llama/Llama-3.2-1B-Instruct ~1.5 GB 35-50 tok/s Community 128K ctx, long-context (needs HF_TOKEN)
9
  #4 HuggingFaceTB/SmolLM2-1.7B-Instruct ~2 GB 25-35 tok/s Apache 2.0 good quality/size ratio
10
+ #5 HuggingFaceTB/SmolLM2-360M-Instruct [DEF] ~0.4 GB 60-80 tok/s Apache 2.0 fastest, no token needed
11
  #5 Qwen/Qwen2.5-1.5B-Instruct ~2 GB 25-40 tok/s Apache 2.0 multilingual, 32K ctx
12
  #6 stabilityai/stablelm-2-zephyr-1_6b ~2 GB 25-40 tok/s MIT DPO-tuned chat feel
13
  #7 Qwen/Qwen2.5-Coder-1.5B-Instruct ~2 GB 25-40 tok/s Apache 2.0 code completion/review
 
35
  category=FutureWarning,
36
  )
37
 
38
+ MODEL_ID = os.environ.get("LLM_MODEL", "HuggingFaceTB/SmolLM2-360M-Instruct")
39
 
40
  # Models that need trust_remote_code=True (custom architectures)
41
  _TRUST_REMOTE_CODE_MODELS = (