Spaces:

Luigi
/

tiny-scribe

Running

Luigi commited on Feb 1

Commit

813e74d

1 Parent(s): 3893e85

feat: Add ERNIE 21B IQ2_XS variant - more stable quantization

- Added ernie_21b_thinking_q2 model with IQ2_XS (2-bit) quantization
- Keeps original IQ1_0 variant for comparison/testing
- IQ2_XS should fix generation failures caused by experimental TQ1_0
- Same repo (unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF), different quant file

Files changed (1) hide show

app.py +16 -0

app.py CHANGED Viewed

@@ -218,6 +218,20 @@ AVAILABLE_MODELS = {
             "repeat_penalty": 1.15,
         },
     },
 }
 DEFAULT_MODEL_KEY = "qwen3_600m_q4"
@@ -272,6 +286,8 @@ def load_model(model_key: str = None) -> Tuple[Llama, str]:
             n_gpu_layers=0,            # CPU only
             verbose=False,
             seed=1337,
         )
         current_model_key = model_key

             "repeat_penalty": 1.15,
         },
     },
+    "ernie_21b_thinking_q2": {
+        "name": "ERNIE-4.5 21B Thinking Q2 (128K Context)",
+        "repo_id": "unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF",
+        "filename": "*IQ2_XS.gguf",
+        "max_context": 131072,
+        "default_temperature": 0.6,
+        "supports_toggle": False,
+        "inference_settings": {
+            "temperature": 0.3,
+            "top_p": 0.9,
+            "top_k": 30,
+            "repeat_penalty": 1.15,
+        },
+    },
 }
 DEFAULT_MODEL_KEY = "qwen3_600m_q4"
             n_gpu_layers=0,            # CPU only
             verbose=False,
             seed=1337,
+            v_type=2,
+            k_type=2,
         )
         current_model_key = model_key