Spaces:

Luigi
/

tiny-scribe

Running

Luigi commited on Feb 1

Commit

cc9a1a9

1 Parent(s): 53bb554

feat: Add two non-reasoning model variants

- Added ERNIE-4.5-21B-A3B-PT (21B, non-thinking) from unsloth
- Standard inference settings (temp 0.7, no thinking mode)
- Added Qwen3-30B-A3B-Instruct (30B, non-thinking) from unsloth
- Standard instruct settings (temp 0.6, no thinking mode)
- Both use TQ1_0 quantization
- Models ordered by parameter count (21B and 30B sections now have variants)

Files changed (1) hide show

app.py +33 -5

app.py CHANGED Viewed

@@ -192,15 +192,29 @@ AVAILABLE_MODELS = {
     },
     "granite4_tiny_q3": {
         "name": "Granite 4.0 Tiny 7B (128K Context)",
-        "repo_id": "unsloth/granite-4.0-h-tiny-GGUF",
         "filename": "*Q3_K_M.gguf",
         "max_context": 131072,
-        "default_temperature": 0.6,
         "supports_toggle": False,
         "inference_settings": {
-            "temperature": 0.0,
-            "top_p": 1.0,
-            "top_k": 0,
             "repeat_penalty": 1.1,
         },
     },
@@ -246,6 +260,20 @@ AVAILABLE_MODELS = {
             "repeat_penalty": 1.0,
         },
     },
 }
 DEFAULT_MODEL_KEY = "qwen3_600m_q4"

     },
     "granite4_tiny_q3": {
         "name": "Granite 4.0 Tiny 7B (128K Context)",
+        "repo_id": "ibm-research/granite-4.0-Tiny-7B-Instruct-GGUF",
         "filename": "*Q3_K_M.gguf",
         "max_context": 131072,
+        "default_temperature": 0.7,
         "supports_toggle": False,
         "inference_settings": {
+            "temperature": 0.7,
+            "top_p": 0.9,
+            "top_k": 40,
+            "repeat_penalty": 1.1,
+        },
+    },
+    "ernie_21b_pt_q1": {
+        "name": "ERNIE-4.5 21B PT (128K Context)",
+        "repo_id": "unsloth/ERNIE-4.5-21B-A3B-PT-GGUF",
+        "filename": "*TQ1_0.gguf",
+        "max_context": 131072,
+        "default_temperature": 0.7,
+        "supports_toggle": False,
+        "inference_settings": {
+            "temperature": 0.7,
+            "top_p": 0.9,
+            "top_k": 40,
             "repeat_penalty": 1.1,
         },
     },
             "repeat_penalty": 1.0,
         },
     },
+    "qwen3_30b_instruct_q1": {
+        "name": "Qwen3 30B Instruct (256K Context)",
+        "repo_id": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF",
+        "filename": "*TQ1_0.gguf",
+        "max_context": 262144,
+        "default_temperature": 0.6,
+        "supports_toggle": False,
+        "inference_settings": {
+            "temperature": 0.6,
+            "top_p": 0.95,
+            "top_k": 20,
+            "repeat_penalty": 1.0,
+        },
+    },
 }
 DEFAULT_MODEL_KEY = "qwen3_600m_q4"