Spaces:

Luigi
/

tiny-scribe

Running

Luigi commited on Feb 1

Commit

70126c5

1 Parent(s): cc9a1a9

feat: Add 4 new models (1B-3B range) with various quantizations

- Granite 3.1 1B-A400M (Q8_0, 1.18 GB) - MoE architecture
- Granite 3.3 2B (Q4_K_M, 1.55 GB) - IBM official
- Youtu-LLM 2B (Q8_0, 2.09 GB) - Tencent, toggle reasoning
- Granite 3.1 3B-A800M (Q4_K_M, 2.02 GB) - MoE architecture

All models:
- Ordered by parameter count (1B → 2B → 2B → 3B)
- Under 4GB limit for HF Spaces
- Community-recommended inference settings

Files changed (1) hide show

app.py +66 -10

app.py CHANGED Viewed

@@ -122,9 +122,9 @@ AVAILABLE_MODELS = {
         },
     },
     "qwen3_600m_q4": {
-        "name": "Qwen3 0.6B Q4 (Default)",
         "repo_id": "unsloth/Qwen3-0.6B-GGUF",
-        "filename": "*Q4_K_M.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
         "supports_toggle": True,
@@ -132,7 +132,21 @@ AVAILABLE_MODELS = {
             "temperature": 0.6,
             "top_p": 0.95,
             "top_k": 20,
-            "repeat_penalty": 1.05,
         },
     },
     "falcon_h1_1.5b_q4": {
@@ -149,9 +163,9 @@ AVAILABLE_MODELS = {
         },
     },
     "qwen3_1.7b_q4": {
-        "name": "Qwen3 1.7B Q4",
         "repo_id": "unsloth/Qwen3-1.7B-GGUF",
-        "filename": "*Q4_K_M.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
         "supports_toggle": True,
@@ -159,18 +173,60 @@ AVAILABLE_MODELS = {
             "temperature": 0.6,
             "top_p": 0.95,
             "top_k": 20,
             "repeat_penalty": 1.05,
         },
     },
     "lfm2_2_6b_transcript": {
-        "name": "LFM2 2.6B Transcript",
-        "repo_id": "mradermacher/LFM2-2.6B-Transcript-GGUF",
-        "filename": "*Q4_K_M.gguf",
-        "max_context": 32768,
         "default_temperature": 0.6,
         "supports_toggle": False,
         "inference_settings": {
-            "temperature": 0.3,
             "top_p": 0.9,
             "top_k": 40,
             "repeat_penalty": 1.1,

         },
     },
     "qwen3_600m_q4": {
+        "name": "Qwen3 0.6B Q4 (32K Context)",
         "repo_id": "unsloth/Qwen3-0.6B-GGUF",
+        "filename": "*Q4_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
         "supports_toggle": True,
             "temperature": 0.6,
             "top_p": 0.95,
             "top_k": 20,
+            "repeat_penalty": 1.0,
+        },
+    },
+    "granite_3_1_1b_q8": {
+        "name": "Granite 3.1 1B-A400M Instruct (128K Context)",
+        "repo_id": "bartowski/granite-3.1-1b-a400m-instruct-GGUF",
+        "filename": "*Q8_0.gguf",
+        "max_context": 131072,
+        "default_temperature": 0.7,
+        "supports_toggle": False,
+        "inference_settings": {
+            "temperature": 0.7,
+            "top_p": 0.9,
+            "top_k": 40,
+            "repeat_penalty": 1.1,
         },
     },
     "falcon_h1_1.5b_q4": {
         },
     },
     "qwen3_1.7b_q4": {
+        "name": "Qwen3 1.7B Q4 (32K Context)",
         "repo_id": "unsloth/Qwen3-1.7B-GGUF",
+        "filename": "*Q4_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
         "supports_toggle": True,
             "temperature": 0.6,
             "top_p": 0.95,
             "top_k": 20,
+            "repeat_penalty": 1.0,
+        },
+    },
+    "granite_3_3_2b_q4": {
+        "name": "Granite 3.3 2B Instruct (128K Context)",
+        "repo_id": "ibm-granite/granite-3.3-2b-instruct-GGUF",
+        "filename": "*Q4_K_M.gguf",
+        "max_context": 131072,
+        "default_temperature": 0.7,
+        "supports_toggle": False,
+        "inference_settings": {
+            "temperature": 0.7,
+            "top_p": 0.9,
+            "top_k": 40,
+            "repeat_penalty": 1.1,
+        },
+    },
+    "youtu_llm_2b_q8": {
+        "name": "Youtu-LLM 2B (128K Context)",
+        "repo_id": "tencent/Youtu-LLM-2B-GGUF",
+        "filename": "*Q8_0.gguf",
+        "max_context": 131072,
+        "default_temperature": 0.7,
+        "supports_toggle": True,
+        "inference_settings": {
+            "temperature": 0.7,
+            "top_p": 0.8,
+            "top_k": 20,
             "repeat_penalty": 1.05,
         },
     },
     "lfm2_2_6b_transcript": {
+        "name": "LFM2 2.6B Transcript (8K Context)",
+        "repo_id": "LiquidAI/LFM-2.6B-Transcript-GGUF",
+        "filename": "*Q4_0.gguf",
+        "max_context": 8192,
         "default_temperature": 0.6,
         "supports_toggle": False,
         "inference_settings": {
+            "temperature": 0.6,
+            "top_p": 0.95,
+            "top_k": 20,
+            "repeat_penalty": 1.1,
+        },
+    },
+    "granite_3_1_3b_q4": {
+        "name": "Granite 3.1 3B-A800M Instruct (128K Context)",
+        "repo_id": "bartowski/granite-3.1-3b-a800m-instruct-GGUF",
+        "filename": "*Q4_K_M.gguf",
+        "max_context": 131072,
+        "default_temperature": 0.7,
+        "supports_toggle": False,
+        "inference_settings": {
+            "temperature": 0.7,
             "top_p": 0.9,
             "top_k": 40,
             "repeat_penalty": 1.1,