Spaces:
Sleeping
Sleeping
use 4 bit quantized models for faster inference
Browse files- gemmademo/_model.py +8 -8
gemmademo/_model.py
CHANGED
|
@@ -23,29 +23,29 @@ class LlamaCppGemmaModel:
|
|
| 23 |
AVAILABLE_MODELS: Dict[str, Dict] = {
|
| 24 |
"gemma-2b": {
|
| 25 |
"model_path": "models/gemma-2b.gguf",
|
| 26 |
-
"repo_id": "
|
| 27 |
-
"filename": "gemma-2b.gguf", # update to the actual filename
|
| 28 |
"description": "2B parameters, base model",
|
| 29 |
"type": "base",
|
| 30 |
},
|
| 31 |
"gemma-2b-it": {
|
| 32 |
"model_path": "models/gemma-2b-it.gguf",
|
| 33 |
-
"repo_id": "
|
| 34 |
-
"filename": "gemma-2b-it.gguf", # update to the actual filename
|
| 35 |
"description": "2B parameters, instruction-tuned",
|
| 36 |
"type": "instruct",
|
| 37 |
},
|
| 38 |
"gemma-7b-it": {
|
| 39 |
"model_path": "models/gemma-7b-it.gguf",
|
| 40 |
-
"repo_id": "
|
| 41 |
-
"filename": "gemma-7b
|
| 42 |
"description": "7B parameters, instruction-tuned",
|
| 43 |
"type": "instruct",
|
| 44 |
},
|
| 45 |
"gemma-7b-gguf": {
|
| 46 |
"model_path": "models/gemma-7b.gguf",
|
| 47 |
-
"repo_id": "
|
| 48 |
-
"filename": "gemma-7b.gguf", # updated filename for GGUF model
|
| 49 |
"description": "7B parameters in GGUF format",
|
| 50 |
"type": "base",
|
| 51 |
},
|
|
|
|
| 23 |
AVAILABLE_MODELS: Dict[str, Dict] = {
|
| 24 |
"gemma-2b": {
|
| 25 |
"model_path": "models/gemma-2b.gguf",
|
| 26 |
+
"repo_id": "rahuldshetty/gemma-2b-gguf-quantized", # update to the actual repo id
|
| 27 |
+
"filename": "gemma-2b-Q4_K_M.gguf", # update to the actual filename
|
| 28 |
"description": "2B parameters, base model",
|
| 29 |
"type": "base",
|
| 30 |
},
|
| 31 |
"gemma-2b-it": {
|
| 32 |
"model_path": "models/gemma-2b-it.gguf",
|
| 33 |
+
"repo_id": "MaziyarPanahi/gemma-2b-it-GGUF", # update to the actual repo id
|
| 34 |
+
"filename": "gemma-2b-it.Q4_K_M.gguf", # update to the actual filename
|
| 35 |
"description": "2B parameters, instruction-tuned",
|
| 36 |
"type": "instruct",
|
| 37 |
},
|
| 38 |
"gemma-7b-it": {
|
| 39 |
"model_path": "models/gemma-7b-it.gguf",
|
| 40 |
+
"repo_id": "MaziyarPanahi/gemma-7b-GGUF", # update to the actual repo id
|
| 41 |
+
"filename": "gemma-7b.Q4_K_M.gguf", # update to the actual filename
|
| 42 |
"description": "7B parameters, instruction-tuned",
|
| 43 |
"type": "instruct",
|
| 44 |
},
|
| 45 |
"gemma-7b-gguf": {
|
| 46 |
"model_path": "models/gemma-7b.gguf",
|
| 47 |
+
"repo_id": "rahuldshetty/gemma-7b-it-gguf-quantized", # repository for the GGUF model
|
| 48 |
+
"filename": "gemma-7b-it-Q4_K_M.gguf", # updated filename for GGUF model
|
| 49 |
"description": "7B parameters in GGUF format",
|
| 50 |
"type": "base",
|
| 51 |
},
|