Spaces:
Sleeping
Sleeping
remove unuseful model imports and comments
Browse files- app.py +1 -1
- gemmademo/_model.py +6 -7
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from gemmademo import LlamaCppGemmaModel, GradioChat
|
| 3 |
|
| 4 |
def main():
|
| 5 |
# Model and task selection
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from gemmademo import LlamaCppGemmaModel, GradioChat
|
| 3 |
|
| 4 |
def main():
|
| 5 |
# Model and task selection
|
gemmademo/_model.py
CHANGED
|
@@ -23,25 +23,24 @@ class LlamaCppGemmaModel:
|
|
| 23 |
_model_cache = {}
|
| 24 |
|
| 25 |
AVAILABLE_MODELS: Dict[str, Dict] = {
|
| 26 |
-
# Does the job well.
|
| 27 |
"gemma-3b": {
|
| 28 |
"model_path": "models/gemma-3-1b-it-Q5_K_M.gguf",
|
| 29 |
-
"repo_id": "bartowski/google_gemma-3-1b-it-GGUF",
|
| 30 |
"filename": "google_gemma-3-1b-it-Q5_K_M.gguf", # Better quantization
|
| 31 |
"description": "3B parameters, instruction-tuned (Q5_K_M)",
|
| 32 |
"type": "instruct",
|
| 33 |
},
|
| 34 |
"gemma-2b": {
|
| 35 |
"model_path": "models/gemma-2b-it.gguf",
|
| 36 |
-
"repo_id": "MaziyarPanahi/gemma-2b-it-GGUF",
|
| 37 |
-
"filename": "gemma-2b-it.Q4_K_M.gguf",
|
| 38 |
"description": "2B parameters, instruction-tuned",
|
| 39 |
"type": "instruct",
|
| 40 |
},
|
| 41 |
"gemma-7b": {
|
| 42 |
"model_path": "models/gemma-7b-it.gguf",
|
| 43 |
-
"repo_id": "rahuldshetty/gemma-7b-it-gguf-quantized",
|
| 44 |
-
"filename": "gemma-7b-it-Q4_K_M.gguf",
|
| 45 |
"description": "7B parameters in GGUF format",
|
| 46 |
"type": "base",
|
| 47 |
},
|
|
@@ -109,7 +108,7 @@ class LlamaCppGemmaModel:
|
|
| 109 |
n_ctx=n_ctx,
|
| 110 |
n_gpu_layers=n_gpu_layers,
|
| 111 |
n_batch=8,
|
| 112 |
-
verbose=False,
|
| 113 |
)
|
| 114 |
|
| 115 |
# Cache the model for future use
|
|
|
|
| 23 |
_model_cache = {}
|
| 24 |
|
| 25 |
AVAILABLE_MODELS: Dict[str, Dict] = {
|
|
|
|
| 26 |
"gemma-3b": {
|
| 27 |
"model_path": "models/gemma-3-1b-it-Q5_K_M.gguf",
|
| 28 |
+
"repo_id": "bartowski/google_gemma-3-1b-it-GGUF",
|
| 29 |
"filename": "google_gemma-3-1b-it-Q5_K_M.gguf", # Better quantization
|
| 30 |
"description": "3B parameters, instruction-tuned (Q5_K_M)",
|
| 31 |
"type": "instruct",
|
| 32 |
},
|
| 33 |
"gemma-2b": {
|
| 34 |
"model_path": "models/gemma-2b-it.gguf",
|
| 35 |
+
"repo_id": "MaziyarPanahi/gemma-2b-it-GGUF",
|
| 36 |
+
"filename": "gemma-2b-it.Q4_K_M.gguf",
|
| 37 |
"description": "2B parameters, instruction-tuned",
|
| 38 |
"type": "instruct",
|
| 39 |
},
|
| 40 |
"gemma-7b": {
|
| 41 |
"model_path": "models/gemma-7b-it.gguf",
|
| 42 |
+
"repo_id": "rahuldshetty/gemma-7b-it-gguf-quantized",
|
| 43 |
+
"filename": "gemma-7b-it-Q4_K_M.gguf",
|
| 44 |
"description": "7B parameters in GGUF format",
|
| 45 |
"type": "base",
|
| 46 |
},
|
|
|
|
| 108 |
n_ctx=n_ctx,
|
| 109 |
n_gpu_layers=n_gpu_layers,
|
| 110 |
n_batch=8,
|
| 111 |
+
verbose=False,
|
| 112 |
)
|
| 113 |
|
| 114 |
# Cache the model for future use
|