aadya1762 commited on
Commit
5ca1c38
·
1 Parent(s): 309f62f

use 4 bit quantized models for faster inference

Browse files
Files changed (1) hide show
  1. gemmademo/_model.py +8 -8
gemmademo/_model.py CHANGED
@@ -23,29 +23,29 @@ class LlamaCppGemmaModel:
23
  AVAILABLE_MODELS: Dict[str, Dict] = {
24
  "gemma-2b": {
25
  "model_path": "models/gemma-2b.gguf",
26
- "repo_id": "google/gemma-2b", # update to the actual repo id
27
- "filename": "gemma-2b.gguf", # update to the actual filename
28
  "description": "2B parameters, base model",
29
  "type": "base",
30
  },
31
  "gemma-2b-it": {
32
  "model_path": "models/gemma-2b-it.gguf",
33
- "repo_id": "google/gemma-2b-it", # update to the actual repo id
34
- "filename": "gemma-2b-it.gguf", # update to the actual filename
35
  "description": "2B parameters, instruction-tuned",
36
  "type": "instruct",
37
  },
38
  "gemma-7b-it": {
39
  "model_path": "models/gemma-7b-it.gguf",
40
- "repo_id": "google/gemma-7b-it", # update to the actual repo id
41
- "filename": "gemma-7b-it.gguf", # update to the actual filename
42
  "description": "7B parameters, instruction-tuned",
43
  "type": "instruct",
44
  },
45
  "gemma-7b-gguf": {
46
  "model_path": "models/gemma-7b.gguf",
47
- "repo_id": "google/gemma-7b-GGUF", # repository for the GGUF model
48
- "filename": "gemma-7b.gguf", # updated filename for GGUF model
49
  "description": "7B parameters in GGUF format",
50
  "type": "base",
51
  },
 
23
  AVAILABLE_MODELS: Dict[str, Dict] = {
24
  "gemma-2b": {
25
  "model_path": "models/gemma-2b.gguf",
26
+ "repo_id": "rahuldshetty/gemma-2b-gguf-quantized", # update to the actual repo id
27
+ "filename": "gemma-2b-Q4_K_M.gguf", # update to the actual filename
28
  "description": "2B parameters, base model",
29
  "type": "base",
30
  },
31
  "gemma-2b-it": {
32
  "model_path": "models/gemma-2b-it.gguf",
33
+ "repo_id": "MaziyarPanahi/gemma-2b-it-GGUF", # update to the actual repo id
34
+ "filename": "gemma-2b-it.Q4_K_M.gguf", # update to the actual filename
35
  "description": "2B parameters, instruction-tuned",
36
  "type": "instruct",
37
  },
38
  "gemma-7b-it": {
39
  "model_path": "models/gemma-7b-it.gguf",
40
+ "repo_id": "MaziyarPanahi/gemma-7b-GGUF", # update to the actual repo id
41
+ "filename": "gemma-7b.Q4_K_M.gguf", # update to the actual filename
42
  "description": "7B parameters, instruction-tuned",
43
  "type": "instruct",
44
  },
45
  "gemma-7b-gguf": {
46
  "model_path": "models/gemma-7b.gguf",
47
+ "repo_id": "rahuldshetty/gemma-7b-it-gguf-quantized", # repository for the GGUF model
48
+ "filename": "gemma-7b-it-Q4_K_M.gguf", # updated filename for GGUF model
49
  "description": "7B parameters in GGUF format",
50
  "type": "base",
51
  },