aadya1762 commited on
Commit
0304bfe
·
1 Parent(s): 28295c6

Add Gemma3-1B Quantized Model

Browse files
Files changed (2) hide show
  1. gemmademo/_chat.py +1 -1
  2. gemmademo/_model.py +13 -3
gemmademo/_chat.py CHANGED
@@ -17,7 +17,7 @@ class GradioChat:
17
  def __init__(self, model_options: list[str], task_options: list[str]):
18
  self.model_options = model_options
19
  self.task_options = task_options
20
- self.current_model_name = "gemma-2b-it" # Default model
21
  self.current_task_name = "Question Answering" # Default task
22
 
23
  self.model = self._load_model(self.current_model_name)
 
17
  def __init__(self, model_options: list[str], task_options: list[str]):
18
  self.model_options = model_options
19
  self.task_options = task_options
20
+ self.current_model_name = "gemma-3b" # Default model
21
  self.current_task_name = "Question Answering" # Default task
22
 
23
  self.model = self._load_model(self.current_model_name)
gemmademo/_model.py CHANGED
@@ -20,6 +20,13 @@ class LlamaCppGemmaModel:
20
  """
21
 
22
  AVAILABLE_MODELS: Dict[str, Dict] = {
 
 
 
 
 
 
 
23
  "gemma-2b": {
24
  "model_path": "models/gemma-2b.gguf",
25
  "repo_id": "rahuldshetty/gemma-2b-gguf-quantized", # update to the actual repo id
@@ -50,7 +57,7 @@ class LlamaCppGemmaModel:
50
  },
51
  }
52
 
53
- def __init__(self, name: str = "gemma-2b"):
54
  """
55
  Initialize the model instance.
56
 
@@ -96,9 +103,12 @@ class LlamaCppGemmaModel:
96
  if downloaded_path != model_path:
97
  os.rename(downloaded_path, model_path)
98
 
 
 
99
  self.model = Llama(
100
  model_path=model_path,
101
- n_threads=os.cpu_count(),
 
102
  n_ctx=n_ctx,
103
  n_gpu_layers=n_gpu_layers,
104
  n_batch=8,
@@ -106,7 +116,7 @@ class LlamaCppGemmaModel:
106
  return self
107
 
108
  def generate_response(
109
- self, prompt: str, max_tokens: int = 512, temperature: float = 0.7
110
  ):
111
  """
112
  Generate a response using the llama.cpp model.
 
20
  """
21
 
22
  AVAILABLE_MODELS: Dict[str, Dict] = {
23
+ "gemma-3b": {
24
+ "model_path": "models/gemma3.gguf",
25
+ "repo_id": "unsloth/gemma-3-1b-it-GGUF", # update to the actual repo id
26
+ "filename": "gemma-3-1b-it-Q3_K_M.gguf",
27
+ "description": "3B parameters, base model",
28
+ "type": "base",
29
+ },
30
  "gemma-2b": {
31
  "model_path": "models/gemma-2b.gguf",
32
  "repo_id": "rahuldshetty/gemma-2b-gguf-quantized", # update to the actual repo id
 
57
  },
58
  }
59
 
60
+ def __init__(self, name: str = "gemma-3b"):
61
  """
62
  Initialize the model instance.
63
 
 
103
  if downloaded_path != model_path:
104
  os.rename(downloaded_path, model_path)
105
 
106
+ _threads = os.cpu_count()
107
+
108
  self.model = Llama(
109
  model_path=model_path,
110
+ n_threads=_threads,
111
+ n_threads_batch=_threads,
112
  n_ctx=n_ctx,
113
  n_gpu_layers=n_gpu_layers,
114
  n_batch=8,
 
116
  return self
117
 
118
  def generate_response(
119
+ self, prompt: str, max_tokens: int = 512, temperature: float = 0.1
120
  ):
121
  """
122
  Generate a response using the llama.cpp model.