aadya1762 commited on
Commit
8200fa2
·
1 Parent(s): f318a82
Files changed (1) hide show
  1. gemmademo/_model.py +5 -7
gemmademo/_model.py CHANGED
@@ -51,9 +51,7 @@ class LlamaCppGemmaModel:
51
  },
52
  }
53
 
54
- def __init__(
55
- self, name: str = "gemma-2b", max_tokens: int = 512, temperature: float = 0.7
56
- ):
57
  """
58
  Initialize the model instance.
59
 
@@ -62,8 +60,6 @@ class LlamaCppGemmaModel:
62
  """
63
  self.name = name
64
  self.model = None # Instance of Llama from llama.cpp
65
- self.max_tokens = max_tokens
66
- self.temperature = temperature
67
 
68
  def load_model(self, n_ctx: int = 2048, n_gpu_layers: int = 0):
69
  """
@@ -118,6 +114,8 @@ class LlamaCppGemmaModel:
118
  def generate_response(
119
  self,
120
  prompt: str,
 
 
121
  ) -> str:
122
  """
123
  Generate a response using the llama.cpp model.
@@ -137,8 +135,8 @@ class LlamaCppGemmaModel:
137
  # Call the llama.cpp model with the provided parameters.
138
  response = self.model(
139
  prompt,
140
- max_tokens=self.max_tokens,
141
- temperature=self.temperature,
142
  )
143
  generated_text = response["choices"][0]["text"]
144
  return generated_text.strip()
 
51
  },
52
  }
53
 
54
+ def __init__(self, name: str = "gemma-2b",):
 
 
55
  """
56
  Initialize the model instance.
57
 
 
60
  """
61
  self.name = name
62
  self.model = None # Instance of Llama from llama.cpp
 
 
63
 
64
  def load_model(self, n_ctx: int = 2048, n_gpu_layers: int = 0):
65
  """
 
114
  def generate_response(
115
  self,
116
  prompt: str,
117
+ max_tokens: int = 512,
118
+ temperature: float = 0.7,
119
  ) -> str:
120
  """
121
  Generate a response using the llama.cpp model.
 
135
  # Call the llama.cpp model with the provided parameters.
136
  response = self.model(
137
  prompt,
138
+ max_tokens=max_tokens,
139
+ temperature=temperature,
140
  )
141
  generated_text = response["choices"][0]["text"]
142
  return generated_text.strip()