Spaces:
Sleeping
Sleeping
fix
Browse files- gemmademo/_model.py +5 -7
gemmademo/_model.py
CHANGED
|
@@ -51,9 +51,7 @@ class LlamaCppGemmaModel:
|
|
| 51 |
},
|
| 52 |
}
|
| 53 |
|
| 54 |
-
def __init__(
|
| 55 |
-
self, name: str = "gemma-2b", max_tokens: int = 512, temperature: float = 0.7
|
| 56 |
-
):
|
| 57 |
"""
|
| 58 |
Initialize the model instance.
|
| 59 |
|
|
@@ -62,8 +60,6 @@ class LlamaCppGemmaModel:
|
|
| 62 |
"""
|
| 63 |
self.name = name
|
| 64 |
self.model = None # Instance of Llama from llama.cpp
|
| 65 |
-
self.max_tokens = max_tokens
|
| 66 |
-
self.temperature = temperature
|
| 67 |
|
| 68 |
def load_model(self, n_ctx: int = 2048, n_gpu_layers: int = 0):
|
| 69 |
"""
|
|
@@ -118,6 +114,8 @@ class LlamaCppGemmaModel:
|
|
| 118 |
def generate_response(
|
| 119 |
self,
|
| 120 |
prompt: str,
|
|
|
|
|
|
|
| 121 |
) -> str:
|
| 122 |
"""
|
| 123 |
Generate a response using the llama.cpp model.
|
|
@@ -137,8 +135,8 @@ class LlamaCppGemmaModel:
|
|
| 137 |
# Call the llama.cpp model with the provided parameters.
|
| 138 |
response = self.model(
|
| 139 |
prompt,
|
| 140 |
-
max_tokens=
|
| 141 |
-
temperature=
|
| 142 |
)
|
| 143 |
generated_text = response["choices"][0]["text"]
|
| 144 |
return generated_text.strip()
|
|
|
|
| 51 |
},
|
| 52 |
}
|
| 53 |
|
| 54 |
+
def __init__(self, name: str = "gemma-2b",):
|
|
|
|
|
|
|
| 55 |
"""
|
| 56 |
Initialize the model instance.
|
| 57 |
|
|
|
|
| 60 |
"""
|
| 61 |
self.name = name
|
| 62 |
self.model = None # Instance of Llama from llama.cpp
|
|
|
|
|
|
|
| 63 |
|
| 64 |
def load_model(self, n_ctx: int = 2048, n_gpu_layers: int = 0):
|
| 65 |
"""
|
|
|
|
| 114 |
def generate_response(
|
| 115 |
self,
|
| 116 |
prompt: str,
|
| 117 |
+
max_tokens: int = 512,
|
| 118 |
+
temperature: float = 0.7,
|
| 119 |
) -> str:
|
| 120 |
"""
|
| 121 |
Generate a response using the llama.cpp model.
|
|
|
|
| 135 |
# Call the llama.cpp model with the provided parameters.
|
| 136 |
response = self.model(
|
| 137 |
prompt,
|
| 138 |
+
max_tokens=max_tokens,
|
| 139 |
+
temperature=temperature,
|
| 140 |
)
|
| 141 |
generated_text = response["choices"][0]["text"]
|
| 142 |
return generated_text.strip()
|