Piotr Zalewski commited on
copied READMEs be like
Browse files
README.md
CHANGED
|
@@ -62,7 +62,7 @@ from transformers import pipeline
|
|
| 62 |
|
| 63 |
pipe = pipeline(
|
| 64 |
"text-generation",
|
| 65 |
-
model="google/gemma-2-
|
| 66 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
| 67 |
device="cuda", # replace with "mps" to run on a Mac device
|
| 68 |
)
|
|
@@ -84,9 +84,9 @@ print(assistant_response)
|
|
| 84 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 85 |
import torch
|
| 86 |
|
| 87 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
| 88 |
model = AutoModelForCausalLM.from_pretrained(
|
| 89 |
-
"google/gemma-2-
|
| 90 |
device_map="auto",
|
| 91 |
torch_dtype=torch.bfloat16,
|
| 92 |
)
|
|
@@ -122,9 +122,9 @@ You can also use `float32` if you skip the dtype, but no precision increase will
|
|
| 122 |
# pip install accelerate
|
| 123 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 124 |
|
| 125 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
| 126 |
model = AutoModelForCausalLM.from_pretrained(
|
| 127 |
-
"google/gemma-2-
|
| 128 |
device_map="auto",
|
| 129 |
)
|
| 130 |
|
|
@@ -142,7 +142,7 @@ for running Gemma 2 through a command line interface, or CLI. Follow the [instal
|
|
| 142 |
for getting started, then launch the CLI through the following command:
|
| 143 |
|
| 144 |
```shell
|
| 145 |
-
local-gemma --model
|
| 146 |
```
|
| 147 |
|
| 148 |
#### Quantized Versions through `bitsandbytes`
|
|
@@ -158,9 +158,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
| 158 |
|
| 159 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 160 |
|
| 161 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
| 162 |
model = AutoModelForCausalLM.from_pretrained(
|
| 163 |
-
"google/gemma-2-
|
| 164 |
quantization_config=quantization_config,
|
| 165 |
)
|
| 166 |
|
|
@@ -183,9 +183,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
| 183 |
|
| 184 |
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
| 185 |
|
| 186 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
| 187 |
model = AutoModelForCausalLM.from_pretrained(
|
| 188 |
-
"google/gemma-2-
|
| 189 |
quantization_config=quantization_config,
|
| 190 |
)
|
| 191 |
|
|
@@ -220,8 +220,8 @@ import torch
|
|
| 220 |
torch.set_float32_matmul_precision("high")
|
| 221 |
|
| 222 |
# load the model + tokenizer
|
| 223 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
| 224 |
-
model = Gemma2ForCausalLM.from_pretrained("google/gemma-2-
|
| 225 |
model.to("cuda")
|
| 226 |
|
| 227 |
# apply the torch compile transformation
|
|
@@ -271,7 +271,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
| 271 |
import transformers
|
| 272 |
import torch
|
| 273 |
|
| 274 |
-
model_id = "google/gemma-2-
|
| 275 |
dtype = torch.bfloat16
|
| 276 |
|
| 277 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
|
| 62 |
|
| 63 |
pipe = pipeline(
|
| 64 |
"text-generation",
|
| 65 |
+
model="google/gemma-2-27b-it",
|
| 66 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
| 67 |
device="cuda", # replace with "mps" to run on a Mac device
|
| 68 |
)
|
|
|
|
| 84 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 85 |
import torch
|
| 86 |
|
| 87 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
|
| 88 |
model = AutoModelForCausalLM.from_pretrained(
|
| 89 |
+
"google/gemma-2-27b-it",
|
| 90 |
device_map="auto",
|
| 91 |
torch_dtype=torch.bfloat16,
|
| 92 |
)
|
|
|
|
| 122 |
# pip install accelerate
|
| 123 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 124 |
|
| 125 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
|
| 126 |
model = AutoModelForCausalLM.from_pretrained(
|
| 127 |
+
"google/gemma-2-27b-it",
|
| 128 |
device_map="auto",
|
| 129 |
)
|
| 130 |
|
|
|
|
| 142 |
for getting started, then launch the CLI through the following command:
|
| 143 |
|
| 144 |
```shell
|
| 145 |
+
local-gemma --model 27b --preset speed
|
| 146 |
```
|
| 147 |
|
| 148 |
#### Quantized Versions through `bitsandbytes`
|
|
|
|
| 158 |
|
| 159 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 160 |
|
| 161 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
|
| 162 |
model = AutoModelForCausalLM.from_pretrained(
|
| 163 |
+
"google/gemma-2-27b-it",
|
| 164 |
quantization_config=quantization_config,
|
| 165 |
)
|
| 166 |
|
|
|
|
| 183 |
|
| 184 |
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
| 185 |
|
| 186 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
|
| 187 |
model = AutoModelForCausalLM.from_pretrained(
|
| 188 |
+
"google/gemma-2-27b-it",
|
| 189 |
quantization_config=quantization_config,
|
| 190 |
)
|
| 191 |
|
|
|
|
| 220 |
torch.set_float32_matmul_precision("high")
|
| 221 |
|
| 222 |
# load the model + tokenizer
|
| 223 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
|
| 224 |
+
model = Gemma2ForCausalLM.from_pretrained("google/gemma-2-27b-it", torch_dtype=torch.bfloat16)
|
| 225 |
model.to("cuda")
|
| 226 |
|
| 227 |
# apply the torch compile transformation
|
|
|
|
| 271 |
import transformers
|
| 272 |
import torch
|
| 273 |
|
| 274 |
+
model_id = "google/gemma-2-27b-it"
|
| 275 |
dtype = torch.bfloat16
|
| 276 |
|
| 277 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|