lunahr
/

SystemGemma2-27b-it

@@ -62,7 +62,7 @@ from transformers import pipeline
 pipe = pipeline(
     "text-generation",
-    model="google/gemma-2-9b-it",
     model_kwargs={"torch_dtype": torch.bfloat16},
     device="cuda",  # replace with "mps" to run on a Mac device
 )
@@ -84,9 +84,9 @@ print(assistant_response)
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
 model = AutoModelForCausalLM.from_pretrained(
-    "google/gemma-2-9b-it",
     device_map="auto",
     torch_dtype=torch.bfloat16,
 )
@@ -122,9 +122,9 @@ You can also use `float32` if you skip the dtype, but no precision increase will
 # pip install accelerate
 from transformers import AutoTokenizer, AutoModelForCausalLM
-tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
 model = AutoModelForCausalLM.from_pretrained(
-    "google/gemma-2-9b-it",
     device_map="auto",
 )
@@ -142,7 +142,7 @@ for running Gemma 2 through a command line interface, or CLI. Follow the [instal
 for getting started, then launch the CLI through the following command:
 ```shell
-local-gemma --model 9b --preset speed
 ```
 #### Quantized Versions through `bitsandbytes`
@@ -158,9 +158,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 quantization_config = BitsAndBytesConfig(load_in_8bit=True)
-tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
 model = AutoModelForCausalLM.from_pretrained(
-    "google/gemma-2-9b-it",
     quantization_config=quantization_config,
 )
@@ -183,9 +183,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 quantization_config = BitsAndBytesConfig(load_in_4bit=True)
-tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
 model = AutoModelForCausalLM.from_pretrained(
-    "google/gemma-2-9b-it",
     quantization_config=quantization_config,
 )
@@ -220,8 +220,8 @@ import torch
 torch.set_float32_matmul_precision("high")
 # load the model + tokenizer
-tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
-model = Gemma2ForCausalLM.from_pretrained("google/gemma-2-9b-it", torch_dtype=torch.bfloat16)
 model.to("cuda")
 # apply the torch compile transformation
@@ -271,7 +271,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 import transformers
 import torch
-model_id = "google/gemma-2-9b-it"
 dtype = torch.bfloat16
 tokenizer = AutoTokenizer.from_pretrained(model_id)

 pipe = pipeline(
     "text-generation",
+    model="google/gemma-2-27b-it",
     model_kwargs={"torch_dtype": torch.bfloat16},
     device="cuda",  # replace with "mps" to run on a Mac device
 )
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
 model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2-27b-it",
     device_map="auto",
     torch_dtype=torch.bfloat16,
 )
 # pip install accelerate
 from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
 model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2-27b-it",
     device_map="auto",
 )
 for getting started, then launch the CLI through the following command:
 ```shell
+local-gemma --model 27b --preset speed
 ```
 #### Quantized Versions through `bitsandbytes`
 quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
 model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2-27b-it",
     quantization_config=quantization_config,
 )
 quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
 model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2-27b-it",
     quantization_config=quantization_config,
 )
 torch.set_float32_matmul_precision("high")
 # load the model + tokenizer
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
+model = Gemma2ForCausalLM.from_pretrained("google/gemma-2-27b-it", torch_dtype=torch.bfloat16)
 model.to("cuda")
 # apply the torch compile transformation
 import transformers
 import torch
+model_id = "google/gemma-2-27b-it"
 dtype = torch.bfloat16
 tokenizer = AutoTokenizer.from_pretrained(model_id)