Update README.md
Browse files
README.md
CHANGED
|
@@ -38,7 +38,7 @@ pip install transformers
|
|
| 38 |
```python
|
| 39 |
# pip install git+https://github.com/huggingface/transformers.git # TODO: merge PR to main
|
| 40 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 41 |
-
checkpoint = "HuggingFaceTB/SmolLM-
|
| 42 |
device = "cuda" # for GPU usage or "cpu" for CPU usage
|
| 43 |
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
| 44 |
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
|
|
@@ -53,7 +53,7 @@ print(tokenizer.decode(outputs[0]))
|
|
| 53 |
# pip install accelerate
|
| 54 |
import torch
|
| 55 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 56 |
-
checkpoint = "HuggingFaceTB/SmolLM-
|
| 57 |
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
| 58 |
# for fp16 use `torch_dtype=torch.float16` instead
|
| 59 |
model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto", torch_dtype=torch.bfloat16)
|
|
@@ -74,7 +74,7 @@ Memory footprint: 3422.76 MB
|
|
| 74 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
| 75 |
# to use 4bit use `load_in_4bit=True` instead
|
| 76 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 77 |
-
checkpoint = "HuggingFaceTB/SmolLM-
|
| 78 |
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
| 79 |
model = AutoModelForCausalLM.from_pretrained(checkpoint, quantization_config=quantization_config)
|
| 80 |
inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt").to("cuda")
|
|
|
|
| 38 |
```python
|
| 39 |
# pip install git+https://github.com/huggingface/transformers.git # TODO: merge PR to main
|
| 40 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 41 |
+
checkpoint = "HuggingFaceTB/SmolLM-1.7B"
|
| 42 |
device = "cuda" # for GPU usage or "cpu" for CPU usage
|
| 43 |
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
| 44 |
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
|
|
|
|
| 53 |
# pip install accelerate
|
| 54 |
import torch
|
| 55 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 56 |
+
checkpoint = "HuggingFaceTB/SmolLM-1.7B"
|
| 57 |
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
| 58 |
# for fp16 use `torch_dtype=torch.float16` instead
|
| 59 |
model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto", torch_dtype=torch.bfloat16)
|
|
|
|
| 74 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
| 75 |
# to use 4bit use `load_in_4bit=True` instead
|
| 76 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 77 |
+
checkpoint = "HuggingFaceTB/SmolLM-1.7B"
|
| 78 |
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
| 79 |
model = AutoModelForCausalLM.from_pretrained(checkpoint, quantization_config=quantization_config)
|
| 80 |
inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt").to("cuda")
|