Update README.md
Browse files
README.md
CHANGED
|
@@ -85,14 +85,17 @@ import hf_olmo
|
|
| 85 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 86 |
olmo = AutoModelForCausalLM.from_pretrained("allenai/OLMo-7B-Instruct")
|
| 87 |
tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-7B-Instruct")
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
| 90 |
# optional verifying cuda
|
| 91 |
# inputs = {k: v.to('cuda') for k,v in inputs.items()}
|
| 92 |
# olmo = olmo.to('cuda')
|
| 93 |
-
response = olmo.generate(
|
| 94 |
print(tokenizer.batch_decode(response, skip_special_tokens=True)[0])
|
| 95 |
-
>> '
|
| 96 |
```
|
| 97 |
Alternatively, with the pipeline abstraction:
|
| 98 |
```python
|
|
@@ -100,8 +103,8 @@ import hf_olmo
|
|
| 100 |
|
| 101 |
from transformers import pipeline
|
| 102 |
olmo_pipe = pipeline("text-generation", model="allenai/OLMo-7B-Instruct")
|
| 103 |
-
print(olmo_pipe("
|
| 104 |
-
>> '
|
| 105 |
```
|
| 106 |
|
| 107 |
Or, you can make this slightly faster by quantizing the model, e.g. `AutoModelForCausalLM.from_pretrained("allenai/OLMo-7B-Instruct", torch_dtype=torch.float16, load_in_8bit=True)` (requires `bitsandbytes`).
|
|
|
|
| 85 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 86 |
olmo = AutoModelForCausalLM.from_pretrained("allenai/OLMo-7B-Instruct")
|
| 87 |
tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-7B-Instruct")
|
| 88 |
+
chat = [
|
| 89 |
+
{ "role": "user", "content": "What is language modeling?" },
|
| 90 |
+
]
|
| 91 |
+
prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
| 92 |
+
inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
|
| 93 |
# optional verifying cuda
|
| 94 |
# inputs = {k: v.to('cuda') for k,v in inputs.items()}
|
| 95 |
# olmo = olmo.to('cuda')
|
| 96 |
+
response = olmo.generate(input_ids=inputs.to(olmo.device), max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
|
| 97 |
print(tokenizer.batch_decode(response, skip_special_tokens=True)[0])
|
| 98 |
+
>> '<|user|>\nWhat is language modeling?\n<|assistant|>\nLanguage modeling is a type of natural language processing (NLP) task or machine learning task that...'
|
| 99 |
```
|
| 100 |
Alternatively, with the pipeline abstraction:
|
| 101 |
```python
|
|
|
|
| 103 |
|
| 104 |
from transformers import pipeline
|
| 105 |
olmo_pipe = pipeline("text-generation", model="allenai/OLMo-7B-Instruct")
|
| 106 |
+
print(olmo_pipe("What is language modeling?"))
|
| 107 |
+
>> '[{'generated_text': 'What is language modeling?\nLanguage modeling is a type of natural language processing (NLP) task...'}]'
|
| 108 |
```
|
| 109 |
|
| 110 |
Or, you can make this slightly faster by quantizing the model, e.g. `AutoModelForCausalLM.from_pretrained("allenai/OLMo-7B-Instruct", torch_dtype=torch.float16, load_in_8bit=True)` (requires `bitsandbytes`).
|