Text Generation
Transformers
Safetensors
English
olmo3
conversational

Update inference examples to use the correct chat template

#12
by mario-sanz - opened
Files changed (1) hide show
  1. README.md +8 -8
README.md CHANGED
@@ -45,13 +45,13 @@ You can use OLMo with the standard HuggingFace transformers library:
45
  from transformers import AutoModelForCausalLM, AutoTokenizer
46
  olmo = AutoModelForCausalLM.from_pretrained("allenai/Olmo-3-32B-Think")
47
  tokenizer = AutoTokenizer.from_pretrained("allenai/Olmo-3-32B-Think")
48
- message = ["Who would win in a fight - a dinosaur or a cow named Moo Moo?"]
49
- inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False)
50
  # optional verifying cuda
51
  # inputs = {k: v.to('cuda') for k,v in inputs.items()}
52
  # olmo = olmo.to('cuda')
53
  response = olmo.generate(**inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
54
- print(tokenizer.batch_decode(response, skip_special_tokens=True)[0])
55
  >> '<think>Okay, so the question is who would win in a fight...'
56
  ```
57
 
@@ -186,8 +186,8 @@ model = AutoModelForCausalLM.from_pretrained(
186
  device_map="auto",
187
  )
188
 
189
- prompt = "Who would win in a fight - a dinosaur or a cow named MooMoo?"
190
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
191
 
192
  outputs = model.generate(
193
  **inputs,
@@ -196,7 +196,7 @@ outputs = model.generate(
196
  max_new_tokens=32768,
197
  )
198
 
199
- print(tokenizer.decode(outputs[0], skip_special_tokens=True))
200
  ```
201
 
202
  ### vllm Example
@@ -212,8 +212,8 @@ sampling_params = SamplingParams(
212
  max_tokens=32768,
213
  )
214
 
215
- prompt = "Who would win in a fight - a dinosaur or a cow named MooMoo?"
216
- outputs = llm.generate(prompt, sampling_params)
217
  print(outputs[0].outputs[0].text)
218
  ```
219
 
 
45
  from transformers import AutoModelForCausalLM, AutoTokenizer
46
  olmo = AutoModelForCausalLM.from_pretrained("allenai/Olmo-3-32B-Think")
47
  tokenizer = AutoTokenizer.from_pretrained("allenai/Olmo-3-32B-Think")
48
+ message = [{"role": "user", "content": "Who would win in a fight - a dinosaur or a cow named Moo Moo?"}]
49
+ inputs = tokenizer.apply_chat_template(message, add_generation_prompt=True, return_tensors='pt', return_dict=True)
50
  # optional verifying cuda
51
  # inputs = {k: v.to('cuda') for k,v in inputs.items()}
52
  # olmo = olmo.to('cuda')
53
  response = olmo.generate(**inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
54
+ print(tokenizer.decode(response[0][inputs.input_ids.shape[1]:], skip_special_tokens=True))
55
  >> '<think>Okay, so the question is who would win in a fight...'
56
  ```
57
 
 
186
  device_map="auto",
187
  )
188
 
189
+ message = [{"role": "user", "content": "Who would win in a fight - a dinosaur or a cow named Moo Moo?"}]
190
+ inputs = tokenizer.apply_chat_template(message, add_generation_prompt=True, return_tensors='pt', return_dict=True).to(model.device)
191
 
192
  outputs = model.generate(
193
  **inputs,
 
196
  max_new_tokens=32768,
197
  )
198
 
199
+ print(tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True))
200
  ```
201
 
202
  ### vllm Example
 
212
  max_tokens=32768,
213
  )
214
 
215
+ message = [{"role": "user", "content": "Who would win in a fight - a dinosaur or a cow named Moo Moo?"}]
216
+ outputs = llm.chat(message, sampling_params)
217
  print(outputs[0].outputs[0].text)
218
  ```
219