reaperdoesntknow commited on
Commit
4c9b4a0
·
verified ·
1 Parent(s): 9bcae30

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -6
README.md CHANGED
@@ -29,15 +29,19 @@ This variant uses Yarn based Rope Scaling with 1:1 Ratio from max_position_embed
29
  from transformers import AutoTokenizer, AutoModelForCausalLM
30
  import torch
31
 
32
- model_id = "reaperdoesntknow/Qemma-sft"
33
- tok = AutoTokenizer.from_pretrained(model_id, use_fast=True)
34
  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16).eval()
35
 
36
- messages = [{"role": "user", "content": "Explain finite-scale discrepancy Δ_r in one paragraph."}]
37
- inputs = tok.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
 
 
 
 
 
 
38
 
39
- out = model.generate(inputs, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.9)
40
- print(tok.decode(out[0], skip_special_tokens=True))
41
  ```
42
 
43
  ## What’s inside
 
29
  from transformers import AutoTokenizer, AutoModelForCausalLM
30
  import torch
31
 
32
+ model_id = "reaperdoesntknow/Qemma-redux"
33
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
34
  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16).eval()
35
 
36
+ text = "I notice that the sum involves the absolute values of three linear expressions of x."
37
+ inputs = tokenizer(text, return_tensors="pt", max_length=64, padding='max_length', truncation=True)
38
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
39
+
40
+ with torch.no_grad():
41
+ model.eval()
42
+ outputs = model.generate(**inputs, max_new_tokens=256, do_sample=True, min_length=32)
43
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
44
 
 
 
45
  ```
46
 
47
  ## What’s inside