update quickstart

#13
by hevans - opened
Files changed (2) hide show
  1. README.md +11 -9
  2. generation_config.json +1 -0
README.md CHANGED
@@ -104,7 +104,6 @@ The following example demonstrates how to load the model, enable Reasoning Mode,
104
 
105
  ```python
106
  import re
107
- import torch
108
  from transformers import AutoTokenizer, AutoModelForCausalLM
109
 
110
  # 1. Configure Model
@@ -123,23 +122,27 @@ prompt = "Hello"
123
  messages = [{"role": "user", "content": prompt}]
124
 
125
  # Use apply_chat_template to construct input; set enable_thinking=True to activate Reasoning Mode
126
- input_ids = tokenizer.apply_chat_template(
127
- messages,
128
- tokenize=True,
129
- add_generation_prompt=True,
130
- return_tensors="pt",
131
  enable_thinking=True
132
- ).to(model.device)
 
 
 
133
 
134
  # 4. Generate Response
135
  outputs = model.generate(
136
- input_ids,
137
  max_new_tokens=512,
138
  do_sample=True,
139
  temperature=1.0,
 
140
  top_p=0.95,
141
  repetition_penalty=1.05
142
  )
 
143
 
144
  # 5. Parse Results
145
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -161,7 +164,6 @@ thought, final_answer = parse_reasoning(full_response)
161
 
162
  print(f"\n{'='*20} Thought Process {'='*20}\n{thought}")
163
  print(f"\n{'='*20} Final Answer {'='*20}\n{final_answer}")
164
-
165
  ```
166
 
167
  ### 3. Key Configuration Details
 
104
 
105
  ```python
106
  import re
 
107
  from transformers import AutoTokenizer, AutoModelForCausalLM
108
 
109
  # 1. Configure Model
 
122
  messages = [{"role": "user", "content": prompt}]
123
 
124
  # Use apply_chat_template to construct input; set enable_thinking=True to activate Reasoning Mode
125
+ input_text = tokenizer.apply_chat_template(
126
+ messages,
127
+ tokenize=False,
128
+ add_generation_prompt=True,
 
129
  enable_thinking=True
130
+ )
131
+
132
+ model_inputs = tokenizer([input_text], return_tensors="pt").to(model.device)
133
+ print("Input prepared. Starting generation...")
134
 
135
  # 4. Generate Response
136
  outputs = model.generate(
137
+ **model_inputs,
138
  max_new_tokens=512,
139
  do_sample=True,
140
  temperature=1.0,
141
+ top_k=20,
142
  top_p=0.95,
143
  repetition_penalty=1.05
144
  )
145
+ print("Generation complete!")
146
 
147
  # 5. Parse Results
148
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
164
 
165
  print(f"\n{'='*20} Thought Process {'='*20}\n{thought}")
166
  print(f"\n{'='*20} Final Answer {'='*20}\n{final_answer}")
 
167
  ```
168
 
169
  ### 3. Key Configuration Details
generation_config.json CHANGED
@@ -2,6 +2,7 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 128000,
4
  "eos_token_id": 128001,
 
5
  "do_sample": true,
6
  "temperature": 1.0,
7
  "top_k": 20,
 
2
  "_from_model_config": true,
3
  "bos_token_id": 128000,
4
  "eos_token_id": 128001,
5
+ "pad_token_id": 128001,
6
  "do_sample": true,
7
  "temperature": 1.0,
8
  "top_k": 20,