Files changed (1) hide show
  1. README.md +11 -9
README.md CHANGED
@@ -100,7 +100,6 @@ The following example demonstrates how to load the model, enable Reasoning Mode,
100
 
101
  ```python
102
  import re
103
- import torch
104
  from transformers import AutoTokenizer, AutoModelForCausalLM
105
 
106
  # 1. Configure Model
@@ -119,17 +118,19 @@ prompt = "Hello"
119
  messages = [{"role": "user", "content": prompt}]
120
 
121
  # Use apply_chat_template to construct input; set enable_thinking=True to activate Reasoning Mode
122
- input_ids = tokenizer.apply_chat_template(
123
- messages,
124
- tokenize=True,
125
- add_generation_prompt=True,
126
- return_tensors="pt",
127
  enable_thinking=True
128
- ).to(model.device)
 
 
 
129
 
130
  # 4. Generate Response
131
  outputs = model.generate(
132
- input_ids,
133
  max_new_tokens=512,
134
  do_sample=True,
135
  temperature=1.0,
@@ -137,6 +138,8 @@ outputs = model.generate(
137
  repetition_penalty=1.05
138
  )
139
 
 
 
140
  # 5. Parse Results
141
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
142
 
@@ -157,7 +160,6 @@ thought, final_answer = parse_reasoning(full_response)
157
 
158
  print(f"\n{'='*20} Thought Process {'='*20}\n{thought}")
159
  print(f"\n{'='*20} Final Answer {'='*20}\n{final_answer}")
160
-
161
  ```
162
 
163
  ### 3. Key Configuration Details
 
100
 
101
  ```python
102
  import re
 
103
  from transformers import AutoTokenizer, AutoModelForCausalLM
104
 
105
  # 1. Configure Model
 
118
  messages = [{"role": "user", "content": prompt}]
119
 
120
  # Use apply_chat_template to construct input; set enable_thinking=True to activate Reasoning Mode
121
+ inputs = tokenizer.apply_chat_template(
122
+ messages,
123
+ tokenize=False,
124
+ add_generation_prompt=True,
 
125
  enable_thinking=True
126
+ )
127
+ input_ids = tokenizer(inputs, return_tensors="pt").to(model.device)
128
+
129
+ print("Input prepared. Starting generation...")
130
 
131
  # 4. Generate Response
132
  outputs = model.generate(
133
+ **input_ids,
134
  max_new_tokens=512,
135
  do_sample=True,
136
  temperature=1.0,
 
138
  repetition_penalty=1.05
139
  )
140
 
141
+ print("Generation complete!")
142
+
143
  # 5. Parse Results
144
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
145
 
 
160
 
161
  print(f"\n{'='*20} Thought Process {'='*20}\n{thought}")
162
  print(f"\n{'='*20} Final Answer {'='*20}\n{final_answer}")
 
163
  ```
164
 
165
  ### 3. Key Configuration Details