mindchain commited on
Commit
0168a3e
Β·
verified Β·
1 Parent(s): 61cc0c7

Fix NoneType.shape error: device handling, CPU optimizer, safe tensor ops

Browse files
Files changed (1) hide show
  1. train_arithmetic.py +19 -7
train_arithmetic.py CHANGED
@@ -97,21 +97,32 @@ def test_base_model(model, tokenizer, n_samples=20):
97
  print("\n" + "="*70)
98
  print("πŸ“Š TESTING BASE MODEL PERFORMANCE")
99
  print("="*70)
100
-
101
  test_samples = generate_arithmetic_samples(n_samples)
102
  correct = 0
103
-
104
  model.eval()
105
  with torch.no_grad():
106
  for i, sample in enumerate(test_samples):
107
- inputs = tokenizer(sample['prompt'], return_tensors='pt').to(model.device)
 
 
 
 
 
108
  outputs = model.generate(
109
  **inputs,
110
  max_new_tokens=20,
111
  do_sample=False,
112
  temperature=1.0
113
  )
114
- response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
 
 
 
 
 
 
115
 
116
  # Extract answer
117
  numbers = re.findall(r'-?\d+\.?\d*', response)
@@ -157,7 +168,6 @@ def main():
157
  model = AutoModelForCausalLM.from_pretrained(
158
  BASE_MODEL,
159
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
160
- device_map="auto" if torch.cuda.is_available() else None
161
  )
162
 
163
  # Test base model first
@@ -170,6 +180,7 @@ def main():
170
  print(f"βœ… {len(train_dataset)} training samples\n")
171
 
172
  # GRPO Config
 
173
  training_args = GRPOConfig(
174
  output_dir="./outputs",
175
  max_steps=MAX_STEPS,
@@ -179,12 +190,13 @@ def main():
179
  beta=0.0, # No KL penalty for this task
180
  bf16=torch.cuda.is_available() and torch.cuda.is_bf16_supported(),
181
  fp16=False,
182
- gradient_checkpointing=True,
183
- optim="adamw_8bit",
184
  logging_steps=1,
185
  save_steps=MAX_STEPS, # Save at end
186
  push_to_hub=False, # We'll push manually
187
  report_to="none",
 
188
  )
189
 
190
  print("πŸš€ Starting GRPO Training...")
 
97
  print("\n" + "="*70)
98
  print("πŸ“Š TESTING BASE MODEL PERFORMANCE")
99
  print("="*70)
100
+
101
  test_samples = generate_arithmetic_samples(n_samples)
102
  correct = 0
103
+
104
  model.eval()
105
  with torch.no_grad():
106
  for i, sample in enumerate(test_samples):
107
+ inputs = tokenizer(sample['prompt'], return_tensors='pt')
108
+
109
+ # Handle device placement
110
+ if hasattr(model, 'device') and model.device is not None:
111
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
112
+
113
  outputs = model.generate(
114
  **inputs,
115
  max_new_tokens=20,
116
  do_sample=False,
117
  temperature=1.0
118
  )
119
+
120
+ # Safely decode response
121
+ input_ids = inputs.get('input_ids')
122
+ if input_ids is not None and hasattr(input_ids, 'shape'):
123
+ response = tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
124
+ else:
125
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
126
 
127
  # Extract answer
128
  numbers = re.findall(r'-?\d+\.?\d*', response)
 
168
  model = AutoModelForCausalLM.from_pretrained(
169
  BASE_MODEL,
170
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
 
171
  )
172
 
173
  # Test base model first
 
180
  print(f"βœ… {len(train_dataset)} training samples\n")
181
 
182
  # GRPO Config
183
+ is_cpu = not torch.cuda.is_available()
184
  training_args = GRPOConfig(
185
  output_dir="./outputs",
186
  max_steps=MAX_STEPS,
 
190
  beta=0.0, # No KL penalty for this task
191
  bf16=torch.cuda.is_available() and torch.cuda.is_bf16_supported(),
192
  fp16=False,
193
+ gradient_checkpointing=not is_cpu, # Disable on CPU
194
+ optim="adamw_torch" if is_cpu else "adamw_8bit", # Use standard optimizer on CPU
195
  logging_steps=1,
196
  save_steps=MAX_STEPS, # Save at end
197
  push_to_hub=False, # We'll push manually
198
  report_to="none",
199
+ use_cpu=is_cpu, # Explicitly tell trainer to use CPU
200
  )
201
 
202
  print("πŸš€ Starting GRPO Training...")