Spaces:
Runtime error
Runtime error
Fix NoneType.shape error: device handling, CPU optimizer, safe tensor ops
Browse files- train_arithmetic.py +19 -7
train_arithmetic.py
CHANGED
|
@@ -97,21 +97,32 @@ def test_base_model(model, tokenizer, n_samples=20):
|
|
| 97 |
print("\n" + "="*70)
|
| 98 |
print("π TESTING BASE MODEL PERFORMANCE")
|
| 99 |
print("="*70)
|
| 100 |
-
|
| 101 |
test_samples = generate_arithmetic_samples(n_samples)
|
| 102 |
correct = 0
|
| 103 |
-
|
| 104 |
model.eval()
|
| 105 |
with torch.no_grad():
|
| 106 |
for i, sample in enumerate(test_samples):
|
| 107 |
-
inputs = tokenizer(sample['prompt'], return_tensors='pt')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
outputs = model.generate(
|
| 109 |
**inputs,
|
| 110 |
max_new_tokens=20,
|
| 111 |
do_sample=False,
|
| 112 |
temperature=1.0
|
| 113 |
)
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
# Extract answer
|
| 117 |
numbers = re.findall(r'-?\d+\.?\d*', response)
|
|
@@ -157,7 +168,6 @@ def main():
|
|
| 157 |
model = AutoModelForCausalLM.from_pretrained(
|
| 158 |
BASE_MODEL,
|
| 159 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 160 |
-
device_map="auto" if torch.cuda.is_available() else None
|
| 161 |
)
|
| 162 |
|
| 163 |
# Test base model first
|
|
@@ -170,6 +180,7 @@ def main():
|
|
| 170 |
print(f"β
{len(train_dataset)} training samples\n")
|
| 171 |
|
| 172 |
# GRPO Config
|
|
|
|
| 173 |
training_args = GRPOConfig(
|
| 174 |
output_dir="./outputs",
|
| 175 |
max_steps=MAX_STEPS,
|
|
@@ -179,12 +190,13 @@ def main():
|
|
| 179 |
beta=0.0, # No KL penalty for this task
|
| 180 |
bf16=torch.cuda.is_available() and torch.cuda.is_bf16_supported(),
|
| 181 |
fp16=False,
|
| 182 |
-
gradient_checkpointing=
|
| 183 |
-
optim="adamw_8bit",
|
| 184 |
logging_steps=1,
|
| 185 |
save_steps=MAX_STEPS, # Save at end
|
| 186 |
push_to_hub=False, # We'll push manually
|
| 187 |
report_to="none",
|
|
|
|
| 188 |
)
|
| 189 |
|
| 190 |
print("π Starting GRPO Training...")
|
|
|
|
| 97 |
print("\n" + "="*70)
|
| 98 |
print("π TESTING BASE MODEL PERFORMANCE")
|
| 99 |
print("="*70)
|
| 100 |
+
|
| 101 |
test_samples = generate_arithmetic_samples(n_samples)
|
| 102 |
correct = 0
|
| 103 |
+
|
| 104 |
model.eval()
|
| 105 |
with torch.no_grad():
|
| 106 |
for i, sample in enumerate(test_samples):
|
| 107 |
+
inputs = tokenizer(sample['prompt'], return_tensors='pt')
|
| 108 |
+
|
| 109 |
+
# Handle device placement
|
| 110 |
+
if hasattr(model, 'device') and model.device is not None:
|
| 111 |
+
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
| 112 |
+
|
| 113 |
outputs = model.generate(
|
| 114 |
**inputs,
|
| 115 |
max_new_tokens=20,
|
| 116 |
do_sample=False,
|
| 117 |
temperature=1.0
|
| 118 |
)
|
| 119 |
+
|
| 120 |
+
# Safely decode response
|
| 121 |
+
input_ids = inputs.get('input_ids')
|
| 122 |
+
if input_ids is not None and hasattr(input_ids, 'shape'):
|
| 123 |
+
response = tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
|
| 124 |
+
else:
|
| 125 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 126 |
|
| 127 |
# Extract answer
|
| 128 |
numbers = re.findall(r'-?\d+\.?\d*', response)
|
|
|
|
| 168 |
model = AutoModelForCausalLM.from_pretrained(
|
| 169 |
BASE_MODEL,
|
| 170 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
|
|
|
| 171 |
)
|
| 172 |
|
| 173 |
# Test base model first
|
|
|
|
| 180 |
print(f"β
{len(train_dataset)} training samples\n")
|
| 181 |
|
| 182 |
# GRPO Config
|
| 183 |
+
is_cpu = not torch.cuda.is_available()
|
| 184 |
training_args = GRPOConfig(
|
| 185 |
output_dir="./outputs",
|
| 186 |
max_steps=MAX_STEPS,
|
|
|
|
| 190 |
beta=0.0, # No KL penalty for this task
|
| 191 |
bf16=torch.cuda.is_available() and torch.cuda.is_bf16_supported(),
|
| 192 |
fp16=False,
|
| 193 |
+
gradient_checkpointing=not is_cpu, # Disable on CPU
|
| 194 |
+
optim="adamw_torch" if is_cpu else "adamw_8bit", # Use standard optimizer on CPU
|
| 195 |
logging_steps=1,
|
| 196 |
save_steps=MAX_STEPS, # Save at end
|
| 197 |
push_to_hub=False, # We'll push manually
|
| 198 |
report_to="none",
|
| 199 |
+
use_cpu=is_cpu, # Explicitly tell trainer to use CPU
|
| 200 |
)
|
| 201 |
|
| 202 |
print("π Starting GRPO Training...")
|