Charlie81 commited on
Commit
2a594f6
·
1 Parent(s): 5c05368

modify batch and fix tensor issue

Browse files
Files changed (2) hide show
  1. myolmoe/modeling_myolmoe.py +1 -1
  2. scripts/train.py +1 -1
myolmoe/modeling_myolmoe.py CHANGED
@@ -1065,7 +1065,7 @@ class MyOlmoeForCausalLM(OlmoePreTrainedModel, GenerationMixin):
1065
  output = (aux_loss,) + output
1066
  return (loss,) + output if loss is not None else output
1067
  #
1068
- total_small_expert_loss = 0
1069
  for layer_output in outputs:
1070
  if len(layer_output) > 1 and isinstance(layer_output[1], torch.Tensor):
1071
  total_small_expert_loss += layer_output[1]
 
1065
  output = (aux_loss,) + output
1066
  return (loss,) + output if loss is not None else output
1067
  #
1068
+ total_small_expert_loss = torch.tensor(0.0, device=logits.device)
1069
  for layer_output in outputs:
1070
  if len(layer_output) > 1 and isinstance(layer_output[1], torch.Tensor):
1071
  total_small_expert_loss += layer_output[1]
scripts/train.py CHANGED
@@ -72,7 +72,7 @@ def main():
72
  # Training arguments
73
  training_args = TrainingArguments(
74
  output_dir="./output",
75
- per_device_train_batch_size=2,
76
  gradient_accumulation_steps=8,
77
  learning_rate=1e-5,
78
  num_train_epochs=1,
 
72
  # Training arguments
73
  training_args = TrainingArguments(
74
  output_dir="./output",
75
+ per_device_train_batch_size=16,
76
  gradient_accumulation_steps=8,
77
  learning_rate=1e-5,
78
  num_train_epochs=1,