nathanael-fijalkow commited on
Commit
d561f35
·
1 Parent(s): 88fbdea
Files changed (2) hide show
  1. src/model.py +2 -1
  2. src/train.py +0 -4
src/model.py CHANGED
@@ -359,7 +359,8 @@ class ChessForCausalLM(PreTrainedModel):
359
  shift_labels = labels[..., 1:].contiguous()
360
 
361
  # Flatten for cross-entropy
362
- loss_fct = nn.CrossEntropyLoss(ignore_index=self.config.pad_token_id)
 
363
  loss = loss_fct(
364
  shift_logits.view(-1, shift_logits.size(-1)),
365
  shift_labels.view(-1),
 
359
  shift_labels = labels[..., 1:].contiguous()
360
 
361
  # Flatten for cross-entropy
362
+ loss_fct = nn.CrossEntropyLoss(ignore_index=-100)
363
+ # loss_fct = nn.CrossEntropyLoss(ignore_index=self.config.pad_token_id)
364
  loss = loss_fct(
365
  shift_logits.view(-1, shift_logits.size(-1)),
366
  shift_labels.view(-1),
src/train.py CHANGED
@@ -35,10 +35,6 @@ def parse_args():
35
  )
36
 
37
  # Model arguments
38
- parser.add_argument(
39
- "--vocab_size", type=int, default=1200,
40
- help="Vocabulary size"
41
- )
42
  parser.add_argument(
43
  "--n_embd", type=int, default=128,
44
  help="Embedding dimension"
 
35
  )
36
 
37
  # Model arguments
 
 
 
 
38
  parser.add_argument(
39
  "--n_embd", type=int, default=128,
40
  help="Embedding dimension"