Spaces:
Running
Running
Commit
·
d561f35
1
Parent(s):
88fbdea
bug fixes
Browse files- src/model.py +2 -1
- src/train.py +0 -4
src/model.py
CHANGED
|
@@ -359,7 +359,8 @@ class ChessForCausalLM(PreTrainedModel):
|
|
| 359 |
shift_labels = labels[..., 1:].contiguous()
|
| 360 |
|
| 361 |
# Flatten for cross-entropy
|
| 362 |
-
loss_fct = nn.CrossEntropyLoss(ignore_index
|
|
|
|
| 363 |
loss = loss_fct(
|
| 364 |
shift_logits.view(-1, shift_logits.size(-1)),
|
| 365 |
shift_labels.view(-1),
|
|
|
|
| 359 |
shift_labels = labels[..., 1:].contiguous()
|
| 360 |
|
| 361 |
# Flatten for cross-entropy
|
| 362 |
+
loss_fct = nn.CrossEntropyLoss(ignore_index=-100)
|
| 363 |
+
# loss_fct = nn.CrossEntropyLoss(ignore_index=self.config.pad_token_id)
|
| 364 |
loss = loss_fct(
|
| 365 |
shift_logits.view(-1, shift_logits.size(-1)),
|
| 366 |
shift_labels.view(-1),
|
src/train.py
CHANGED
|
@@ -35,10 +35,6 @@ def parse_args():
|
|
| 35 |
)
|
| 36 |
|
| 37 |
# Model arguments
|
| 38 |
-
parser.add_argument(
|
| 39 |
-
"--vocab_size", type=int, default=1200,
|
| 40 |
-
help="Vocabulary size"
|
| 41 |
-
)
|
| 42 |
parser.add_argument(
|
| 43 |
"--n_embd", type=int, default=128,
|
| 44 |
help="Embedding dimension"
|
|
|
|
| 35 |
)
|
| 36 |
|
| 37 |
# Model arguments
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
parser.add_argument(
|
| 39 |
"--n_embd", type=int, default=128,
|
| 40 |
help="Embedding dimension"
|