Chess Challenge submission by raphael-mathiot
Browse files- config.json +4 -0
- model.py +0 -4
config.json
CHANGED
|
@@ -2,6 +2,10 @@
|
|
| 2 |
"architectures": [
|
| 3 |
"ChessForCausalLM"
|
| 4 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"bos_token_id": 1,
|
| 6 |
"dropout": 0.1,
|
| 7 |
"dtype": "float32",
|
|
|
|
| 2 |
"architectures": [
|
| 3 |
"ChessForCausalLM"
|
| 4 |
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "model.ChessConfig",
|
| 7 |
+
"AutoModelForCausalLM": "model.ChessForCausalLM"
|
| 8 |
+
},
|
| 9 |
"bos_token_id": 1,
|
| 10 |
"dropout": 0.1,
|
| 11 |
"dtype": "float32",
|
model.py
CHANGED
|
@@ -353,16 +353,12 @@ class ChessForCausalLM(PreTrainedModel):
|
|
| 353 |
|
| 354 |
# Compute loss if labels are provided
|
| 355 |
loss = None
|
| 356 |
-
|
| 357 |
if labels is not None:
|
| 358 |
# Shift logits and labels for next-token prediction
|
| 359 |
shift_logits = logits[..., :-1, :].contiguous()
|
| 360 |
shift_labels = labels[..., 1:].contiguous()
|
| 361 |
-
|
| 362 |
-
#print(shift_labels[0, 32].item(), torch.argmax(shift_logits[0, 33]).item())
|
| 363 |
|
| 364 |
# Flatten for cross-entropy
|
| 365 |
-
#loss_fct = nn.CrossEntropyLoss(ignore_index=self.config.pad_token_id)
|
| 366 |
loss_fct = nn.CrossEntropyLoss(ignore_index=-100)
|
| 367 |
loss = loss_fct(
|
| 368 |
shift_logits.view(-1, shift_logits.size(-1)),
|
|
|
|
| 353 |
|
| 354 |
# Compute loss if labels are provided
|
| 355 |
loss = None
|
|
|
|
| 356 |
if labels is not None:
|
| 357 |
# Shift logits and labels for next-token prediction
|
| 358 |
shift_logits = logits[..., :-1, :].contiguous()
|
| 359 |
shift_labels = labels[..., 1:].contiguous()
|
|
|
|
|
|
|
| 360 |
|
| 361 |
# Flatten for cross-entropy
|
|
|
|
| 362 |
loss_fct = nn.CrossEntropyLoss(ignore_index=-100)
|
| 363 |
loss = loss_fct(
|
| 364 |
shift_logits.view(-1, shift_logits.size(-1)),
|