Instructions to use moos124/code-reasoning-0.5b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use moos124/code-reasoning-0.5b with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("moos124/code-reasoning-0.5b", dtype="auto") - Notebooks
- Google Colab
- Kaggle
Training in progress, step 600, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 70430032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61cb52070072e8afdec22ed12a2a6c0dcd897acc0f48fa5fb64af2ba786ab0de
|
| 3 |
size 70430032
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 141058579
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c721600da4eb90c4f49f8d37dbb1da796e5fbdf9c7643a9d8f761d2451dcc8b
|
| 3 |
size 141058579
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9d6cafb5e04141843b1a40b8ff83074c07b432b2b5a50df1e9f6d279b1eed92
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e84e01eb9897f39a70561784520b1c3e4555338a3ed58b3ab5b08ab16be2ff2
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -608,6 +608,16 @@
|
|
| 608 |
"mean_token_accuracy": 0.7749405071139336,
|
| 609 |
"num_tokens": 2731814.0,
|
| 610 |
"step": 590
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 611 |
}
|
| 612 |
],
|
| 613 |
"logging_steps": 10,
|
|
@@ -627,7 +637,7 @@
|
|
| 627 |
"attributes": {}
|
| 628 |
}
|
| 629 |
},
|
| 630 |
-
"total_flos": 1.
|
| 631 |
"train_batch_size": 4,
|
| 632 |
"trial_name": null,
|
| 633 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.128,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 608 |
"mean_token_accuracy": 0.7749405071139336,
|
| 609 |
"num_tokens": 2731814.0,
|
| 610 |
"step": 590
|
| 611 |
+
},
|
| 612 |
+
{
|
| 613 |
+
"entropy": 1.0139609590172767,
|
| 614 |
+
"epoch": 0.128,
|
| 615 |
+
"grad_norm": 0.26556891202926636,
|
| 616 |
+
"learning_rate": 9.973244969092282e-05,
|
| 617 |
+
"loss": 1.1074792861938476,
|
| 618 |
+
"mean_token_accuracy": 0.7530097424983978,
|
| 619 |
+
"num_tokens": 2780314.0,
|
| 620 |
+
"step": 600
|
| 621 |
}
|
| 622 |
],
|
| 623 |
"logging_steps": 10,
|
|
|
|
| 637 |
"attributes": {}
|
| 638 |
}
|
| 639 |
},
|
| 640 |
+
"total_flos": 1.323906590997504e+16,
|
| 641 |
"train_batch_size": 4,
|
| 642 |
"trial_name": null,
|
| 643 |
"trial_params": null
|