pablocosta commited on
Commit
f5ba008
·
verified ·
1 Parent(s): a5853ec

Upload 15 files

Browse files
Files changed (7) hide show
  1. config.json +1 -1
  2. model.safetensors +1 -1
  3. optimizer.pt +2 -2
  4. rng_state.pth +2 -2
  5. scheduler.pt +2 -2
  6. trainer_state.json +19 -64
  7. training_args.bin +2 -2
config.json CHANGED
@@ -55,7 +55,7 @@
55
  "tie_word_embeddings": true,
56
  "transformers_version": "4.57.3",
57
  "unsloth_fixed": true,
58
- "unsloth_version": "2025.12.9",
59
  "use_cache": true,
60
  "use_sliding_window": false,
61
  "vocab_size": 151936
 
55
  "tie_word_embeddings": true,
56
  "transformers_version": "4.57.3",
57
  "unsloth_fixed": true,
58
+ "unsloth_version": "2026.1.3",
59
  "use_cache": true,
60
  "use_sliding_window": false,
61
  "vocab_size": 151936
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7da59e792ad516b8a2f9f5e215efb662008dd708fda2ec18696a45034fcd72f
3
  size 1192135096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d966d83e7218d61061d4dd032d5d3509e90257a66ad90bfbe6c9c61a8efe60d
3
  size 1192135096
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68dbc6b8518536cc5f3ede29c31c8553b5a0f7a80a366868075afc89068e50a4
3
- size 2384459962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9debb92fecbcd38bbdf87706bccbe4e8625be93df931d21f11c01f0003adb89d
3
+ size 2384460363
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07247e14bef85c4a3ea4639a866bfcfde6b11d94c7a29fcde66ef77b8da6768f
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:947683e18f75e99925fce99fd9ad8bd89c0ab043c3fe5a98850f3f75077a397b
3
+ size 14709
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff2a046dc5a4f5be7e5b3adfb83205f0a2e6b3fca9848e27c9a0e7c3757b7fff
3
- size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da22aa9d04ee073542d7ab226a2d0b649f7ddd41601fe6dd5c5ce8c95f63d3d2
3
+ size 1465
trainer_state.json CHANGED
@@ -1,88 +1,43 @@
1
  {
2
- "best_global_step": 3000,
3
- "best_metric": 2.1779088973999023,
4
- "best_model_checkpoint": "./results/checkpoint-3000",
5
- "epoch": 1.0,
6
  "eval_steps": 600,
7
- "global_step": 3000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.2,
14
- "grad_norm": 4.78125,
15
  "learning_rate": 0.00018033388981636062,
16
- "loss": 0.5121,
17
  "step": 600
18
  },
19
  {
20
  "epoch": 0.2,
21
- "eval_loss": 2.3121278285980225,
22
- "eval_runtime": 172.1996,
23
- "eval_samples_per_second": 69.687,
24
- "eval_steps_per_second": 17.422,
25
  "step": 600
26
  },
27
  {
28
  "epoch": 0.4,
29
- "grad_norm": 0.5,
30
  "learning_rate": 0.00016030050083472454,
31
- "loss": 0.2471,
32
  "step": 1200
33
  },
34
  {
35
  "epoch": 0.4,
36
- "eval_loss": 2.2023062705993652,
37
- "eval_runtime": 171.6931,
38
- "eval_samples_per_second": 69.892,
39
- "eval_steps_per_second": 17.473,
40
  "step": 1200
41
- },
42
- {
43
- "epoch": 0.6,
44
- "grad_norm": 0.490234375,
45
- "learning_rate": 0.0001402671118530885,
46
- "loss": 0.1818,
47
- "step": 1800
48
- },
49
- {
50
- "epoch": 0.6,
51
- "eval_loss": 2.3628954887390137,
52
- "eval_runtime": 171.601,
53
- "eval_samples_per_second": 69.93,
54
- "eval_steps_per_second": 17.482,
55
- "step": 1800
56
- },
57
- {
58
- "epoch": 0.8,
59
- "grad_norm": 0.443359375,
60
- "learning_rate": 0.00012023372287145242,
61
- "loss": 0.1599,
62
- "step": 2400
63
- },
64
- {
65
- "epoch": 0.8,
66
- "eval_loss": 2.1811935901641846,
67
- "eval_runtime": 171.4037,
68
- "eval_samples_per_second": 70.01,
69
- "eval_steps_per_second": 17.503,
70
- "step": 2400
71
- },
72
- {
73
- "epoch": 1.0,
74
- "grad_norm": 0.4140625,
75
- "learning_rate": 0.00010020033388981636,
76
- "loss": 0.1483,
77
- "step": 3000
78
- },
79
- {
80
- "epoch": 1.0,
81
- "eval_loss": 2.1779088973999023,
82
- "eval_runtime": 171.5443,
83
- "eval_samples_per_second": 69.953,
84
- "eval_steps_per_second": 17.488,
85
- "step": 3000
86
  }
87
  ],
88
  "logging_steps": 600,
@@ -93,7 +48,7 @@
93
  "stateful_callbacks": {
94
  "EarlyStoppingCallback": {
95
  "args": {
96
- "early_stopping_patience": 3,
97
  "early_stopping_threshold": 0.0
98
  },
99
  "attributes": {
@@ -111,7 +66,7 @@
111
  "attributes": {}
112
  }
113
  },
114
- "total_flos": 5.009492668828877e+16,
115
  "train_batch_size": 8,
116
  "trial_name": null,
117
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1200,
3
+ "best_metric": 2.1439664363861084,
4
+ "best_model_checkpoint": "./results/checkpoint-1200",
5
+ "epoch": 0.4,
6
  "eval_steps": 600,
7
+ "global_step": 1200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.2,
14
+ "grad_norm": 1.328125,
15
  "learning_rate": 0.00018033388981636062,
16
+ "loss": 0.5064,
17
  "step": 600
18
  },
19
  {
20
  "epoch": 0.2,
21
+ "eval_loss": 2.144320249557495,
22
+ "eval_runtime": 183.765,
23
+ "eval_samples_per_second": 65.301,
24
+ "eval_steps_per_second": 16.325,
25
  "step": 600
26
  },
27
  {
28
  "epoch": 0.4,
29
+ "grad_norm": 0.478515625,
30
  "learning_rate": 0.00016030050083472454,
31
+ "loss": 0.244,
32
  "step": 1200
33
  },
34
  {
35
  "epoch": 0.4,
36
+ "eval_loss": 2.1439664363861084,
37
+ "eval_runtime": 201.3005,
38
+ "eval_samples_per_second": 59.612,
39
+ "eval_steps_per_second": 14.903,
40
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  }
42
  ],
43
  "logging_steps": 600,
 
48
  "stateful_callbacks": {
49
  "EarlyStoppingCallback": {
50
  "args": {
51
+ "early_stopping_patience": 2,
52
  "early_stopping_threshold": 0.0
53
  },
54
  "attributes": {
 
66
  "attributes": {}
67
  }
68
  },
69
+ "total_flos": 2.004344603816755e+16,
70
  "train_batch_size": 8,
71
  "trial_name": null,
72
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d164cd25651fd3af62c9b44ebeea3505e3fc6dc73e7ee7a02fdc18786b3d4def
3
- size 5880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e2ce04f5e18277ed5449999fe959f5ccd3d3d76d5f4313948e741633eb0e88d
3
+ size 6353