abdo-Mansour commited on
Commit
07b5e76
·
verified ·
1 Parent(s): 6ae39b1

Training in progress, step 90, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a947abc26ea9f7064aa5882cfe41ce9f28da860e8e27e8b17c5eae05c89c94a
3
  size 40422168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddf3d264f4e7faa4df50572f9c5b017b949573e85dbde65a378e87fedc53222e
3
  size 40422168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4730bcf347c69534b23091801b6a2f47c149b495ae3f7690af2c3806c4f107fa
3
  size 81075835
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e54225cd90a08fa49938ed4ef9396f3e7c9f1bdecfb2c172e9ca5b20f739d114
3
  size 81075835
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e9f036fb045222351951c991afc6f4c7d35d431e3dde5ae19757c77e2e0c1c4
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:409fe8f2237e77733b21fb6879cf1d7186d4a0d14d42651f3693147f5847f8f7
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29cef692cbb5d80311ec58beb990e25db05310b0b00f5f3bfc2d78daf8f934e7
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06e450118c7faab59405fb9e6bc7be113ba36014f09eebd1d021e19f621ff31e
3
  size 14917
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cd0e9d505fbc3f97feb166d29026132bdf14eb3e5c7ff77beebc303ee666f96
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fcfce7e32b31bead77e9e963d8e602c77a653a75c54ecbe876542fbeeba3fdc
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e488bed40fb87eb02bbb6039673d72788a08a8c0ca3fb9f930ea5a1aed5a5225
3
  size 1529
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60fd4b39b4e65b659b019a8b86e68491e17b012bb6723be4b28b4cd7467b0c5
3
  size 1529
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 50,
3
  "best_metric": 0.3759682774543762,
4
  "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-50",
5
- "epoch": 0.5610098176718092,
6
  "eval_steps": 50,
7
- "global_step": 50,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -86,6 +86,62 @@
86
  "eval_samples_per_second": 4.08,
87
  "eval_steps_per_second": 1.021,
88
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  }
90
  ],
91
  "logging_steps": 5,
@@ -100,12 +156,12 @@
100
  "should_evaluate": false,
101
  "should_log": false,
102
  "should_save": true,
103
- "should_training_stop": false
104
  },
105
  "attributes": {}
106
  }
107
  },
108
- "total_flos": 7717249448148992.0,
109
  "train_batch_size": 2,
110
  "trial_name": null,
111
  "trial_params": null
 
2
  "best_global_step": 50,
3
  "best_metric": 0.3759682774543762,
4
  "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-50",
5
+ "epoch": 1.0,
6
  "eval_steps": 50,
7
+ "global_step": 90,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
86
  "eval_samples_per_second": 4.08,
87
  "eval_steps_per_second": 1.021,
88
  "step": 50
89
+ },
90
+ {
91
+ "epoch": 0.6171107994389902,
92
+ "grad_norm": 1.648816704750061,
93
+ "learning_rate": 1e-05,
94
+ "loss": 0.3668,
95
+ "step": 55
96
+ },
97
+ {
98
+ "epoch": 0.6732117812061711,
99
+ "grad_norm": 1.4755737781524658,
100
+ "learning_rate": 7.835603860618973e-06,
101
+ "loss": 0.3061,
102
+ "step": 60
103
+ },
104
+ {
105
+ "epoch": 0.729312762973352,
106
+ "grad_norm": 1.2600551843643188,
107
+ "learning_rate": 5.773817382593008e-06,
108
+ "loss": 0.3963,
109
+ "step": 65
110
+ },
111
+ {
112
+ "epoch": 0.7854137447405329,
113
+ "grad_norm": 1.612899661064148,
114
+ "learning_rate": 3.912385709912794e-06,
115
+ "loss": 0.3179,
116
+ "step": 70
117
+ },
118
+ {
119
+ "epoch": 0.8415147265077139,
120
+ "grad_norm": 1.2310423851013184,
121
+ "learning_rate": 2.339555568810221e-06,
122
+ "loss": 0.3536,
123
+ "step": 75
124
+ },
125
+ {
126
+ "epoch": 0.8976157082748948,
127
+ "grad_norm": 1.7635549306869507,
128
+ "learning_rate": 1.129891668217783e-06,
129
+ "loss": 0.3133,
130
+ "step": 80
131
+ },
132
+ {
133
+ "epoch": 0.9537166900420757,
134
+ "grad_norm": 1.8376754522323608,
135
+ "learning_rate": 3.4074173710931804e-07,
136
+ "loss": 0.3641,
137
+ "step": 85
138
+ },
139
+ {
140
+ "epoch": 1.0,
141
+ "grad_norm": 1.9177987575531006,
142
+ "learning_rate": 9.517784181422018e-09,
143
+ "loss": 0.2073,
144
+ "step": 90
145
  }
146
  ],
147
  "logging_steps": 5,
 
156
  "should_evaluate": false,
157
  "should_log": false,
158
  "should_save": true,
159
+ "should_training_stop": true
160
  },
161
  "attributes": {}
162
  }
163
  },
164
+ "total_flos": 1.3864992889634816e+16,
165
  "train_batch_size": 2,
166
  "trial_name": null,
167
  "trial_params": null