Upload gpt_ts_d1/meta_000034.json with huggingface_hub
Browse files
gpt_ts_d1/meta_000034.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"step": 34,
|
| 3 |
-
"val_bpb": 3.
|
| 4 |
"model_config": {
|
| 5 |
"run": "gpt_ts_d1",
|
| 6 |
"device_type": "",
|
|
@@ -36,7 +36,7 @@
|
|
| 36 |
"save_every": -1,
|
| 37 |
"push_checkpoints_to_hub": true,
|
| 38 |
"use_profiler": true,
|
| 39 |
-
"memory_history_max_entries":
|
| 40 |
"model_tag": "gpt_ts_d1",
|
| 41 |
"n_layer": 1,
|
| 42 |
"n_head": 1,
|
|
@@ -79,7 +79,7 @@
|
|
| 79 |
"save_every": -1,
|
| 80 |
"push_checkpoints_to_hub": true,
|
| 81 |
"use_profiler": true,
|
| 82 |
-
"memory_history_max_entries":
|
| 83 |
"model_tag": "gpt_ts_d1"
|
| 84 |
},
|
| 85 |
"device_batch_size": 32,
|
|
@@ -89,8 +89,8 @@
|
|
| 89 |
"rg_idx": 19
|
| 90 |
},
|
| 91 |
"loop_state": {
|
| 92 |
-
"min_val_bpb": 3.
|
| 93 |
-
"smooth_train_loss": 2.
|
| 94 |
-
"total_training_time":
|
| 95 |
}
|
| 96 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"step": 34,
|
| 3 |
+
"val_bpb": 3.2278985236288658,
|
| 4 |
"model_config": {
|
| 5 |
"run": "gpt_ts_d1",
|
| 6 |
"device_type": "",
|
|
|
|
| 36 |
"save_every": -1,
|
| 37 |
"push_checkpoints_to_hub": true,
|
| 38 |
"use_profiler": true,
|
| 39 |
+
"memory_history_max_entries": 10000,
|
| 40 |
"model_tag": "gpt_ts_d1",
|
| 41 |
"n_layer": 1,
|
| 42 |
"n_head": 1,
|
|
|
|
| 79 |
"save_every": -1,
|
| 80 |
"push_checkpoints_to_hub": true,
|
| 81 |
"use_profiler": true,
|
| 82 |
+
"memory_history_max_entries": 10000,
|
| 83 |
"model_tag": "gpt_ts_d1"
|
| 84 |
},
|
| 85 |
"device_batch_size": 32,
|
|
|
|
| 89 |
"rg_idx": 19
|
| 90 |
},
|
| 91 |
"loop_state": {
|
| 92 |
+
"min_val_bpb": 3.2278985236288658,
|
| 93 |
+
"smooth_train_loss": 2.2646813129219647,
|
| 94 |
+
"total_training_time": 4.694071292877197
|
| 95 |
}
|
| 96 |
}
|