Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r2-a2/adapter_config.json +30 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r2-a2/eval_results.json +4 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r2-a2/training_configuration.json +38 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r2-a2/training_logs.json +625 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r32-a2/adapter_config.json +30 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r32-a2/eval_results.json +4 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r32-a2/training_configuration.json +38 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r32-a2/training_logs.json +625 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r8-a2/adapter_config.json +30 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r8-a2/eval_results.json +4 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r8-a2/training_configuration.json +38 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r8-a2/training_logs.json +625 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r2-a2/adapter_config.json +30 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r2-a2/eval_results.json +4 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r2-a2/training_configuration.json +38 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r2-a2/training_logs.json +1273 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r32-a2/adapter_config.json +30 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r32-a2/eval_results.json +4 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r32-a2/training_configuration.json +38 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r32-a2/training_logs.json +1273 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r8-a2/adapter_config.json +30 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r8-a2/eval_results.json +4 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r8-a2/training_configuration.json +38 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r8-a2/training_logs.json +1273 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r2-a2/adapter_config.json +30 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r2-a2/eval_results.json +4 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r2-a2/training_configuration.json +38 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r2-a2/training_logs.json +2659 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r32-a2/adapter_config.json +30 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r32-a2/eval_results.json +4 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r32-a2/training_configuration.json +38 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r32-a2/training_logs.json +2659 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r8-a2/adapter_config.json +30 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r8-a2/eval_results.json +4 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r8-a2/training_configuration.json +38 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r8-a2/training_logs.json +2659 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r2-a2/adapter_config.json +30 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r2-a2/eval_results.json +4 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r2-a2/training_configuration.json +38 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r2-a2/training_logs.json +0 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r32-a2/adapter_config.json +30 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r32-a2/eval_results.json +4 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r32-a2/training_configuration.json +38 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r32-a2/training_logs.json +0 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r8-a2/adapter_config.json +30 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r8-a2/eval_results.json +4 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r8-a2/training_configuration.json +38 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r8-a2/training_logs.json +0 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-logiqa-r2-a2/adapter_config.json +30 -0
- TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-logiqa-r2-a2/eval_results.json +4 -0
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r2-a2/adapter_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha": 4,
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": false,
|
| 8 |
+
"init_weight": "kaiming",
|
| 9 |
+
"layers_pattern": null,
|
| 10 |
+
"layers_to_transform": null,
|
| 11 |
+
"metric_tracking": false,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "ABLATION",
|
| 14 |
+
"r": 2,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"seed": 42,
|
| 17 |
+
"share_weights": false,
|
| 18 |
+
"target_modules": [
|
| 19 |
+
"up_proj",
|
| 20 |
+
"gate_proj",
|
| 21 |
+
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"q_proj",
|
| 24 |
+
"down_proj",
|
| 25 |
+
"o_proj"
|
| 26 |
+
],
|
| 27 |
+
"task_type": null,
|
| 28 |
+
"track_n": 100,
|
| 29 |
+
"variant": "A"
|
| 30 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r2-a2/eval_results.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "arc_c",
|
| 3 |
+
"results": 0.4513651877133106
|
| 4 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r2-a2/training_configuration.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "TinyLlama/TinyLlama_v1.1",
|
| 3 |
+
"dataset": {
|
| 4 |
+
"name": "ARC_C",
|
| 5 |
+
"dataset_id": "allenai/ai2_arc",
|
| 6 |
+
"preprocess_id": "arc_train_deepeval"
|
| 7 |
+
},
|
| 8 |
+
"peft_config": {
|
| 9 |
+
"method": "abl_A",
|
| 10 |
+
"rank": 2,
|
| 11 |
+
"alpha": 4,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"bias": "none",
|
| 14 |
+
"target_modules": [
|
| 15 |
+
"q_proj",
|
| 16 |
+
"k_proj",
|
| 17 |
+
"v_proj",
|
| 18 |
+
"o_proj",
|
| 19 |
+
"gate_proj",
|
| 20 |
+
"down_proj",
|
| 21 |
+
"up_proj"
|
| 22 |
+
],
|
| 23 |
+
"trainable_parameter_count": 1577576
|
| 24 |
+
},
|
| 25 |
+
"training_config": {
|
| 26 |
+
"max_dataset_length": null,
|
| 27 |
+
"batch_size": 64,
|
| 28 |
+
"per_device_batch_size": 32,
|
| 29 |
+
"gradient_accumulation_steps": 2,
|
| 30 |
+
"learning_rate": 0.0003,
|
| 31 |
+
"num_epochs": 4,
|
| 32 |
+
"warmup_ratio": 0.1
|
| 33 |
+
},
|
| 34 |
+
"model_name": "TinyLlama_v1.1-abl_A-arc_c-r2-a2",
|
| 35 |
+
"output_dir": "./experiment_results/TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r2-a2",
|
| 36 |
+
"seed": 42,
|
| 37 |
+
"timestamp": "2025-08-30T16:35:26.588040"
|
| 38 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r2-a2/training_logs.json
ADDED
|
@@ -0,0 +1,625 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 1,
|
| 4 |
+
"epoch": 0.05714285714285714,
|
| 5 |
+
"cpu_mem": 1.48738048,
|
| 6 |
+
"gpu_mem": 4.423850496,
|
| 7 |
+
"loss": 4.4614,
|
| 8 |
+
"grad_norm": 329.5343017578125,
|
| 9 |
+
"learning_rate": 4.285714285714285e-05
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"step": 2,
|
| 13 |
+
"epoch": 0.11428571428571428,
|
| 14 |
+
"cpu_mem": 1.492885504,
|
| 15 |
+
"gpu_mem": 4.436614144,
|
| 16 |
+
"loss": 4.6994,
|
| 17 |
+
"grad_norm": 335.7124328613281,
|
| 18 |
+
"learning_rate": 8.57142857142857e-05
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"step": 3,
|
| 22 |
+
"epoch": 0.17142857142857143,
|
| 23 |
+
"cpu_mem": 1.492885504,
|
| 24 |
+
"gpu_mem": 4.436644864,
|
| 25 |
+
"loss": 2.1292,
|
| 26 |
+
"grad_norm": 166.02584838867188,
|
| 27 |
+
"learning_rate": 0.00012857142857142855
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"step": 4,
|
| 31 |
+
"epoch": 0.22857142857142856,
|
| 32 |
+
"cpu_mem": 1.493082112,
|
| 33 |
+
"gpu_mem": 4.436611072,
|
| 34 |
+
"loss": 1.5628,
|
| 35 |
+
"grad_norm": 19.919021606445312,
|
| 36 |
+
"learning_rate": 0.0001714285714285714
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"step": 5,
|
| 40 |
+
"epoch": 0.2857142857142857,
|
| 41 |
+
"cpu_mem": 1.493082112,
|
| 42 |
+
"gpu_mem": 4.436598784,
|
| 43 |
+
"loss": 1.4114,
|
| 44 |
+
"grad_norm": 11.660603523254395,
|
| 45 |
+
"learning_rate": 0.00021428571428571427
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"step": 6,
|
| 49 |
+
"epoch": 0.34285714285714286,
|
| 50 |
+
"cpu_mem": 1.493082112,
|
| 51 |
+
"gpu_mem": 4.43666176,
|
| 52 |
+
"loss": 1.4434,
|
| 53 |
+
"grad_norm": 21.82590675354004,
|
| 54 |
+
"learning_rate": 0.0002571428571428571
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"step": 7,
|
| 58 |
+
"epoch": 0.4,
|
| 59 |
+
"cpu_mem": 1.493082112,
|
| 60 |
+
"gpu_mem": 4.436667904,
|
| 61 |
+
"loss": 1.5455,
|
| 62 |
+
"grad_norm": 18.506698608398438,
|
| 63 |
+
"learning_rate": 0.0003
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"step": 8,
|
| 67 |
+
"epoch": 0.45714285714285713,
|
| 68 |
+
"cpu_mem": 1.493082112,
|
| 69 |
+
"gpu_mem": 4.436626432,
|
| 70 |
+
"loss": 1.3713,
|
| 71 |
+
"grad_norm": 6.629955768585205,
|
| 72 |
+
"learning_rate": 0.00029980111348272456
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 9,
|
| 76 |
+
"epoch": 0.5142857142857142,
|
| 77 |
+
"cpu_mem": 1.493082112,
|
| 78 |
+
"gpu_mem": 4.436621824,
|
| 79 |
+
"loss": 1.3708,
|
| 80 |
+
"grad_norm": 20.72789192199707,
|
| 81 |
+
"learning_rate": 0.00029920498134218835
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"step": 10,
|
| 85 |
+
"epoch": 0.5714285714285714,
|
| 86 |
+
"cpu_mem": 1.493082112,
|
| 87 |
+
"gpu_mem": 4.436611072,
|
| 88 |
+
"loss": 1.4826,
|
| 89 |
+
"grad_norm": 16.597583770751953,
|
| 90 |
+
"learning_rate": 0.0002982131844136615
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"step": 11,
|
| 94 |
+
"epoch": 0.6285714285714286,
|
| 95 |
+
"cpu_mem": 1.493082112,
|
| 96 |
+
"gpu_mem": 4.436621824,
|
| 97 |
+
"loss": 1.3793,
|
| 98 |
+
"grad_norm": 8.454121589660645,
|
| 99 |
+
"learning_rate": 0.0002968283527643036
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"step": 12,
|
| 103 |
+
"epoch": 0.6857142857142857,
|
| 104 |
+
"cpu_mem": 1.493082112,
|
| 105 |
+
"gpu_mem": 4.4366464,
|
| 106 |
+
"loss": 1.4662,
|
| 107 |
+
"grad_norm": 15.033178329467773,
|
| 108 |
+
"learning_rate": 0.000295054158718698
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"step": 13,
|
| 112 |
+
"epoch": 0.7428571428571429,
|
| 113 |
+
"cpu_mem": 1.493082112,
|
| 114 |
+
"gpu_mem": 4.4366464,
|
| 115 |
+
"loss": 1.3175,
|
| 116 |
+
"grad_norm": 9.481575965881348,
|
| 117 |
+
"learning_rate": 0.00029289530712050735
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"step": 14,
|
| 121 |
+
"epoch": 0.8,
|
| 122 |
+
"cpu_mem": 1.493082112,
|
| 123 |
+
"gpu_mem": 4.436594176,
|
| 124 |
+
"loss": 1.4611,
|
| 125 |
+
"grad_norm": 11.892037391662598,
|
| 126 |
+
"learning_rate": 0.000290357522856074
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"step": 15,
|
| 130 |
+
"epoch": 0.8571428571428571,
|
| 131 |
+
"cpu_mem": 1.493082112,
|
| 132 |
+
"gpu_mem": 4.43666944,
|
| 133 |
+
"loss": 1.3686,
|
| 134 |
+
"grad_norm": 5.933671474456787,
|
| 135 |
+
"learning_rate": 0.0002874475356730507
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"step": 16,
|
| 139 |
+
"epoch": 0.9142857142857143,
|
| 140 |
+
"cpu_mem": 1.493082112,
|
| 141 |
+
"gpu_mem": 4.436663296,
|
| 142 |
+
"loss": 1.6435,
|
| 143 |
+
"grad_norm": 23.32131576538086,
|
| 144 |
+
"learning_rate": 0.0002841730623343193
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"step": 17,
|
| 148 |
+
"epoch": 0.9714285714285714,
|
| 149 |
+
"cpu_mem": 1.493082112,
|
| 150 |
+
"gpu_mem": 4.436667904,
|
| 151 |
+
"loss": 1.6087,
|
| 152 |
+
"grad_norm": 17.27324676513672,
|
| 153 |
+
"learning_rate": 0.00028054278615452326
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"step": 18,
|
| 157 |
+
"epoch": 1.0285714285714285,
|
| 158 |
+
"cpu_mem": 1.493082112,
|
| 159 |
+
"gpu_mem": 4.443011584,
|
| 160 |
+
"loss": 2.1751,
|
| 161 |
+
"grad_norm": 17.91927719116211,
|
| 162 |
+
"learning_rate": 0.0002765663339734778
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"step": 19,
|
| 166 |
+
"epoch": 1.0857142857142856,
|
| 167 |
+
"cpu_mem": 1.493082112,
|
| 168 |
+
"gpu_mem": 4.443010048,
|
| 169 |
+
"loss": 1.4158,
|
| 170 |
+
"grad_norm": 5.83022928237915,
|
| 171 |
+
"learning_rate": 0.00027225425062752165
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"step": 20,
|
| 175 |
+
"epoch": 1.1428571428571428,
|
| 176 |
+
"cpu_mem": 1.493082112,
|
| 177 |
+
"gpu_mem": 4.442985472,
|
| 178 |
+
"loss": 1.3469,
|
| 179 |
+
"grad_norm": 6.347321510314941,
|
| 180 |
+
"learning_rate": 0.0002676179709865066
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"step": 21,
|
| 184 |
+
"epoch": 1.2,
|
| 185 |
+
"cpu_mem": 1.493082112,
|
| 186 |
+
"gpu_mem": 4.442993152,
|
| 187 |
+
"loss": 1.3926,
|
| 188 |
+
"grad_norm": 6.69634485244751,
|
| 189 |
+
"learning_rate": 0.0002626697896305779
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"step": 22,
|
| 193 |
+
"epoch": 1.2571428571428571,
|
| 194 |
+
"cpu_mem": 1.493082112,
|
| 195 |
+
"gpu_mem": 4.443022336,
|
| 196 |
+
"loss": 1.4088,
|
| 197 |
+
"grad_norm": 9.436979293823242,
|
| 198 |
+
"learning_rate": 0.000257422828247159
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"step": 23,
|
| 202 |
+
"epoch": 1.3142857142857143,
|
| 203 |
+
"cpu_mem": 1.493082112,
|
| 204 |
+
"gpu_mem": 4.44305152,
|
| 205 |
+
"loss": 1.3046,
|
| 206 |
+
"grad_norm": 3.0805585384368896,
|
| 207 |
+
"learning_rate": 0.00025189100083459397
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"step": 24,
|
| 211 |
+
"epoch": 1.3714285714285714,
|
| 212 |
+
"cpu_mem": 1.493082112,
|
| 213 |
+
"gpu_mem": 4.442994688,
|
| 214 |
+
"loss": 1.3668,
|
| 215 |
+
"grad_norm": 5.27305269241333,
|
| 216 |
+
"learning_rate": 0.0002460889768047263
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"step": 25,
|
| 220 |
+
"epoch": 1.4285714285714286,
|
| 221 |
+
"cpu_mem": 1.493082112,
|
| 222 |
+
"gpu_mem": 4.443063808,
|
| 223 |
+
"loss": 1.3333,
|
| 224 |
+
"grad_norm": 5.636720180511475,
|
| 225 |
+
"learning_rate": 0.00024003214208225522
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"step": 26,
|
| 229 |
+
"epoch": 1.4857142857142858,
|
| 230 |
+
"cpu_mem": 1.493082112,
|
| 231 |
+
"gpu_mem": 4.4430208,
|
| 232 |
+
"loss": 1.3928,
|
| 233 |
+
"grad_norm": 6.520215034484863,
|
| 234 |
+
"learning_rate": 0.00023373655830402968
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"step": 27,
|
| 238 |
+
"epoch": 1.5428571428571427,
|
| 239 |
+
"cpu_mem": 1.493082112,
|
| 240 |
+
"gpu_mem": 4.442979328,
|
| 241 |
+
"loss": 1.4101,
|
| 242 |
+
"grad_norm": 8.386139869689941,
|
| 243 |
+
"learning_rate": 0.00022721892022647462
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"step": 28,
|
| 247 |
+
"epoch": 1.6,
|
| 248 |
+
"cpu_mem": 1.493082112,
|
| 249 |
+
"gpu_mem": 4.443025408,
|
| 250 |
+
"loss": 1.5088,
|
| 251 |
+
"grad_norm": 14.999929428100586,
|
| 252 |
+
"learning_rate": 0.000220496511454098
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"step": 29,
|
| 256 |
+
"epoch": 1.657142857142857,
|
| 257 |
+
"cpu_mem": 1.493082112,
|
| 258 |
+
"gpu_mem": 4.4430208,
|
| 259 |
+
"loss": 1.3617,
|
| 260 |
+
"grad_norm": 5.525674343109131,
|
| 261 |
+
"learning_rate": 0.0002135871586064791
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"step": 30,
|
| 265 |
+
"epoch": 1.7142857142857144,
|
| 266 |
+
"cpu_mem": 1.493082112,
|
| 267 |
+
"gpu_mem": 4.443010048,
|
| 268 |
+
"loss": 1.3591,
|
| 269 |
+
"grad_norm": 5.05485200881958,
|
| 270 |
+
"learning_rate": 0.00020650918404527775
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"step": 31,
|
| 274 |
+
"epoch": 1.7714285714285714,
|
| 275 |
+
"cpu_mem": 1.493082112,
|
| 276 |
+
"gpu_mem": 4.443040768,
|
| 277 |
+
"loss": 1.3461,
|
| 278 |
+
"grad_norm": 4.573202133178711,
|
| 279 |
+
"learning_rate": 0.00019928135728662522
|
| 280 |
+
},
|
| 281 |
+
{
|
| 282 |
+
"step": 32,
|
| 283 |
+
"epoch": 1.8285714285714287,
|
| 284 |
+
"cpu_mem": 1.493082112,
|
| 285 |
+
"gpu_mem": 4.443049984,
|
| 286 |
+
"loss": 1.3656,
|
| 287 |
+
"grad_norm": 5.207383632659912,
|
| 288 |
+
"learning_rate": 0.00019192284522774142
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"step": 33,
|
| 292 |
+
"epoch": 1.8857142857142857,
|
| 293 |
+
"cpu_mem": 1.493082112,
|
| 294 |
+
"gpu_mem": 4.443030016,
|
| 295 |
+
"loss": 1.3748,
|
| 296 |
+
"grad_norm": 5.258096694946289,
|
| 297 |
+
"learning_rate": 0.00018445316131976934
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"step": 34,
|
| 301 |
+
"epoch": 1.9428571428571428,
|
| 302 |
+
"cpu_mem": 1.493082112,
|
| 303 |
+
"gpu_mem": 4.443008512,
|
| 304 |
+
"loss": 1.3884,
|
| 305 |
+
"grad_norm": 5.586267471313477,
|
| 306 |
+
"learning_rate": 0.00017689211382161034
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"step": 35,
|
| 310 |
+
"epoch": 2.0,
|
| 311 |
+
"cpu_mem": 1.493082112,
|
| 312 |
+
"gpu_mem": 4.442896384,
|
| 313 |
+
"loss": 2.0668,
|
| 314 |
+
"grad_norm": 7.811915874481201,
|
| 315 |
+
"learning_rate": 0.00016925975327198266
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"step": 36,
|
| 319 |
+
"epoch": 2.057142857142857,
|
| 320 |
+
"cpu_mem": 1.493082112,
|
| 321 |
+
"gpu_mem": 4.436640256,
|
| 322 |
+
"loss": 1.3123,
|
| 323 |
+
"grad_norm": 3.8615543842315674,
|
| 324 |
+
"learning_rate": 0.00016157631931899697
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"step": 37,
|
| 328 |
+
"epoch": 2.1142857142857143,
|
| 329 |
+
"cpu_mem": 1.493082112,
|
| 330 |
+
"gpu_mem": 4.436649472,
|
| 331 |
+
"loss": 1.3892,
|
| 332 |
+
"grad_norm": 8.06524658203125,
|
| 333 |
+
"learning_rate": 0.0001538621870482483
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"step": 38,
|
| 337 |
+
"epoch": 2.1714285714285713,
|
| 338 |
+
"cpu_mem": 1.493082112,
|
| 339 |
+
"gpu_mem": 4.436620288,
|
| 340 |
+
"loss": 1.336,
|
| 341 |
+
"grad_norm": 4.97226095199585,
|
| 342 |
+
"learning_rate": 0.00014613781295175172
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"step": 39,
|
| 346 |
+
"epoch": 2.2285714285714286,
|
| 347 |
+
"cpu_mem": 1.493082112,
|
| 348 |
+
"gpu_mem": 4.43663872,
|
| 349 |
+
"loss": 1.3288,
|
| 350 |
+
"grad_norm": 4.268495559692383,
|
| 351 |
+
"learning_rate": 0.00013842368068100303
|
| 352 |
+
},
|
| 353 |
+
{
|
| 354 |
+
"step": 40,
|
| 355 |
+
"epoch": 2.2857142857142856,
|
| 356 |
+
"cpu_mem": 1.493082112,
|
| 357 |
+
"gpu_mem": 4.43661568,
|
| 358 |
+
"loss": 1.3649,
|
| 359 |
+
"grad_norm": 5.184421062469482,
|
| 360 |
+
"learning_rate": 0.00013074024672801731
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"step": 41,
|
| 364 |
+
"epoch": 2.342857142857143,
|
| 365 |
+
"cpu_mem": 1.493082112,
|
| 366 |
+
"gpu_mem": 4.436617216,
|
| 367 |
+
"loss": 1.3795,
|
| 368 |
+
"grad_norm": 5.0870513916015625,
|
| 369 |
+
"learning_rate": 0.00012310788617838966
|
| 370 |
+
},
|
| 371 |
+
{
|
| 372 |
+
"step": 42,
|
| 373 |
+
"epoch": 2.4,
|
| 374 |
+
"cpu_mem": 1.493082112,
|
| 375 |
+
"gpu_mem": 4.4366464,
|
| 376 |
+
"loss": 1.3246,
|
| 377 |
+
"grad_norm": 6.057374000549316,
|
| 378 |
+
"learning_rate": 0.00011554683868023067
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"step": 43,
|
| 382 |
+
"epoch": 2.4571428571428573,
|
| 383 |
+
"cpu_mem": 1.493082112,
|
| 384 |
+
"gpu_mem": 4.43666176,
|
| 385 |
+
"loss": 1.3699,
|
| 386 |
+
"grad_norm": 8.358153343200684,
|
| 387 |
+
"learning_rate": 0.00010807715477225858
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"step": 44,
|
| 391 |
+
"epoch": 2.5142857142857142,
|
| 392 |
+
"cpu_mem": 1.493082112,
|
| 393 |
+
"gpu_mem": 4.436680192,
|
| 394 |
+
"loss": 1.3074,
|
| 395 |
+
"grad_norm": 4.818901062011719,
|
| 396 |
+
"learning_rate": 0.00010071864271337478
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"step": 45,
|
| 400 |
+
"epoch": 2.571428571428571,
|
| 401 |
+
"cpu_mem": 1.493082112,
|
| 402 |
+
"gpu_mem": 4.436634112,
|
| 403 |
+
"loss": 1.2994,
|
| 404 |
+
"grad_norm": 5.064252853393555,
|
| 405 |
+
"learning_rate": 9.34908159547222e-05
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"step": 46,
|
| 409 |
+
"epoch": 2.6285714285714286,
|
| 410 |
+
"cpu_mem": 1.493082112,
|
| 411 |
+
"gpu_mem": 4.436627968,
|
| 412 |
+
"loss": 1.2786,
|
| 413 |
+
"grad_norm": 4.9510273933410645,
|
| 414 |
+
"learning_rate": 8.641284139352091e-05
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"step": 47,
|
| 418 |
+
"epoch": 2.685714285714286,
|
| 419 |
+
"cpu_mem": 1.493082112,
|
| 420 |
+
"gpu_mem": 4.436621824,
|
| 421 |
+
"loss": 1.2801,
|
| 422 |
+
"grad_norm": 4.4370856285095215,
|
| 423 |
+
"learning_rate": 7.950348854590204e-05
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"step": 48,
|
| 427 |
+
"epoch": 2.742857142857143,
|
| 428 |
+
"cpu_mem": 1.493082112,
|
| 429 |
+
"gpu_mem": 4.436626432,
|
| 430 |
+
"loss": 1.2029,
|
| 431 |
+
"grad_norm": 4.3672943115234375,
|
| 432 |
+
"learning_rate": 7.278107977352543e-05
|
| 433 |
+
},
|
| 434 |
+
{
|
| 435 |
+
"step": 49,
|
| 436 |
+
"epoch": 2.8,
|
| 437 |
+
"cpu_mem": 1.493082112,
|
| 438 |
+
"gpu_mem": 4.436617216,
|
| 439 |
+
"loss": 1.2484,
|
| 440 |
+
"grad_norm": 3.859243154525757,
|
| 441 |
+
"learning_rate": 6.626344169597031e-05
|
| 442 |
+
},
|
| 443 |
+
{
|
| 444 |
+
"step": 50,
|
| 445 |
+
"epoch": 2.857142857142857,
|
| 446 |
+
"cpu_mem": 1.493082112,
|
| 447 |
+
"gpu_mem": 4.436598784,
|
| 448 |
+
"loss": 1.3265,
|
| 449 |
+
"grad_norm": 7.188536643981934,
|
| 450 |
+
"learning_rate": 5.996785791774478e-05
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"step": 51,
|
| 454 |
+
"epoch": 2.914285714285714,
|
| 455 |
+
"cpu_mem": 1.493082112,
|
| 456 |
+
"gpu_mem": 4.43662336,
|
| 457 |
+
"loss": 1.2704,
|
| 458 |
+
"grad_norm": 5.372682571411133,
|
| 459 |
+
"learning_rate": 5.391102319527373e-05
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"step": 52,
|
| 463 |
+
"epoch": 2.9714285714285715,
|
| 464 |
+
"cpu_mem": 1.493082112,
|
| 465 |
+
"gpu_mem": 4.436651008,
|
| 466 |
+
"loss": 1.3735,
|
| 467 |
+
"grad_norm": 8.921072959899902,
|
| 468 |
+
"learning_rate": 4.8108999165406026e-05
|
| 469 |
+
},
|
| 470 |
+
{
|
| 471 |
+
"step": 53,
|
| 472 |
+
"epoch": 3.0285714285714285,
|
| 473 |
+
"cpu_mem": 1.493082112,
|
| 474 |
+
"gpu_mem": 4.443006976,
|
| 475 |
+
"loss": 1.832,
|
| 476 |
+
"grad_norm": 8.732340812683105,
|
| 477 |
+
"learning_rate": 4.257717175284103e-05
|
| 478 |
+
},
|
| 479 |
+
{
|
| 480 |
+
"step": 54,
|
| 481 |
+
"epoch": 3.085714285714286,
|
| 482 |
+
"cpu_mem": 1.493082112,
|
| 483 |
+
"gpu_mem": 4.442976256,
|
| 484 |
+
"loss": 1.3082,
|
| 485 |
+
"grad_norm": 7.223543167114258,
|
| 486 |
+
"learning_rate": 3.733021036942205e-05
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"step": 55,
|
| 490 |
+
"epoch": 3.142857142857143,
|
| 491 |
+
"cpu_mem": 1.493082112,
|
| 492 |
+
"gpu_mem": 4.443010048,
|
| 493 |
+
"loss": 1.3045,
|
| 494 |
+
"grad_norm": 8.533186912536621,
|
| 495 |
+
"learning_rate": 3.238202901349345e-05
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"step": 56,
|
| 499 |
+
"epoch": 3.2,
|
| 500 |
+
"cpu_mem": 1.493082112,
|
| 501 |
+
"gpu_mem": 4.443083776,
|
| 502 |
+
"loss": 1.2201,
|
| 503 |
+
"grad_norm": 5.6041579246521,
|
| 504 |
+
"learning_rate": 2.774574937247831e-05
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"step": 57,
|
| 508 |
+
"epoch": 3.257142857142857,
|
| 509 |
+
"cpu_mem": 1.493082112,
|
| 510 |
+
"gpu_mem": 4.443026944,
|
| 511 |
+
"loss": 1.2386,
|
| 512 |
+
"grad_norm": 5.491722583770752,
|
| 513 |
+
"learning_rate": 2.3433666026522153e-05
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"step": 58,
|
| 517 |
+
"epoch": 3.314285714285714,
|
| 518 |
+
"cpu_mem": 1.493082112,
|
| 519 |
+
"gpu_mem": 4.4430208,
|
| 520 |
+
"loss": 1.142,
|
| 521 |
+
"grad_norm": 5.923037052154541,
|
| 522 |
+
"learning_rate": 1.945721384547671e-05
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"step": 59,
|
| 526 |
+
"epoch": 3.3714285714285714,
|
| 527 |
+
"cpu_mem": 1.493082112,
|
| 528 |
+
"gpu_mem": 4.443071488,
|
| 529 |
+
"loss": 1.2387,
|
| 530 |
+
"grad_norm": 4.976379871368408,
|
| 531 |
+
"learning_rate": 1.5826937665680693e-05
|
| 532 |
+
},
|
| 533 |
+
{
|
| 534 |
+
"step": 60,
|
| 535 |
+
"epoch": 3.4285714285714284,
|
| 536 |
+
"cpu_mem": 1.493082112,
|
| 537 |
+
"gpu_mem": 4.44299776,
|
| 538 |
+
"loss": 1.2988,
|
| 539 |
+
"grad_norm": 7.554241180419922,
|
| 540 |
+
"learning_rate": 1.2552464326949302e-05
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"step": 61,
|
| 544 |
+
"epoch": 3.4857142857142858,
|
| 545 |
+
"cpu_mem": 1.493082112,
|
| 546 |
+
"gpu_mem": 4.443011584,
|
| 547 |
+
"loss": 1.2697,
|
| 548 |
+
"grad_norm": 6.907799243927002,
|
| 549 |
+
"learning_rate": 9.64247714392597e-06
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"step": 62,
|
| 553 |
+
"epoch": 3.5428571428571427,
|
| 554 |
+
"cpu_mem": 1.493082112,
|
| 555 |
+
"gpu_mem": 4.44301312,
|
| 556 |
+
"loss": 1.2166,
|
| 557 |
+
"grad_norm": 5.639773845672607,
|
| 558 |
+
"learning_rate": 7.104692879492624e-06
|
| 559 |
+
},
|
| 560 |
+
{
|
| 561 |
+
"step": 63,
|
| 562 |
+
"epoch": 3.6,
|
| 563 |
+
"cpu_mem": 1.493082112,
|
| 564 |
+
"gpu_mem": 4.443002368,
|
| 565 |
+
"loss": 1.2443,
|
| 566 |
+
"grad_norm": 6.010934829711914,
|
| 567 |
+
"learning_rate": 4.945841281301943e-06
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"step": 64,
|
| 571 |
+
"epoch": 3.657142857142857,
|
| 572 |
+
"cpu_mem": 1.493082112,
|
| 573 |
+
"gpu_mem": 4.443019264,
|
| 574 |
+
"loss": 1.2085,
|
| 575 |
+
"grad_norm": 6.853599548339844,
|
| 576 |
+
"learning_rate": 3.1716472356963286e-06
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"step": 65,
|
| 580 |
+
"epoch": 3.7142857142857144,
|
| 581 |
+
"cpu_mem": 1.493082112,
|
| 582 |
+
"gpu_mem": 4.443040768,
|
| 583 |
+
"loss": 1.2106,
|
| 584 |
+
"grad_norm": 6.083324909210205,
|
| 585 |
+
"learning_rate": 1.7868155863384415e-06
|
| 586 |
+
},
|
| 587 |
+
{
|
| 588 |
+
"step": 66,
|
| 589 |
+
"epoch": 3.7714285714285714,
|
| 590 |
+
"cpu_mem": 1.493082112,
|
| 591 |
+
"gpu_mem": 4.443031552,
|
| 592 |
+
"loss": 1.1881,
|
| 593 |
+
"grad_norm": 6.191532611846924,
|
| 594 |
+
"learning_rate": 7.950186578116413e-07
|
| 595 |
+
},
|
| 596 |
+
{
|
| 597 |
+
"step": 67,
|
| 598 |
+
"epoch": 3.8285714285714287,
|
| 599 |
+
"cpu_mem": 1.493082112,
|
| 600 |
+
"gpu_mem": 4.443057664,
|
| 601 |
+
"loss": 1.2379,
|
| 602 |
+
"grad_norm": 6.098973751068115,
|
| 603 |
+
"learning_rate": 1.988865172754206e-07
|
| 604 |
+
},
|
| 605 |
+
{
|
| 606 |
+
"step": 68,
|
| 607 |
+
"epoch": 3.8857142857142857,
|
| 608 |
+
"cpu_mem": 1.493082112,
|
| 609 |
+
"gpu_mem": 4.443008512,
|
| 610 |
+
"loss": 1.2206,
|
| 611 |
+
"grad_norm": 6.373558044433594,
|
| 612 |
+
"learning_rate": 0.0
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"step": 68,
|
| 616 |
+
"epoch": 3.8857142857142857,
|
| 617 |
+
"cpu_mem": 1.493082112,
|
| 618 |
+
"gpu_mem": 4.443008512,
|
| 619 |
+
"train_runtime": 374.6823,
|
| 620 |
+
"train_samples_per_second": 11.946,
|
| 621 |
+
"train_steps_per_second": 0.181,
|
| 622 |
+
"total_flos": 0.0,
|
| 623 |
+
"train_loss": 1.4825304623912363
|
| 624 |
+
}
|
| 625 |
+
]
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r32-a2/adapter_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha": 64,
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": false,
|
| 8 |
+
"init_weight": "kaiming",
|
| 9 |
+
"layers_pattern": null,
|
| 10 |
+
"layers_to_transform": null,
|
| 11 |
+
"metric_tracking": false,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "ABLATION",
|
| 14 |
+
"r": 32,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"seed": 42,
|
| 17 |
+
"share_weights": false,
|
| 18 |
+
"target_modules": [
|
| 19 |
+
"up_proj",
|
| 20 |
+
"gate_proj",
|
| 21 |
+
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"q_proj",
|
| 24 |
+
"down_proj",
|
| 25 |
+
"o_proj"
|
| 26 |
+
],
|
| 27 |
+
"task_type": null,
|
| 28 |
+
"track_n": 100,
|
| 29 |
+
"variant": "A"
|
| 30 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r32-a2/eval_results.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "arc_c",
|
| 3 |
+
"results": 0.628839590443686
|
| 4 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r32-a2/training_configuration.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "TinyLlama/TinyLlama_v1.1",
|
| 3 |
+
"dataset": {
|
| 4 |
+
"name": "ARC_C",
|
| 5 |
+
"dataset_id": "allenai/ai2_arc",
|
| 6 |
+
"preprocess_id": "arc_train_deepeval"
|
| 7 |
+
},
|
| 8 |
+
"peft_config": {
|
| 9 |
+
"method": "abl_A",
|
| 10 |
+
"rank": 32,
|
| 11 |
+
"alpha": 64,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"bias": "none",
|
| 14 |
+
"target_modules": [
|
| 15 |
+
"q_proj",
|
| 16 |
+
"k_proj",
|
| 17 |
+
"v_proj",
|
| 18 |
+
"o_proj",
|
| 19 |
+
"gate_proj",
|
| 20 |
+
"down_proj",
|
| 21 |
+
"up_proj"
|
| 22 |
+
],
|
| 23 |
+
"trainable_parameter_count": 25389056
|
| 24 |
+
},
|
| 25 |
+
"training_config": {
|
| 26 |
+
"max_dataset_length": null,
|
| 27 |
+
"batch_size": 64,
|
| 28 |
+
"per_device_batch_size": 32,
|
| 29 |
+
"gradient_accumulation_steps": 2,
|
| 30 |
+
"learning_rate": 0.0003,
|
| 31 |
+
"num_epochs": 4,
|
| 32 |
+
"warmup_ratio": 0.1
|
| 33 |
+
},
|
| 34 |
+
"model_name": "TinyLlama_v1.1-abl_A-arc_c-r32-a2",
|
| 35 |
+
"output_dir": "./experiment_results/TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r32-a2",
|
| 36 |
+
"seed": 42,
|
| 37 |
+
"timestamp": "2025-08-31T06:31:01.002762"
|
| 38 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r32-a2/training_logs.json
ADDED
|
@@ -0,0 +1,625 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 1,
|
| 4 |
+
"epoch": 0.05714285714285714,
|
| 5 |
+
"cpu_mem": 1.496281088,
|
| 6 |
+
"gpu_mem": 4.519020032,
|
| 7 |
+
"loss": 4.4614,
|
| 8 |
+
"grad_norm": 280.62310791015625,
|
| 9 |
+
"learning_rate": 4.285714285714285e-05
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"step": 2,
|
| 13 |
+
"epoch": 0.11428571428571428,
|
| 14 |
+
"cpu_mem": 1.501589504,
|
| 15 |
+
"gpu_mem": 4.722122752,
|
| 16 |
+
"loss": 4.6994,
|
| 17 |
+
"grad_norm": 286.9012451171875,
|
| 18 |
+
"learning_rate": 8.57142857142857e-05
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"step": 3,
|
| 22 |
+
"epoch": 0.17142857142857143,
|
| 23 |
+
"cpu_mem": 1.501786112,
|
| 24 |
+
"gpu_mem": 4.722153472,
|
| 25 |
+
"loss": 2.1324,
|
| 26 |
+
"grad_norm": 415.12750244140625,
|
| 27 |
+
"learning_rate": 0.00012857142857142855
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"step": 4,
|
| 31 |
+
"epoch": 0.22857142857142856,
|
| 32 |
+
"cpu_mem": 1.501786112,
|
| 33 |
+
"gpu_mem": 4.72211968,
|
| 34 |
+
"loss": 1.7543,
|
| 35 |
+
"grad_norm": 44.261512756347656,
|
| 36 |
+
"learning_rate": 0.0001714285714285714
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"step": 5,
|
| 40 |
+
"epoch": 0.2857142857142857,
|
| 41 |
+
"cpu_mem": 1.501786112,
|
| 42 |
+
"gpu_mem": 4.722107392,
|
| 43 |
+
"loss": 1.508,
|
| 44 |
+
"grad_norm": 22.360448837280273,
|
| 45 |
+
"learning_rate": 0.00021428571428571427
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"step": 6,
|
| 49 |
+
"epoch": 0.34285714285714286,
|
| 50 |
+
"cpu_mem": 1.501786112,
|
| 51 |
+
"gpu_mem": 4.722170368,
|
| 52 |
+
"loss": 1.4382,
|
| 53 |
+
"grad_norm": 9.388525009155273,
|
| 54 |
+
"learning_rate": 0.0002571428571428571
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"step": 7,
|
| 58 |
+
"epoch": 0.4,
|
| 59 |
+
"cpu_mem": 1.50198272,
|
| 60 |
+
"gpu_mem": 4.722176512,
|
| 61 |
+
"loss": 1.4429,
|
| 62 |
+
"grad_norm": 13.978992462158203,
|
| 63 |
+
"learning_rate": 0.0003
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"step": 8,
|
| 67 |
+
"epoch": 0.45714285714285713,
|
| 68 |
+
"cpu_mem": 1.50198272,
|
| 69 |
+
"gpu_mem": 4.72213504,
|
| 70 |
+
"loss": 1.564,
|
| 71 |
+
"grad_norm": 15.369060516357422,
|
| 72 |
+
"learning_rate": 0.00029980111348272456
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 9,
|
| 76 |
+
"epoch": 0.5142857142857142,
|
| 77 |
+
"cpu_mem": 1.50198272,
|
| 78 |
+
"gpu_mem": 4.722130432,
|
| 79 |
+
"loss": 1.5394,
|
| 80 |
+
"grad_norm": 17.35812759399414,
|
| 81 |
+
"learning_rate": 0.00029920498134218835
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"step": 10,
|
| 85 |
+
"epoch": 0.5714285714285714,
|
| 86 |
+
"cpu_mem": 1.50198272,
|
| 87 |
+
"gpu_mem": 4.72211968,
|
| 88 |
+
"loss": 1.4216,
|
| 89 |
+
"grad_norm": 7.401285648345947,
|
| 90 |
+
"learning_rate": 0.0002982131844136615
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"step": 11,
|
| 94 |
+
"epoch": 0.6285714285714286,
|
| 95 |
+
"cpu_mem": 1.50198272,
|
| 96 |
+
"gpu_mem": 4.722130432,
|
| 97 |
+
"loss": 1.7083,
|
| 98 |
+
"grad_norm": 20.328474044799805,
|
| 99 |
+
"learning_rate": 0.0002968283527643036
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"step": 12,
|
| 103 |
+
"epoch": 0.6857142857142857,
|
| 104 |
+
"cpu_mem": 1.50198272,
|
| 105 |
+
"gpu_mem": 4.722155008,
|
| 106 |
+
"loss": 1.3558,
|
| 107 |
+
"grad_norm": 2.454993724822998,
|
| 108 |
+
"learning_rate": 0.000295054158718698
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"step": 13,
|
| 112 |
+
"epoch": 0.7428571428571429,
|
| 113 |
+
"cpu_mem": 1.50198272,
|
| 114 |
+
"gpu_mem": 4.722155008,
|
| 115 |
+
"loss": 1.5307,
|
| 116 |
+
"grad_norm": 12.548927307128906,
|
| 117 |
+
"learning_rate": 0.00029289530712050735
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"step": 14,
|
| 121 |
+
"epoch": 0.8,
|
| 122 |
+
"cpu_mem": 1.50198272,
|
| 123 |
+
"gpu_mem": 4.722102784,
|
| 124 |
+
"loss": 1.4728,
|
| 125 |
+
"grad_norm": 6.753164768218994,
|
| 126 |
+
"learning_rate": 0.000290357522856074
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"step": 15,
|
| 130 |
+
"epoch": 0.8571428571428571,
|
| 131 |
+
"cpu_mem": 1.50198272,
|
| 132 |
+
"gpu_mem": 4.722178048,
|
| 133 |
+
"loss": 1.4047,
|
| 134 |
+
"grad_norm": 5.13401460647583,
|
| 135 |
+
"learning_rate": 0.0002874475356730507
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"step": 16,
|
| 139 |
+
"epoch": 0.9142857142857143,
|
| 140 |
+
"cpu_mem": 1.50198272,
|
| 141 |
+
"gpu_mem": 4.722171904,
|
| 142 |
+
"loss": 1.7088,
|
| 143 |
+
"grad_norm": 15.40377140045166,
|
| 144 |
+
"learning_rate": 0.0002841730623343193
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"step": 17,
|
| 148 |
+
"epoch": 0.9714285714285714,
|
| 149 |
+
"cpu_mem": 1.50198272,
|
| 150 |
+
"gpu_mem": 4.722176512,
|
| 151 |
+
"loss": 1.6083,
|
| 152 |
+
"grad_norm": 22.233034133911133,
|
| 153 |
+
"learning_rate": 0.00028054278615452326
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"step": 18,
|
| 157 |
+
"epoch": 1.0285714285714285,
|
| 158 |
+
"cpu_mem": 1.50198272,
|
| 159 |
+
"gpu_mem": 4.823689728,
|
| 160 |
+
"loss": 2.0617,
|
| 161 |
+
"grad_norm": 4.666072845458984,
|
| 162 |
+
"learning_rate": 0.0002765663339734778
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"step": 19,
|
| 166 |
+
"epoch": 1.0857142857142856,
|
| 167 |
+
"cpu_mem": 1.50198272,
|
| 168 |
+
"gpu_mem": 4.823688192,
|
| 169 |
+
"loss": 1.3905,
|
| 170 |
+
"grad_norm": 4.700724124908447,
|
| 171 |
+
"learning_rate": 0.00027225425062752165
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"step": 20,
|
| 175 |
+
"epoch": 1.1428571428571428,
|
| 176 |
+
"cpu_mem": 1.50198272,
|
| 177 |
+
"gpu_mem": 4.823663616,
|
| 178 |
+
"loss": 1.3482,
|
| 179 |
+
"grad_norm": 5.293838024139404,
|
| 180 |
+
"learning_rate": 0.0002676179709865066
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"step": 21,
|
| 184 |
+
"epoch": 1.2,
|
| 185 |
+
"cpu_mem": 1.50198272,
|
| 186 |
+
"gpu_mem": 4.823671296,
|
| 187 |
+
"loss": 1.4783,
|
| 188 |
+
"grad_norm": 6.684973239898682,
|
| 189 |
+
"learning_rate": 0.0002626697896305779
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"step": 22,
|
| 193 |
+
"epoch": 1.2571428571428571,
|
| 194 |
+
"cpu_mem": 1.50198272,
|
| 195 |
+
"gpu_mem": 4.82370048,
|
| 196 |
+
"loss": 1.3886,
|
| 197 |
+
"grad_norm": 4.591466426849365,
|
| 198 |
+
"learning_rate": 0.000257422828247159
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"step": 23,
|
| 202 |
+
"epoch": 1.3142857142857143,
|
| 203 |
+
"cpu_mem": 1.50198272,
|
| 204 |
+
"gpu_mem": 4.823729664,
|
| 205 |
+
"loss": 1.3456,
|
| 206 |
+
"grad_norm": 3.4432966709136963,
|
| 207 |
+
"learning_rate": 0.00025189100083459397
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"step": 24,
|
| 211 |
+
"epoch": 1.3714285714285714,
|
| 212 |
+
"cpu_mem": 1.50198272,
|
| 213 |
+
"gpu_mem": 4.823672832,
|
| 214 |
+
"loss": 1.3971,
|
| 215 |
+
"grad_norm": 4.504514694213867,
|
| 216 |
+
"learning_rate": 0.0002460889768047263
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"step": 25,
|
| 220 |
+
"epoch": 1.4285714285714286,
|
| 221 |
+
"cpu_mem": 1.50198272,
|
| 222 |
+
"gpu_mem": 4.823741952,
|
| 223 |
+
"loss": 1.3702,
|
| 224 |
+
"grad_norm": 4.352265357971191,
|
| 225 |
+
"learning_rate": 0.00024003214208225522
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"step": 26,
|
| 229 |
+
"epoch": 1.4857142857142858,
|
| 230 |
+
"cpu_mem": 1.50198272,
|
| 231 |
+
"gpu_mem": 4.823698944,
|
| 232 |
+
"loss": 1.3863,
|
| 233 |
+
"grad_norm": 3.940533399581909,
|
| 234 |
+
"learning_rate": 0.00023373655830402968
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"step": 27,
|
| 238 |
+
"epoch": 1.5428571428571427,
|
| 239 |
+
"cpu_mem": 1.50198272,
|
| 240 |
+
"gpu_mem": 4.823657472,
|
| 241 |
+
"loss": 1.3904,
|
| 242 |
+
"grad_norm": 3.843360662460327,
|
| 243 |
+
"learning_rate": 0.00022721892022647462
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"step": 28,
|
| 247 |
+
"epoch": 1.6,
|
| 248 |
+
"cpu_mem": 1.50198272,
|
| 249 |
+
"gpu_mem": 4.823703552,
|
| 250 |
+
"loss": 1.6316,
|
| 251 |
+
"grad_norm": 9.981597900390625,
|
| 252 |
+
"learning_rate": 0.000220496511454098
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"step": 29,
|
| 256 |
+
"epoch": 1.657142857142857,
|
| 257 |
+
"cpu_mem": 1.50198272,
|
| 258 |
+
"gpu_mem": 4.823698944,
|
| 259 |
+
"loss": 1.5115,
|
| 260 |
+
"grad_norm": 6.392779350280762,
|
| 261 |
+
"learning_rate": 0.0002135871586064791
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"step": 30,
|
| 265 |
+
"epoch": 1.7142857142857144,
|
| 266 |
+
"cpu_mem": 1.50198272,
|
| 267 |
+
"gpu_mem": 4.823688192,
|
| 268 |
+
"loss": 1.4569,
|
| 269 |
+
"grad_norm": 6.152426242828369,
|
| 270 |
+
"learning_rate": 0.00020650918404527775
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"step": 31,
|
| 274 |
+
"epoch": 1.7714285714285714,
|
| 275 |
+
"cpu_mem": 1.50198272,
|
| 276 |
+
"gpu_mem": 4.823718912,
|
| 277 |
+
"loss": 1.3658,
|
| 278 |
+
"grad_norm": 3.233621835708618,
|
| 279 |
+
"learning_rate": 0.00019928135728662522
|
| 280 |
+
},
|
| 281 |
+
{
|
| 282 |
+
"step": 32,
|
| 283 |
+
"epoch": 1.8285714285714287,
|
| 284 |
+
"cpu_mem": 1.50198272,
|
| 285 |
+
"gpu_mem": 4.823728128,
|
| 286 |
+
"loss": 1.3712,
|
| 287 |
+
"grad_norm": 2.499112129211426,
|
| 288 |
+
"learning_rate": 0.00019192284522774142
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"step": 33,
|
| 292 |
+
"epoch": 1.8857142857142857,
|
| 293 |
+
"cpu_mem": 1.50198272,
|
| 294 |
+
"gpu_mem": 4.82370816,
|
| 295 |
+
"loss": 1.4401,
|
| 296 |
+
"grad_norm": 4.652753829956055,
|
| 297 |
+
"learning_rate": 0.00018445316131976934
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"step": 34,
|
| 301 |
+
"epoch": 1.9428571428571428,
|
| 302 |
+
"cpu_mem": 1.50198272,
|
| 303 |
+
"gpu_mem": 4.823686656,
|
| 304 |
+
"loss": 1.4205,
|
| 305 |
+
"grad_norm": 3.1102354526519775,
|
| 306 |
+
"learning_rate": 0.00017689211382161034
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"step": 35,
|
| 310 |
+
"epoch": 2.0,
|
| 311 |
+
"cpu_mem": 1.50198272,
|
| 312 |
+
"gpu_mem": 4.823574528,
|
| 313 |
+
"loss": 2.0597,
|
| 314 |
+
"grad_norm": 1.4106764793395996,
|
| 315 |
+
"learning_rate": 0.00016925975327198266
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"step": 36,
|
| 319 |
+
"epoch": 2.057142857142857,
|
| 320 |
+
"cpu_mem": 1.50198272,
|
| 321 |
+
"gpu_mem": 4.722148864,
|
| 322 |
+
"loss": 1.3446,
|
| 323 |
+
"grad_norm": 2.1405179500579834,
|
| 324 |
+
"learning_rate": 0.00016157631931899697
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"step": 37,
|
| 328 |
+
"epoch": 2.1142857142857143,
|
| 329 |
+
"cpu_mem": 1.50198272,
|
| 330 |
+
"gpu_mem": 4.72215808,
|
| 331 |
+
"loss": 1.4145,
|
| 332 |
+
"grad_norm": 3.9450926780700684,
|
| 333 |
+
"learning_rate": 0.0001538621870482483
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"step": 38,
|
| 337 |
+
"epoch": 2.1714285714285713,
|
| 338 |
+
"cpu_mem": 1.50198272,
|
| 339 |
+
"gpu_mem": 4.722128896,
|
| 340 |
+
"loss": 1.3416,
|
| 341 |
+
"grad_norm": 2.131178617477417,
|
| 342 |
+
"learning_rate": 0.00014613781295175172
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"step": 39,
|
| 346 |
+
"epoch": 2.2285714285714286,
|
| 347 |
+
"cpu_mem": 1.50198272,
|
| 348 |
+
"gpu_mem": 4.722147328,
|
| 349 |
+
"loss": 1.3367,
|
| 350 |
+
"grad_norm": 2.133847713470459,
|
| 351 |
+
"learning_rate": 0.00013842368068100303
|
| 352 |
+
},
|
| 353 |
+
{
|
| 354 |
+
"step": 40,
|
| 355 |
+
"epoch": 2.2857142857142856,
|
| 356 |
+
"cpu_mem": 1.50198272,
|
| 357 |
+
"gpu_mem": 4.722124288,
|
| 358 |
+
"loss": 1.3975,
|
| 359 |
+
"grad_norm": 3.288180112838745,
|
| 360 |
+
"learning_rate": 0.00013074024672801731
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"step": 41,
|
| 364 |
+
"epoch": 2.342857142857143,
|
| 365 |
+
"cpu_mem": 1.50198272,
|
| 366 |
+
"gpu_mem": 4.722125824,
|
| 367 |
+
"loss": 1.3958,
|
| 368 |
+
"grad_norm": 3.1248035430908203,
|
| 369 |
+
"learning_rate": 0.00012310788617838966
|
| 370 |
+
},
|
| 371 |
+
{
|
| 372 |
+
"step": 42,
|
| 373 |
+
"epoch": 2.4,
|
| 374 |
+
"cpu_mem": 1.50198272,
|
| 375 |
+
"gpu_mem": 4.722155008,
|
| 376 |
+
"loss": 1.335,
|
| 377 |
+
"grad_norm": 3.862166404724121,
|
| 378 |
+
"learning_rate": 0.00011554683868023067
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"step": 43,
|
| 382 |
+
"epoch": 2.4571428571428573,
|
| 383 |
+
"cpu_mem": 1.50198272,
|
| 384 |
+
"gpu_mem": 4.722170368,
|
| 385 |
+
"loss": 1.3372,
|
| 386 |
+
"grad_norm": 3.2153024673461914,
|
| 387 |
+
"learning_rate": 0.00010807715477225858
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"step": 44,
|
| 391 |
+
"epoch": 2.5142857142857142,
|
| 392 |
+
"cpu_mem": 1.50198272,
|
| 393 |
+
"gpu_mem": 4.7221888,
|
| 394 |
+
"loss": 1.2856,
|
| 395 |
+
"grad_norm": 1.854359745979309,
|
| 396 |
+
"learning_rate": 0.00010071864271337478
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"step": 45,
|
| 400 |
+
"epoch": 2.571428571428571,
|
| 401 |
+
"cpu_mem": 1.50198272,
|
| 402 |
+
"gpu_mem": 4.72214272,
|
| 403 |
+
"loss": 1.322,
|
| 404 |
+
"grad_norm": 2.8490543365478516,
|
| 405 |
+
"learning_rate": 9.34908159547222e-05
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"step": 46,
|
| 409 |
+
"epoch": 2.6285714285714286,
|
| 410 |
+
"cpu_mem": 1.50198272,
|
| 411 |
+
"gpu_mem": 4.722136576,
|
| 412 |
+
"loss": 1.2721,
|
| 413 |
+
"grad_norm": 2.5255608558654785,
|
| 414 |
+
"learning_rate": 8.641284139352091e-05
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"step": 47,
|
| 418 |
+
"epoch": 2.685714285714286,
|
| 419 |
+
"cpu_mem": 1.50198272,
|
| 420 |
+
"gpu_mem": 4.722130432,
|
| 421 |
+
"loss": 1.2753,
|
| 422 |
+
"grad_norm": 2.5269887447357178,
|
| 423 |
+
"learning_rate": 7.950348854590204e-05
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"step": 48,
|
| 427 |
+
"epoch": 2.742857142857143,
|
| 428 |
+
"cpu_mem": 1.50198272,
|
| 429 |
+
"gpu_mem": 4.72213504,
|
| 430 |
+
"loss": 1.2814,
|
| 431 |
+
"grad_norm": 3.4005813598632812,
|
| 432 |
+
"learning_rate": 7.278107977352543e-05
|
| 433 |
+
},
|
| 434 |
+
{
|
| 435 |
+
"step": 49,
|
| 436 |
+
"epoch": 2.8,
|
| 437 |
+
"cpu_mem": 1.50198272,
|
| 438 |
+
"gpu_mem": 4.722125824,
|
| 439 |
+
"loss": 1.2355,
|
| 440 |
+
"grad_norm": 2.77209734916687,
|
| 441 |
+
"learning_rate": 6.626344169597031e-05
|
| 442 |
+
},
|
| 443 |
+
{
|
| 444 |
+
"step": 50,
|
| 445 |
+
"epoch": 2.857142857142857,
|
| 446 |
+
"cpu_mem": 1.50198272,
|
| 447 |
+
"gpu_mem": 4.722107392,
|
| 448 |
+
"loss": 1.2617,
|
| 449 |
+
"grad_norm": 2.584846258163452,
|
| 450 |
+
"learning_rate": 5.996785791774478e-05
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"step": 51,
|
| 454 |
+
"epoch": 2.914285714285714,
|
| 455 |
+
"cpu_mem": 1.50198272,
|
| 456 |
+
"gpu_mem": 4.722131968,
|
| 457 |
+
"loss": 1.2872,
|
| 458 |
+
"grad_norm": 3.403454542160034,
|
| 459 |
+
"learning_rate": 5.391102319527373e-05
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"step": 52,
|
| 463 |
+
"epoch": 2.9714285714285715,
|
| 464 |
+
"cpu_mem": 1.50198272,
|
| 465 |
+
"gpu_mem": 4.722159616,
|
| 466 |
+
"loss": 1.323,
|
| 467 |
+
"grad_norm": 4.359912872314453,
|
| 468 |
+
"learning_rate": 4.8108999165406026e-05
|
| 469 |
+
},
|
| 470 |
+
{
|
| 471 |
+
"step": 53,
|
| 472 |
+
"epoch": 3.0285714285714285,
|
| 473 |
+
"cpu_mem": 1.50198272,
|
| 474 |
+
"gpu_mem": 4.82368512,
|
| 475 |
+
"loss": 1.8447,
|
| 476 |
+
"grad_norm": 5.682162761688232,
|
| 477 |
+
"learning_rate": 4.257717175284103e-05
|
| 478 |
+
},
|
| 479 |
+
{
|
| 480 |
+
"step": 54,
|
| 481 |
+
"epoch": 3.085714285714286,
|
| 482 |
+
"cpu_mem": 1.50198272,
|
| 483 |
+
"gpu_mem": 4.8236544,
|
| 484 |
+
"loss": 1.1505,
|
| 485 |
+
"grad_norm": 3.0071282386779785,
|
| 486 |
+
"learning_rate": 3.733021036942205e-05
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"step": 55,
|
| 490 |
+
"epoch": 3.142857142857143,
|
| 491 |
+
"cpu_mem": 1.50198272,
|
| 492 |
+
"gpu_mem": 4.823688192,
|
| 493 |
+
"loss": 1.151,
|
| 494 |
+
"grad_norm": 3.7012956142425537,
|
| 495 |
+
"learning_rate": 3.238202901349345e-05
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"step": 56,
|
| 499 |
+
"epoch": 3.2,
|
| 500 |
+
"cpu_mem": 1.50198272,
|
| 501 |
+
"gpu_mem": 4.82376192,
|
| 502 |
+
"loss": 1.1417,
|
| 503 |
+
"grad_norm": 4.017345905303955,
|
| 504 |
+
"learning_rate": 2.774574937247831e-05
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"step": 57,
|
| 508 |
+
"epoch": 3.257142857142857,
|
| 509 |
+
"cpu_mem": 1.50198272,
|
| 510 |
+
"gpu_mem": 4.823705088,
|
| 511 |
+
"loss": 1.0821,
|
| 512 |
+
"grad_norm": 3.950089454650879,
|
| 513 |
+
"learning_rate": 2.3433666026522153e-05
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"step": 58,
|
| 517 |
+
"epoch": 3.314285714285714,
|
| 518 |
+
"cpu_mem": 1.50198272,
|
| 519 |
+
"gpu_mem": 4.823698944,
|
| 520 |
+
"loss": 1.0089,
|
| 521 |
+
"grad_norm": 4.133927345275879,
|
| 522 |
+
"learning_rate": 1.945721384547671e-05
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"step": 59,
|
| 526 |
+
"epoch": 3.3714285714285714,
|
| 527 |
+
"cpu_mem": 1.50198272,
|
| 528 |
+
"gpu_mem": 4.823749632,
|
| 529 |
+
"loss": 1.0377,
|
| 530 |
+
"grad_norm": 4.262353420257568,
|
| 531 |
+
"learning_rate": 1.5826937665680693e-05
|
| 532 |
+
},
|
| 533 |
+
{
|
| 534 |
+
"step": 60,
|
| 535 |
+
"epoch": 3.4285714285714284,
|
| 536 |
+
"cpu_mem": 1.50198272,
|
| 537 |
+
"gpu_mem": 4.823675904,
|
| 538 |
+
"loss": 1.0539,
|
| 539 |
+
"grad_norm": 5.903791904449463,
|
| 540 |
+
"learning_rate": 1.2552464326949302e-05
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"step": 61,
|
| 544 |
+
"epoch": 3.4857142857142858,
|
| 545 |
+
"cpu_mem": 1.50198272,
|
| 546 |
+
"gpu_mem": 4.823689728,
|
| 547 |
+
"loss": 1.1056,
|
| 548 |
+
"grad_norm": 5.543725490570068,
|
| 549 |
+
"learning_rate": 9.64247714392597e-06
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"step": 62,
|
| 553 |
+
"epoch": 3.5428571428571427,
|
| 554 |
+
"cpu_mem": 1.50198272,
|
| 555 |
+
"gpu_mem": 4.823691264,
|
| 556 |
+
"loss": 1.0513,
|
| 557 |
+
"grad_norm": 5.472988128662109,
|
| 558 |
+
"learning_rate": 7.104692879492624e-06
|
| 559 |
+
},
|
| 560 |
+
{
|
| 561 |
+
"step": 63,
|
| 562 |
+
"epoch": 3.6,
|
| 563 |
+
"cpu_mem": 1.50198272,
|
| 564 |
+
"gpu_mem": 4.823680512,
|
| 565 |
+
"loss": 1.0188,
|
| 566 |
+
"grad_norm": 5.62291955947876,
|
| 567 |
+
"learning_rate": 4.945841281301943e-06
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"step": 64,
|
| 571 |
+
"epoch": 3.657142857142857,
|
| 572 |
+
"cpu_mem": 1.50198272,
|
| 573 |
+
"gpu_mem": 4.823697408,
|
| 574 |
+
"loss": 1.0059,
|
| 575 |
+
"grad_norm": 5.603131294250488,
|
| 576 |
+
"learning_rate": 3.1716472356963286e-06
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"step": 65,
|
| 580 |
+
"epoch": 3.7142857142857144,
|
| 581 |
+
"cpu_mem": 1.50198272,
|
| 582 |
+
"gpu_mem": 4.823718912,
|
| 583 |
+
"loss": 1.0754,
|
| 584 |
+
"grad_norm": 6.040858268737793,
|
| 585 |
+
"learning_rate": 1.7868155863384415e-06
|
| 586 |
+
},
|
| 587 |
+
{
|
| 588 |
+
"step": 66,
|
| 589 |
+
"epoch": 3.7714285714285714,
|
| 590 |
+
"cpu_mem": 1.50198272,
|
| 591 |
+
"gpu_mem": 4.823709696,
|
| 592 |
+
"loss": 0.9901,
|
| 593 |
+
"grad_norm": 5.5554585456848145,
|
| 594 |
+
"learning_rate": 7.950186578116413e-07
|
| 595 |
+
},
|
| 596 |
+
{
|
| 597 |
+
"step": 67,
|
| 598 |
+
"epoch": 3.8285714285714287,
|
| 599 |
+
"cpu_mem": 1.50198272,
|
| 600 |
+
"gpu_mem": 4.823735808,
|
| 601 |
+
"loss": 1.0235,
|
| 602 |
+
"grad_norm": 6.136469841003418,
|
| 603 |
+
"learning_rate": 1.988865172754206e-07
|
| 604 |
+
},
|
| 605 |
+
{
|
| 606 |
+
"step": 68,
|
| 607 |
+
"epoch": 3.8857142857142857,
|
| 608 |
+
"cpu_mem": 1.50198272,
|
| 609 |
+
"gpu_mem": 4.823686656,
|
| 610 |
+
"loss": 1.0859,
|
| 611 |
+
"grad_norm": 5.908904075622559,
|
| 612 |
+
"learning_rate": 0.0
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"step": 68,
|
| 616 |
+
"epoch": 3.8857142857142857,
|
| 617 |
+
"cpu_mem": 1.50198272,
|
| 618 |
+
"gpu_mem": 4.823686656,
|
| 619 |
+
"train_runtime": 378.6352,
|
| 620 |
+
"train_samples_per_second": 11.821,
|
| 621 |
+
"train_steps_per_second": 0.18,
|
| 622 |
+
"total_flos": 0.0,
|
| 623 |
+
"train_loss": 1.4682180960388744
|
| 624 |
+
}
|
| 625 |
+
]
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r8-a2/adapter_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha": 16,
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": false,
|
| 8 |
+
"init_weight": "kaiming",
|
| 9 |
+
"layers_pattern": null,
|
| 10 |
+
"layers_to_transform": null,
|
| 11 |
+
"metric_tracking": false,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "ABLATION",
|
| 14 |
+
"r": 8,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"seed": 42,
|
| 17 |
+
"share_weights": false,
|
| 18 |
+
"target_modules": [
|
| 19 |
+
"up_proj",
|
| 20 |
+
"gate_proj",
|
| 21 |
+
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"q_proj",
|
| 24 |
+
"down_proj",
|
| 25 |
+
"o_proj"
|
| 26 |
+
],
|
| 27 |
+
"task_type": null,
|
| 28 |
+
"track_n": 100,
|
| 29 |
+
"variant": "A"
|
| 30 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r8-a2/eval_results.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "arc_c",
|
| 3 |
+
"results": 0.378839590443686
|
| 4 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r8-a2/training_configuration.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "TinyLlama/TinyLlama_v1.1",
|
| 3 |
+
"dataset": {
|
| 4 |
+
"name": "ARC_C",
|
| 5 |
+
"dataset_id": "allenai/ai2_arc",
|
| 6 |
+
"preprocess_id": "arc_train_deepeval"
|
| 7 |
+
},
|
| 8 |
+
"peft_config": {
|
| 9 |
+
"method": "abl_A",
|
| 10 |
+
"rank": 8,
|
| 11 |
+
"alpha": 16,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"bias": "none",
|
| 14 |
+
"target_modules": [
|
| 15 |
+
"q_proj",
|
| 16 |
+
"k_proj",
|
| 17 |
+
"v_proj",
|
| 18 |
+
"o_proj",
|
| 19 |
+
"gate_proj",
|
| 20 |
+
"down_proj",
|
| 21 |
+
"up_proj"
|
| 22 |
+
],
|
| 23 |
+
"trainable_parameter_count": 6317696
|
| 24 |
+
},
|
| 25 |
+
"training_config": {
|
| 26 |
+
"max_dataset_length": null,
|
| 27 |
+
"batch_size": 64,
|
| 28 |
+
"per_device_batch_size": 32,
|
| 29 |
+
"gradient_accumulation_steps": 2,
|
| 30 |
+
"learning_rate": 0.0003,
|
| 31 |
+
"num_epochs": 4,
|
| 32 |
+
"warmup_ratio": 0.1
|
| 33 |
+
},
|
| 34 |
+
"model_name": "TinyLlama_v1.1-abl_A-arc_c-r8-a2",
|
| 35 |
+
"output_dir": "./experiment_results/TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r8-a2",
|
| 36 |
+
"seed": 42,
|
| 37 |
+
"timestamp": "2025-08-30T23:32:37.041918"
|
| 38 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_c-r8-a2/training_logs.json
ADDED
|
@@ -0,0 +1,625 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 1,
|
| 4 |
+
"epoch": 0.05714285714285714,
|
| 5 |
+
"cpu_mem": 1.489108992,
|
| 6 |
+
"gpu_mem": 4.442774016,
|
| 7 |
+
"loss": 4.4614,
|
| 8 |
+
"grad_norm": 272.1399230957031,
|
| 9 |
+
"learning_rate": 4.285714285714285e-05
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"step": 2,
|
| 13 |
+
"epoch": 0.11428571428571428,
|
| 14 |
+
"cpu_mem": 1.494614016,
|
| 15 |
+
"gpu_mem": 4.493384704,
|
| 16 |
+
"loss": 4.6994,
|
| 17 |
+
"grad_norm": 279.0349426269531,
|
| 18 |
+
"learning_rate": 8.57142857142857e-05
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"step": 3,
|
| 22 |
+
"epoch": 0.17142857142857143,
|
| 23 |
+
"cpu_mem": 1.494614016,
|
| 24 |
+
"gpu_mem": 4.493415424,
|
| 25 |
+
"loss": 2.3086,
|
| 26 |
+
"grad_norm": 260.66900634765625,
|
| 27 |
+
"learning_rate": 0.00012857142857142855
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"step": 4,
|
| 31 |
+
"epoch": 0.22857142857142856,
|
| 32 |
+
"cpu_mem": 1.494810624,
|
| 33 |
+
"gpu_mem": 4.493381632,
|
| 34 |
+
"loss": 1.572,
|
| 35 |
+
"grad_norm": 18.902830123901367,
|
| 36 |
+
"learning_rate": 0.0001714285714285714
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"step": 5,
|
| 40 |
+
"epoch": 0.2857142857142857,
|
| 41 |
+
"cpu_mem": 1.495007232,
|
| 42 |
+
"gpu_mem": 4.493369344,
|
| 43 |
+
"loss": 1.5805,
|
| 44 |
+
"grad_norm": 30.244815826416016,
|
| 45 |
+
"learning_rate": 0.00021428571428571427
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"step": 6,
|
| 49 |
+
"epoch": 0.34285714285714286,
|
| 50 |
+
"cpu_mem": 1.495007232,
|
| 51 |
+
"gpu_mem": 4.49343232,
|
| 52 |
+
"loss": 1.3975,
|
| 53 |
+
"grad_norm": 10.924633026123047,
|
| 54 |
+
"learning_rate": 0.0002571428571428571
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"step": 7,
|
| 58 |
+
"epoch": 0.4,
|
| 59 |
+
"cpu_mem": 1.495007232,
|
| 60 |
+
"gpu_mem": 4.493438464,
|
| 61 |
+
"loss": 1.4746,
|
| 62 |
+
"grad_norm": 35.98440933227539,
|
| 63 |
+
"learning_rate": 0.0003
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"step": 8,
|
| 67 |
+
"epoch": 0.45714285714285713,
|
| 68 |
+
"cpu_mem": 1.495007232,
|
| 69 |
+
"gpu_mem": 4.493396992,
|
| 70 |
+
"loss": 1.5768,
|
| 71 |
+
"grad_norm": 21.909250259399414,
|
| 72 |
+
"learning_rate": 0.00029980111348272456
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 9,
|
| 76 |
+
"epoch": 0.5142857142857142,
|
| 77 |
+
"cpu_mem": 1.495007232,
|
| 78 |
+
"gpu_mem": 4.493392384,
|
| 79 |
+
"loss": 1.3486,
|
| 80 |
+
"grad_norm": 8.877981185913086,
|
| 81 |
+
"learning_rate": 0.00029920498134218835
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"step": 10,
|
| 85 |
+
"epoch": 0.5714285714285714,
|
| 86 |
+
"cpu_mem": 1.495007232,
|
| 87 |
+
"gpu_mem": 4.493381632,
|
| 88 |
+
"loss": 1.6285,
|
| 89 |
+
"grad_norm": 21.38736915588379,
|
| 90 |
+
"learning_rate": 0.0002982131844136615
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"step": 11,
|
| 94 |
+
"epoch": 0.6285714285714286,
|
| 95 |
+
"cpu_mem": 1.495007232,
|
| 96 |
+
"gpu_mem": 4.493392384,
|
| 97 |
+
"loss": 1.4523,
|
| 98 |
+
"grad_norm": 7.3010358810424805,
|
| 99 |
+
"learning_rate": 0.0002968283527643036
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"step": 12,
|
| 103 |
+
"epoch": 0.6857142857142857,
|
| 104 |
+
"cpu_mem": 1.495007232,
|
| 105 |
+
"gpu_mem": 4.49341696,
|
| 106 |
+
"loss": 1.4579,
|
| 107 |
+
"grad_norm": 8.112820625305176,
|
| 108 |
+
"learning_rate": 0.000295054158718698
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"step": 13,
|
| 112 |
+
"epoch": 0.7428571428571429,
|
| 113 |
+
"cpu_mem": 1.495007232,
|
| 114 |
+
"gpu_mem": 4.49341696,
|
| 115 |
+
"loss": 1.3493,
|
| 116 |
+
"grad_norm": 11.578926086425781,
|
| 117 |
+
"learning_rate": 0.00029289530712050735
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"step": 14,
|
| 121 |
+
"epoch": 0.8,
|
| 122 |
+
"cpu_mem": 1.495007232,
|
| 123 |
+
"gpu_mem": 4.493364736,
|
| 124 |
+
"loss": 1.6974,
|
| 125 |
+
"grad_norm": 17.703752517700195,
|
| 126 |
+
"learning_rate": 0.000290357522856074
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"step": 15,
|
| 130 |
+
"epoch": 0.8571428571428571,
|
| 131 |
+
"cpu_mem": 1.495007232,
|
| 132 |
+
"gpu_mem": 4.49344,
|
| 133 |
+
"loss": 1.5182,
|
| 134 |
+
"grad_norm": 8.22641658782959,
|
| 135 |
+
"learning_rate": 0.0002874475356730507
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"step": 16,
|
| 139 |
+
"epoch": 0.9142857142857143,
|
| 140 |
+
"cpu_mem": 1.495007232,
|
| 141 |
+
"gpu_mem": 4.493433856,
|
| 142 |
+
"loss": 1.4541,
|
| 143 |
+
"grad_norm": 7.929551124572754,
|
| 144 |
+
"learning_rate": 0.0002841730623343193
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"step": 17,
|
| 148 |
+
"epoch": 0.9714285714285714,
|
| 149 |
+
"cpu_mem": 1.495007232,
|
| 150 |
+
"gpu_mem": 4.493438464,
|
| 151 |
+
"loss": 1.3941,
|
| 152 |
+
"grad_norm": 5.744842052459717,
|
| 153 |
+
"learning_rate": 0.00028054278615452326
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"step": 18,
|
| 157 |
+
"epoch": 1.0285714285714285,
|
| 158 |
+
"cpu_mem": 1.495007232,
|
| 159 |
+
"gpu_mem": 4.518705664,
|
| 160 |
+
"loss": 2.1397,
|
| 161 |
+
"grad_norm": 13.66163158416748,
|
| 162 |
+
"learning_rate": 0.0002765663339734778
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"step": 19,
|
| 166 |
+
"epoch": 1.0857142857142856,
|
| 167 |
+
"cpu_mem": 1.495007232,
|
| 168 |
+
"gpu_mem": 4.518704128,
|
| 169 |
+
"loss": 1.4262,
|
| 170 |
+
"grad_norm": 6.4307355880737305,
|
| 171 |
+
"learning_rate": 0.00027225425062752165
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"step": 20,
|
| 175 |
+
"epoch": 1.1428571428571428,
|
| 176 |
+
"cpu_mem": 1.495007232,
|
| 177 |
+
"gpu_mem": 4.518679552,
|
| 178 |
+
"loss": 1.3419,
|
| 179 |
+
"grad_norm": 18.498199462890625,
|
| 180 |
+
"learning_rate": 0.0002676179709865066
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"step": 21,
|
| 184 |
+
"epoch": 1.2,
|
| 185 |
+
"cpu_mem": 1.495007232,
|
| 186 |
+
"gpu_mem": 4.518687232,
|
| 187 |
+
"loss": 1.3816,
|
| 188 |
+
"grad_norm": 3.349029541015625,
|
| 189 |
+
"learning_rate": 0.0002626697896305779
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"step": 22,
|
| 193 |
+
"epoch": 1.2571428571428571,
|
| 194 |
+
"cpu_mem": 1.495007232,
|
| 195 |
+
"gpu_mem": 4.518716416,
|
| 196 |
+
"loss": 1.3216,
|
| 197 |
+
"grad_norm": 3.663336753845215,
|
| 198 |
+
"learning_rate": 0.000257422828247159
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"step": 23,
|
| 202 |
+
"epoch": 1.3142857142857143,
|
| 203 |
+
"cpu_mem": 1.495007232,
|
| 204 |
+
"gpu_mem": 4.5187456,
|
| 205 |
+
"loss": 1.3365,
|
| 206 |
+
"grad_norm": 6.960829734802246,
|
| 207 |
+
"learning_rate": 0.00025189100083459397
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"step": 24,
|
| 211 |
+
"epoch": 1.3714285714285714,
|
| 212 |
+
"cpu_mem": 1.495007232,
|
| 213 |
+
"gpu_mem": 4.518688768,
|
| 214 |
+
"loss": 1.3619,
|
| 215 |
+
"grad_norm": 5.14816427230835,
|
| 216 |
+
"learning_rate": 0.0002460889768047263
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"step": 25,
|
| 220 |
+
"epoch": 1.4285714285714286,
|
| 221 |
+
"cpu_mem": 1.495007232,
|
| 222 |
+
"gpu_mem": 4.518757888,
|
| 223 |
+
"loss": 1.3392,
|
| 224 |
+
"grad_norm": 4.903714656829834,
|
| 225 |
+
"learning_rate": 0.00024003214208225522
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"step": 26,
|
| 229 |
+
"epoch": 1.4857142857142858,
|
| 230 |
+
"cpu_mem": 1.495007232,
|
| 231 |
+
"gpu_mem": 4.51871488,
|
| 232 |
+
"loss": 1.3861,
|
| 233 |
+
"grad_norm": 6.120626926422119,
|
| 234 |
+
"learning_rate": 0.00023373655830402968
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"step": 27,
|
| 238 |
+
"epoch": 1.5428571428571427,
|
| 239 |
+
"cpu_mem": 1.495007232,
|
| 240 |
+
"gpu_mem": 4.518673408,
|
| 241 |
+
"loss": 1.3876,
|
| 242 |
+
"grad_norm": 6.803613662719727,
|
| 243 |
+
"learning_rate": 0.00022721892022647462
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"step": 28,
|
| 247 |
+
"epoch": 1.6,
|
| 248 |
+
"cpu_mem": 1.495007232,
|
| 249 |
+
"gpu_mem": 4.518719488,
|
| 250 |
+
"loss": 1.6675,
|
| 251 |
+
"grad_norm": 14.671072006225586,
|
| 252 |
+
"learning_rate": 0.000220496511454098
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"step": 29,
|
| 256 |
+
"epoch": 1.657142857142857,
|
| 257 |
+
"cpu_mem": 1.495007232,
|
| 258 |
+
"gpu_mem": 4.51871488,
|
| 259 |
+
"loss": 1.4052,
|
| 260 |
+
"grad_norm": 6.3027143478393555,
|
| 261 |
+
"learning_rate": 0.0002135871586064791
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"step": 30,
|
| 265 |
+
"epoch": 1.7142857142857144,
|
| 266 |
+
"cpu_mem": 1.495007232,
|
| 267 |
+
"gpu_mem": 4.518704128,
|
| 268 |
+
"loss": 1.3962,
|
| 269 |
+
"grad_norm": 5.853539943695068,
|
| 270 |
+
"learning_rate": 0.00020650918404527775
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"step": 31,
|
| 274 |
+
"epoch": 1.7714285714285714,
|
| 275 |
+
"cpu_mem": 1.495007232,
|
| 276 |
+
"gpu_mem": 4.518734848,
|
| 277 |
+
"loss": 1.3627,
|
| 278 |
+
"grad_norm": 4.129302024841309,
|
| 279 |
+
"learning_rate": 0.00019928135728662522
|
| 280 |
+
},
|
| 281 |
+
{
|
| 282 |
+
"step": 32,
|
| 283 |
+
"epoch": 1.8285714285714287,
|
| 284 |
+
"cpu_mem": 1.495007232,
|
| 285 |
+
"gpu_mem": 4.518744064,
|
| 286 |
+
"loss": 1.347,
|
| 287 |
+
"grad_norm": 6.030110836029053,
|
| 288 |
+
"learning_rate": 0.00019192284522774142
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"step": 33,
|
| 292 |
+
"epoch": 1.8857142857142857,
|
| 293 |
+
"cpu_mem": 1.495007232,
|
| 294 |
+
"gpu_mem": 4.518724096,
|
| 295 |
+
"loss": 1.4082,
|
| 296 |
+
"grad_norm": 8.369693756103516,
|
| 297 |
+
"learning_rate": 0.00018445316131976934
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"step": 34,
|
| 301 |
+
"epoch": 1.9428571428571428,
|
| 302 |
+
"cpu_mem": 1.495007232,
|
| 303 |
+
"gpu_mem": 4.518702592,
|
| 304 |
+
"loss": 1.4094,
|
| 305 |
+
"grad_norm": 14.688669204711914,
|
| 306 |
+
"learning_rate": 0.00017689211382161034
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"step": 35,
|
| 310 |
+
"epoch": 2.0,
|
| 311 |
+
"cpu_mem": 1.495007232,
|
| 312 |
+
"gpu_mem": 4.518590464,
|
| 313 |
+
"loss": 2.1062,
|
| 314 |
+
"grad_norm": 14.797432899475098,
|
| 315 |
+
"learning_rate": 0.00016925975327198266
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"step": 36,
|
| 319 |
+
"epoch": 2.057142857142857,
|
| 320 |
+
"cpu_mem": 1.495007232,
|
| 321 |
+
"gpu_mem": 4.493410816,
|
| 322 |
+
"loss": 1.3897,
|
| 323 |
+
"grad_norm": 10.996891021728516,
|
| 324 |
+
"learning_rate": 0.00016157631931899697
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"step": 37,
|
| 328 |
+
"epoch": 2.1142857142857143,
|
| 329 |
+
"cpu_mem": 1.495007232,
|
| 330 |
+
"gpu_mem": 4.493420032,
|
| 331 |
+
"loss": 1.4705,
|
| 332 |
+
"grad_norm": 15.965860366821289,
|
| 333 |
+
"learning_rate": 0.0001538621870482483
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"step": 38,
|
| 337 |
+
"epoch": 2.1714285714285713,
|
| 338 |
+
"cpu_mem": 1.495007232,
|
| 339 |
+
"gpu_mem": 4.493390848,
|
| 340 |
+
"loss": 1.3793,
|
| 341 |
+
"grad_norm": 5.473352432250977,
|
| 342 |
+
"learning_rate": 0.00014613781295175172
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"step": 39,
|
| 346 |
+
"epoch": 2.2285714285714286,
|
| 347 |
+
"cpu_mem": 1.495007232,
|
| 348 |
+
"gpu_mem": 4.49340928,
|
| 349 |
+
"loss": 1.3478,
|
| 350 |
+
"grad_norm": 2.9749255180358887,
|
| 351 |
+
"learning_rate": 0.00013842368068100303
|
| 352 |
+
},
|
| 353 |
+
{
|
| 354 |
+
"step": 40,
|
| 355 |
+
"epoch": 2.2857142857142856,
|
| 356 |
+
"cpu_mem": 1.495007232,
|
| 357 |
+
"gpu_mem": 4.49338624,
|
| 358 |
+
"loss": 1.4282,
|
| 359 |
+
"grad_norm": 7.062312126159668,
|
| 360 |
+
"learning_rate": 0.00013074024672801731
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"step": 41,
|
| 364 |
+
"epoch": 2.342857142857143,
|
| 365 |
+
"cpu_mem": 1.495007232,
|
| 366 |
+
"gpu_mem": 4.493387776,
|
| 367 |
+
"loss": 1.4169,
|
| 368 |
+
"grad_norm": 5.1576642990112305,
|
| 369 |
+
"learning_rate": 0.00012310788617838966
|
| 370 |
+
},
|
| 371 |
+
{
|
| 372 |
+
"step": 42,
|
| 373 |
+
"epoch": 2.4,
|
| 374 |
+
"cpu_mem": 1.495007232,
|
| 375 |
+
"gpu_mem": 4.49341696,
|
| 376 |
+
"loss": 1.4134,
|
| 377 |
+
"grad_norm": 9.255854606628418,
|
| 378 |
+
"learning_rate": 0.00011554683868023067
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"step": 43,
|
| 382 |
+
"epoch": 2.4571428571428573,
|
| 383 |
+
"cpu_mem": 1.495007232,
|
| 384 |
+
"gpu_mem": 4.49343232,
|
| 385 |
+
"loss": 1.3524,
|
| 386 |
+
"grad_norm": 5.755366802215576,
|
| 387 |
+
"learning_rate": 0.00010807715477225858
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"step": 44,
|
| 391 |
+
"epoch": 2.5142857142857142,
|
| 392 |
+
"cpu_mem": 1.495007232,
|
| 393 |
+
"gpu_mem": 4.493450752,
|
| 394 |
+
"loss": 1.3488,
|
| 395 |
+
"grad_norm": 4.835580348968506,
|
| 396 |
+
"learning_rate": 0.00010071864271337478
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"step": 45,
|
| 400 |
+
"epoch": 2.571428571428571,
|
| 401 |
+
"cpu_mem": 1.495007232,
|
| 402 |
+
"gpu_mem": 4.493404672,
|
| 403 |
+
"loss": 1.3469,
|
| 404 |
+
"grad_norm": 6.548555374145508,
|
| 405 |
+
"learning_rate": 9.34908159547222e-05
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"step": 46,
|
| 409 |
+
"epoch": 2.6285714285714286,
|
| 410 |
+
"cpu_mem": 1.495007232,
|
| 411 |
+
"gpu_mem": 4.493398528,
|
| 412 |
+
"loss": 1.2932,
|
| 413 |
+
"grad_norm": 3.8608975410461426,
|
| 414 |
+
"learning_rate": 8.641284139352091e-05
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"step": 47,
|
| 418 |
+
"epoch": 2.685714285714286,
|
| 419 |
+
"cpu_mem": 1.495007232,
|
| 420 |
+
"gpu_mem": 4.493392384,
|
| 421 |
+
"loss": 1.3043,
|
| 422 |
+
"grad_norm": 4.572495460510254,
|
| 423 |
+
"learning_rate": 7.950348854590204e-05
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"step": 48,
|
| 427 |
+
"epoch": 2.742857142857143,
|
| 428 |
+
"cpu_mem": 1.495007232,
|
| 429 |
+
"gpu_mem": 4.493396992,
|
| 430 |
+
"loss": 1.2866,
|
| 431 |
+
"grad_norm": 3.4438271522521973,
|
| 432 |
+
"learning_rate": 7.278107977352543e-05
|
| 433 |
+
},
|
| 434 |
+
{
|
| 435 |
+
"step": 49,
|
| 436 |
+
"epoch": 2.8,
|
| 437 |
+
"cpu_mem": 1.495007232,
|
| 438 |
+
"gpu_mem": 4.493387776,
|
| 439 |
+
"loss": 1.3026,
|
| 440 |
+
"grad_norm": 3.738175868988037,
|
| 441 |
+
"learning_rate": 6.626344169597031e-05
|
| 442 |
+
},
|
| 443 |
+
{
|
| 444 |
+
"step": 50,
|
| 445 |
+
"epoch": 2.857142857142857,
|
| 446 |
+
"cpu_mem": 1.495007232,
|
| 447 |
+
"gpu_mem": 4.493369344,
|
| 448 |
+
"loss": 1.3494,
|
| 449 |
+
"grad_norm": 4.942461967468262,
|
| 450 |
+
"learning_rate": 5.996785791774478e-05
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"step": 51,
|
| 454 |
+
"epoch": 2.914285714285714,
|
| 455 |
+
"cpu_mem": 1.495007232,
|
| 456 |
+
"gpu_mem": 4.49339392,
|
| 457 |
+
"loss": 1.3157,
|
| 458 |
+
"grad_norm": 4.040122985839844,
|
| 459 |
+
"learning_rate": 5.391102319527373e-05
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"step": 52,
|
| 463 |
+
"epoch": 2.9714285714285715,
|
| 464 |
+
"cpu_mem": 1.495007232,
|
| 465 |
+
"gpu_mem": 4.493421568,
|
| 466 |
+
"loss": 1.4034,
|
| 467 |
+
"grad_norm": 4.740878105163574,
|
| 468 |
+
"learning_rate": 4.8108999165406026e-05
|
| 469 |
+
},
|
| 470 |
+
{
|
| 471 |
+
"step": 53,
|
| 472 |
+
"epoch": 3.0285714285714285,
|
| 473 |
+
"cpu_mem": 1.495007232,
|
| 474 |
+
"gpu_mem": 4.518701056,
|
| 475 |
+
"loss": 1.9696,
|
| 476 |
+
"grad_norm": 6.076801300048828,
|
| 477 |
+
"learning_rate": 4.257717175284103e-05
|
| 478 |
+
},
|
| 479 |
+
{
|
| 480 |
+
"step": 54,
|
| 481 |
+
"epoch": 3.085714285714286,
|
| 482 |
+
"cpu_mem": 1.495007232,
|
| 483 |
+
"gpu_mem": 4.518670336,
|
| 484 |
+
"loss": 1.3186,
|
| 485 |
+
"grad_norm": 6.321723937988281,
|
| 486 |
+
"learning_rate": 3.733021036942205e-05
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"step": 55,
|
| 490 |
+
"epoch": 3.142857142857143,
|
| 491 |
+
"cpu_mem": 1.495007232,
|
| 492 |
+
"gpu_mem": 4.518704128,
|
| 493 |
+
"loss": 1.3322,
|
| 494 |
+
"grad_norm": 4.518864154815674,
|
| 495 |
+
"learning_rate": 3.238202901349345e-05
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"step": 56,
|
| 499 |
+
"epoch": 3.2,
|
| 500 |
+
"cpu_mem": 1.495007232,
|
| 501 |
+
"gpu_mem": 4.518777856,
|
| 502 |
+
"loss": 1.3229,
|
| 503 |
+
"grad_norm": 3.9502453804016113,
|
| 504 |
+
"learning_rate": 2.774574937247831e-05
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"step": 57,
|
| 508 |
+
"epoch": 3.257142857142857,
|
| 509 |
+
"cpu_mem": 1.495007232,
|
| 510 |
+
"gpu_mem": 4.518721024,
|
| 511 |
+
"loss": 1.2922,
|
| 512 |
+
"grad_norm": 2.77620530128479,
|
| 513 |
+
"learning_rate": 2.3433666026522153e-05
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"step": 58,
|
| 517 |
+
"epoch": 3.314285714285714,
|
| 518 |
+
"cpu_mem": 1.495007232,
|
| 519 |
+
"gpu_mem": 4.51871488,
|
| 520 |
+
"loss": 1.221,
|
| 521 |
+
"grad_norm": 5.571518898010254,
|
| 522 |
+
"learning_rate": 1.945721384547671e-05
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"step": 59,
|
| 526 |
+
"epoch": 3.3714285714285714,
|
| 527 |
+
"cpu_mem": 1.495007232,
|
| 528 |
+
"gpu_mem": 4.518765568,
|
| 529 |
+
"loss": 1.2745,
|
| 530 |
+
"grad_norm": 4.341223239898682,
|
| 531 |
+
"learning_rate": 1.5826937665680693e-05
|
| 532 |
+
},
|
| 533 |
+
{
|
| 534 |
+
"step": 60,
|
| 535 |
+
"epoch": 3.4285714285714284,
|
| 536 |
+
"cpu_mem": 1.495007232,
|
| 537 |
+
"gpu_mem": 4.51869184,
|
| 538 |
+
"loss": 1.3552,
|
| 539 |
+
"grad_norm": 5.9276251792907715,
|
| 540 |
+
"learning_rate": 1.2552464326949302e-05
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"step": 61,
|
| 544 |
+
"epoch": 3.4857142857142858,
|
| 545 |
+
"cpu_mem": 1.495007232,
|
| 546 |
+
"gpu_mem": 4.518705664,
|
| 547 |
+
"loss": 1.3201,
|
| 548 |
+
"grad_norm": 4.258768558502197,
|
| 549 |
+
"learning_rate": 9.64247714392597e-06
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"step": 62,
|
| 553 |
+
"epoch": 3.5428571428571427,
|
| 554 |
+
"cpu_mem": 1.495007232,
|
| 555 |
+
"gpu_mem": 4.5187072,
|
| 556 |
+
"loss": 1.3376,
|
| 557 |
+
"grad_norm": 5.822268009185791,
|
| 558 |
+
"learning_rate": 7.104692879492624e-06
|
| 559 |
+
},
|
| 560 |
+
{
|
| 561 |
+
"step": 63,
|
| 562 |
+
"epoch": 3.6,
|
| 563 |
+
"cpu_mem": 1.495007232,
|
| 564 |
+
"gpu_mem": 4.518696448,
|
| 565 |
+
"loss": 1.3397,
|
| 566 |
+
"grad_norm": 3.860724925994873,
|
| 567 |
+
"learning_rate": 4.945841281301943e-06
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"step": 64,
|
| 571 |
+
"epoch": 3.657142857142857,
|
| 572 |
+
"cpu_mem": 1.495007232,
|
| 573 |
+
"gpu_mem": 4.518713344,
|
| 574 |
+
"loss": 1.2999,
|
| 575 |
+
"grad_norm": 4.712264060974121,
|
| 576 |
+
"learning_rate": 3.1716472356963286e-06
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"step": 65,
|
| 580 |
+
"epoch": 3.7142857142857144,
|
| 581 |
+
"cpu_mem": 1.495007232,
|
| 582 |
+
"gpu_mem": 4.518734848,
|
| 583 |
+
"loss": 1.3846,
|
| 584 |
+
"grad_norm": 7.827590465545654,
|
| 585 |
+
"learning_rate": 1.7868155863384415e-06
|
| 586 |
+
},
|
| 587 |
+
{
|
| 588 |
+
"step": 66,
|
| 589 |
+
"epoch": 3.7714285714285714,
|
| 590 |
+
"cpu_mem": 1.495007232,
|
| 591 |
+
"gpu_mem": 4.518725632,
|
| 592 |
+
"loss": 1.255,
|
| 593 |
+
"grad_norm": 4.00440788269043,
|
| 594 |
+
"learning_rate": 7.950186578116413e-07
|
| 595 |
+
},
|
| 596 |
+
{
|
| 597 |
+
"step": 67,
|
| 598 |
+
"epoch": 3.8285714285714287,
|
| 599 |
+
"cpu_mem": 1.495007232,
|
| 600 |
+
"gpu_mem": 4.518751744,
|
| 601 |
+
"loss": 1.2525,
|
| 602 |
+
"grad_norm": 4.9576640129089355,
|
| 603 |
+
"learning_rate": 1.988865172754206e-07
|
| 604 |
+
},
|
| 605 |
+
{
|
| 606 |
+
"step": 68,
|
| 607 |
+
"epoch": 3.8857142857142857,
|
| 608 |
+
"cpu_mem": 1.495007232,
|
| 609 |
+
"gpu_mem": 4.518702592,
|
| 610 |
+
"loss": 1.3006,
|
| 611 |
+
"grad_norm": 4.395829677581787,
|
| 612 |
+
"learning_rate": 0.0
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"step": 68,
|
| 616 |
+
"epoch": 3.8857142857142857,
|
| 617 |
+
"cpu_mem": 1.495007232,
|
| 618 |
+
"gpu_mem": 4.518702592,
|
| 619 |
+
"train_runtime": 376.1937,
|
| 620 |
+
"train_samples_per_second": 11.898,
|
| 621 |
+
"train_steps_per_second": 0.181,
|
| 622 |
+
"total_flos": 0.0,
|
| 623 |
+
"train_loss": 1.523840169696247
|
| 624 |
+
}
|
| 625 |
+
]
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r2-a2/adapter_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha": 4,
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": false,
|
| 8 |
+
"init_weight": "kaiming",
|
| 9 |
+
"layers_pattern": null,
|
| 10 |
+
"layers_to_transform": null,
|
| 11 |
+
"metric_tracking": false,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "ABLATION",
|
| 14 |
+
"r": 2,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"seed": 42,
|
| 17 |
+
"share_weights": false,
|
| 18 |
+
"target_modules": [
|
| 19 |
+
"up_proj",
|
| 20 |
+
"gate_proj",
|
| 21 |
+
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"q_proj",
|
| 24 |
+
"down_proj",
|
| 25 |
+
"o_proj"
|
| 26 |
+
],
|
| 27 |
+
"task_type": null,
|
| 28 |
+
"track_n": 100,
|
| 29 |
+
"variant": "A"
|
| 30 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r2-a2/eval_results.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "arc_e",
|
| 3 |
+
"results": 0.33375420875420875
|
| 4 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r2-a2/training_configuration.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "TinyLlama/TinyLlama_v1.1",
|
| 3 |
+
"dataset": {
|
| 4 |
+
"name": "ARC_E",
|
| 5 |
+
"dataset_id": "allenai/ai2_arc",
|
| 6 |
+
"preprocess_id": "arc_train_deepeval"
|
| 7 |
+
},
|
| 8 |
+
"peft_config": {
|
| 9 |
+
"method": "abl_A",
|
| 10 |
+
"rank": 2,
|
| 11 |
+
"alpha": 4,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"bias": "none",
|
| 14 |
+
"target_modules": [
|
| 15 |
+
"q_proj",
|
| 16 |
+
"k_proj",
|
| 17 |
+
"v_proj",
|
| 18 |
+
"o_proj",
|
| 19 |
+
"gate_proj",
|
| 20 |
+
"down_proj",
|
| 21 |
+
"up_proj"
|
| 22 |
+
],
|
| 23 |
+
"trainable_parameter_count": 1577576
|
| 24 |
+
},
|
| 25 |
+
"training_config": {
|
| 26 |
+
"max_dataset_length": null,
|
| 27 |
+
"batch_size": 64,
|
| 28 |
+
"per_device_batch_size": 32,
|
| 29 |
+
"gradient_accumulation_steps": 2,
|
| 30 |
+
"learning_rate": 0.0003,
|
| 31 |
+
"num_epochs": 4,
|
| 32 |
+
"warmup_ratio": 0.1
|
| 33 |
+
},
|
| 34 |
+
"model_name": "TinyLlama_v1.1-abl_A-arc_e-r2-a2",
|
| 35 |
+
"output_dir": "./experiment_results/TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r2-a2",
|
| 36 |
+
"seed": 42,
|
| 37 |
+
"timestamp": "2025-08-30T15:57:00.430559"
|
| 38 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r2-a2/training_logs.json
ADDED
|
@@ -0,0 +1,1273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 1,
|
| 4 |
+
"epoch": 0.028169014084507043,
|
| 5 |
+
"cpu_mem": 1.48697088,
|
| 6 |
+
"gpu_mem": 4.4237952,
|
| 7 |
+
"loss": 4.6319,
|
| 8 |
+
"grad_norm": 334.8832702636719,
|
| 9 |
+
"learning_rate": 2.1428571428571425e-05
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"step": 2,
|
| 13 |
+
"epoch": 0.056338028169014086,
|
| 14 |
+
"cpu_mem": 1.492672512,
|
| 15 |
+
"gpu_mem": 4.436629504,
|
| 16 |
+
"loss": 4.4578,
|
| 17 |
+
"grad_norm": 338.71502685546875,
|
| 18 |
+
"learning_rate": 4.285714285714285e-05
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"step": 3,
|
| 22 |
+
"epoch": 0.08450704225352113,
|
| 23 |
+
"cpu_mem": 1.49286912,
|
| 24 |
+
"gpu_mem": 4.436608,
|
| 25 |
+
"loss": 3.0613,
|
| 26 |
+
"grad_norm": 203.26577758789062,
|
| 27 |
+
"learning_rate": 6.428571428571427e-05
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"step": 4,
|
| 31 |
+
"epoch": 0.11267605633802817,
|
| 32 |
+
"cpu_mem": 1.493065728,
|
| 33 |
+
"gpu_mem": 4.436586496,
|
| 34 |
+
"loss": 2.1672,
|
| 35 |
+
"grad_norm": 93.64673614501953,
|
| 36 |
+
"learning_rate": 8.57142857142857e-05
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"step": 5,
|
| 40 |
+
"epoch": 0.14084507042253522,
|
| 41 |
+
"cpu_mem": 1.493065728,
|
| 42 |
+
"gpu_mem": 4.436627968,
|
| 43 |
+
"loss": 1.5508,
|
| 44 |
+
"grad_norm": 19.551036834716797,
|
| 45 |
+
"learning_rate": 0.00010714285714285714
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"step": 6,
|
| 49 |
+
"epoch": 0.16901408450704225,
|
| 50 |
+
"cpu_mem": 1.493262336,
|
| 51 |
+
"gpu_mem": 4.436603392,
|
| 52 |
+
"loss": 1.4936,
|
| 53 |
+
"grad_norm": 32.31931686401367,
|
| 54 |
+
"learning_rate": 0.00012857142857142855
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"step": 7,
|
| 58 |
+
"epoch": 0.19718309859154928,
|
| 59 |
+
"cpu_mem": 1.493262336,
|
| 60 |
+
"gpu_mem": 4.436626432,
|
| 61 |
+
"loss": 1.4445,
|
| 62 |
+
"grad_norm": 20.193700790405273,
|
| 63 |
+
"learning_rate": 0.00015
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"step": 8,
|
| 67 |
+
"epoch": 0.22535211267605634,
|
| 68 |
+
"cpu_mem": 1.493262336,
|
| 69 |
+
"gpu_mem": 4.43658496,
|
| 70 |
+
"loss": 1.3912,
|
| 71 |
+
"grad_norm": 15.018762588500977,
|
| 72 |
+
"learning_rate": 0.0001714285714285714
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 9,
|
| 76 |
+
"epoch": 0.2535211267605634,
|
| 77 |
+
"cpu_mem": 1.493262336,
|
| 78 |
+
"gpu_mem": 4.436586496,
|
| 79 |
+
"loss": 1.3628,
|
| 80 |
+
"grad_norm": 12.806224822998047,
|
| 81 |
+
"learning_rate": 0.00019285714285714286
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"step": 10,
|
| 85 |
+
"epoch": 0.28169014084507044,
|
| 86 |
+
"cpu_mem": 1.493262336,
|
| 87 |
+
"gpu_mem": 4.436581888,
|
| 88 |
+
"loss": 1.6795,
|
| 89 |
+
"grad_norm": 60.71196746826172,
|
| 90 |
+
"learning_rate": 0.00021428571428571427
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"step": 11,
|
| 94 |
+
"epoch": 0.30985915492957744,
|
| 95 |
+
"cpu_mem": 1.493458944,
|
| 96 |
+
"gpu_mem": 4.436660224,
|
| 97 |
+
"loss": 1.3897,
|
| 98 |
+
"grad_norm": 14.609763145446777,
|
| 99 |
+
"learning_rate": 0.00023571428571428569
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"step": 12,
|
| 103 |
+
"epoch": 0.3380281690140845,
|
| 104 |
+
"cpu_mem": 1.493458944,
|
| 105 |
+
"gpu_mem": 4.436634112,
|
| 106 |
+
"loss": 1.3519,
|
| 107 |
+
"grad_norm": 10.01632308959961,
|
| 108 |
+
"learning_rate": 0.0002571428571428571
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"step": 13,
|
| 112 |
+
"epoch": 0.36619718309859156,
|
| 113 |
+
"cpu_mem": 1.493458944,
|
| 114 |
+
"gpu_mem": 4.43658496,
|
| 115 |
+
"loss": 1.3813,
|
| 116 |
+
"grad_norm": 9.067853927612305,
|
| 117 |
+
"learning_rate": 0.00027857142857142854
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"step": 14,
|
| 121 |
+
"epoch": 0.39436619718309857,
|
| 122 |
+
"cpu_mem": 1.493458944,
|
| 123 |
+
"gpu_mem": 4.436606464,
|
| 124 |
+
"loss": 1.4137,
|
| 125 |
+
"grad_norm": 11.54834270477295,
|
| 126 |
+
"learning_rate": 0.0003
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"step": 15,
|
| 130 |
+
"epoch": 0.4225352112676056,
|
| 131 |
+
"cpu_mem": 1.493458944,
|
| 132 |
+
"gpu_mem": 4.436583424,
|
| 133 |
+
"loss": 1.33,
|
| 134 |
+
"grad_norm": 4.984076499938965,
|
| 135 |
+
"learning_rate": 0.0002999533773001224
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"step": 16,
|
| 139 |
+
"epoch": 0.4507042253521127,
|
| 140 |
+
"cpu_mem": 1.493458944,
|
| 141 |
+
"gpu_mem": 4.436588032,
|
| 142 |
+
"loss": 1.3787,
|
| 143 |
+
"grad_norm": 4.959704875946045,
|
| 144 |
+
"learning_rate": 0.0002998135381828383
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"step": 17,
|
| 148 |
+
"epoch": 0.4788732394366197,
|
| 149 |
+
"cpu_mem": 1.493458944,
|
| 150 |
+
"gpu_mem": 4.436624896,
|
| 151 |
+
"loss": 1.3491,
|
| 152 |
+
"grad_norm": 6.165195465087891,
|
| 153 |
+
"learning_rate": 0.00029958056957717696
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"step": 18,
|
| 157 |
+
"epoch": 0.5070422535211268,
|
| 158 |
+
"cpu_mem": 1.493458944,
|
| 159 |
+
"gpu_mem": 4.436635648,
|
| 160 |
+
"loss": 1.3499,
|
| 161 |
+
"grad_norm": 6.145087242126465,
|
| 162 |
+
"learning_rate": 0.0002992546163048102
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"step": 19,
|
| 166 |
+
"epoch": 0.5352112676056338,
|
| 167 |
+
"cpu_mem": 1.493458944,
|
| 168 |
+
"gpu_mem": 4.436578816,
|
| 169 |
+
"loss": 1.3647,
|
| 170 |
+
"grad_norm": 14.54247760772705,
|
| 171 |
+
"learning_rate": 0.0002988358809900258
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"step": 20,
|
| 175 |
+
"epoch": 0.5633802816901409,
|
| 176 |
+
"cpu_mem": 1.493458944,
|
| 177 |
+
"gpu_mem": 4.436649472,
|
| 178 |
+
"loss": 1.3599,
|
| 179 |
+
"grad_norm": 6.127029895782471,
|
| 180 |
+
"learning_rate": 0.0002983246239337692
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"step": 21,
|
| 184 |
+
"epoch": 0.5915492957746479,
|
| 185 |
+
"cpu_mem": 1.493458944,
|
| 186 |
+
"gpu_mem": 4.436647936,
|
| 187 |
+
"loss": 1.3275,
|
| 188 |
+
"grad_norm": 5.688446521759033,
|
| 189 |
+
"learning_rate": 0.0002977211629518312
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"step": 22,
|
| 193 |
+
"epoch": 0.6197183098591549,
|
| 194 |
+
"cpu_mem": 1.493458944,
|
| 195 |
+
"gpu_mem": 4.436604928,
|
| 196 |
+
"loss": 1.3288,
|
| 197 |
+
"grad_norm": 7.160696029663086,
|
| 198 |
+
"learning_rate": 0.00029702587317728153
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"step": 23,
|
| 202 |
+
"epoch": 0.647887323943662,
|
| 203 |
+
"cpu_mem": 1.493458944,
|
| 204 |
+
"gpu_mem": 4.436621824,
|
| 205 |
+
"loss": 1.3048,
|
| 206 |
+
"grad_norm": 5.542471408843994,
|
| 207 |
+
"learning_rate": 0.0002962391868272735
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"step": 24,
|
| 211 |
+
"epoch": 0.676056338028169,
|
| 212 |
+
"cpu_mem": 1.493458944,
|
| 213 |
+
"gpu_mem": 4.436578816,
|
| 214 |
+
"loss": 1.3567,
|
| 215 |
+
"grad_norm": 9.522262573242188,
|
| 216 |
+
"learning_rate": 0.00029536159293436166
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"step": 25,
|
| 220 |
+
"epoch": 0.704225352112676,
|
| 221 |
+
"cpu_mem": 1.493458944,
|
| 222 |
+
"gpu_mem": 4.436608,
|
| 223 |
+
"loss": 1.3671,
|
| 224 |
+
"grad_norm": 9.449694633483887,
|
| 225 |
+
"learning_rate": 0.00029439363704250176
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"step": 26,
|
| 229 |
+
"epoch": 0.7323943661971831,
|
| 230 |
+
"cpu_mem": 1.493458944,
|
| 231 |
+
"gpu_mem": 4.436588032,
|
| 232 |
+
"loss": 1.4186,
|
| 233 |
+
"grad_norm": 7.2168426513671875,
|
| 234 |
+
"learning_rate": 0.00029333592086792107
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"step": 27,
|
| 238 |
+
"epoch": 0.7605633802816901,
|
| 239 |
+
"cpu_mem": 1.493458944,
|
| 240 |
+
"gpu_mem": 4.436614144,
|
| 241 |
+
"loss": 1.3558,
|
| 242 |
+
"grad_norm": 10.885024070739746,
|
| 243 |
+
"learning_rate": 0.0002921891019250697
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"step": 28,
|
| 247 |
+
"epoch": 0.7887323943661971,
|
| 248 |
+
"cpu_mem": 1.493458944,
|
| 249 |
+
"gpu_mem": 4.436614144,
|
| 250 |
+
"loss": 1.3774,
|
| 251 |
+
"grad_norm": 6.4214277267456055,
|
| 252 |
+
"learning_rate": 0.0002909538931178862
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"step": 29,
|
| 256 |
+
"epoch": 0.8169014084507042,
|
| 257 |
+
"cpu_mem": 1.493458944,
|
| 258 |
+
"gpu_mem": 4.43659264,
|
| 259 |
+
"loss": 1.2741,
|
| 260 |
+
"grad_norm": 6.542706489562988,
|
| 261 |
+
"learning_rate": 0.00028963106229663063
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"step": 30,
|
| 265 |
+
"epoch": 0.8450704225352113,
|
| 266 |
+
"cpu_mem": 1.493458944,
|
| 267 |
+
"gpu_mem": 4.436583424,
|
| 268 |
+
"loss": 1.3852,
|
| 269 |
+
"grad_norm": 9.458147048950195,
|
| 270 |
+
"learning_rate": 0.00028822143178056114
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"step": 31,
|
| 274 |
+
"epoch": 0.8732394366197183,
|
| 275 |
+
"cpu_mem": 1.493458944,
|
| 276 |
+
"gpu_mem": 4.436601856,
|
| 277 |
+
"loss": 1.3504,
|
| 278 |
+
"grad_norm": 16.29282569885254,
|
| 279 |
+
"learning_rate": 0.00028672587784675096
|
| 280 |
+
},
|
| 281 |
+
{
|
| 282 |
+
"step": 32,
|
| 283 |
+
"epoch": 0.9014084507042254,
|
| 284 |
+
"cpu_mem": 1.493458944,
|
| 285 |
+
"gpu_mem": 4.436624896,
|
| 286 |
+
"loss": 1.3272,
|
| 287 |
+
"grad_norm": 7.9392499923706055,
|
| 288 |
+
"learning_rate": 0.0002851453301853628
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"step": 33,
|
| 292 |
+
"epoch": 0.9295774647887324,
|
| 293 |
+
"cpu_mem": 1.493458944,
|
| 294 |
+
"gpu_mem": 4.436621824,
|
| 295 |
+
"loss": 1.3411,
|
| 296 |
+
"grad_norm": 4.136773109436035,
|
| 297 |
+
"learning_rate": 0.00028348077132172027
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"step": 34,
|
| 301 |
+
"epoch": 0.9577464788732394,
|
| 302 |
+
"cpu_mem": 1.493458944,
|
| 303 |
+
"gpu_mem": 4.436624896,
|
| 304 |
+
"loss": 1.3751,
|
| 305 |
+
"grad_norm": 5.5571393966674805,
|
| 306 |
+
"learning_rate": 0.0002817332360055343
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"step": 35,
|
| 310 |
+
"epoch": 0.9859154929577465,
|
| 311 |
+
"cpu_mem": 1.493458944,
|
| 312 |
+
"gpu_mem": 4.436606464,
|
| 313 |
+
"loss": 1.2823,
|
| 314 |
+
"grad_norm": 3.0069351196289062,
|
| 315 |
+
"learning_rate": 0.0002799038105676658
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"step": 36,
|
| 319 |
+
"epoch": 1.0140845070422535,
|
| 320 |
+
"cpu_mem": 1.493458944,
|
| 321 |
+
"gpu_mem": 4.442983936,
|
| 322 |
+
"loss": 1.9088,
|
| 323 |
+
"grad_norm": 9.872271537780762,
|
| 324 |
+
"learning_rate": 0.0002779936322448233
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"step": 37,
|
| 328 |
+
"epoch": 1.0422535211267605,
|
| 329 |
+
"cpu_mem": 1.493458944,
|
| 330 |
+
"gpu_mem": 4.442988544,
|
| 331 |
+
"loss": 1.3898,
|
| 332 |
+
"grad_norm": 9.080979347229004,
|
| 333 |
+
"learning_rate": 0.0002760038884726157
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"step": 38,
|
| 337 |
+
"epoch": 1.0704225352112675,
|
| 338 |
+
"cpu_mem": 1.493458944,
|
| 339 |
+
"gpu_mem": 4.44296704,
|
| 340 |
+
"loss": 1.2297,
|
| 341 |
+
"grad_norm": 4.792629718780518,
|
| 342 |
+
"learning_rate": 0.00027393581614739923
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"step": 39,
|
| 346 |
+
"epoch": 1.0985915492957747,
|
| 347 |
+
"cpu_mem": 1.493458944,
|
| 348 |
+
"gpu_mem": 4.442956288,
|
| 349 |
+
"loss": 1.3502,
|
| 350 |
+
"grad_norm": 10.400541305541992,
|
| 351 |
+
"learning_rate": 0.0002717907008573785
|
| 352 |
+
},
|
| 353 |
+
{
|
| 354 |
+
"step": 40,
|
| 355 |
+
"epoch": 1.1267605633802817,
|
| 356 |
+
"cpu_mem": 1.493458944,
|
| 357 |
+
"gpu_mem": 4.443019264,
|
| 358 |
+
"loss": 1.3253,
|
| 359 |
+
"grad_norm": 4.320893287658691,
|
| 360 |
+
"learning_rate": 0.0002695698760834384
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"step": 41,
|
| 364 |
+
"epoch": 1.1549295774647887,
|
| 365 |
+
"cpu_mem": 1.493458944,
|
| 366 |
+
"gpu_mem": 4.442979328,
|
| 367 |
+
"loss": 1.3928,
|
| 368 |
+
"grad_norm": 12.37241268157959,
|
| 369 |
+
"learning_rate": 0.00026727472237020447
|
| 370 |
+
},
|
| 371 |
+
{
|
| 372 |
+
"step": 42,
|
| 373 |
+
"epoch": 1.1830985915492958,
|
| 374 |
+
"cpu_mem": 1.493458944,
|
| 375 |
+
"gpu_mem": 4.443022336,
|
| 376 |
+
"loss": 1.3985,
|
| 377 |
+
"grad_norm": 9.03736686706543,
|
| 378 |
+
"learning_rate": 0.00026490666646784665
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"step": 43,
|
| 382 |
+
"epoch": 1.2112676056338028,
|
| 383 |
+
"cpu_mem": 1.493458944,
|
| 384 |
+
"gpu_mem": 4.442971648,
|
| 385 |
+
"loss": 1.3813,
|
| 386 |
+
"grad_norm": 5.811436176300049,
|
| 387 |
+
"learning_rate": 0.0002624671804451601
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"step": 44,
|
| 391 |
+
"epoch": 1.2394366197183098,
|
| 392 |
+
"cpu_mem": 1.493458944,
|
| 393 |
+
"gpu_mem": 4.44303616,
|
| 394 |
+
"loss": 1.3624,
|
| 395 |
+
"grad_norm": 5.308187961578369,
|
| 396 |
+
"learning_rate": 0.0002599577807744739
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"step": 45,
|
| 400 |
+
"epoch": 1.267605633802817,
|
| 401 |
+
"cpu_mem": 1.493458944,
|
| 402 |
+
"gpu_mem": 4.443003904,
|
| 403 |
+
"loss": 1.3711,
|
| 404 |
+
"grad_norm": 4.097907066345215,
|
| 405 |
+
"learning_rate": 0.0002573800273889577
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"step": 46,
|
| 409 |
+
"epoch": 1.295774647887324,
|
| 410 |
+
"cpu_mem": 1.493458944,
|
| 411 |
+
"gpu_mem": 4.443008512,
|
| 412 |
+
"loss": 1.3712,
|
| 413 |
+
"grad_norm": 4.213882923126221,
|
| 414 |
+
"learning_rate": 0.0002547355227129109
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"step": 47,
|
| 418 |
+
"epoch": 1.323943661971831,
|
| 419 |
+
"cpu_mem": 1.493458944,
|
| 420 |
+
"gpu_mem": 4.442954752,
|
| 421 |
+
"loss": 1.3301,
|
| 422 |
+
"grad_norm": 7.293313503265381,
|
| 423 |
+
"learning_rate": 0.00025202591066563786
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"step": 48,
|
| 427 |
+
"epoch": 1.352112676056338,
|
| 428 |
+
"cpu_mem": 1.493458944,
|
| 429 |
+
"gpu_mem": 4.442968576,
|
| 430 |
+
"loss": 1.3347,
|
| 431 |
+
"grad_norm": 8.195301055908203,
|
| 432 |
+
"learning_rate": 0.0002492528756395289
|
| 433 |
+
},
|
| 434 |
+
{
|
| 435 |
+
"step": 49,
|
| 436 |
+
"epoch": 1.380281690140845,
|
| 437 |
+
"cpu_mem": 1.493458944,
|
| 438 |
+
"gpu_mem": 4.442957824,
|
| 439 |
+
"loss": 1.3941,
|
| 440 |
+
"grad_norm": 25.371461868286133,
|
| 441 |
+
"learning_rate": 0.0002464181414529809
|
| 442 |
+
},
|
| 443 |
+
{
|
| 444 |
+
"step": 50,
|
| 445 |
+
"epoch": 1.408450704225352,
|
| 446 |
+
"cpu_mem": 1.493458944,
|
| 447 |
+
"gpu_mem": 4.442971648,
|
| 448 |
+
"loss": 1.4377,
|
| 449 |
+
"grad_norm": 18.559114456176758,
|
| 450 |
+
"learning_rate": 0.00024352347027881003
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"step": 51,
|
| 454 |
+
"epoch": 1.436619718309859,
|
| 455 |
+
"cpu_mem": 1.493458944,
|
| 456 |
+
"gpu_mem": 4.443023872,
|
| 457 |
+
"loss": 1.364,
|
| 458 |
+
"grad_norm": 8.876504898071289,
|
| 459 |
+
"learning_rate": 0.0002405706615488216
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"step": 52,
|
| 463 |
+
"epoch": 1.4647887323943662,
|
| 464 |
+
"cpu_mem": 1.493458944,
|
| 465 |
+
"gpu_mem": 4.442971648,
|
| 466 |
+
"loss": 1.686,
|
| 467 |
+
"grad_norm": 56.97942352294922,
|
| 468 |
+
"learning_rate": 0.00023756155083521846
|
| 469 |
+
},
|
| 470 |
+
{
|
| 471 |
+
"step": 53,
|
| 472 |
+
"epoch": 1.4929577464788732,
|
| 473 |
+
"cpu_mem": 1.493458944,
|
| 474 |
+
"gpu_mem": 4.443040768,
|
| 475 |
+
"loss": 1.4077,
|
| 476 |
+
"grad_norm": 22.452436447143555,
|
| 477 |
+
"learning_rate": 0.00023449800870954326
|
| 478 |
+
},
|
| 479 |
+
{
|
| 480 |
+
"step": 54,
|
| 481 |
+
"epoch": 1.5211267605633803,
|
| 482 |
+
"cpu_mem": 1.493458944,
|
| 483 |
+
"gpu_mem": 4.443008512,
|
| 484 |
+
"loss": 1.3135,
|
| 485 |
+
"grad_norm": 6.5965576171875,
|
| 486 |
+
"learning_rate": 0.0002313819395798639
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"step": 55,
|
| 490 |
+
"epoch": 1.5492957746478875,
|
| 491 |
+
"cpu_mem": 1.493458944,
|
| 492 |
+
"gpu_mem": 4.443017728,
|
| 493 |
+
"loss": 1.3823,
|
| 494 |
+
"grad_norm": 6.214559078216553,
|
| 495 |
+
"learning_rate": 0.0002282152805069247
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"step": 56,
|
| 499 |
+
"epoch": 1.5774647887323945,
|
| 500 |
+
"cpu_mem": 1.493458944,
|
| 501 |
+
"gpu_mem": 4.442993152,
|
| 502 |
+
"loss": 1.3107,
|
| 503 |
+
"grad_norm": 2.944521903991699,
|
| 504 |
+
"learning_rate": 0.000225
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"step": 57,
|
| 508 |
+
"epoch": 1.6056338028169015,
|
| 509 |
+
"cpu_mem": 1.493458944,
|
| 510 |
+
"gpu_mem": 4.443026944,
|
| 511 |
+
"loss": 1.3232,
|
| 512 |
+
"grad_norm": 4.46520471572876,
|
| 513 |
+
"learning_rate": 0.00022173809679319772
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"step": 58,
|
| 517 |
+
"epoch": 1.6338028169014085,
|
| 518 |
+
"cpu_mem": 1.493458944,
|
| 519 |
+
"gpu_mem": 4.443008512,
|
| 520 |
+
"loss": 1.3273,
|
| 521 |
+
"grad_norm": 3.4397385120391846,
|
| 522 |
+
"learning_rate": 0.00021843159860297442
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"step": 59,
|
| 526 |
+
"epoch": 1.6619718309859155,
|
| 527 |
+
"cpu_mem": 1.493458944,
|
| 528 |
+
"gpu_mem": 4.442994688,
|
| 529 |
+
"loss": 1.3453,
|
| 530 |
+
"grad_norm": 3.8322765827178955,
|
| 531 |
+
"learning_rate": 0.00021508256086763368
|
| 532 |
+
},
|
| 533 |
+
{
|
| 534 |
+
"step": 60,
|
| 535 |
+
"epoch": 1.6901408450704225,
|
| 536 |
+
"cpu_mem": 1.493458944,
|
| 537 |
+
"gpu_mem": 4.443033088,
|
| 538 |
+
"loss": 1.3242,
|
| 539 |
+
"grad_norm": 4.89071798324585,
|
| 540 |
+
"learning_rate": 0.00021169306546959174
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"step": 61,
|
| 544 |
+
"epoch": 1.7183098591549295,
|
| 545 |
+
"cpu_mem": 1.493458944,
|
| 546 |
+
"gpu_mem": 4.442965504,
|
| 547 |
+
"loss": 1.2936,
|
| 548 |
+
"grad_norm": 4.631297588348389,
|
| 549 |
+
"learning_rate": 0.0002082652194412042
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"step": 62,
|
| 553 |
+
"epoch": 1.7464788732394365,
|
| 554 |
+
"cpu_mem": 1.493458944,
|
| 555 |
+
"gpu_mem": 4.44301312,
|
| 556 |
+
"loss": 1.4406,
|
| 557 |
+
"grad_norm": 12.51733684539795,
|
| 558 |
+
"learning_rate": 0.00020480115365495926
|
| 559 |
+
},
|
| 560 |
+
{
|
| 561 |
+
"step": 63,
|
| 562 |
+
"epoch": 1.7746478873239435,
|
| 563 |
+
"cpu_mem": 1.493458944,
|
| 564 |
+
"gpu_mem": 4.442962432,
|
| 565 |
+
"loss": 1.446,
|
| 566 |
+
"grad_norm": 15.611908912658691,
|
| 567 |
+
"learning_rate": 0.00020130302149885031
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"step": 64,
|
| 571 |
+
"epoch": 1.8028169014084507,
|
| 572 |
+
"cpu_mem": 1.493458944,
|
| 573 |
+
"gpu_mem": 4.443011584,
|
| 574 |
+
"loss": 1.4236,
|
| 575 |
+
"grad_norm": 12.190735816955566,
|
| 576 |
+
"learning_rate": 0.00019777299753775265
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"step": 65,
|
| 580 |
+
"epoch": 1.8309859154929577,
|
| 581 |
+
"cpu_mem": 1.493458944,
|
| 582 |
+
"gpu_mem": 4.443010048,
|
| 583 |
+
"loss": 1.3596,
|
| 584 |
+
"grad_norm": 5.446914196014404,
|
| 585 |
+
"learning_rate": 0.00019421327616163563
|
| 586 |
+
},
|
| 587 |
+
{
|
| 588 |
+
"step": 66,
|
| 589 |
+
"epoch": 1.8591549295774648,
|
| 590 |
+
"cpu_mem": 1.493458944,
|
| 591 |
+
"gpu_mem": 4.44302848,
|
| 592 |
+
"loss": 1.2865,
|
| 593 |
+
"grad_norm": 4.165730953216553,
|
| 594 |
+
"learning_rate": 0.00019062607022145078
|
| 595 |
+
},
|
| 596 |
+
{
|
| 597 |
+
"step": 67,
|
| 598 |
+
"epoch": 1.887323943661972,
|
| 599 |
+
"cpu_mem": 1.493458944,
|
| 600 |
+
"gpu_mem": 4.442970112,
|
| 601 |
+
"loss": 1.2999,
|
| 602 |
+
"grad_norm": 3.62854266166687,
|
| 603 |
+
"learning_rate": 0.00018701360965354402
|
| 604 |
+
},
|
| 605 |
+
{
|
| 606 |
+
"step": 68,
|
| 607 |
+
"epoch": 1.915492957746479,
|
| 608 |
+
"cpu_mem": 1.493458944,
|
| 609 |
+
"gpu_mem": 4.4429824,
|
| 610 |
+
"loss": 1.3443,
|
| 611 |
+
"grad_norm": 3.0337541103363037,
|
| 612 |
+
"learning_rate": 0.00018337814009344714
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"step": 69,
|
| 616 |
+
"epoch": 1.943661971830986,
|
| 617 |
+
"cpu_mem": 1.493458944,
|
| 618 |
+
"gpu_mem": 4.443006976,
|
| 619 |
+
"loss": 1.3273,
|
| 620 |
+
"grad_norm": 4.934184551239014,
|
| 621 |
+
"learning_rate": 0.0001797219214799096
|
| 622 |
+
},
|
| 623 |
+
{
|
| 624 |
+
"step": 70,
|
| 625 |
+
"epoch": 1.971830985915493,
|
| 626 |
+
"cpu_mem": 1.493458944,
|
| 627 |
+
"gpu_mem": 4.442983936,
|
| 628 |
+
"loss": 1.2715,
|
| 629 |
+
"grad_norm": 3.110337495803833,
|
| 630 |
+
"learning_rate": 0.00017604722665003956
|
| 631 |
+
},
|
| 632 |
+
{
|
| 633 |
+
"step": 71,
|
| 634 |
+
"epoch": 2.0,
|
| 635 |
+
"cpu_mem": 1.493458944,
|
| 636 |
+
"gpu_mem": 4.442819584,
|
| 637 |
+
"loss": 1.9529,
|
| 638 |
+
"grad_norm": 7.656370639801025,
|
| 639 |
+
"learning_rate": 0.00017235633992642615
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"step": 72,
|
| 643 |
+
"epoch": 2.028169014084507,
|
| 644 |
+
"cpu_mem": 1.493458944,
|
| 645 |
+
"gpu_mem": 4.436617216,
|
| 646 |
+
"loss": 1.3812,
|
| 647 |
+
"grad_norm": 5.92775297164917,
|
| 648 |
+
"learning_rate": 0.00016865155569712278
|
| 649 |
+
},
|
| 650 |
+
{
|
| 651 |
+
"step": 73,
|
| 652 |
+
"epoch": 2.056338028169014,
|
| 653 |
+
"cpu_mem": 1.493458944,
|
| 654 |
+
"gpu_mem": 4.436580352,
|
| 655 |
+
"loss": 1.3011,
|
| 656 |
+
"grad_norm": 7.2833356857299805,
|
| 657 |
+
"learning_rate": 0.0001649351769893725
|
| 658 |
+
},
|
| 659 |
+
{
|
| 660 |
+
"step": 74,
|
| 661 |
+
"epoch": 2.084507042253521,
|
| 662 |
+
"cpu_mem": 1.493458944,
|
| 663 |
+
"gpu_mem": 4.436640256,
|
| 664 |
+
"loss": 1.3427,
|
| 665 |
+
"grad_norm": 14.072025299072266,
|
| 666 |
+
"learning_rate": 0.00016120951403796364
|
| 667 |
+
},
|
| 668 |
+
{
|
| 669 |
+
"step": 75,
|
| 670 |
+
"epoch": 2.112676056338028,
|
| 671 |
+
"cpu_mem": 1.493458944,
|
| 672 |
+
"gpu_mem": 4.436608,
|
| 673 |
+
"loss": 1.3463,
|
| 674 |
+
"grad_norm": 7.366079807281494,
|
| 675 |
+
"learning_rate": 0.00015747688284910457
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"step": 76,
|
| 679 |
+
"epoch": 2.140845070422535,
|
| 680 |
+
"cpu_mem": 1.493458944,
|
| 681 |
+
"gpu_mem": 4.436618752,
|
| 682 |
+
"loss": 1.3097,
|
| 683 |
+
"grad_norm": 11.609695434570312,
|
| 684 |
+
"learning_rate": 0.00015373960376071093
|
| 685 |
+
},
|
| 686 |
+
{
|
| 687 |
+
"step": 77,
|
| 688 |
+
"epoch": 2.169014084507042,
|
| 689 |
+
"cpu_mem": 1.493458944,
|
| 690 |
+
"gpu_mem": 4.436655616,
|
| 691 |
+
"loss": 1.3418,
|
| 692 |
+
"grad_norm": 11.972086906433105,
|
| 693 |
+
"learning_rate": 0.00015
|
| 694 |
+
},
|
| 695 |
+
{
|
| 696 |
+
"step": 78,
|
| 697 |
+
"epoch": 2.1971830985915495,
|
| 698 |
+
"cpu_mem": 1.493458944,
|
| 699 |
+
"gpu_mem": 4.436640256,
|
| 700 |
+
"loss": 1.3932,
|
| 701 |
+
"grad_norm": 11.582221984863281,
|
| 702 |
+
"learning_rate": 0.00014626039623928907
|
| 703 |
+
},
|
| 704 |
+
{
|
| 705 |
+
"step": 79,
|
| 706 |
+
"epoch": 2.2253521126760565,
|
| 707 |
+
"cpu_mem": 1.493458944,
|
| 708 |
+
"gpu_mem": 4.436591104,
|
| 709 |
+
"loss": 1.2812,
|
| 710 |
+
"grad_norm": 7.3847832679748535,
|
| 711 |
+
"learning_rate": 0.0001425231171508954
|
| 712 |
+
},
|
| 713 |
+
{
|
| 714 |
+
"step": 80,
|
| 715 |
+
"epoch": 2.2535211267605635,
|
| 716 |
+
"cpu_mem": 1.493458944,
|
| 717 |
+
"gpu_mem": 4.436635648,
|
| 718 |
+
"loss": 1.3296,
|
| 719 |
+
"grad_norm": 6.362971782684326,
|
| 720 |
+
"learning_rate": 0.00013879048596203636
|
| 721 |
+
},
|
| 722 |
+
{
|
| 723 |
+
"step": 81,
|
| 724 |
+
"epoch": 2.2816901408450705,
|
| 725 |
+
"cpu_mem": 1.493458944,
|
| 726 |
+
"gpu_mem": 4.436621824,
|
| 727 |
+
"loss": 1.3876,
|
| 728 |
+
"grad_norm": 11.030096054077148,
|
| 729 |
+
"learning_rate": 0.0001350648230106275
|
| 730 |
+
},
|
| 731 |
+
{
|
| 732 |
+
"step": 82,
|
| 733 |
+
"epoch": 2.3098591549295775,
|
| 734 |
+
"cpu_mem": 1.493458944,
|
| 735 |
+
"gpu_mem": 4.436589568,
|
| 736 |
+
"loss": 1.4036,
|
| 737 |
+
"grad_norm": 9.106473922729492,
|
| 738 |
+
"learning_rate": 0.00013134844430287725
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"step": 83,
|
| 742 |
+
"epoch": 2.3380281690140845,
|
| 743 |
+
"cpu_mem": 1.493458944,
|
| 744 |
+
"gpu_mem": 4.436640256,
|
| 745 |
+
"loss": 1.3261,
|
| 746 |
+
"grad_norm": 11.944342613220215,
|
| 747 |
+
"learning_rate": 0.0001276436600735738
|
| 748 |
+
},
|
| 749 |
+
{
|
| 750 |
+
"step": 84,
|
| 751 |
+
"epoch": 2.3661971830985915,
|
| 752 |
+
"cpu_mem": 1.493458944,
|
| 753 |
+
"gpu_mem": 4.436578816,
|
| 754 |
+
"loss": 1.3887,
|
| 755 |
+
"grad_norm": 12.192349433898926,
|
| 756 |
+
"learning_rate": 0.00012395277334996044
|
| 757 |
+
},
|
| 758 |
+
{
|
| 759 |
+
"step": 85,
|
| 760 |
+
"epoch": 2.3943661971830985,
|
| 761 |
+
"cpu_mem": 1.493458944,
|
| 762 |
+
"gpu_mem": 4.436624896,
|
| 763 |
+
"loss": 1.3469,
|
| 764 |
+
"grad_norm": 7.058681488037109,
|
| 765 |
+
"learning_rate": 0.00012027807852009038
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"step": 86,
|
| 769 |
+
"epoch": 2.4225352112676055,
|
| 770 |
+
"cpu_mem": 1.493458944,
|
| 771 |
+
"gpu_mem": 4.436578816,
|
| 772 |
+
"loss": 1.3328,
|
| 773 |
+
"grad_norm": 5.5536932945251465,
|
| 774 |
+
"learning_rate": 0.00011662185990655284
|
| 775 |
+
},
|
| 776 |
+
{
|
| 777 |
+
"step": 87,
|
| 778 |
+
"epoch": 2.4507042253521125,
|
| 779 |
+
"cpu_mem": 1.493458944,
|
| 780 |
+
"gpu_mem": 4.436609536,
|
| 781 |
+
"loss": 1.3939,
|
| 782 |
+
"grad_norm": 7.393185138702393,
|
| 783 |
+
"learning_rate": 0.00011298639034645593
|
| 784 |
+
},
|
| 785 |
+
{
|
| 786 |
+
"step": 88,
|
| 787 |
+
"epoch": 2.4788732394366195,
|
| 788 |
+
"cpu_mem": 1.493458944,
|
| 789 |
+
"gpu_mem": 4.43658496,
|
| 790 |
+
"loss": 1.3269,
|
| 791 |
+
"grad_norm": 7.776304721832275,
|
| 792 |
+
"learning_rate": 0.00010937392977854923
|
| 793 |
+
},
|
| 794 |
+
{
|
| 795 |
+
"step": 89,
|
| 796 |
+
"epoch": 2.507042253521127,
|
| 797 |
+
"cpu_mem": 1.493458944,
|
| 798 |
+
"gpu_mem": 4.43663872,
|
| 799 |
+
"loss": 1.3153,
|
| 800 |
+
"grad_norm": 5.972353935241699,
|
| 801 |
+
"learning_rate": 0.00010578672383836435
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"step": 90,
|
| 805 |
+
"epoch": 2.535211267605634,
|
| 806 |
+
"cpu_mem": 1.493458944,
|
| 807 |
+
"gpu_mem": 4.436620288,
|
| 808 |
+
"loss": 1.3751,
|
| 809 |
+
"grad_norm": 6.654686450958252,
|
| 810 |
+
"learning_rate": 0.00010222700246224735
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"step": 91,
|
| 814 |
+
"epoch": 2.563380281690141,
|
| 815 |
+
"cpu_mem": 1.493458944,
|
| 816 |
+
"gpu_mem": 4.4365696,
|
| 817 |
+
"loss": 1.3689,
|
| 818 |
+
"grad_norm": 8.81556224822998,
|
| 819 |
+
"learning_rate": 9.869697850114969e-05
|
| 820 |
+
},
|
| 821 |
+
{
|
| 822 |
+
"step": 92,
|
| 823 |
+
"epoch": 2.591549295774648,
|
| 824 |
+
"cpu_mem": 1.493458944,
|
| 825 |
+
"gpu_mem": 4.436594176,
|
| 826 |
+
"loss": 1.3643,
|
| 827 |
+
"grad_norm": 6.586201190948486,
|
| 828 |
+
"learning_rate": 9.519884634504074e-05
|
| 829 |
+
},
|
| 830 |
+
{
|
| 831 |
+
"step": 93,
|
| 832 |
+
"epoch": 2.619718309859155,
|
| 833 |
+
"cpu_mem": 1.493458944,
|
| 834 |
+
"gpu_mem": 4.436597248,
|
| 835 |
+
"loss": 1.2994,
|
| 836 |
+
"grad_norm": 4.92837381362915,
|
| 837 |
+
"learning_rate": 9.17347805587958e-05
|
| 838 |
+
},
|
| 839 |
+
{
|
| 840 |
+
"step": 94,
|
| 841 |
+
"epoch": 2.647887323943662,
|
| 842 |
+
"cpu_mem": 1.493458944,
|
| 843 |
+
"gpu_mem": 4.436589568,
|
| 844 |
+
"loss": 1.2857,
|
| 845 |
+
"grad_norm": 4.152866840362549,
|
| 846 |
+
"learning_rate": 8.830693453040829e-05
|
| 847 |
+
},
|
| 848 |
+
{
|
| 849 |
+
"step": 95,
|
| 850 |
+
"epoch": 2.676056338028169,
|
| 851 |
+
"cpu_mem": 1.493458944,
|
| 852 |
+
"gpu_mem": 4.436627968,
|
| 853 |
+
"loss": 1.3404,
|
| 854 |
+
"grad_norm": 4.53918981552124,
|
| 855 |
+
"learning_rate": 8.491743913236628e-05
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"step": 96,
|
| 859 |
+
"epoch": 2.704225352112676,
|
| 860 |
+
"cpu_mem": 1.493458944,
|
| 861 |
+
"gpu_mem": 4.436637184,
|
| 862 |
+
"loss": 1.2617,
|
| 863 |
+
"grad_norm": 6.160858631134033,
|
| 864 |
+
"learning_rate": 8.156840139702554e-05
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"step": 97,
|
| 868 |
+
"epoch": 2.732394366197183,
|
| 869 |
+
"cpu_mem": 1.493458944,
|
| 870 |
+
"gpu_mem": 4.436580352,
|
| 871 |
+
"loss": 1.3543,
|
| 872 |
+
"grad_norm": 4.770242691040039,
|
| 873 |
+
"learning_rate": 7.82619032068023e-05
|
| 874 |
+
},
|
| 875 |
+
{
|
| 876 |
+
"step": 98,
|
| 877 |
+
"epoch": 2.76056338028169,
|
| 878 |
+
"cpu_mem": 1.493458944,
|
| 879 |
+
"gpu_mem": 4.436580352,
|
| 880 |
+
"loss": 1.327,
|
| 881 |
+
"grad_norm": 2.966948986053467,
|
| 882 |
+
"learning_rate": 7.500000000000002e-05
|
| 883 |
+
},
|
| 884 |
+
{
|
| 885 |
+
"step": 99,
|
| 886 |
+
"epoch": 2.788732394366197,
|
| 887 |
+
"cpu_mem": 1.493458944,
|
| 888 |
+
"gpu_mem": 4.43657728,
|
| 889 |
+
"loss": 1.3015,
|
| 890 |
+
"grad_norm": 3.1356072425842285,
|
| 891 |
+
"learning_rate": 7.17847194930753e-05
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"step": 100,
|
| 895 |
+
"epoch": 2.816901408450704,
|
| 896 |
+
"cpu_mem": 1.493458944,
|
| 897 |
+
"gpu_mem": 4.436575744,
|
| 898 |
+
"loss": 1.272,
|
| 899 |
+
"grad_norm": 5.338186264038086,
|
| 900 |
+
"learning_rate": 6.86180604201361e-05
|
| 901 |
+
},
|
| 902 |
+
{
|
| 903 |
+
"step": 101,
|
| 904 |
+
"epoch": 2.845070422535211,
|
| 905 |
+
"cpu_mem": 1.493458944,
|
| 906 |
+
"gpu_mem": 4.436618752,
|
| 907 |
+
"loss": 1.2616,
|
| 908 |
+
"grad_norm": 4.881860733032227,
|
| 909 |
+
"learning_rate": 6.550199129045668e-05
|
| 910 |
+
},
|
| 911 |
+
{
|
| 912 |
+
"step": 102,
|
| 913 |
+
"epoch": 2.873239436619718,
|
| 914 |
+
"cpu_mem": 1.493458944,
|
| 915 |
+
"gpu_mem": 4.436557312,
|
| 916 |
+
"loss": 1.3467,
|
| 917 |
+
"grad_norm": 5.19181489944458,
|
| 918 |
+
"learning_rate": 6.243844916478155e-05
|
| 919 |
+
},
|
| 920 |
+
{
|
| 921 |
+
"step": 103,
|
| 922 |
+
"epoch": 2.9014084507042255,
|
| 923 |
+
"cpu_mem": 1.493458944,
|
| 924 |
+
"gpu_mem": 4.436606464,
|
| 925 |
+
"loss": 1.309,
|
| 926 |
+
"grad_norm": 3.279639959335327,
|
| 927 |
+
"learning_rate": 5.9429338451178355e-05
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"step": 104,
|
| 931 |
+
"epoch": 2.9295774647887325,
|
| 932 |
+
"cpu_mem": 1.493458944,
|
| 933 |
+
"gpu_mem": 4.43666944,
|
| 934 |
+
"loss": 1.3868,
|
| 935 |
+
"grad_norm": 6.660994052886963,
|
| 936 |
+
"learning_rate": 5.6476529721189974e-05
|
| 937 |
+
},
|
| 938 |
+
{
|
| 939 |
+
"step": 105,
|
| 940 |
+
"epoch": 2.9577464788732395,
|
| 941 |
+
"cpu_mem": 1.493458944,
|
| 942 |
+
"gpu_mem": 4.436621824,
|
| 943 |
+
"loss": 1.2845,
|
| 944 |
+
"grad_norm": 3.30313777923584,
|
| 945 |
+
"learning_rate": 5.358185854701909e-05
|
| 946 |
+
},
|
| 947 |
+
{
|
| 948 |
+
"step": 106,
|
| 949 |
+
"epoch": 2.9859154929577465,
|
| 950 |
+
"cpu_mem": 1.493458944,
|
| 951 |
+
"gpu_mem": 4.436603392,
|
| 952 |
+
"loss": 1.3232,
|
| 953 |
+
"grad_norm": 3.6408579349517822,
|
| 954 |
+
"learning_rate": 5.074712436047112e-05
|
| 955 |
+
},
|
| 956 |
+
{
|
| 957 |
+
"step": 107,
|
| 958 |
+
"epoch": 3.0140845070422535,
|
| 959 |
+
"cpu_mem": 1.493458944,
|
| 960 |
+
"gpu_mem": 4.443006976,
|
| 961 |
+
"loss": 1.8794,
|
| 962 |
+
"grad_norm": 7.724911212921143,
|
| 963 |
+
"learning_rate": 4.7974089334362057e-05
|
| 964 |
+
},
|
| 965 |
+
{
|
| 966 |
+
"step": 108,
|
| 967 |
+
"epoch": 3.0422535211267605,
|
| 968 |
+
"cpu_mem": 1.493458944,
|
| 969 |
+
"gpu_mem": 4.442988544,
|
| 970 |
+
"loss": 1.3102,
|
| 971 |
+
"grad_norm": 3.513817310333252,
|
| 972 |
+
"learning_rate": 4.526447728708908e-05
|
| 973 |
+
},
|
| 974 |
+
{
|
| 975 |
+
"step": 109,
|
| 976 |
+
"epoch": 3.0704225352112675,
|
| 977 |
+
"cpu_mem": 1.493458944,
|
| 978 |
+
"gpu_mem": 4.442977792,
|
| 979 |
+
"loss": 1.3073,
|
| 980 |
+
"grad_norm": 2.583456516265869,
|
| 981 |
+
"learning_rate": 4.261997261104223e-05
|
| 982 |
+
},
|
| 983 |
+
{
|
| 984 |
+
"step": 110,
|
| 985 |
+
"epoch": 3.0985915492957745,
|
| 986 |
+
"cpu_mem": 1.493458944,
|
| 987 |
+
"gpu_mem": 4.443031552,
|
| 988 |
+
"loss": 1.327,
|
| 989 |
+
"grad_norm": 5.944295406341553,
|
| 990 |
+
"learning_rate": 4.004221922552608e-05
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"step": 111,
|
| 994 |
+
"epoch": 3.1267605633802815,
|
| 995 |
+
"cpu_mem": 1.493458944,
|
| 996 |
+
"gpu_mem": 4.442991616,
|
| 997 |
+
"loss": 1.3193,
|
| 998 |
+
"grad_norm": 3.914555072784424,
|
| 999 |
+
"learning_rate": 3.753281955483985e-05
|
| 1000 |
+
},
|
| 1001 |
+
{
|
| 1002 |
+
"step": 112,
|
| 1003 |
+
"epoch": 3.1549295774647885,
|
| 1004 |
+
"cpu_mem": 1.493458944,
|
| 1005 |
+
"gpu_mem": 4.443010048,
|
| 1006 |
+
"loss": 1.2872,
|
| 1007 |
+
"grad_norm": 2.4625840187072754,
|
| 1008 |
+
"learning_rate": 3.509333353215331e-05
|
| 1009 |
+
},
|
| 1010 |
+
{
|
| 1011 |
+
"step": 113,
|
| 1012 |
+
"epoch": 3.183098591549296,
|
| 1013 |
+
"cpu_mem": 1.493458944,
|
| 1014 |
+
"gpu_mem": 4.443073024,
|
| 1015 |
+
"loss": 1.3187,
|
| 1016 |
+
"grad_norm": 3.991567611694336,
|
| 1017 |
+
"learning_rate": 3.2725277629795526e-05
|
| 1018 |
+
},
|
| 1019 |
+
{
|
| 1020 |
+
"step": 114,
|
| 1021 |
+
"epoch": 3.211267605633803,
|
| 1022 |
+
"cpu_mem": 1.493458944,
|
| 1023 |
+
"gpu_mem": 4.443000832,
|
| 1024 |
+
"loss": 1.3259,
|
| 1025 |
+
"grad_norm": 3.1732451915740967,
|
| 1026 |
+
"learning_rate": 3.0430123916561672e-05
|
| 1027 |
+
},
|
| 1028 |
+
{
|
| 1029 |
+
"step": 115,
|
| 1030 |
+
"epoch": 3.23943661971831,
|
| 1031 |
+
"cpu_mem": 1.493458944,
|
| 1032 |
+
"gpu_mem": 4.442994688,
|
| 1033 |
+
"loss": 1.3577,
|
| 1034 |
+
"grad_norm": 4.750394821166992,
|
| 1035 |
+
"learning_rate": 2.8209299142621522e-05
|
| 1036 |
+
},
|
| 1037 |
+
{
|
| 1038 |
+
"step": 116,
|
| 1039 |
+
"epoch": 3.267605633802817,
|
| 1040 |
+
"cpu_mem": 1.493458944,
|
| 1041 |
+
"gpu_mem": 4.443010048,
|
| 1042 |
+
"loss": 1.3211,
|
| 1043 |
+
"grad_norm": 3.380751132965088,
|
| 1044 |
+
"learning_rate": 2.6064183852600797e-05
|
| 1045 |
+
},
|
| 1046 |
+
{
|
| 1047 |
+
"step": 117,
|
| 1048 |
+
"epoch": 3.295774647887324,
|
| 1049 |
+
"cpu_mem": 1.493458944,
|
| 1050 |
+
"gpu_mem": 4.443025408,
|
| 1051 |
+
"loss": 1.29,
|
| 1052 |
+
"grad_norm": 4.058185577392578,
|
| 1053 |
+
"learning_rate": 2.3996111527384288e-05
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"step": 118,
|
| 1057 |
+
"epoch": 3.323943661971831,
|
| 1058 |
+
"cpu_mem": 1.493458944,
|
| 1059 |
+
"gpu_mem": 4.443016192,
|
| 1060 |
+
"loss": 1.2777,
|
| 1061 |
+
"grad_norm": 5.971620082855225,
|
| 1062 |
+
"learning_rate": 2.2006367755176655e-05
|
| 1063 |
+
},
|
| 1064 |
+
{
|
| 1065 |
+
"step": 119,
|
| 1066 |
+
"epoch": 3.352112676056338,
|
| 1067 |
+
"cpu_mem": 1.493458944,
|
| 1068 |
+
"gpu_mem": 4.443006976,
|
| 1069 |
+
"loss": 1.2883,
|
| 1070 |
+
"grad_norm": 3.4085781574249268,
|
| 1071 |
+
"learning_rate": 2.009618943233419e-05
|
| 1072 |
+
},
|
| 1073 |
+
{
|
| 1074 |
+
"step": 120,
|
| 1075 |
+
"epoch": 3.380281690140845,
|
| 1076 |
+
"cpu_mem": 1.493458944,
|
| 1077 |
+
"gpu_mem": 4.443025408,
|
| 1078 |
+
"loss": 1.2777,
|
| 1079 |
+
"grad_norm": 2.7961478233337402,
|
| 1080 |
+
"learning_rate": 1.82667639944657e-05
|
| 1081 |
+
},
|
| 1082 |
+
{
|
| 1083 |
+
"step": 121,
|
| 1084 |
+
"epoch": 3.408450704225352,
|
| 1085 |
+
"cpu_mem": 1.493458944,
|
| 1086 |
+
"gpu_mem": 4.443023872,
|
| 1087 |
+
"loss": 1.2971,
|
| 1088 |
+
"grad_norm": 4.205790042877197,
|
| 1089 |
+
"learning_rate": 1.6519228678279718e-05
|
| 1090 |
+
},
|
| 1091 |
+
{
|
| 1092 |
+
"step": 122,
|
| 1093 |
+
"epoch": 3.436619718309859,
|
| 1094 |
+
"cpu_mem": 1.493458944,
|
| 1095 |
+
"gpu_mem": 4.442980864,
|
| 1096 |
+
"loss": 1.2862,
|
| 1097 |
+
"grad_norm": 4.291749000549316,
|
| 1098 |
+
"learning_rate": 1.4854669814637143e-05
|
| 1099 |
+
},
|
| 1100 |
+
{
|
| 1101 |
+
"step": 123,
|
| 1102 |
+
"epoch": 3.464788732394366,
|
| 1103 |
+
"cpu_mem": 1.493458944,
|
| 1104 |
+
"gpu_mem": 4.44301312,
|
| 1105 |
+
"loss": 1.2822,
|
| 1106 |
+
"grad_norm": 4.5682053565979,
|
| 1107 |
+
"learning_rate": 1.3274122153249028e-05
|
| 1108 |
+
},
|
| 1109 |
+
{
|
| 1110 |
+
"step": 124,
|
| 1111 |
+
"epoch": 3.492957746478873,
|
| 1112 |
+
"cpu_mem": 1.493458944,
|
| 1113 |
+
"gpu_mem": 4.44296704,
|
| 1114 |
+
"loss": 1.3005,
|
| 1115 |
+
"grad_norm": 2.844740867614746,
|
| 1116 |
+
"learning_rate": 1.1778568219438839e-05
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"step": 125,
|
| 1120 |
+
"epoch": 3.52112676056338,
|
| 1121 |
+
"cpu_mem": 1.493458944,
|
| 1122 |
+
"gpu_mem": 4.443011584,
|
| 1123 |
+
"loss": 1.2295,
|
| 1124 |
+
"grad_norm": 5.023967266082764,
|
| 1125 |
+
"learning_rate": 1.036893770336938e-05
|
| 1126 |
+
},
|
| 1127 |
+
{
|
| 1128 |
+
"step": 126,
|
| 1129 |
+
"epoch": 3.5492957746478875,
|
| 1130 |
+
"cpu_mem": 1.493458944,
|
| 1131 |
+
"gpu_mem": 4.442962432,
|
| 1132 |
+
"loss": 1.2662,
|
| 1133 |
+
"grad_norm": 4.076255798339844,
|
| 1134 |
+
"learning_rate": 9.046106882113751e-06
|
| 1135 |
+
},
|
| 1136 |
+
{
|
| 1137 |
+
"step": 127,
|
| 1138 |
+
"epoch": 3.5774647887323945,
|
| 1139 |
+
"cpu_mem": 1.493458944,
|
| 1140 |
+
"gpu_mem": 4.44297472,
|
| 1141 |
+
"loss": 1.2682,
|
| 1142 |
+
"grad_norm": 3.1658072471618652,
|
| 1143 |
+
"learning_rate": 7.810898074930243e-06
|
| 1144 |
+
},
|
| 1145 |
+
{
|
| 1146 |
+
"step": 128,
|
| 1147 |
+
"epoch": 3.6056338028169015,
|
| 1148 |
+
"cpu_mem": 1.493458944,
|
| 1149 |
+
"gpu_mem": 4.442999296,
|
| 1150 |
+
"loss": 1.2845,
|
| 1151 |
+
"grad_norm": 3.069446325302124,
|
| 1152 |
+
"learning_rate": 6.664079132078881e-06
|
| 1153 |
+
},
|
| 1154 |
+
{
|
| 1155 |
+
"step": 129,
|
| 1156 |
+
"epoch": 3.6338028169014085,
|
| 1157 |
+
"cpu_mem": 1.493458944,
|
| 1158 |
+
"gpu_mem": 4.442960896,
|
| 1159 |
+
"loss": 1.3036,
|
| 1160 |
+
"grad_norm": 3.197641372680664,
|
| 1161 |
+
"learning_rate": 5.606362957498195e-06
|
| 1162 |
+
},
|
| 1163 |
+
{
|
| 1164 |
+
"step": 130,
|
| 1165 |
+
"epoch": 3.6619718309859155,
|
| 1166 |
+
"cpu_mem": 1.493458944,
|
| 1167 |
+
"gpu_mem": 4.442963968,
|
| 1168 |
+
"loss": 1.3179,
|
| 1169 |
+
"grad_norm": 3.2924671173095703,
|
| 1170 |
+
"learning_rate": 4.638407065638322e-06
|
| 1171 |
+
},
|
| 1172 |
+
{
|
| 1173 |
+
"step": 131,
|
| 1174 |
+
"epoch": 3.6901408450704225,
|
| 1175 |
+
"cpu_mem": 1.493458944,
|
| 1176 |
+
"gpu_mem": 4.442976256,
|
| 1177 |
+
"loss": 1.2769,
|
| 1178 |
+
"grad_norm": 4.022733211517334,
|
| 1179 |
+
"learning_rate": 3.760813172726457e-06
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"step": 132,
|
| 1183 |
+
"epoch": 3.7183098591549295,
|
| 1184 |
+
"cpu_mem": 1.493458944,
|
| 1185 |
+
"gpu_mem": 4.442940928,
|
| 1186 |
+
"loss": 1.2735,
|
| 1187 |
+
"grad_norm": 3.0946011543273926,
|
| 1188 |
+
"learning_rate": 2.9741268227184255e-06
|
| 1189 |
+
},
|
| 1190 |
+
{
|
| 1191 |
+
"step": 133,
|
| 1192 |
+
"epoch": 3.7464788732394365,
|
| 1193 |
+
"cpu_mem": 1.493458944,
|
| 1194 |
+
"gpu_mem": 4.4429824,
|
| 1195 |
+
"loss": 1.2261,
|
| 1196 |
+
"grad_norm": 3.561920166015625,
|
| 1197 |
+
"learning_rate": 2.2788370481687965e-06
|
| 1198 |
+
},
|
| 1199 |
+
{
|
| 1200 |
+
"step": 134,
|
| 1201 |
+
"epoch": 3.7746478873239435,
|
| 1202 |
+
"cpu_mem": 1.493458944,
|
| 1203 |
+
"gpu_mem": 4.44299776,
|
| 1204 |
+
"loss": 1.248,
|
| 1205 |
+
"grad_norm": 3.3285350799560547,
|
| 1206 |
+
"learning_rate": 1.6753760662307215e-06
|
| 1207 |
+
},
|
| 1208 |
+
{
|
| 1209 |
+
"step": 135,
|
| 1210 |
+
"epoch": 3.802816901408451,
|
| 1211 |
+
"cpu_mem": 1.493458944,
|
| 1212 |
+
"gpu_mem": 4.442962432,
|
| 1213 |
+
"loss": 1.3244,
|
| 1214 |
+
"grad_norm": 4.943061828613281,
|
| 1215 |
+
"learning_rate": 1.1641190099741904e-06
|
| 1216 |
+
},
|
| 1217 |
+
{
|
| 1218 |
+
"step": 136,
|
| 1219 |
+
"epoch": 3.830985915492958,
|
| 1220 |
+
"cpu_mem": 1.493458944,
|
| 1221 |
+
"gpu_mem": 4.442970112,
|
| 1222 |
+
"loss": 1.3367,
|
| 1223 |
+
"grad_norm": 4.610203266143799,
|
| 1224 |
+
"learning_rate": 7.453836951897885e-07
|
| 1225 |
+
},
|
| 1226 |
+
{
|
| 1227 |
+
"step": 137,
|
| 1228 |
+
"epoch": 3.859154929577465,
|
| 1229 |
+
"cpu_mem": 1.493458944,
|
| 1230 |
+
"gpu_mem": 4.442991616,
|
| 1231 |
+
"loss": 1.3017,
|
| 1232 |
+
"grad_norm": 4.349905967712402,
|
| 1233 |
+
"learning_rate": 4.194304228229806e-07
|
| 1234 |
+
},
|
| 1235 |
+
{
|
| 1236 |
+
"step": 138,
|
| 1237 |
+
"epoch": 3.887323943661972,
|
| 1238 |
+
"cpu_mem": 1.493458944,
|
| 1239 |
+
"gpu_mem": 4.443002368,
|
| 1240 |
+
"loss": 1.3214,
|
| 1241 |
+
"grad_norm": 3.8690969944000244,
|
| 1242 |
+
"learning_rate": 1.8646181716164831e-07
|
| 1243 |
+
},
|
| 1244 |
+
{
|
| 1245 |
+
"step": 139,
|
| 1246 |
+
"epoch": 3.915492957746479,
|
| 1247 |
+
"cpu_mem": 1.493458944,
|
| 1248 |
+
"gpu_mem": 4.442994688,
|
| 1249 |
+
"loss": 1.3432,
|
| 1250 |
+
"grad_norm": 3.9236385822296143,
|
| 1251 |
+
"learning_rate": 4.662269987756317e-08
|
| 1252 |
+
},
|
| 1253 |
+
{
|
| 1254 |
+
"step": 140,
|
| 1255 |
+
"epoch": 3.943661971830986,
|
| 1256 |
+
"cpu_mem": 1.493458944,
|
| 1257 |
+
"gpu_mem": 4.44302848,
|
| 1258 |
+
"loss": 1.3178,
|
| 1259 |
+
"grad_norm": 3.8790531158447266,
|
| 1260 |
+
"learning_rate": 0.0
|
| 1261 |
+
},
|
| 1262 |
+
{
|
| 1263 |
+
"step": 140,
|
| 1264 |
+
"epoch": 3.943661971830986,
|
| 1265 |
+
"cpu_mem": 1.493458944,
|
| 1266 |
+
"gpu_mem": 4.44302848,
|
| 1267 |
+
"train_runtime": 672.9448,
|
| 1268 |
+
"train_samples_per_second": 13.38,
|
| 1269 |
+
"train_steps_per_second": 0.208,
|
| 1270 |
+
"total_flos": 0.0,
|
| 1271 |
+
"train_loss": 1.4192385068961553
|
| 1272 |
+
}
|
| 1273 |
+
]
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r32-a2/adapter_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha": 64,
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": false,
|
| 8 |
+
"init_weight": "kaiming",
|
| 9 |
+
"layers_pattern": null,
|
| 10 |
+
"layers_to_transform": null,
|
| 11 |
+
"metric_tracking": false,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "ABLATION",
|
| 14 |
+
"r": 32,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"seed": 42,
|
| 17 |
+
"share_weights": false,
|
| 18 |
+
"target_modules": [
|
| 19 |
+
"up_proj",
|
| 20 |
+
"gate_proj",
|
| 21 |
+
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"q_proj",
|
| 24 |
+
"down_proj",
|
| 25 |
+
"o_proj"
|
| 26 |
+
],
|
| 27 |
+
"task_type": null,
|
| 28 |
+
"track_n": 100,
|
| 29 |
+
"variant": "A"
|
| 30 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r32-a2/eval_results.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "arc_e",
|
| 3 |
+
"results": 0.37247474747474746
|
| 4 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r32-a2/training_configuration.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "TinyLlama/TinyLlama_v1.1",
|
| 3 |
+
"dataset": {
|
| 4 |
+
"name": "ARC_E",
|
| 5 |
+
"dataset_id": "allenai/ai2_arc",
|
| 6 |
+
"preprocess_id": "arc_train_deepeval"
|
| 7 |
+
},
|
| 8 |
+
"peft_config": {
|
| 9 |
+
"method": "abl_A",
|
| 10 |
+
"rank": 32,
|
| 11 |
+
"alpha": 64,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"bias": "none",
|
| 14 |
+
"target_modules": [
|
| 15 |
+
"q_proj",
|
| 16 |
+
"k_proj",
|
| 17 |
+
"v_proj",
|
| 18 |
+
"o_proj",
|
| 19 |
+
"gate_proj",
|
| 20 |
+
"down_proj",
|
| 21 |
+
"up_proj"
|
| 22 |
+
],
|
| 23 |
+
"trainable_parameter_count": 25389056
|
| 24 |
+
},
|
| 25 |
+
"training_config": {
|
| 26 |
+
"max_dataset_length": null,
|
| 27 |
+
"batch_size": 64,
|
| 28 |
+
"per_device_batch_size": 32,
|
| 29 |
+
"gradient_accumulation_steps": 2,
|
| 30 |
+
"learning_rate": 0.0003,
|
| 31 |
+
"num_epochs": 4,
|
| 32 |
+
"warmup_ratio": 0.1
|
| 33 |
+
},
|
| 34 |
+
"model_name": "TinyLlama_v1.1-abl_A-arc_e-r32-a2",
|
| 35 |
+
"output_dir": "./experiment_results/TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r32-a2",
|
| 36 |
+
"seed": 42,
|
| 37 |
+
"timestamp": "2025-08-31T05:51:47.380869"
|
| 38 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r32-a2/training_logs.json
ADDED
|
@@ -0,0 +1,1273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 1,
|
| 4 |
+
"epoch": 0.028169014084507043,
|
| 5 |
+
"cpu_mem": 1.496137728,
|
| 6 |
+
"gpu_mem": 4.518964736,
|
| 7 |
+
"loss": 4.6319,
|
| 8 |
+
"grad_norm": 285.1859436035156,
|
| 9 |
+
"learning_rate": 2.1428571428571425e-05
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"step": 2,
|
| 13 |
+
"epoch": 0.056338028169014086,
|
| 14 |
+
"cpu_mem": 1.501642752,
|
| 15 |
+
"gpu_mem": 4.722138112,
|
| 16 |
+
"loss": 4.4578,
|
| 17 |
+
"grad_norm": 290.5561218261719,
|
| 18 |
+
"learning_rate": 4.285714285714285e-05
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"step": 3,
|
| 22 |
+
"epoch": 0.08450704225352113,
|
| 23 |
+
"cpu_mem": 1.50183936,
|
| 24 |
+
"gpu_mem": 4.722116608,
|
| 25 |
+
"loss": 2.8478,
|
| 26 |
+
"grad_norm": 381.0905456542969,
|
| 27 |
+
"learning_rate": 6.428571428571427e-05
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"step": 4,
|
| 31 |
+
"epoch": 0.11267605633802817,
|
| 32 |
+
"cpu_mem": 1.502035968,
|
| 33 |
+
"gpu_mem": 4.722095104,
|
| 34 |
+
"loss": 1.6686,
|
| 35 |
+
"grad_norm": 25.219541549682617,
|
| 36 |
+
"learning_rate": 8.57142857142857e-05
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"step": 5,
|
| 40 |
+
"epoch": 0.14084507042253522,
|
| 41 |
+
"cpu_mem": 1.502035968,
|
| 42 |
+
"gpu_mem": 4.722136576,
|
| 43 |
+
"loss": 1.5693,
|
| 44 |
+
"grad_norm": 22.9230899810791,
|
| 45 |
+
"learning_rate": 0.00010714285714285714
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"step": 6,
|
| 49 |
+
"epoch": 0.16901408450704225,
|
| 50 |
+
"cpu_mem": 1.502035968,
|
| 51 |
+
"gpu_mem": 4.722112,
|
| 52 |
+
"loss": 1.4612,
|
| 53 |
+
"grad_norm": 21.357065200805664,
|
| 54 |
+
"learning_rate": 0.00012857142857142855
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"step": 7,
|
| 58 |
+
"epoch": 0.19718309859154928,
|
| 59 |
+
"cpu_mem": 1.502232576,
|
| 60 |
+
"gpu_mem": 4.72213504,
|
| 61 |
+
"loss": 1.6244,
|
| 62 |
+
"grad_norm": 26.6319637298584,
|
| 63 |
+
"learning_rate": 0.00015
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"step": 8,
|
| 67 |
+
"epoch": 0.22535211267605634,
|
| 68 |
+
"cpu_mem": 1.502232576,
|
| 69 |
+
"gpu_mem": 4.722093568,
|
| 70 |
+
"loss": 1.3759,
|
| 71 |
+
"grad_norm": 7.972470760345459,
|
| 72 |
+
"learning_rate": 0.0001714285714285714
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 9,
|
| 76 |
+
"epoch": 0.2535211267605634,
|
| 77 |
+
"cpu_mem": 1.502429184,
|
| 78 |
+
"gpu_mem": 4.722095104,
|
| 79 |
+
"loss": 1.398,
|
| 80 |
+
"grad_norm": 12.75944995880127,
|
| 81 |
+
"learning_rate": 0.00019285714285714286
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"step": 10,
|
| 85 |
+
"epoch": 0.28169014084507044,
|
| 86 |
+
"cpu_mem": 1.502429184,
|
| 87 |
+
"gpu_mem": 4.722090496,
|
| 88 |
+
"loss": 1.7315,
|
| 89 |
+
"grad_norm": 20.16497802734375,
|
| 90 |
+
"learning_rate": 0.00021428571428571427
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"step": 11,
|
| 94 |
+
"epoch": 0.30985915492957744,
|
| 95 |
+
"cpu_mem": 1.502429184,
|
| 96 |
+
"gpu_mem": 4.722168832,
|
| 97 |
+
"loss": 1.5034,
|
| 98 |
+
"grad_norm": 12.408662796020508,
|
| 99 |
+
"learning_rate": 0.00023571428571428569
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"step": 12,
|
| 103 |
+
"epoch": 0.3380281690140845,
|
| 104 |
+
"cpu_mem": 1.502429184,
|
| 105 |
+
"gpu_mem": 4.72214272,
|
| 106 |
+
"loss": 1.3497,
|
| 107 |
+
"grad_norm": 6.996767044067383,
|
| 108 |
+
"learning_rate": 0.0002571428571428571
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"step": 13,
|
| 112 |
+
"epoch": 0.36619718309859156,
|
| 113 |
+
"cpu_mem": 1.502429184,
|
| 114 |
+
"gpu_mem": 4.722093568,
|
| 115 |
+
"loss": 2.105,
|
| 116 |
+
"grad_norm": 52.36497116088867,
|
| 117 |
+
"learning_rate": 0.00027857142857142854
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"step": 14,
|
| 121 |
+
"epoch": 0.39436619718309857,
|
| 122 |
+
"cpu_mem": 1.502429184,
|
| 123 |
+
"gpu_mem": 4.722115072,
|
| 124 |
+
"loss": 1.4537,
|
| 125 |
+
"grad_norm": 8.213821411132812,
|
| 126 |
+
"learning_rate": 0.0003
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"step": 15,
|
| 130 |
+
"epoch": 0.4225352112676056,
|
| 131 |
+
"cpu_mem": 1.502429184,
|
| 132 |
+
"gpu_mem": 4.722092032,
|
| 133 |
+
"loss": 1.4046,
|
| 134 |
+
"grad_norm": 8.018381118774414,
|
| 135 |
+
"learning_rate": 0.0002999533773001224
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"step": 16,
|
| 139 |
+
"epoch": 0.4507042253521127,
|
| 140 |
+
"cpu_mem": 1.502429184,
|
| 141 |
+
"gpu_mem": 4.72209664,
|
| 142 |
+
"loss": 1.4606,
|
| 143 |
+
"grad_norm": 8.81755256652832,
|
| 144 |
+
"learning_rate": 0.0002998135381828383
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"step": 17,
|
| 148 |
+
"epoch": 0.4788732394366197,
|
| 149 |
+
"cpu_mem": 1.502429184,
|
| 150 |
+
"gpu_mem": 4.722133504,
|
| 151 |
+
"loss": 1.535,
|
| 152 |
+
"grad_norm": 16.856678009033203,
|
| 153 |
+
"learning_rate": 0.00029958056957717696
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"step": 18,
|
| 157 |
+
"epoch": 0.5070422535211268,
|
| 158 |
+
"cpu_mem": 1.502429184,
|
| 159 |
+
"gpu_mem": 4.722144256,
|
| 160 |
+
"loss": 1.4464,
|
| 161 |
+
"grad_norm": 6.95269775390625,
|
| 162 |
+
"learning_rate": 0.0002992546163048102
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"step": 19,
|
| 166 |
+
"epoch": 0.5352112676056338,
|
| 167 |
+
"cpu_mem": 1.502429184,
|
| 168 |
+
"gpu_mem": 4.722087424,
|
| 169 |
+
"loss": 1.2935,
|
| 170 |
+
"grad_norm": 4.264415264129639,
|
| 171 |
+
"learning_rate": 0.0002988358809900258
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"step": 20,
|
| 175 |
+
"epoch": 0.5633802816901409,
|
| 176 |
+
"cpu_mem": 1.502429184,
|
| 177 |
+
"gpu_mem": 4.72215808,
|
| 178 |
+
"loss": 1.6446,
|
| 179 |
+
"grad_norm": 16.577444076538086,
|
| 180 |
+
"learning_rate": 0.0002983246239337692
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"step": 21,
|
| 184 |
+
"epoch": 0.5915492957746479,
|
| 185 |
+
"cpu_mem": 1.502429184,
|
| 186 |
+
"gpu_mem": 4.722156544,
|
| 187 |
+
"loss": 1.4246,
|
| 188 |
+
"grad_norm": 9.108543395996094,
|
| 189 |
+
"learning_rate": 0.0002977211629518312
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"step": 22,
|
| 193 |
+
"epoch": 0.6197183098591549,
|
| 194 |
+
"cpu_mem": 1.502429184,
|
| 195 |
+
"gpu_mem": 4.722113536,
|
| 196 |
+
"loss": 1.4405,
|
| 197 |
+
"grad_norm": 8.338932991027832,
|
| 198 |
+
"learning_rate": 0.00029702587317728153
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"step": 23,
|
| 202 |
+
"epoch": 0.647887323943662,
|
| 203 |
+
"cpu_mem": 1.502429184,
|
| 204 |
+
"gpu_mem": 4.722130432,
|
| 205 |
+
"loss": 1.3325,
|
| 206 |
+
"grad_norm": 4.5872039794921875,
|
| 207 |
+
"learning_rate": 0.0002962391868272735
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"step": 24,
|
| 211 |
+
"epoch": 0.676056338028169,
|
| 212 |
+
"cpu_mem": 1.502429184,
|
| 213 |
+
"gpu_mem": 4.722087424,
|
| 214 |
+
"loss": 1.346,
|
| 215 |
+
"grad_norm": 3.732668161392212,
|
| 216 |
+
"learning_rate": 0.00029536159293436166
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"step": 25,
|
| 220 |
+
"epoch": 0.704225352112676,
|
| 221 |
+
"cpu_mem": 1.502429184,
|
| 222 |
+
"gpu_mem": 4.722116608,
|
| 223 |
+
"loss": 1.4111,
|
| 224 |
+
"grad_norm": 3.657146692276001,
|
| 225 |
+
"learning_rate": 0.00029439363704250176
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"step": 26,
|
| 229 |
+
"epoch": 0.7323943661971831,
|
| 230 |
+
"cpu_mem": 1.502429184,
|
| 231 |
+
"gpu_mem": 4.72209664,
|
| 232 |
+
"loss": 1.5039,
|
| 233 |
+
"grad_norm": 3.2886135578155518,
|
| 234 |
+
"learning_rate": 0.00029333592086792107
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"step": 27,
|
| 238 |
+
"epoch": 0.7605633802816901,
|
| 239 |
+
"cpu_mem": 1.502429184,
|
| 240 |
+
"gpu_mem": 4.722122752,
|
| 241 |
+
"loss": 1.358,
|
| 242 |
+
"grad_norm": 2.674607276916504,
|
| 243 |
+
"learning_rate": 0.0002921891019250697
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"step": 28,
|
| 247 |
+
"epoch": 0.7887323943661971,
|
| 248 |
+
"cpu_mem": 1.502429184,
|
| 249 |
+
"gpu_mem": 4.722122752,
|
| 250 |
+
"loss": 1.4135,
|
| 251 |
+
"grad_norm": 3.7271816730499268,
|
| 252 |
+
"learning_rate": 0.0002909538931178862
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"step": 29,
|
| 256 |
+
"epoch": 0.8169014084507042,
|
| 257 |
+
"cpu_mem": 1.502625792,
|
| 258 |
+
"gpu_mem": 4.722101248,
|
| 259 |
+
"loss": 1.2997,
|
| 260 |
+
"grad_norm": 3.6826894283294678,
|
| 261 |
+
"learning_rate": 0.00028963106229663063
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"step": 30,
|
| 265 |
+
"epoch": 0.8450704225352113,
|
| 266 |
+
"cpu_mem": 1.502625792,
|
| 267 |
+
"gpu_mem": 4.722092032,
|
| 268 |
+
"loss": 1.3454,
|
| 269 |
+
"grad_norm": 2.921555757522583,
|
| 270 |
+
"learning_rate": 0.00028822143178056114
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"step": 31,
|
| 274 |
+
"epoch": 0.8732394366197183,
|
| 275 |
+
"cpu_mem": 1.502625792,
|
| 276 |
+
"gpu_mem": 4.722110464,
|
| 277 |
+
"loss": 1.4119,
|
| 278 |
+
"grad_norm": 6.010624885559082,
|
| 279 |
+
"learning_rate": 0.00028672587784675096
|
| 280 |
+
},
|
| 281 |
+
{
|
| 282 |
+
"step": 32,
|
| 283 |
+
"epoch": 0.9014084507042254,
|
| 284 |
+
"cpu_mem": 1.502625792,
|
| 285 |
+
"gpu_mem": 4.722133504,
|
| 286 |
+
"loss": 1.3421,
|
| 287 |
+
"grad_norm": 3.9047536849975586,
|
| 288 |
+
"learning_rate": 0.0002851453301853628
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"step": 33,
|
| 292 |
+
"epoch": 0.9295774647887324,
|
| 293 |
+
"cpu_mem": 1.502625792,
|
| 294 |
+
"gpu_mem": 4.722130432,
|
| 295 |
+
"loss": 1.38,
|
| 296 |
+
"grad_norm": 3.3717494010925293,
|
| 297 |
+
"learning_rate": 0.00028348077132172027
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"step": 34,
|
| 301 |
+
"epoch": 0.9577464788732394,
|
| 302 |
+
"cpu_mem": 1.502625792,
|
| 303 |
+
"gpu_mem": 4.722133504,
|
| 304 |
+
"loss": 1.3892,
|
| 305 |
+
"grad_norm": 2.1161556243896484,
|
| 306 |
+
"learning_rate": 0.0002817332360055343
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"step": 35,
|
| 310 |
+
"epoch": 0.9859154929577465,
|
| 311 |
+
"cpu_mem": 1.502625792,
|
| 312 |
+
"gpu_mem": 4.722115072,
|
| 313 |
+
"loss": 1.3076,
|
| 314 |
+
"grad_norm": 1.9025923013687134,
|
| 315 |
+
"learning_rate": 0.0002799038105676658
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"step": 36,
|
| 319 |
+
"epoch": 1.0140845070422535,
|
| 320 |
+
"cpu_mem": 1.502625792,
|
| 321 |
+
"gpu_mem": 4.82366208,
|
| 322 |
+
"loss": 1.9438,
|
| 323 |
+
"grad_norm": 4.72139835357666,
|
| 324 |
+
"learning_rate": 0.0002779936322448233
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"step": 37,
|
| 328 |
+
"epoch": 1.0422535211267605,
|
| 329 |
+
"cpu_mem": 1.502625792,
|
| 330 |
+
"gpu_mem": 4.823666688,
|
| 331 |
+
"loss": 1.3326,
|
| 332 |
+
"grad_norm": 4.325397968292236,
|
| 333 |
+
"learning_rate": 0.0002760038884726157
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"step": 38,
|
| 337 |
+
"epoch": 1.0704225352112675,
|
| 338 |
+
"cpu_mem": 1.502625792,
|
| 339 |
+
"gpu_mem": 4.823645184,
|
| 340 |
+
"loss": 1.2542,
|
| 341 |
+
"grad_norm": 3.8574061393737793,
|
| 342 |
+
"learning_rate": 0.00027393581614739923
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"step": 39,
|
| 346 |
+
"epoch": 1.0985915492957747,
|
| 347 |
+
"cpu_mem": 1.502625792,
|
| 348 |
+
"gpu_mem": 4.823634432,
|
| 349 |
+
"loss": 1.3937,
|
| 350 |
+
"grad_norm": 8.146137237548828,
|
| 351 |
+
"learning_rate": 0.0002717907008573785
|
| 352 |
+
},
|
| 353 |
+
{
|
| 354 |
+
"step": 40,
|
| 355 |
+
"epoch": 1.1267605633802817,
|
| 356 |
+
"cpu_mem": 1.502625792,
|
| 357 |
+
"gpu_mem": 4.823697408,
|
| 358 |
+
"loss": 1.398,
|
| 359 |
+
"grad_norm": 4.23073148727417,
|
| 360 |
+
"learning_rate": 0.0002695698760834384
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"step": 41,
|
| 364 |
+
"epoch": 1.1549295774647887,
|
| 365 |
+
"cpu_mem": 1.502625792,
|
| 366 |
+
"gpu_mem": 4.823657472,
|
| 367 |
+
"loss": 1.5805,
|
| 368 |
+
"grad_norm": 9.995660781860352,
|
| 369 |
+
"learning_rate": 0.00026727472237020447
|
| 370 |
+
},
|
| 371 |
+
{
|
| 372 |
+
"step": 42,
|
| 373 |
+
"epoch": 1.1830985915492958,
|
| 374 |
+
"cpu_mem": 1.502625792,
|
| 375 |
+
"gpu_mem": 4.82370048,
|
| 376 |
+
"loss": 1.3628,
|
| 377 |
+
"grad_norm": 3.161339521408081,
|
| 378 |
+
"learning_rate": 0.00026490666646784665
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"step": 43,
|
| 382 |
+
"epoch": 1.2112676056338028,
|
| 383 |
+
"cpu_mem": 1.502625792,
|
| 384 |
+
"gpu_mem": 4.823649792,
|
| 385 |
+
"loss": 1.4042,
|
| 386 |
+
"grad_norm": 4.245759010314941,
|
| 387 |
+
"learning_rate": 0.0002624671804451601
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"step": 44,
|
| 391 |
+
"epoch": 1.2394366197183098,
|
| 392 |
+
"cpu_mem": 1.502625792,
|
| 393 |
+
"gpu_mem": 4.823714304,
|
| 394 |
+
"loss": 1.3295,
|
| 395 |
+
"grad_norm": 3.277341365814209,
|
| 396 |
+
"learning_rate": 0.0002599577807744739
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"step": 45,
|
| 400 |
+
"epoch": 1.267605633802817,
|
| 401 |
+
"cpu_mem": 1.502625792,
|
| 402 |
+
"gpu_mem": 4.823682048,
|
| 403 |
+
"loss": 1.4885,
|
| 404 |
+
"grad_norm": 4.2645487785339355,
|
| 405 |
+
"learning_rate": 0.0002573800273889577
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"step": 46,
|
| 409 |
+
"epoch": 1.295774647887324,
|
| 410 |
+
"cpu_mem": 1.502625792,
|
| 411 |
+
"gpu_mem": 4.823686656,
|
| 412 |
+
"loss": 1.4298,
|
| 413 |
+
"grad_norm": 5.765285015106201,
|
| 414 |
+
"learning_rate": 0.0002547355227129109
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"step": 47,
|
| 418 |
+
"epoch": 1.323943661971831,
|
| 419 |
+
"cpu_mem": 1.502625792,
|
| 420 |
+
"gpu_mem": 4.823632896,
|
| 421 |
+
"loss": 1.3327,
|
| 422 |
+
"grad_norm": 3.2660598754882812,
|
| 423 |
+
"learning_rate": 0.00025202591066563786
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"step": 48,
|
| 427 |
+
"epoch": 1.352112676056338,
|
| 428 |
+
"cpu_mem": 1.502625792,
|
| 429 |
+
"gpu_mem": 4.82364672,
|
| 430 |
+
"loss": 1.3981,
|
| 431 |
+
"grad_norm": 4.678296089172363,
|
| 432 |
+
"learning_rate": 0.0002492528756395289
|
| 433 |
+
},
|
| 434 |
+
{
|
| 435 |
+
"step": 49,
|
| 436 |
+
"epoch": 1.380281690140845,
|
| 437 |
+
"cpu_mem": 1.502625792,
|
| 438 |
+
"gpu_mem": 4.823635968,
|
| 439 |
+
"loss": 2.184,
|
| 440 |
+
"grad_norm": 293.4337463378906,
|
| 441 |
+
"learning_rate": 0.0002464181414529809
|
| 442 |
+
},
|
| 443 |
+
{
|
| 444 |
+
"step": 50,
|
| 445 |
+
"epoch": 1.408450704225352,
|
| 446 |
+
"cpu_mem": 1.502625792,
|
| 447 |
+
"gpu_mem": 4.823649792,
|
| 448 |
+
"loss": 1.4052,
|
| 449 |
+
"grad_norm": 5.673724174499512,
|
| 450 |
+
"learning_rate": 0.00024352347027881003
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"step": 51,
|
| 454 |
+
"epoch": 1.436619718309859,
|
| 455 |
+
"cpu_mem": 1.502625792,
|
| 456 |
+
"gpu_mem": 4.823702016,
|
| 457 |
+
"loss": 1.4252,
|
| 458 |
+
"grad_norm": 6.799444198608398,
|
| 459 |
+
"learning_rate": 0.0002405706615488216
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"step": 52,
|
| 463 |
+
"epoch": 1.4647887323943662,
|
| 464 |
+
"cpu_mem": 1.502625792,
|
| 465 |
+
"gpu_mem": 4.823649792,
|
| 466 |
+
"loss": 1.3539,
|
| 467 |
+
"grad_norm": 2.679464340209961,
|
| 468 |
+
"learning_rate": 0.00023756155083521846
|
| 469 |
+
},
|
| 470 |
+
{
|
| 471 |
+
"step": 53,
|
| 472 |
+
"epoch": 1.4929577464788732,
|
| 473 |
+
"cpu_mem": 1.502625792,
|
| 474 |
+
"gpu_mem": 4.823718912,
|
| 475 |
+
"loss": 1.4057,
|
| 476 |
+
"grad_norm": 4.1786208152771,
|
| 477 |
+
"learning_rate": 0.00023449800870954326
|
| 478 |
+
},
|
| 479 |
+
{
|
| 480 |
+
"step": 54,
|
| 481 |
+
"epoch": 1.5211267605633803,
|
| 482 |
+
"cpu_mem": 1.502625792,
|
| 483 |
+
"gpu_mem": 4.823686656,
|
| 484 |
+
"loss": 1.4722,
|
| 485 |
+
"grad_norm": 5.400444507598877,
|
| 486 |
+
"learning_rate": 0.0002313819395798639
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"step": 55,
|
| 490 |
+
"epoch": 1.5492957746478875,
|
| 491 |
+
"cpu_mem": 1.502625792,
|
| 492 |
+
"gpu_mem": 4.823695872,
|
| 493 |
+
"loss": 1.3521,
|
| 494 |
+
"grad_norm": 1.586466908454895,
|
| 495 |
+
"learning_rate": 0.0002282152805069247
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"step": 56,
|
| 499 |
+
"epoch": 1.5774647887323945,
|
| 500 |
+
"cpu_mem": 1.502625792,
|
| 501 |
+
"gpu_mem": 4.823671296,
|
| 502 |
+
"loss": 1.3238,
|
| 503 |
+
"grad_norm": 1.901183843612671,
|
| 504 |
+
"learning_rate": 0.000225
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"step": 57,
|
| 508 |
+
"epoch": 1.6056338028169015,
|
| 509 |
+
"cpu_mem": 1.502625792,
|
| 510 |
+
"gpu_mem": 4.823705088,
|
| 511 |
+
"loss": 1.3406,
|
| 512 |
+
"grad_norm": 2.266773223876953,
|
| 513 |
+
"learning_rate": 0.00022173809679319772
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"step": 58,
|
| 517 |
+
"epoch": 1.6338028169014085,
|
| 518 |
+
"cpu_mem": 1.502625792,
|
| 519 |
+
"gpu_mem": 4.823686656,
|
| 520 |
+
"loss": 1.3164,
|
| 521 |
+
"grad_norm": 0.935617983341217,
|
| 522 |
+
"learning_rate": 0.00021843159860297442
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"step": 59,
|
| 526 |
+
"epoch": 1.6619718309859155,
|
| 527 |
+
"cpu_mem": 1.502625792,
|
| 528 |
+
"gpu_mem": 4.823672832,
|
| 529 |
+
"loss": 1.3413,
|
| 530 |
+
"grad_norm": 1.1456469297409058,
|
| 531 |
+
"learning_rate": 0.00021508256086763368
|
| 532 |
+
},
|
| 533 |
+
{
|
| 534 |
+
"step": 60,
|
| 535 |
+
"epoch": 1.6901408450704225,
|
| 536 |
+
"cpu_mem": 1.502625792,
|
| 537 |
+
"gpu_mem": 4.823711232,
|
| 538 |
+
"loss": 1.2904,
|
| 539 |
+
"grad_norm": 1.728401780128479,
|
| 540 |
+
"learning_rate": 0.00021169306546959174
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"step": 61,
|
| 544 |
+
"epoch": 1.7183098591549295,
|
| 545 |
+
"cpu_mem": 1.502625792,
|
| 546 |
+
"gpu_mem": 4.823643648,
|
| 547 |
+
"loss": 1.3529,
|
| 548 |
+
"grad_norm": 3.0350029468536377,
|
| 549 |
+
"learning_rate": 0.0002082652194412042
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"step": 62,
|
| 553 |
+
"epoch": 1.7464788732394365,
|
| 554 |
+
"cpu_mem": 1.502625792,
|
| 555 |
+
"gpu_mem": 4.823691264,
|
| 556 |
+
"loss": 1.4544,
|
| 557 |
+
"grad_norm": 3.896820068359375,
|
| 558 |
+
"learning_rate": 0.00020480115365495926
|
| 559 |
+
},
|
| 560 |
+
{
|
| 561 |
+
"step": 63,
|
| 562 |
+
"epoch": 1.7746478873239435,
|
| 563 |
+
"cpu_mem": 1.502625792,
|
| 564 |
+
"gpu_mem": 4.823640576,
|
| 565 |
+
"loss": 1.4155,
|
| 566 |
+
"grad_norm": 3.300171136856079,
|
| 567 |
+
"learning_rate": 0.00020130302149885031
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"step": 64,
|
| 571 |
+
"epoch": 1.8028169014084507,
|
| 572 |
+
"cpu_mem": 1.502625792,
|
| 573 |
+
"gpu_mem": 4.823689728,
|
| 574 |
+
"loss": 1.3494,
|
| 575 |
+
"grad_norm": 2.4495275020599365,
|
| 576 |
+
"learning_rate": 0.00019777299753775265
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"step": 65,
|
| 580 |
+
"epoch": 1.8309859154929577,
|
| 581 |
+
"cpu_mem": 1.502625792,
|
| 582 |
+
"gpu_mem": 4.823688192,
|
| 583 |
+
"loss": 1.4032,
|
| 584 |
+
"grad_norm": 3.058351993560791,
|
| 585 |
+
"learning_rate": 0.00019421327616163563
|
| 586 |
+
},
|
| 587 |
+
{
|
| 588 |
+
"step": 66,
|
| 589 |
+
"epoch": 1.8591549295774648,
|
| 590 |
+
"cpu_mem": 1.502625792,
|
| 591 |
+
"gpu_mem": 4.823706624,
|
| 592 |
+
"loss": 1.4233,
|
| 593 |
+
"grad_norm": 4.7667107582092285,
|
| 594 |
+
"learning_rate": 0.00019062607022145078
|
| 595 |
+
},
|
| 596 |
+
{
|
| 597 |
+
"step": 67,
|
| 598 |
+
"epoch": 1.887323943661972,
|
| 599 |
+
"cpu_mem": 1.502625792,
|
| 600 |
+
"gpu_mem": 4.823648256,
|
| 601 |
+
"loss": 1.3687,
|
| 602 |
+
"grad_norm": 5.0993218421936035,
|
| 603 |
+
"learning_rate": 0.00018701360965354402
|
| 604 |
+
},
|
| 605 |
+
{
|
| 606 |
+
"step": 68,
|
| 607 |
+
"epoch": 1.915492957746479,
|
| 608 |
+
"cpu_mem": 1.502625792,
|
| 609 |
+
"gpu_mem": 4.823660544,
|
| 610 |
+
"loss": 20.6412,
|
| 611 |
+
"grad_norm": 727.7623901367188,
|
| 612 |
+
"learning_rate": 0.00018337814009344714
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"step": 69,
|
| 616 |
+
"epoch": 1.943661971830986,
|
| 617 |
+
"cpu_mem": 1.502625792,
|
| 618 |
+
"gpu_mem": 4.82368512,
|
| 619 |
+
"loss": 5.2158,
|
| 620 |
+
"grad_norm": 216.47073364257812,
|
| 621 |
+
"learning_rate": 0.0001797219214799096
|
| 622 |
+
},
|
| 623 |
+
{
|
| 624 |
+
"step": 70,
|
| 625 |
+
"epoch": 1.971830985915493,
|
| 626 |
+
"cpu_mem": 1.502625792,
|
| 627 |
+
"gpu_mem": 4.82366208,
|
| 628 |
+
"loss": 1.9586,
|
| 629 |
+
"grad_norm": 153.92445373535156,
|
| 630 |
+
"learning_rate": 0.00017604722665003956
|
| 631 |
+
},
|
| 632 |
+
{
|
| 633 |
+
"step": 71,
|
| 634 |
+
"epoch": 2.0,
|
| 635 |
+
"cpu_mem": 1.502625792,
|
| 636 |
+
"gpu_mem": 4.823497728,
|
| 637 |
+
"loss": 2.0123,
|
| 638 |
+
"grad_norm": 5.384662628173828,
|
| 639 |
+
"learning_rate": 0.00017235633992642615
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"step": 72,
|
| 643 |
+
"epoch": 2.028169014084507,
|
| 644 |
+
"cpu_mem": 1.502625792,
|
| 645 |
+
"gpu_mem": 4.722125824,
|
| 646 |
+
"loss": 1.3851,
|
| 647 |
+
"grad_norm": 2.2103421688079834,
|
| 648 |
+
"learning_rate": 0.00016865155569712278
|
| 649 |
+
},
|
| 650 |
+
{
|
| 651 |
+
"step": 73,
|
| 652 |
+
"epoch": 2.056338028169014,
|
| 653 |
+
"cpu_mem": 1.502625792,
|
| 654 |
+
"gpu_mem": 4.72208896,
|
| 655 |
+
"loss": 1.3777,
|
| 656 |
+
"grad_norm": 1.1181278228759766,
|
| 657 |
+
"learning_rate": 0.0001649351769893725
|
| 658 |
+
},
|
| 659 |
+
{
|
| 660 |
+
"step": 74,
|
| 661 |
+
"epoch": 2.084507042253521,
|
| 662 |
+
"cpu_mem": 1.502625792,
|
| 663 |
+
"gpu_mem": 4.722148864,
|
| 664 |
+
"loss": 1.3305,
|
| 665 |
+
"grad_norm": 0.8732612133026123,
|
| 666 |
+
"learning_rate": 0.00016120951403796364
|
| 667 |
+
},
|
| 668 |
+
{
|
| 669 |
+
"step": 75,
|
| 670 |
+
"epoch": 2.112676056338028,
|
| 671 |
+
"cpu_mem": 1.502625792,
|
| 672 |
+
"gpu_mem": 4.722116608,
|
| 673 |
+
"loss": 1.344,
|
| 674 |
+
"grad_norm": 1.5410144329071045,
|
| 675 |
+
"learning_rate": 0.00015747688284910457
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"step": 76,
|
| 679 |
+
"epoch": 2.140845070422535,
|
| 680 |
+
"cpu_mem": 1.502625792,
|
| 681 |
+
"gpu_mem": 4.72212736,
|
| 682 |
+
"loss": 1.2997,
|
| 683 |
+
"grad_norm": 0.754600465297699,
|
| 684 |
+
"learning_rate": 0.00015373960376071093
|
| 685 |
+
},
|
| 686 |
+
{
|
| 687 |
+
"step": 77,
|
| 688 |
+
"epoch": 2.169014084507042,
|
| 689 |
+
"cpu_mem": 1.502625792,
|
| 690 |
+
"gpu_mem": 4.722164224,
|
| 691 |
+
"loss": 1.3713,
|
| 692 |
+
"grad_norm": 1.0481202602386475,
|
| 693 |
+
"learning_rate": 0.00015
|
| 694 |
+
},
|
| 695 |
+
{
|
| 696 |
+
"step": 78,
|
| 697 |
+
"epoch": 2.1971830985915495,
|
| 698 |
+
"cpu_mem": 1.502625792,
|
| 699 |
+
"gpu_mem": 4.722148864,
|
| 700 |
+
"loss": 1.4209,
|
| 701 |
+
"grad_norm": 2.4341037273406982,
|
| 702 |
+
"learning_rate": 0.00014626039623928907
|
| 703 |
+
},
|
| 704 |
+
{
|
| 705 |
+
"step": 79,
|
| 706 |
+
"epoch": 2.2253521126760565,
|
| 707 |
+
"cpu_mem": 1.502625792,
|
| 708 |
+
"gpu_mem": 4.722099712,
|
| 709 |
+
"loss": 1.291,
|
| 710 |
+
"grad_norm": 1.7542685270309448,
|
| 711 |
+
"learning_rate": 0.0001425231171508954
|
| 712 |
+
},
|
| 713 |
+
{
|
| 714 |
+
"step": 80,
|
| 715 |
+
"epoch": 2.2535211267605635,
|
| 716 |
+
"cpu_mem": 1.502625792,
|
| 717 |
+
"gpu_mem": 4.722144256,
|
| 718 |
+
"loss": 1.346,
|
| 719 |
+
"grad_norm": 1.6453899145126343,
|
| 720 |
+
"learning_rate": 0.00013879048596203636
|
| 721 |
+
},
|
| 722 |
+
{
|
| 723 |
+
"step": 81,
|
| 724 |
+
"epoch": 2.2816901408450705,
|
| 725 |
+
"cpu_mem": 1.502625792,
|
| 726 |
+
"gpu_mem": 4.722130432,
|
| 727 |
+
"loss": 1.4232,
|
| 728 |
+
"grad_norm": 2.9081246852874756,
|
| 729 |
+
"learning_rate": 0.0001350648230106275
|
| 730 |
+
},
|
| 731 |
+
{
|
| 732 |
+
"step": 82,
|
| 733 |
+
"epoch": 2.3098591549295775,
|
| 734 |
+
"cpu_mem": 1.502625792,
|
| 735 |
+
"gpu_mem": 4.722098176,
|
| 736 |
+
"loss": 1.4118,
|
| 737 |
+
"grad_norm": 3.444525718688965,
|
| 738 |
+
"learning_rate": 0.00013134844430287725
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"step": 83,
|
| 742 |
+
"epoch": 2.3380281690140845,
|
| 743 |
+
"cpu_mem": 1.502625792,
|
| 744 |
+
"gpu_mem": 4.722148864,
|
| 745 |
+
"loss": 1.3006,
|
| 746 |
+
"grad_norm": 2.5711867809295654,
|
| 747 |
+
"learning_rate": 0.0001276436600735738
|
| 748 |
+
},
|
| 749 |
+
{
|
| 750 |
+
"step": 84,
|
| 751 |
+
"epoch": 2.3661971830985915,
|
| 752 |
+
"cpu_mem": 1.502625792,
|
| 753 |
+
"gpu_mem": 4.722087424,
|
| 754 |
+
"loss": 1.4037,
|
| 755 |
+
"grad_norm": 2.308828115463257,
|
| 756 |
+
"learning_rate": 0.00012395277334996044
|
| 757 |
+
},
|
| 758 |
+
{
|
| 759 |
+
"step": 85,
|
| 760 |
+
"epoch": 2.3943661971830985,
|
| 761 |
+
"cpu_mem": 1.502625792,
|
| 762 |
+
"gpu_mem": 4.722133504,
|
| 763 |
+
"loss": 1.3524,
|
| 764 |
+
"grad_norm": 2.286288261413574,
|
| 765 |
+
"learning_rate": 0.00012027807852009038
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"step": 86,
|
| 769 |
+
"epoch": 2.4225352112676055,
|
| 770 |
+
"cpu_mem": 1.502625792,
|
| 771 |
+
"gpu_mem": 4.722087424,
|
| 772 |
+
"loss": 1.3171,
|
| 773 |
+
"grad_norm": 0.8145406246185303,
|
| 774 |
+
"learning_rate": 0.00011662185990655284
|
| 775 |
+
},
|
| 776 |
+
{
|
| 777 |
+
"step": 87,
|
| 778 |
+
"epoch": 2.4507042253521125,
|
| 779 |
+
"cpu_mem": 1.502625792,
|
| 780 |
+
"gpu_mem": 4.722118144,
|
| 781 |
+
"loss": 1.4024,
|
| 782 |
+
"grad_norm": 2.189497232437134,
|
| 783 |
+
"learning_rate": 0.00011298639034645593
|
| 784 |
+
},
|
| 785 |
+
{
|
| 786 |
+
"step": 88,
|
| 787 |
+
"epoch": 2.4788732394366195,
|
| 788 |
+
"cpu_mem": 1.502625792,
|
| 789 |
+
"gpu_mem": 4.722093568,
|
| 790 |
+
"loss": 1.3276,
|
| 791 |
+
"grad_norm": 2.225213050842285,
|
| 792 |
+
"learning_rate": 0.00010937392977854923
|
| 793 |
+
},
|
| 794 |
+
{
|
| 795 |
+
"step": 89,
|
| 796 |
+
"epoch": 2.507042253521127,
|
| 797 |
+
"cpu_mem": 1.502625792,
|
| 798 |
+
"gpu_mem": 4.722147328,
|
| 799 |
+
"loss": 1.3019,
|
| 800 |
+
"grad_norm": 1.4794793128967285,
|
| 801 |
+
"learning_rate": 0.00010578672383836435
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"step": 90,
|
| 805 |
+
"epoch": 2.535211267605634,
|
| 806 |
+
"cpu_mem": 1.502625792,
|
| 807 |
+
"gpu_mem": 4.722128896,
|
| 808 |
+
"loss": 1.3902,
|
| 809 |
+
"grad_norm": 2.832213878631592,
|
| 810 |
+
"learning_rate": 0.00010222700246224735
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"step": 91,
|
| 814 |
+
"epoch": 2.563380281690141,
|
| 815 |
+
"cpu_mem": 1.502625792,
|
| 816 |
+
"gpu_mem": 4.722078208,
|
| 817 |
+
"loss": 1.3622,
|
| 818 |
+
"grad_norm": 2.659364700317383,
|
| 819 |
+
"learning_rate": 9.869697850114969e-05
|
| 820 |
+
},
|
| 821 |
+
{
|
| 822 |
+
"step": 92,
|
| 823 |
+
"epoch": 2.591549295774648,
|
| 824 |
+
"cpu_mem": 1.502625792,
|
| 825 |
+
"gpu_mem": 4.722102784,
|
| 826 |
+
"loss": 1.3862,
|
| 827 |
+
"grad_norm": 2.9223179817199707,
|
| 828 |
+
"learning_rate": 9.519884634504074e-05
|
| 829 |
+
},
|
| 830 |
+
{
|
| 831 |
+
"step": 93,
|
| 832 |
+
"epoch": 2.619718309859155,
|
| 833 |
+
"cpu_mem": 1.502625792,
|
| 834 |
+
"gpu_mem": 4.722105856,
|
| 835 |
+
"loss": 1.3254,
|
| 836 |
+
"grad_norm": 2.7396914958953857,
|
| 837 |
+
"learning_rate": 9.17347805587958e-05
|
| 838 |
+
},
|
| 839 |
+
{
|
| 840 |
+
"step": 94,
|
| 841 |
+
"epoch": 2.647887323943662,
|
| 842 |
+
"cpu_mem": 1.502625792,
|
| 843 |
+
"gpu_mem": 4.722098176,
|
| 844 |
+
"loss": 1.3504,
|
| 845 |
+
"grad_norm": 2.2619333267211914,
|
| 846 |
+
"learning_rate": 8.830693453040829e-05
|
| 847 |
+
},
|
| 848 |
+
{
|
| 849 |
+
"step": 95,
|
| 850 |
+
"epoch": 2.676056338028169,
|
| 851 |
+
"cpu_mem": 1.502625792,
|
| 852 |
+
"gpu_mem": 4.722136576,
|
| 853 |
+
"loss": 1.375,
|
| 854 |
+
"grad_norm": 2.095527172088623,
|
| 855 |
+
"learning_rate": 8.491743913236628e-05
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"step": 96,
|
| 859 |
+
"epoch": 2.704225352112676,
|
| 860 |
+
"cpu_mem": 1.502625792,
|
| 861 |
+
"gpu_mem": 4.722145792,
|
| 862 |
+
"loss": 1.2863,
|
| 863 |
+
"grad_norm": 2.3975515365600586,
|
| 864 |
+
"learning_rate": 8.156840139702554e-05
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"step": 97,
|
| 868 |
+
"epoch": 2.732394366197183,
|
| 869 |
+
"cpu_mem": 1.502625792,
|
| 870 |
+
"gpu_mem": 4.72208896,
|
| 871 |
+
"loss": 1.3655,
|
| 872 |
+
"grad_norm": 2.111492395401001,
|
| 873 |
+
"learning_rate": 7.82619032068023e-05
|
| 874 |
+
},
|
| 875 |
+
{
|
| 876 |
+
"step": 98,
|
| 877 |
+
"epoch": 2.76056338028169,
|
| 878 |
+
"cpu_mem": 1.502625792,
|
| 879 |
+
"gpu_mem": 4.72208896,
|
| 880 |
+
"loss": 1.3394,
|
| 881 |
+
"grad_norm": 0.9519637823104858,
|
| 882 |
+
"learning_rate": 7.500000000000002e-05
|
| 883 |
+
},
|
| 884 |
+
{
|
| 885 |
+
"step": 99,
|
| 886 |
+
"epoch": 2.788732394366197,
|
| 887 |
+
"cpu_mem": 1.502625792,
|
| 888 |
+
"gpu_mem": 4.722085888,
|
| 889 |
+
"loss": 1.2922,
|
| 890 |
+
"grad_norm": 1.1138625144958496,
|
| 891 |
+
"learning_rate": 7.17847194930753e-05
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"step": 100,
|
| 895 |
+
"epoch": 2.816901408450704,
|
| 896 |
+
"cpu_mem": 1.502625792,
|
| 897 |
+
"gpu_mem": 4.722084352,
|
| 898 |
+
"loss": 1.2824,
|
| 899 |
+
"grad_norm": 1.63324773311615,
|
| 900 |
+
"learning_rate": 6.86180604201361e-05
|
| 901 |
+
},
|
| 902 |
+
{
|
| 903 |
+
"step": 101,
|
| 904 |
+
"epoch": 2.845070422535211,
|
| 905 |
+
"cpu_mem": 1.502625792,
|
| 906 |
+
"gpu_mem": 4.72212736,
|
| 907 |
+
"loss": 1.2767,
|
| 908 |
+
"grad_norm": 1.6835689544677734,
|
| 909 |
+
"learning_rate": 6.550199129045668e-05
|
| 910 |
+
},
|
| 911 |
+
{
|
| 912 |
+
"step": 102,
|
| 913 |
+
"epoch": 2.873239436619718,
|
| 914 |
+
"cpu_mem": 1.502625792,
|
| 915 |
+
"gpu_mem": 4.72206592,
|
| 916 |
+
"loss": 1.3731,
|
| 917 |
+
"grad_norm": 2.8863682746887207,
|
| 918 |
+
"learning_rate": 6.243844916478155e-05
|
| 919 |
+
},
|
| 920 |
+
{
|
| 921 |
+
"step": 103,
|
| 922 |
+
"epoch": 2.9014084507042255,
|
| 923 |
+
"cpu_mem": 1.502625792,
|
| 924 |
+
"gpu_mem": 4.722115072,
|
| 925 |
+
"loss": 1.3284,
|
| 926 |
+
"grad_norm": 1.9598286151885986,
|
| 927 |
+
"learning_rate": 5.9429338451178355e-05
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"step": 104,
|
| 931 |
+
"epoch": 2.9295774647887325,
|
| 932 |
+
"cpu_mem": 1.502625792,
|
| 933 |
+
"gpu_mem": 4.722178048,
|
| 934 |
+
"loss": 1.3901,
|
| 935 |
+
"grad_norm": 3.4294772148132324,
|
| 936 |
+
"learning_rate": 5.6476529721189974e-05
|
| 937 |
+
},
|
| 938 |
+
{
|
| 939 |
+
"step": 105,
|
| 940 |
+
"epoch": 2.9577464788732395,
|
| 941 |
+
"cpu_mem": 1.502625792,
|
| 942 |
+
"gpu_mem": 4.722130432,
|
| 943 |
+
"loss": 1.3027,
|
| 944 |
+
"grad_norm": 1.786590814590454,
|
| 945 |
+
"learning_rate": 5.358185854701909e-05
|
| 946 |
+
},
|
| 947 |
+
{
|
| 948 |
+
"step": 106,
|
| 949 |
+
"epoch": 2.9859154929577465,
|
| 950 |
+
"cpu_mem": 1.502625792,
|
| 951 |
+
"gpu_mem": 4.722112,
|
| 952 |
+
"loss": 1.3242,
|
| 953 |
+
"grad_norm": 1.6533207893371582,
|
| 954 |
+
"learning_rate": 5.074712436047112e-05
|
| 955 |
+
},
|
| 956 |
+
{
|
| 957 |
+
"step": 107,
|
| 958 |
+
"epoch": 3.0140845070422535,
|
| 959 |
+
"cpu_mem": 1.502625792,
|
| 960 |
+
"gpu_mem": 4.82368512,
|
| 961 |
+
"loss": 1.8733,
|
| 962 |
+
"grad_norm": 3.4330568313598633,
|
| 963 |
+
"learning_rate": 4.7974089334362057e-05
|
| 964 |
+
},
|
| 965 |
+
{
|
| 966 |
+
"step": 108,
|
| 967 |
+
"epoch": 3.0422535211267605,
|
| 968 |
+
"cpu_mem": 1.502625792,
|
| 969 |
+
"gpu_mem": 4.823666688,
|
| 970 |
+
"loss": 1.2738,
|
| 971 |
+
"grad_norm": 1.685111403465271,
|
| 972 |
+
"learning_rate": 4.526447728708908e-05
|
| 973 |
+
},
|
| 974 |
+
{
|
| 975 |
+
"step": 109,
|
| 976 |
+
"epoch": 3.0704225352112675,
|
| 977 |
+
"cpu_mem": 1.502625792,
|
| 978 |
+
"gpu_mem": 4.823655936,
|
| 979 |
+
"loss": 1.3244,
|
| 980 |
+
"grad_norm": 1.720697045326233,
|
| 981 |
+
"learning_rate": 4.261997261104223e-05
|
| 982 |
+
},
|
| 983 |
+
{
|
| 984 |
+
"step": 110,
|
| 985 |
+
"epoch": 3.0985915492957745,
|
| 986 |
+
"cpu_mem": 1.502625792,
|
| 987 |
+
"gpu_mem": 4.823709696,
|
| 988 |
+
"loss": 1.2854,
|
| 989 |
+
"grad_norm": 2.801140308380127,
|
| 990 |
+
"learning_rate": 4.004221922552608e-05
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"step": 111,
|
| 994 |
+
"epoch": 3.1267605633802815,
|
| 995 |
+
"cpu_mem": 1.502625792,
|
| 996 |
+
"gpu_mem": 4.82366976,
|
| 997 |
+
"loss": 1.3167,
|
| 998 |
+
"grad_norm": 1.9198555946350098,
|
| 999 |
+
"learning_rate": 3.753281955483985e-05
|
| 1000 |
+
},
|
| 1001 |
+
{
|
| 1002 |
+
"step": 112,
|
| 1003 |
+
"epoch": 3.1549295774647885,
|
| 1004 |
+
"cpu_mem": 1.502625792,
|
| 1005 |
+
"gpu_mem": 4.823688192,
|
| 1006 |
+
"loss": 1.2922,
|
| 1007 |
+
"grad_norm": 1.4882051944732666,
|
| 1008 |
+
"learning_rate": 3.509333353215331e-05
|
| 1009 |
+
},
|
| 1010 |
+
{
|
| 1011 |
+
"step": 113,
|
| 1012 |
+
"epoch": 3.183098591549296,
|
| 1013 |
+
"cpu_mem": 1.502625792,
|
| 1014 |
+
"gpu_mem": 4.823751168,
|
| 1015 |
+
"loss": 1.2862,
|
| 1016 |
+
"grad_norm": 1.7628090381622314,
|
| 1017 |
+
"learning_rate": 3.2725277629795526e-05
|
| 1018 |
+
},
|
| 1019 |
+
{
|
| 1020 |
+
"step": 114,
|
| 1021 |
+
"epoch": 3.211267605633803,
|
| 1022 |
+
"cpu_mem": 1.502625792,
|
| 1023 |
+
"gpu_mem": 4.823678976,
|
| 1024 |
+
"loss": 1.3162,
|
| 1025 |
+
"grad_norm": 1.6963146924972534,
|
| 1026 |
+
"learning_rate": 3.0430123916561672e-05
|
| 1027 |
+
},
|
| 1028 |
+
{
|
| 1029 |
+
"step": 115,
|
| 1030 |
+
"epoch": 3.23943661971831,
|
| 1031 |
+
"cpu_mem": 1.502625792,
|
| 1032 |
+
"gpu_mem": 4.823672832,
|
| 1033 |
+
"loss": 1.3671,
|
| 1034 |
+
"grad_norm": 2.351639747619629,
|
| 1035 |
+
"learning_rate": 2.8209299142621522e-05
|
| 1036 |
+
},
|
| 1037 |
+
{
|
| 1038 |
+
"step": 116,
|
| 1039 |
+
"epoch": 3.267605633802817,
|
| 1040 |
+
"cpu_mem": 1.502625792,
|
| 1041 |
+
"gpu_mem": 4.823688192,
|
| 1042 |
+
"loss": 1.3129,
|
| 1043 |
+
"grad_norm": 1.3395206928253174,
|
| 1044 |
+
"learning_rate": 2.6064183852600797e-05
|
| 1045 |
+
},
|
| 1046 |
+
{
|
| 1047 |
+
"step": 117,
|
| 1048 |
+
"epoch": 3.295774647887324,
|
| 1049 |
+
"cpu_mem": 1.502625792,
|
| 1050 |
+
"gpu_mem": 4.823703552,
|
| 1051 |
+
"loss": 1.276,
|
| 1052 |
+
"grad_norm": 1.8355594873428345,
|
| 1053 |
+
"learning_rate": 2.3996111527384288e-05
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"step": 118,
|
| 1057 |
+
"epoch": 3.323943661971831,
|
| 1058 |
+
"cpu_mem": 1.502625792,
|
| 1059 |
+
"gpu_mem": 4.823694336,
|
| 1060 |
+
"loss": 1.2855,
|
| 1061 |
+
"grad_norm": 1.5066239833831787,
|
| 1062 |
+
"learning_rate": 2.2006367755176655e-05
|
| 1063 |
+
},
|
| 1064 |
+
{
|
| 1065 |
+
"step": 119,
|
| 1066 |
+
"epoch": 3.352112676056338,
|
| 1067 |
+
"cpu_mem": 1.502625792,
|
| 1068 |
+
"gpu_mem": 4.82368512,
|
| 1069 |
+
"loss": 1.2956,
|
| 1070 |
+
"grad_norm": 1.6072317361831665,
|
| 1071 |
+
"learning_rate": 2.009618943233419e-05
|
| 1072 |
+
},
|
| 1073 |
+
{
|
| 1074 |
+
"step": 120,
|
| 1075 |
+
"epoch": 3.380281690140845,
|
| 1076 |
+
"cpu_mem": 1.502625792,
|
| 1077 |
+
"gpu_mem": 4.823703552,
|
| 1078 |
+
"loss": 1.2925,
|
| 1079 |
+
"grad_norm": 1.6089941263198853,
|
| 1080 |
+
"learning_rate": 1.82667639944657e-05
|
| 1081 |
+
},
|
| 1082 |
+
{
|
| 1083 |
+
"step": 121,
|
| 1084 |
+
"epoch": 3.408450704225352,
|
| 1085 |
+
"cpu_mem": 1.502625792,
|
| 1086 |
+
"gpu_mem": 4.823702016,
|
| 1087 |
+
"loss": 1.2467,
|
| 1088 |
+
"grad_norm": 1.8599209785461426,
|
| 1089 |
+
"learning_rate": 1.6519228678279718e-05
|
| 1090 |
+
},
|
| 1091 |
+
{
|
| 1092 |
+
"step": 122,
|
| 1093 |
+
"epoch": 3.436619718309859,
|
| 1094 |
+
"cpu_mem": 1.502625792,
|
| 1095 |
+
"gpu_mem": 4.823659008,
|
| 1096 |
+
"loss": 1.2741,
|
| 1097 |
+
"grad_norm": 1.5860140323638916,
|
| 1098 |
+
"learning_rate": 1.4854669814637143e-05
|
| 1099 |
+
},
|
| 1100 |
+
{
|
| 1101 |
+
"step": 123,
|
| 1102 |
+
"epoch": 3.464788732394366,
|
| 1103 |
+
"cpu_mem": 1.502625792,
|
| 1104 |
+
"gpu_mem": 4.823691264,
|
| 1105 |
+
"loss": 1.2627,
|
| 1106 |
+
"grad_norm": 1.814361810684204,
|
| 1107 |
+
"learning_rate": 1.3274122153249028e-05
|
| 1108 |
+
},
|
| 1109 |
+
{
|
| 1110 |
+
"step": 124,
|
| 1111 |
+
"epoch": 3.492957746478873,
|
| 1112 |
+
"cpu_mem": 1.502625792,
|
| 1113 |
+
"gpu_mem": 4.823645184,
|
| 1114 |
+
"loss": 1.2697,
|
| 1115 |
+
"grad_norm": 1.6075295209884644,
|
| 1116 |
+
"learning_rate": 1.1778568219438839e-05
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"step": 125,
|
| 1120 |
+
"epoch": 3.52112676056338,
|
| 1121 |
+
"cpu_mem": 1.502625792,
|
| 1122 |
+
"gpu_mem": 4.823689728,
|
| 1123 |
+
"loss": 1.2082,
|
| 1124 |
+
"grad_norm": 2.1656956672668457,
|
| 1125 |
+
"learning_rate": 1.036893770336938e-05
|
| 1126 |
+
},
|
| 1127 |
+
{
|
| 1128 |
+
"step": 126,
|
| 1129 |
+
"epoch": 3.5492957746478875,
|
| 1130 |
+
"cpu_mem": 1.502625792,
|
| 1131 |
+
"gpu_mem": 4.823640576,
|
| 1132 |
+
"loss": 1.2388,
|
| 1133 |
+
"grad_norm": 1.7753299474716187,
|
| 1134 |
+
"learning_rate": 9.046106882113751e-06
|
| 1135 |
+
},
|
| 1136 |
+
{
|
| 1137 |
+
"step": 127,
|
| 1138 |
+
"epoch": 3.5774647887323945,
|
| 1139 |
+
"cpu_mem": 1.502625792,
|
| 1140 |
+
"gpu_mem": 4.823652864,
|
| 1141 |
+
"loss": 1.2669,
|
| 1142 |
+
"grad_norm": 1.6709108352661133,
|
| 1143 |
+
"learning_rate": 7.810898074930243e-06
|
| 1144 |
+
},
|
| 1145 |
+
{
|
| 1146 |
+
"step": 128,
|
| 1147 |
+
"epoch": 3.6056338028169015,
|
| 1148 |
+
"cpu_mem": 1.502625792,
|
| 1149 |
+
"gpu_mem": 4.82367744,
|
| 1150 |
+
"loss": 1.28,
|
| 1151 |
+
"grad_norm": 2.1730709075927734,
|
| 1152 |
+
"learning_rate": 6.664079132078881e-06
|
| 1153 |
+
},
|
| 1154 |
+
{
|
| 1155 |
+
"step": 129,
|
| 1156 |
+
"epoch": 3.6338028169014085,
|
| 1157 |
+
"cpu_mem": 1.502625792,
|
| 1158 |
+
"gpu_mem": 4.82363904,
|
| 1159 |
+
"loss": 1.2608,
|
| 1160 |
+
"grad_norm": 1.86732017993927,
|
| 1161 |
+
"learning_rate": 5.606362957498195e-06
|
| 1162 |
+
},
|
| 1163 |
+
{
|
| 1164 |
+
"step": 130,
|
| 1165 |
+
"epoch": 3.6619718309859155,
|
| 1166 |
+
"cpu_mem": 1.502625792,
|
| 1167 |
+
"gpu_mem": 4.823642112,
|
| 1168 |
+
"loss": 1.3212,
|
| 1169 |
+
"grad_norm": 2.1604793071746826,
|
| 1170 |
+
"learning_rate": 4.638407065638322e-06
|
| 1171 |
+
},
|
| 1172 |
+
{
|
| 1173 |
+
"step": 131,
|
| 1174 |
+
"epoch": 3.6901408450704225,
|
| 1175 |
+
"cpu_mem": 1.502625792,
|
| 1176 |
+
"gpu_mem": 4.8236544,
|
| 1177 |
+
"loss": 1.2794,
|
| 1178 |
+
"grad_norm": 2.1699371337890625,
|
| 1179 |
+
"learning_rate": 3.760813172726457e-06
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"step": 132,
|
| 1183 |
+
"epoch": 3.7183098591549295,
|
| 1184 |
+
"cpu_mem": 1.502625792,
|
| 1185 |
+
"gpu_mem": 4.823619072,
|
| 1186 |
+
"loss": 1.2438,
|
| 1187 |
+
"grad_norm": 1.8384634256362915,
|
| 1188 |
+
"learning_rate": 2.9741268227184255e-06
|
| 1189 |
+
},
|
| 1190 |
+
{
|
| 1191 |
+
"step": 133,
|
| 1192 |
+
"epoch": 3.7464788732394365,
|
| 1193 |
+
"cpu_mem": 1.502625792,
|
| 1194 |
+
"gpu_mem": 4.823660544,
|
| 1195 |
+
"loss": 1.206,
|
| 1196 |
+
"grad_norm": 1.811904788017273,
|
| 1197 |
+
"learning_rate": 2.2788370481687965e-06
|
| 1198 |
+
},
|
| 1199 |
+
{
|
| 1200 |
+
"step": 134,
|
| 1201 |
+
"epoch": 3.7746478873239435,
|
| 1202 |
+
"cpu_mem": 1.502625792,
|
| 1203 |
+
"gpu_mem": 4.823675904,
|
| 1204 |
+
"loss": 1.2126,
|
| 1205 |
+
"grad_norm": 1.9175496101379395,
|
| 1206 |
+
"learning_rate": 1.6753760662307215e-06
|
| 1207 |
+
},
|
| 1208 |
+
{
|
| 1209 |
+
"step": 135,
|
| 1210 |
+
"epoch": 3.802816901408451,
|
| 1211 |
+
"cpu_mem": 1.502625792,
|
| 1212 |
+
"gpu_mem": 4.823640576,
|
| 1213 |
+
"loss": 1.3218,
|
| 1214 |
+
"grad_norm": 2.67806077003479,
|
| 1215 |
+
"learning_rate": 1.1641190099741904e-06
|
| 1216 |
+
},
|
| 1217 |
+
{
|
| 1218 |
+
"step": 136,
|
| 1219 |
+
"epoch": 3.830985915492958,
|
| 1220 |
+
"cpu_mem": 1.502625792,
|
| 1221 |
+
"gpu_mem": 4.823648256,
|
| 1222 |
+
"loss": 1.2947,
|
| 1223 |
+
"grad_norm": 2.2404983043670654,
|
| 1224 |
+
"learning_rate": 7.453836951897885e-07
|
| 1225 |
+
},
|
| 1226 |
+
{
|
| 1227 |
+
"step": 137,
|
| 1228 |
+
"epoch": 3.859154929577465,
|
| 1229 |
+
"cpu_mem": 1.502625792,
|
| 1230 |
+
"gpu_mem": 4.82366976,
|
| 1231 |
+
"loss": 1.2894,
|
| 1232 |
+
"grad_norm": 2.716714382171631,
|
| 1233 |
+
"learning_rate": 4.194304228229806e-07
|
| 1234 |
+
},
|
| 1235 |
+
{
|
| 1236 |
+
"step": 138,
|
| 1237 |
+
"epoch": 3.887323943661972,
|
| 1238 |
+
"cpu_mem": 1.502625792,
|
| 1239 |
+
"gpu_mem": 4.823680512,
|
| 1240 |
+
"loss": 1.2933,
|
| 1241 |
+
"grad_norm": 1.7514668703079224,
|
| 1242 |
+
"learning_rate": 1.8646181716164831e-07
|
| 1243 |
+
},
|
| 1244 |
+
{
|
| 1245 |
+
"step": 139,
|
| 1246 |
+
"epoch": 3.915492957746479,
|
| 1247 |
+
"cpu_mem": 1.502625792,
|
| 1248 |
+
"gpu_mem": 4.823672832,
|
| 1249 |
+
"loss": 1.3346,
|
| 1250 |
+
"grad_norm": 1.9952417612075806,
|
| 1251 |
+
"learning_rate": 4.662269987756317e-08
|
| 1252 |
+
},
|
| 1253 |
+
{
|
| 1254 |
+
"step": 140,
|
| 1255 |
+
"epoch": 3.943661971830986,
|
| 1256 |
+
"cpu_mem": 1.502625792,
|
| 1257 |
+
"gpu_mem": 4.823706624,
|
| 1258 |
+
"loss": 1.3088,
|
| 1259 |
+
"grad_norm": 2.4654135704040527,
|
| 1260 |
+
"learning_rate": 0.0
|
| 1261 |
+
},
|
| 1262 |
+
{
|
| 1263 |
+
"step": 140,
|
| 1264 |
+
"epoch": 3.943661971830986,
|
| 1265 |
+
"cpu_mem": 1.502625792,
|
| 1266 |
+
"gpu_mem": 4.823706624,
|
| 1267 |
+
"train_runtime": 678.9966,
|
| 1268 |
+
"train_samples_per_second": 13.261,
|
| 1269 |
+
"train_steps_per_second": 0.206,
|
| 1270 |
+
"total_flos": 0.0,
|
| 1271 |
+
"train_loss": 1.6106574450220381
|
| 1272 |
+
}
|
| 1273 |
+
]
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r8-a2/adapter_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha": 16,
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": false,
|
| 8 |
+
"init_weight": "kaiming",
|
| 9 |
+
"layers_pattern": null,
|
| 10 |
+
"layers_to_transform": null,
|
| 11 |
+
"metric_tracking": false,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "ABLATION",
|
| 14 |
+
"r": 8,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"seed": 42,
|
| 17 |
+
"share_weights": false,
|
| 18 |
+
"target_modules": [
|
| 19 |
+
"up_proj",
|
| 20 |
+
"gate_proj",
|
| 21 |
+
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"q_proj",
|
| 24 |
+
"down_proj",
|
| 25 |
+
"o_proj"
|
| 26 |
+
],
|
| 27 |
+
"task_type": null,
|
| 28 |
+
"track_n": 100,
|
| 29 |
+
"variant": "A"
|
| 30 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r8-a2/eval_results.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "arc_e",
|
| 3 |
+
"results": 0.6611952861952862
|
| 4 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r8-a2/training_configuration.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "TinyLlama/TinyLlama_v1.1",
|
| 3 |
+
"dataset": {
|
| 4 |
+
"name": "ARC_E",
|
| 5 |
+
"dataset_id": "allenai/ai2_arc",
|
| 6 |
+
"preprocess_id": "arc_train_deepeval"
|
| 7 |
+
},
|
| 8 |
+
"peft_config": {
|
| 9 |
+
"method": "abl_A",
|
| 10 |
+
"rank": 8,
|
| 11 |
+
"alpha": 16,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"bias": "none",
|
| 14 |
+
"target_modules": [
|
| 15 |
+
"q_proj",
|
| 16 |
+
"k_proj",
|
| 17 |
+
"v_proj",
|
| 18 |
+
"o_proj",
|
| 19 |
+
"gate_proj",
|
| 20 |
+
"down_proj",
|
| 21 |
+
"up_proj"
|
| 22 |
+
],
|
| 23 |
+
"trainable_parameter_count": 6317696
|
| 24 |
+
},
|
| 25 |
+
"training_config": {
|
| 26 |
+
"max_dataset_length": null,
|
| 27 |
+
"batch_size": 64,
|
| 28 |
+
"per_device_batch_size": 32,
|
| 29 |
+
"gradient_accumulation_steps": 2,
|
| 30 |
+
"learning_rate": 0.0003,
|
| 31 |
+
"num_epochs": 4,
|
| 32 |
+
"warmup_ratio": 0.1
|
| 33 |
+
},
|
| 34 |
+
"model_name": "TinyLlama_v1.1-abl_A-arc_e-r8-a2",
|
| 35 |
+
"output_dir": "./experiment_results/TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r8-a2",
|
| 36 |
+
"seed": 42,
|
| 37 |
+
"timestamp": "2025-08-30T22:53:40.430500"
|
| 38 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-arc_e-r8-a2/training_logs.json
ADDED
|
@@ -0,0 +1,1273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 1,
|
| 4 |
+
"epoch": 0.028169014084507043,
|
| 5 |
+
"cpu_mem": 1.488003072,
|
| 6 |
+
"gpu_mem": 4.44271872,
|
| 7 |
+
"loss": 4.6319,
|
| 8 |
+
"grad_norm": 276.5605773925781,
|
| 9 |
+
"learning_rate": 2.1428571428571425e-05
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"step": 2,
|
| 13 |
+
"epoch": 0.056338028169014086,
|
| 14 |
+
"cpu_mem": 1.493704704,
|
| 15 |
+
"gpu_mem": 4.493400064,
|
| 16 |
+
"loss": 4.4578,
|
| 17 |
+
"grad_norm": 282.3363952636719,
|
| 18 |
+
"learning_rate": 4.285714285714285e-05
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"step": 3,
|
| 22 |
+
"epoch": 0.08450704225352113,
|
| 23 |
+
"cpu_mem": 1.493901312,
|
| 24 |
+
"gpu_mem": 4.49337856,
|
| 25 |
+
"loss": 2.9702,
|
| 26 |
+
"grad_norm": 613.773193359375,
|
| 27 |
+
"learning_rate": 6.428571428571427e-05
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"step": 4,
|
| 31 |
+
"epoch": 0.11267605633802817,
|
| 32 |
+
"cpu_mem": 1.493901312,
|
| 33 |
+
"gpu_mem": 4.493357056,
|
| 34 |
+
"loss": 1.9283,
|
| 35 |
+
"grad_norm": 45.13872146606445,
|
| 36 |
+
"learning_rate": 8.57142857142857e-05
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"step": 5,
|
| 40 |
+
"epoch": 0.14084507042253522,
|
| 41 |
+
"cpu_mem": 1.49409792,
|
| 42 |
+
"gpu_mem": 4.493398528,
|
| 43 |
+
"loss": 1.493,
|
| 44 |
+
"grad_norm": 16.81772232055664,
|
| 45 |
+
"learning_rate": 0.00010714285714285714
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"step": 6,
|
| 49 |
+
"epoch": 0.16901408450704225,
|
| 50 |
+
"cpu_mem": 1.494294528,
|
| 51 |
+
"gpu_mem": 4.493373952,
|
| 52 |
+
"loss": 1.3415,
|
| 53 |
+
"grad_norm": 12.790365219116211,
|
| 54 |
+
"learning_rate": 0.00012857142857142855
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"step": 7,
|
| 58 |
+
"epoch": 0.19718309859154928,
|
| 59 |
+
"cpu_mem": 1.494294528,
|
| 60 |
+
"gpu_mem": 4.493396992,
|
| 61 |
+
"loss": 1.5003,
|
| 62 |
+
"grad_norm": 25.754161834716797,
|
| 63 |
+
"learning_rate": 0.00015
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"step": 8,
|
| 67 |
+
"epoch": 0.22535211267605634,
|
| 68 |
+
"cpu_mem": 1.494294528,
|
| 69 |
+
"gpu_mem": 4.49335552,
|
| 70 |
+
"loss": 1.3258,
|
| 71 |
+
"grad_norm": 6.1254353523254395,
|
| 72 |
+
"learning_rate": 0.0001714285714285714
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 9,
|
| 76 |
+
"epoch": 0.2535211267605634,
|
| 77 |
+
"cpu_mem": 1.494294528,
|
| 78 |
+
"gpu_mem": 4.493357056,
|
| 79 |
+
"loss": 1.4644,
|
| 80 |
+
"grad_norm": 34.32180404663086,
|
| 81 |
+
"learning_rate": 0.00019285714285714286
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"step": 10,
|
| 85 |
+
"epoch": 0.28169014084507044,
|
| 86 |
+
"cpu_mem": 1.494294528,
|
| 87 |
+
"gpu_mem": 4.493352448,
|
| 88 |
+
"loss": 1.4288,
|
| 89 |
+
"grad_norm": 23.392850875854492,
|
| 90 |
+
"learning_rate": 0.00021428571428571427
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"step": 11,
|
| 94 |
+
"epoch": 0.30985915492957744,
|
| 95 |
+
"cpu_mem": 1.494294528,
|
| 96 |
+
"gpu_mem": 4.493430784,
|
| 97 |
+
"loss": 1.5429,
|
| 98 |
+
"grad_norm": 20.585525512695312,
|
| 99 |
+
"learning_rate": 0.00023571428571428569
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"step": 12,
|
| 103 |
+
"epoch": 0.3380281690140845,
|
| 104 |
+
"cpu_mem": 1.494294528,
|
| 105 |
+
"gpu_mem": 4.493404672,
|
| 106 |
+
"loss": 1.305,
|
| 107 |
+
"grad_norm": 5.136800765991211,
|
| 108 |
+
"learning_rate": 0.0002571428571428571
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"step": 13,
|
| 112 |
+
"epoch": 0.36619718309859156,
|
| 113 |
+
"cpu_mem": 1.494294528,
|
| 114 |
+
"gpu_mem": 4.49335552,
|
| 115 |
+
"loss": 1.3883,
|
| 116 |
+
"grad_norm": 26.17745590209961,
|
| 117 |
+
"learning_rate": 0.00027857142857142854
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"step": 14,
|
| 121 |
+
"epoch": 0.39436619718309857,
|
| 122 |
+
"cpu_mem": 1.494294528,
|
| 123 |
+
"gpu_mem": 4.493377024,
|
| 124 |
+
"loss": 1.4056,
|
| 125 |
+
"grad_norm": 12.29943561553955,
|
| 126 |
+
"learning_rate": 0.0003
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"step": 15,
|
| 130 |
+
"epoch": 0.4225352112676056,
|
| 131 |
+
"cpu_mem": 1.494294528,
|
| 132 |
+
"gpu_mem": 4.493353984,
|
| 133 |
+
"loss": 1.3972,
|
| 134 |
+
"grad_norm": 5.9298176765441895,
|
| 135 |
+
"learning_rate": 0.0002999533773001224
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"step": 16,
|
| 139 |
+
"epoch": 0.4507042253521127,
|
| 140 |
+
"cpu_mem": 1.494294528,
|
| 141 |
+
"gpu_mem": 4.493358592,
|
| 142 |
+
"loss": 1.3433,
|
| 143 |
+
"grad_norm": 2.671994924545288,
|
| 144 |
+
"learning_rate": 0.0002998135381828383
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"step": 17,
|
| 148 |
+
"epoch": 0.4788732394366197,
|
| 149 |
+
"cpu_mem": 1.494294528,
|
| 150 |
+
"gpu_mem": 4.493395456,
|
| 151 |
+
"loss": 1.3402,
|
| 152 |
+
"grad_norm": 5.09276008605957,
|
| 153 |
+
"learning_rate": 0.00029958056957717696
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"step": 18,
|
| 157 |
+
"epoch": 0.5070422535211268,
|
| 158 |
+
"cpu_mem": 1.494491136,
|
| 159 |
+
"gpu_mem": 4.493406208,
|
| 160 |
+
"loss": 1.3272,
|
| 161 |
+
"grad_norm": 4.426205635070801,
|
| 162 |
+
"learning_rate": 0.0002992546163048102
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"step": 19,
|
| 166 |
+
"epoch": 0.5352112676056338,
|
| 167 |
+
"cpu_mem": 1.494491136,
|
| 168 |
+
"gpu_mem": 4.493349376,
|
| 169 |
+
"loss": 1.4243,
|
| 170 |
+
"grad_norm": 10.765657424926758,
|
| 171 |
+
"learning_rate": 0.0002988358809900258
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"step": 20,
|
| 175 |
+
"epoch": 0.5633802816901409,
|
| 176 |
+
"cpu_mem": 1.494491136,
|
| 177 |
+
"gpu_mem": 4.493420032,
|
| 178 |
+
"loss": 1.4339,
|
| 179 |
+
"grad_norm": 7.541872024536133,
|
| 180 |
+
"learning_rate": 0.0002983246239337692
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"step": 21,
|
| 184 |
+
"epoch": 0.5915492957746479,
|
| 185 |
+
"cpu_mem": 1.494491136,
|
| 186 |
+
"gpu_mem": 4.493418496,
|
| 187 |
+
"loss": 1.349,
|
| 188 |
+
"grad_norm": 5.185708522796631,
|
| 189 |
+
"learning_rate": 0.0002977211629518312
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"step": 22,
|
| 193 |
+
"epoch": 0.6197183098591549,
|
| 194 |
+
"cpu_mem": 1.494491136,
|
| 195 |
+
"gpu_mem": 4.493375488,
|
| 196 |
+
"loss": 1.3539,
|
| 197 |
+
"grad_norm": 6.7187676429748535,
|
| 198 |
+
"learning_rate": 0.00029702587317728153
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"step": 23,
|
| 202 |
+
"epoch": 0.647887323943662,
|
| 203 |
+
"cpu_mem": 1.494491136,
|
| 204 |
+
"gpu_mem": 4.493392384,
|
| 205 |
+
"loss": 1.3318,
|
| 206 |
+
"grad_norm": 6.468657493591309,
|
| 207 |
+
"learning_rate": 0.0002962391868272735
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"step": 24,
|
| 211 |
+
"epoch": 0.676056338028169,
|
| 212 |
+
"cpu_mem": 1.494491136,
|
| 213 |
+
"gpu_mem": 4.493349376,
|
| 214 |
+
"loss": 1.306,
|
| 215 |
+
"grad_norm": 3.4460861682891846,
|
| 216 |
+
"learning_rate": 0.00029536159293436166
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"step": 25,
|
| 220 |
+
"epoch": 0.704225352112676,
|
| 221 |
+
"cpu_mem": 1.494491136,
|
| 222 |
+
"gpu_mem": 4.49337856,
|
| 223 |
+
"loss": 1.5276,
|
| 224 |
+
"grad_norm": 12.144696235656738,
|
| 225 |
+
"learning_rate": 0.00029439363704250176
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"step": 26,
|
| 229 |
+
"epoch": 0.7323943661971831,
|
| 230 |
+
"cpu_mem": 1.494491136,
|
| 231 |
+
"gpu_mem": 4.493358592,
|
| 232 |
+
"loss": 1.4864,
|
| 233 |
+
"grad_norm": 5.351874351501465,
|
| 234 |
+
"learning_rate": 0.00029333592086792107
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"step": 27,
|
| 238 |
+
"epoch": 0.7605633802816901,
|
| 239 |
+
"cpu_mem": 1.494491136,
|
| 240 |
+
"gpu_mem": 4.493384704,
|
| 241 |
+
"loss": 1.3675,
|
| 242 |
+
"grad_norm": 5.478825092315674,
|
| 243 |
+
"learning_rate": 0.0002921891019250697
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"step": 28,
|
| 247 |
+
"epoch": 0.7887323943661971,
|
| 248 |
+
"cpu_mem": 1.494491136,
|
| 249 |
+
"gpu_mem": 4.493384704,
|
| 250 |
+
"loss": 1.4199,
|
| 251 |
+
"grad_norm": 5.453819274902344,
|
| 252 |
+
"learning_rate": 0.0002909538931178862
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"step": 29,
|
| 256 |
+
"epoch": 0.8169014084507042,
|
| 257 |
+
"cpu_mem": 1.494491136,
|
| 258 |
+
"gpu_mem": 4.4933632,
|
| 259 |
+
"loss": 1.2599,
|
| 260 |
+
"grad_norm": 2.9942517280578613,
|
| 261 |
+
"learning_rate": 0.00028963106229663063
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"step": 30,
|
| 265 |
+
"epoch": 0.8450704225352113,
|
| 266 |
+
"cpu_mem": 1.494491136,
|
| 267 |
+
"gpu_mem": 4.493353984,
|
| 268 |
+
"loss": 1.3457,
|
| 269 |
+
"grad_norm": 3.5960090160369873,
|
| 270 |
+
"learning_rate": 0.00028822143178056114
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"step": 31,
|
| 274 |
+
"epoch": 0.8732394366197183,
|
| 275 |
+
"cpu_mem": 1.494491136,
|
| 276 |
+
"gpu_mem": 4.493372416,
|
| 277 |
+
"loss": 1.3961,
|
| 278 |
+
"grad_norm": 5.403853416442871,
|
| 279 |
+
"learning_rate": 0.00028672587784675096
|
| 280 |
+
},
|
| 281 |
+
{
|
| 282 |
+
"step": 32,
|
| 283 |
+
"epoch": 0.9014084507042254,
|
| 284 |
+
"cpu_mem": 1.494491136,
|
| 285 |
+
"gpu_mem": 4.493395456,
|
| 286 |
+
"loss": 1.3711,
|
| 287 |
+
"grad_norm": 6.018552303314209,
|
| 288 |
+
"learning_rate": 0.0002851453301853628
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"step": 33,
|
| 292 |
+
"epoch": 0.9295774647887324,
|
| 293 |
+
"cpu_mem": 1.494491136,
|
| 294 |
+
"gpu_mem": 4.493392384,
|
| 295 |
+
"loss": 1.3374,
|
| 296 |
+
"grad_norm": 2.8503122329711914,
|
| 297 |
+
"learning_rate": 0.00028348077132172027
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"step": 34,
|
| 301 |
+
"epoch": 0.9577464788732394,
|
| 302 |
+
"cpu_mem": 1.494491136,
|
| 303 |
+
"gpu_mem": 4.493395456,
|
| 304 |
+
"loss": 1.386,
|
| 305 |
+
"grad_norm": 4.291422367095947,
|
| 306 |
+
"learning_rate": 0.0002817332360055343
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"step": 35,
|
| 310 |
+
"epoch": 0.9859154929577465,
|
| 311 |
+
"cpu_mem": 1.494491136,
|
| 312 |
+
"gpu_mem": 4.493377024,
|
| 313 |
+
"loss": 1.3118,
|
| 314 |
+
"grad_norm": 3.1133341789245605,
|
| 315 |
+
"learning_rate": 0.0002799038105676658
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"step": 36,
|
| 319 |
+
"epoch": 1.0140845070422535,
|
| 320 |
+
"cpu_mem": 1.494491136,
|
| 321 |
+
"gpu_mem": 4.518678016,
|
| 322 |
+
"loss": 1.9673,
|
| 323 |
+
"grad_norm": 5.740923881530762,
|
| 324 |
+
"learning_rate": 0.0002779936322448233
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"step": 37,
|
| 328 |
+
"epoch": 1.0422535211267605,
|
| 329 |
+
"cpu_mem": 1.494491136,
|
| 330 |
+
"gpu_mem": 4.518682624,
|
| 331 |
+
"loss": 1.339,
|
| 332 |
+
"grad_norm": 3.4668984413146973,
|
| 333 |
+
"learning_rate": 0.0002760038884726157
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"step": 38,
|
| 337 |
+
"epoch": 1.0704225352112675,
|
| 338 |
+
"cpu_mem": 1.494491136,
|
| 339 |
+
"gpu_mem": 4.51866112,
|
| 340 |
+
"loss": 1.2514,
|
| 341 |
+
"grad_norm": 3.128293514251709,
|
| 342 |
+
"learning_rate": 0.00027393581614739923
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"step": 39,
|
| 346 |
+
"epoch": 1.0985915492957747,
|
| 347 |
+
"cpu_mem": 1.494491136,
|
| 348 |
+
"gpu_mem": 4.518650368,
|
| 349 |
+
"loss": 1.3329,
|
| 350 |
+
"grad_norm": 4.251654148101807,
|
| 351 |
+
"learning_rate": 0.0002717907008573785
|
| 352 |
+
},
|
| 353 |
+
{
|
| 354 |
+
"step": 40,
|
| 355 |
+
"epoch": 1.1267605633802817,
|
| 356 |
+
"cpu_mem": 1.494491136,
|
| 357 |
+
"gpu_mem": 4.518713344,
|
| 358 |
+
"loss": 1.3431,
|
| 359 |
+
"grad_norm": 3.2362022399902344,
|
| 360 |
+
"learning_rate": 0.0002695698760834384
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"step": 41,
|
| 364 |
+
"epoch": 1.1549295774647887,
|
| 365 |
+
"cpu_mem": 1.494491136,
|
| 366 |
+
"gpu_mem": 4.518673408,
|
| 367 |
+
"loss": 1.3388,
|
| 368 |
+
"grad_norm": 3.327332019805908,
|
| 369 |
+
"learning_rate": 0.00026727472237020447
|
| 370 |
+
},
|
| 371 |
+
{
|
| 372 |
+
"step": 42,
|
| 373 |
+
"epoch": 1.1830985915492958,
|
| 374 |
+
"cpu_mem": 1.494491136,
|
| 375 |
+
"gpu_mem": 4.518716416,
|
| 376 |
+
"loss": 1.3812,
|
| 377 |
+
"grad_norm": 3.0941553115844727,
|
| 378 |
+
"learning_rate": 0.00026490666646784665
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"step": 43,
|
| 382 |
+
"epoch": 1.2112676056338028,
|
| 383 |
+
"cpu_mem": 1.494491136,
|
| 384 |
+
"gpu_mem": 4.518665728,
|
| 385 |
+
"loss": 1.3735,
|
| 386 |
+
"grad_norm": 2.356278419494629,
|
| 387 |
+
"learning_rate": 0.0002624671804451601
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"step": 44,
|
| 391 |
+
"epoch": 1.2394366197183098,
|
| 392 |
+
"cpu_mem": 1.494491136,
|
| 393 |
+
"gpu_mem": 4.51873024,
|
| 394 |
+
"loss": 1.3766,
|
| 395 |
+
"grad_norm": 3.27545428276062,
|
| 396 |
+
"learning_rate": 0.0002599577807744739
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"step": 45,
|
| 400 |
+
"epoch": 1.267605633802817,
|
| 401 |
+
"cpu_mem": 1.494491136,
|
| 402 |
+
"gpu_mem": 4.518697984,
|
| 403 |
+
"loss": 1.3891,
|
| 404 |
+
"grad_norm": 2.333723306655884,
|
| 405 |
+
"learning_rate": 0.0002573800273889577
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"step": 46,
|
| 409 |
+
"epoch": 1.295774647887324,
|
| 410 |
+
"cpu_mem": 1.494491136,
|
| 411 |
+
"gpu_mem": 4.518702592,
|
| 412 |
+
"loss": 1.3937,
|
| 413 |
+
"grad_norm": 3.481844425201416,
|
| 414 |
+
"learning_rate": 0.0002547355227129109
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"step": 47,
|
| 418 |
+
"epoch": 1.323943661971831,
|
| 419 |
+
"cpu_mem": 1.494491136,
|
| 420 |
+
"gpu_mem": 4.518648832,
|
| 421 |
+
"loss": 1.3532,
|
| 422 |
+
"grad_norm": 4.956977844238281,
|
| 423 |
+
"learning_rate": 0.00025202591066563786
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"step": 48,
|
| 427 |
+
"epoch": 1.352112676056338,
|
| 428 |
+
"cpu_mem": 1.494491136,
|
| 429 |
+
"gpu_mem": 4.518662656,
|
| 430 |
+
"loss": 1.3068,
|
| 431 |
+
"grad_norm": 3.1343212127685547,
|
| 432 |
+
"learning_rate": 0.0002492528756395289
|
| 433 |
+
},
|
| 434 |
+
{
|
| 435 |
+
"step": 49,
|
| 436 |
+
"epoch": 1.380281690140845,
|
| 437 |
+
"cpu_mem": 1.494491136,
|
| 438 |
+
"gpu_mem": 4.518651904,
|
| 439 |
+
"loss": 1.3936,
|
| 440 |
+
"grad_norm": 8.055434226989746,
|
| 441 |
+
"learning_rate": 0.0002464181414529809
|
| 442 |
+
},
|
| 443 |
+
{
|
| 444 |
+
"step": 50,
|
| 445 |
+
"epoch": 1.408450704225352,
|
| 446 |
+
"cpu_mem": 1.494491136,
|
| 447 |
+
"gpu_mem": 4.518665728,
|
| 448 |
+
"loss": 1.4007,
|
| 449 |
+
"grad_norm": 16.459016799926758,
|
| 450 |
+
"learning_rate": 0.00024352347027881003
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"step": 51,
|
| 454 |
+
"epoch": 1.436619718309859,
|
| 455 |
+
"cpu_mem": 1.494491136,
|
| 456 |
+
"gpu_mem": 4.518717952,
|
| 457 |
+
"loss": 1.4211,
|
| 458 |
+
"grad_norm": 16.374134063720703,
|
| 459 |
+
"learning_rate": 0.0002405706615488216
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"step": 52,
|
| 463 |
+
"epoch": 1.4647887323943662,
|
| 464 |
+
"cpu_mem": 1.494491136,
|
| 465 |
+
"gpu_mem": 4.518665728,
|
| 466 |
+
"loss": 1.3547,
|
| 467 |
+
"grad_norm": 4.7861762046813965,
|
| 468 |
+
"learning_rate": 0.00023756155083521846
|
| 469 |
+
},
|
| 470 |
+
{
|
| 471 |
+
"step": 53,
|
| 472 |
+
"epoch": 1.4929577464788732,
|
| 473 |
+
"cpu_mem": 1.494491136,
|
| 474 |
+
"gpu_mem": 4.518734848,
|
| 475 |
+
"loss": 1.3086,
|
| 476 |
+
"grad_norm": 5.092458724975586,
|
| 477 |
+
"learning_rate": 0.00023449800870954326
|
| 478 |
+
},
|
| 479 |
+
{
|
| 480 |
+
"step": 54,
|
| 481 |
+
"epoch": 1.5211267605633803,
|
| 482 |
+
"cpu_mem": 1.494491136,
|
| 483 |
+
"gpu_mem": 4.518702592,
|
| 484 |
+
"loss": 1.3138,
|
| 485 |
+
"grad_norm": 3.623900890350342,
|
| 486 |
+
"learning_rate": 0.0002313819395798639
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"step": 55,
|
| 490 |
+
"epoch": 1.5492957746478875,
|
| 491 |
+
"cpu_mem": 1.494491136,
|
| 492 |
+
"gpu_mem": 4.518711808,
|
| 493 |
+
"loss": 1.3956,
|
| 494 |
+
"grad_norm": 4.312674522399902,
|
| 495 |
+
"learning_rate": 0.0002282152805069247
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"step": 56,
|
| 499 |
+
"epoch": 1.5774647887323945,
|
| 500 |
+
"cpu_mem": 1.494491136,
|
| 501 |
+
"gpu_mem": 4.518687232,
|
| 502 |
+
"loss": 1.341,
|
| 503 |
+
"grad_norm": 3.958331346511841,
|
| 504 |
+
"learning_rate": 0.000225
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"step": 57,
|
| 508 |
+
"epoch": 1.6056338028169015,
|
| 509 |
+
"cpu_mem": 1.494491136,
|
| 510 |
+
"gpu_mem": 4.518721024,
|
| 511 |
+
"loss": 1.3175,
|
| 512 |
+
"grad_norm": 3.097858190536499,
|
| 513 |
+
"learning_rate": 0.00022173809679319772
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"step": 58,
|
| 517 |
+
"epoch": 1.6338028169014085,
|
| 518 |
+
"cpu_mem": 1.494491136,
|
| 519 |
+
"gpu_mem": 4.518702592,
|
| 520 |
+
"loss": 1.3063,
|
| 521 |
+
"grad_norm": 3.0172431468963623,
|
| 522 |
+
"learning_rate": 0.00021843159860297442
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"step": 59,
|
| 526 |
+
"epoch": 1.6619718309859155,
|
| 527 |
+
"cpu_mem": 1.494491136,
|
| 528 |
+
"gpu_mem": 4.518688768,
|
| 529 |
+
"loss": 1.3144,
|
| 530 |
+
"grad_norm": 2.152859687805176,
|
| 531 |
+
"learning_rate": 0.00021508256086763368
|
| 532 |
+
},
|
| 533 |
+
{
|
| 534 |
+
"step": 60,
|
| 535 |
+
"epoch": 1.6901408450704225,
|
| 536 |
+
"cpu_mem": 1.494491136,
|
| 537 |
+
"gpu_mem": 4.518727168,
|
| 538 |
+
"loss": 1.3108,
|
| 539 |
+
"grad_norm": 3.8040716648101807,
|
| 540 |
+
"learning_rate": 0.00021169306546959174
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"step": 61,
|
| 544 |
+
"epoch": 1.7183098591549295,
|
| 545 |
+
"cpu_mem": 1.494491136,
|
| 546 |
+
"gpu_mem": 4.518659584,
|
| 547 |
+
"loss": 1.3203,
|
| 548 |
+
"grad_norm": 3.8633904457092285,
|
| 549 |
+
"learning_rate": 0.0002082652194412042
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"step": 62,
|
| 553 |
+
"epoch": 1.7464788732394365,
|
| 554 |
+
"cpu_mem": 1.494491136,
|
| 555 |
+
"gpu_mem": 4.5187072,
|
| 556 |
+
"loss": 1.3983,
|
| 557 |
+
"grad_norm": 4.749051094055176,
|
| 558 |
+
"learning_rate": 0.00020480115365495926
|
| 559 |
+
},
|
| 560 |
+
{
|
| 561 |
+
"step": 63,
|
| 562 |
+
"epoch": 1.7746478873239435,
|
| 563 |
+
"cpu_mem": 1.494491136,
|
| 564 |
+
"gpu_mem": 4.518656512,
|
| 565 |
+
"loss": 1.3682,
|
| 566 |
+
"grad_norm": 5.358827114105225,
|
| 567 |
+
"learning_rate": 0.00020130302149885031
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"step": 64,
|
| 571 |
+
"epoch": 1.8028169014084507,
|
| 572 |
+
"cpu_mem": 1.494491136,
|
| 573 |
+
"gpu_mem": 4.518705664,
|
| 574 |
+
"loss": 1.4047,
|
| 575 |
+
"grad_norm": 5.390880584716797,
|
| 576 |
+
"learning_rate": 0.00019777299753775265
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"step": 65,
|
| 580 |
+
"epoch": 1.8309859154929577,
|
| 581 |
+
"cpu_mem": 1.494491136,
|
| 582 |
+
"gpu_mem": 4.518704128,
|
| 583 |
+
"loss": 1.3214,
|
| 584 |
+
"grad_norm": 4.8978447914123535,
|
| 585 |
+
"learning_rate": 0.00019421327616163563
|
| 586 |
+
},
|
| 587 |
+
{
|
| 588 |
+
"step": 66,
|
| 589 |
+
"epoch": 1.8591549295774648,
|
| 590 |
+
"cpu_mem": 1.494491136,
|
| 591 |
+
"gpu_mem": 4.51872256,
|
| 592 |
+
"loss": 1.3365,
|
| 593 |
+
"grad_norm": 4.024534225463867,
|
| 594 |
+
"learning_rate": 0.00019062607022145078
|
| 595 |
+
},
|
| 596 |
+
{
|
| 597 |
+
"step": 67,
|
| 598 |
+
"epoch": 1.887323943661972,
|
| 599 |
+
"cpu_mem": 1.494491136,
|
| 600 |
+
"gpu_mem": 4.518664192,
|
| 601 |
+
"loss": 1.3593,
|
| 602 |
+
"grad_norm": 10.947258949279785,
|
| 603 |
+
"learning_rate": 0.00018701360965354402
|
| 604 |
+
},
|
| 605 |
+
{
|
| 606 |
+
"step": 68,
|
| 607 |
+
"epoch": 1.915492957746479,
|
| 608 |
+
"cpu_mem": 1.494491136,
|
| 609 |
+
"gpu_mem": 4.51867648,
|
| 610 |
+
"loss": 1.3801,
|
| 611 |
+
"grad_norm": 7.698343276977539,
|
| 612 |
+
"learning_rate": 0.00018337814009344714
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"step": 69,
|
| 616 |
+
"epoch": 1.943661971830986,
|
| 617 |
+
"cpu_mem": 1.494491136,
|
| 618 |
+
"gpu_mem": 4.518701056,
|
| 619 |
+
"loss": 1.365,
|
| 620 |
+
"grad_norm": 6.346189022064209,
|
| 621 |
+
"learning_rate": 0.0001797219214799096
|
| 622 |
+
},
|
| 623 |
+
{
|
| 624 |
+
"step": 70,
|
| 625 |
+
"epoch": 1.971830985915493,
|
| 626 |
+
"cpu_mem": 1.494491136,
|
| 627 |
+
"gpu_mem": 4.518678016,
|
| 628 |
+
"loss": 1.2689,
|
| 629 |
+
"grad_norm": 4.332313060760498,
|
| 630 |
+
"learning_rate": 0.00017604722665003956
|
| 631 |
+
},
|
| 632 |
+
{
|
| 633 |
+
"step": 71,
|
| 634 |
+
"epoch": 2.0,
|
| 635 |
+
"cpu_mem": 1.494491136,
|
| 636 |
+
"gpu_mem": 4.518513664,
|
| 637 |
+
"loss": 1.9494,
|
| 638 |
+
"grad_norm": 61.389652252197266,
|
| 639 |
+
"learning_rate": 0.00017235633992642615
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"step": 72,
|
| 643 |
+
"epoch": 2.028169014084507,
|
| 644 |
+
"cpu_mem": 1.494491136,
|
| 645 |
+
"gpu_mem": 4.493387776,
|
| 646 |
+
"loss": 1.369,
|
| 647 |
+
"grad_norm": 5.659824848175049,
|
| 648 |
+
"learning_rate": 0.00016865155569712278
|
| 649 |
+
},
|
| 650 |
+
{
|
| 651 |
+
"step": 73,
|
| 652 |
+
"epoch": 2.056338028169014,
|
| 653 |
+
"cpu_mem": 1.494491136,
|
| 654 |
+
"gpu_mem": 4.493350912,
|
| 655 |
+
"loss": 1.3145,
|
| 656 |
+
"grad_norm": 4.936837673187256,
|
| 657 |
+
"learning_rate": 0.0001649351769893725
|
| 658 |
+
},
|
| 659 |
+
{
|
| 660 |
+
"step": 74,
|
| 661 |
+
"epoch": 2.084507042253521,
|
| 662 |
+
"cpu_mem": 1.494491136,
|
| 663 |
+
"gpu_mem": 4.493410816,
|
| 664 |
+
"loss": 1.2625,
|
| 665 |
+
"grad_norm": 3.075138568878174,
|
| 666 |
+
"learning_rate": 0.00016120951403796364
|
| 667 |
+
},
|
| 668 |
+
{
|
| 669 |
+
"step": 75,
|
| 670 |
+
"epoch": 2.112676056338028,
|
| 671 |
+
"cpu_mem": 1.494491136,
|
| 672 |
+
"gpu_mem": 4.49337856,
|
| 673 |
+
"loss": 1.34,
|
| 674 |
+
"grad_norm": 2.8183681964874268,
|
| 675 |
+
"learning_rate": 0.00015747688284910457
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"step": 76,
|
| 679 |
+
"epoch": 2.140845070422535,
|
| 680 |
+
"cpu_mem": 1.494491136,
|
| 681 |
+
"gpu_mem": 4.493389312,
|
| 682 |
+
"loss": 1.2405,
|
| 683 |
+
"grad_norm": 2.041452169418335,
|
| 684 |
+
"learning_rate": 0.00015373960376071093
|
| 685 |
+
},
|
| 686 |
+
{
|
| 687 |
+
"step": 77,
|
| 688 |
+
"epoch": 2.169014084507042,
|
| 689 |
+
"cpu_mem": 1.494491136,
|
| 690 |
+
"gpu_mem": 4.493426176,
|
| 691 |
+
"loss": 1.3003,
|
| 692 |
+
"grad_norm": 3.310304880142212,
|
| 693 |
+
"learning_rate": 0.00015
|
| 694 |
+
},
|
| 695 |
+
{
|
| 696 |
+
"step": 78,
|
| 697 |
+
"epoch": 2.1971830985915495,
|
| 698 |
+
"cpu_mem": 1.494491136,
|
| 699 |
+
"gpu_mem": 4.493410816,
|
| 700 |
+
"loss": 1.3301,
|
| 701 |
+
"grad_norm": 4.006730079650879,
|
| 702 |
+
"learning_rate": 0.00014626039623928907
|
| 703 |
+
},
|
| 704 |
+
{
|
| 705 |
+
"step": 79,
|
| 706 |
+
"epoch": 2.2253521126760565,
|
| 707 |
+
"cpu_mem": 1.494491136,
|
| 708 |
+
"gpu_mem": 4.493361664,
|
| 709 |
+
"loss": 1.2198,
|
| 710 |
+
"grad_norm": 3.7885594367980957,
|
| 711 |
+
"learning_rate": 0.0001425231171508954
|
| 712 |
+
},
|
| 713 |
+
{
|
| 714 |
+
"step": 80,
|
| 715 |
+
"epoch": 2.2535211267605635,
|
| 716 |
+
"cpu_mem": 1.494491136,
|
| 717 |
+
"gpu_mem": 4.493406208,
|
| 718 |
+
"loss": 1.3053,
|
| 719 |
+
"grad_norm": 3.224207639694214,
|
| 720 |
+
"learning_rate": 0.00013879048596203636
|
| 721 |
+
},
|
| 722 |
+
{
|
| 723 |
+
"step": 81,
|
| 724 |
+
"epoch": 2.2816901408450705,
|
| 725 |
+
"cpu_mem": 1.494491136,
|
| 726 |
+
"gpu_mem": 4.493392384,
|
| 727 |
+
"loss": 1.3576,
|
| 728 |
+
"grad_norm": 5.85601282119751,
|
| 729 |
+
"learning_rate": 0.0001350648230106275
|
| 730 |
+
},
|
| 731 |
+
{
|
| 732 |
+
"step": 82,
|
| 733 |
+
"epoch": 2.3098591549295775,
|
| 734 |
+
"cpu_mem": 1.494491136,
|
| 735 |
+
"gpu_mem": 4.493360128,
|
| 736 |
+
"loss": 1.3497,
|
| 737 |
+
"grad_norm": 6.43959903717041,
|
| 738 |
+
"learning_rate": 0.00013134844430287725
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"step": 83,
|
| 742 |
+
"epoch": 2.3380281690140845,
|
| 743 |
+
"cpu_mem": 1.494491136,
|
| 744 |
+
"gpu_mem": 4.493410816,
|
| 745 |
+
"loss": 1.2215,
|
| 746 |
+
"grad_norm": 5.418457508087158,
|
| 747 |
+
"learning_rate": 0.0001276436600735738
|
| 748 |
+
},
|
| 749 |
+
{
|
| 750 |
+
"step": 84,
|
| 751 |
+
"epoch": 2.3661971830985915,
|
| 752 |
+
"cpu_mem": 1.494491136,
|
| 753 |
+
"gpu_mem": 4.493349376,
|
| 754 |
+
"loss": 1.3708,
|
| 755 |
+
"grad_norm": 6.882091045379639,
|
| 756 |
+
"learning_rate": 0.00012395277334996044
|
| 757 |
+
},
|
| 758 |
+
{
|
| 759 |
+
"step": 85,
|
| 760 |
+
"epoch": 2.3943661971830985,
|
| 761 |
+
"cpu_mem": 1.494491136,
|
| 762 |
+
"gpu_mem": 4.493395456,
|
| 763 |
+
"loss": 1.3319,
|
| 764 |
+
"grad_norm": 6.375625133514404,
|
| 765 |
+
"learning_rate": 0.00012027807852009038
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"step": 86,
|
| 769 |
+
"epoch": 2.4225352112676055,
|
| 770 |
+
"cpu_mem": 1.494491136,
|
| 771 |
+
"gpu_mem": 4.493349376,
|
| 772 |
+
"loss": 1.29,
|
| 773 |
+
"grad_norm": 4.537503719329834,
|
| 774 |
+
"learning_rate": 0.00011662185990655284
|
| 775 |
+
},
|
| 776 |
+
{
|
| 777 |
+
"step": 87,
|
| 778 |
+
"epoch": 2.4507042253521125,
|
| 779 |
+
"cpu_mem": 1.494491136,
|
| 780 |
+
"gpu_mem": 4.493380096,
|
| 781 |
+
"loss": 1.2986,
|
| 782 |
+
"grad_norm": 5.286020278930664,
|
| 783 |
+
"learning_rate": 0.00011298639034645593
|
| 784 |
+
},
|
| 785 |
+
{
|
| 786 |
+
"step": 88,
|
| 787 |
+
"epoch": 2.4788732394366195,
|
| 788 |
+
"cpu_mem": 1.494491136,
|
| 789 |
+
"gpu_mem": 4.49335552,
|
| 790 |
+
"loss": 1.2059,
|
| 791 |
+
"grad_norm": 4.801527500152588,
|
| 792 |
+
"learning_rate": 0.00010937392977854923
|
| 793 |
+
},
|
| 794 |
+
{
|
| 795 |
+
"step": 89,
|
| 796 |
+
"epoch": 2.507042253521127,
|
| 797 |
+
"cpu_mem": 1.494491136,
|
| 798 |
+
"gpu_mem": 4.49340928,
|
| 799 |
+
"loss": 1.2417,
|
| 800 |
+
"grad_norm": 4.618600845336914,
|
| 801 |
+
"learning_rate": 0.00010578672383836435
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"step": 90,
|
| 805 |
+
"epoch": 2.535211267605634,
|
| 806 |
+
"cpu_mem": 1.494491136,
|
| 807 |
+
"gpu_mem": 4.493390848,
|
| 808 |
+
"loss": 1.3213,
|
| 809 |
+
"grad_norm": 5.864246845245361,
|
| 810 |
+
"learning_rate": 0.00010222700246224735
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"step": 91,
|
| 814 |
+
"epoch": 2.563380281690141,
|
| 815 |
+
"cpu_mem": 1.494491136,
|
| 816 |
+
"gpu_mem": 4.49334016,
|
| 817 |
+
"loss": 1.3045,
|
| 818 |
+
"grad_norm": 5.989525318145752,
|
| 819 |
+
"learning_rate": 9.869697850114969e-05
|
| 820 |
+
},
|
| 821 |
+
{
|
| 822 |
+
"step": 92,
|
| 823 |
+
"epoch": 2.591549295774648,
|
| 824 |
+
"cpu_mem": 1.494491136,
|
| 825 |
+
"gpu_mem": 4.493364736,
|
| 826 |
+
"loss": 1.3411,
|
| 827 |
+
"grad_norm": 6.808497905731201,
|
| 828 |
+
"learning_rate": 9.519884634504074e-05
|
| 829 |
+
},
|
| 830 |
+
{
|
| 831 |
+
"step": 93,
|
| 832 |
+
"epoch": 2.619718309859155,
|
| 833 |
+
"cpu_mem": 1.494491136,
|
| 834 |
+
"gpu_mem": 4.493367808,
|
| 835 |
+
"loss": 1.2999,
|
| 836 |
+
"grad_norm": 7.139451026916504,
|
| 837 |
+
"learning_rate": 9.17347805587958e-05
|
| 838 |
+
},
|
| 839 |
+
{
|
| 840 |
+
"step": 94,
|
| 841 |
+
"epoch": 2.647887323943662,
|
| 842 |
+
"cpu_mem": 1.494491136,
|
| 843 |
+
"gpu_mem": 4.493360128,
|
| 844 |
+
"loss": 1.2241,
|
| 845 |
+
"grad_norm": 5.613903522491455,
|
| 846 |
+
"learning_rate": 8.830693453040829e-05
|
| 847 |
+
},
|
| 848 |
+
{
|
| 849 |
+
"step": 95,
|
| 850 |
+
"epoch": 2.676056338028169,
|
| 851 |
+
"cpu_mem": 1.494491136,
|
| 852 |
+
"gpu_mem": 4.493398528,
|
| 853 |
+
"loss": 1.2218,
|
| 854 |
+
"grad_norm": 4.103054523468018,
|
| 855 |
+
"learning_rate": 8.491743913236628e-05
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"step": 96,
|
| 859 |
+
"epoch": 2.704225352112676,
|
| 860 |
+
"cpu_mem": 1.494491136,
|
| 861 |
+
"gpu_mem": 4.493407744,
|
| 862 |
+
"loss": 1.1553,
|
| 863 |
+
"grad_norm": 4.9666829109191895,
|
| 864 |
+
"learning_rate": 8.156840139702554e-05
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"step": 97,
|
| 868 |
+
"epoch": 2.732394366197183,
|
| 869 |
+
"cpu_mem": 1.494491136,
|
| 870 |
+
"gpu_mem": 4.493350912,
|
| 871 |
+
"loss": 1.2102,
|
| 872 |
+
"grad_norm": 4.077977180480957,
|
| 873 |
+
"learning_rate": 7.82619032068023e-05
|
| 874 |
+
},
|
| 875 |
+
{
|
| 876 |
+
"step": 98,
|
| 877 |
+
"epoch": 2.76056338028169,
|
| 878 |
+
"cpu_mem": 1.494491136,
|
| 879 |
+
"gpu_mem": 4.493350912,
|
| 880 |
+
"loss": 1.276,
|
| 881 |
+
"grad_norm": 4.605331897735596,
|
| 882 |
+
"learning_rate": 7.500000000000002e-05
|
| 883 |
+
},
|
| 884 |
+
{
|
| 885 |
+
"step": 99,
|
| 886 |
+
"epoch": 2.788732394366197,
|
| 887 |
+
"cpu_mem": 1.494491136,
|
| 888 |
+
"gpu_mem": 4.49334784,
|
| 889 |
+
"loss": 1.2602,
|
| 890 |
+
"grad_norm": 7.203280448913574,
|
| 891 |
+
"learning_rate": 7.17847194930753e-05
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"step": 100,
|
| 895 |
+
"epoch": 2.816901408450704,
|
| 896 |
+
"cpu_mem": 1.494491136,
|
| 897 |
+
"gpu_mem": 4.493346304,
|
| 898 |
+
"loss": 1.2185,
|
| 899 |
+
"grad_norm": 4.943838119506836,
|
| 900 |
+
"learning_rate": 6.86180604201361e-05
|
| 901 |
+
},
|
| 902 |
+
{
|
| 903 |
+
"step": 101,
|
| 904 |
+
"epoch": 2.845070422535211,
|
| 905 |
+
"cpu_mem": 1.494491136,
|
| 906 |
+
"gpu_mem": 4.493389312,
|
| 907 |
+
"loss": 1.1767,
|
| 908 |
+
"grad_norm": 4.598587512969971,
|
| 909 |
+
"learning_rate": 6.550199129045668e-05
|
| 910 |
+
},
|
| 911 |
+
{
|
| 912 |
+
"step": 102,
|
| 913 |
+
"epoch": 2.873239436619718,
|
| 914 |
+
"cpu_mem": 1.494491136,
|
| 915 |
+
"gpu_mem": 4.493327872,
|
| 916 |
+
"loss": 1.2838,
|
| 917 |
+
"grad_norm": 4.879561901092529,
|
| 918 |
+
"learning_rate": 6.243844916478155e-05
|
| 919 |
+
},
|
| 920 |
+
{
|
| 921 |
+
"step": 103,
|
| 922 |
+
"epoch": 2.9014084507042255,
|
| 923 |
+
"cpu_mem": 1.494491136,
|
| 924 |
+
"gpu_mem": 4.493377024,
|
| 925 |
+
"loss": 1.2128,
|
| 926 |
+
"grad_norm": 4.067531108856201,
|
| 927 |
+
"learning_rate": 5.9429338451178355e-05
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"step": 104,
|
| 931 |
+
"epoch": 2.9295774647887325,
|
| 932 |
+
"cpu_mem": 1.494491136,
|
| 933 |
+
"gpu_mem": 4.49344,
|
| 934 |
+
"loss": 1.2998,
|
| 935 |
+
"grad_norm": 5.858630180358887,
|
| 936 |
+
"learning_rate": 5.6476529721189974e-05
|
| 937 |
+
},
|
| 938 |
+
{
|
| 939 |
+
"step": 105,
|
| 940 |
+
"epoch": 2.9577464788732395,
|
| 941 |
+
"cpu_mem": 1.494491136,
|
| 942 |
+
"gpu_mem": 4.493392384,
|
| 943 |
+
"loss": 1.1456,
|
| 944 |
+
"grad_norm": 4.226877689361572,
|
| 945 |
+
"learning_rate": 5.358185854701909e-05
|
| 946 |
+
},
|
| 947 |
+
{
|
| 948 |
+
"step": 106,
|
| 949 |
+
"epoch": 2.9859154929577465,
|
| 950 |
+
"cpu_mem": 1.494491136,
|
| 951 |
+
"gpu_mem": 4.493373952,
|
| 952 |
+
"loss": 1.2223,
|
| 953 |
+
"grad_norm": 4.734609127044678,
|
| 954 |
+
"learning_rate": 5.074712436047112e-05
|
| 955 |
+
},
|
| 956 |
+
{
|
| 957 |
+
"step": 107,
|
| 958 |
+
"epoch": 3.0140845070422535,
|
| 959 |
+
"cpu_mem": 1.494491136,
|
| 960 |
+
"gpu_mem": 4.518701056,
|
| 961 |
+
"loss": 1.625,
|
| 962 |
+
"grad_norm": 12.016558647155762,
|
| 963 |
+
"learning_rate": 4.7974089334362057e-05
|
| 964 |
+
},
|
| 965 |
+
{
|
| 966 |
+
"step": 108,
|
| 967 |
+
"epoch": 3.0422535211267605,
|
| 968 |
+
"cpu_mem": 1.494491136,
|
| 969 |
+
"gpu_mem": 4.518682624,
|
| 970 |
+
"loss": 1.025,
|
| 971 |
+
"grad_norm": 5.136899948120117,
|
| 972 |
+
"learning_rate": 4.526447728708908e-05
|
| 973 |
+
},
|
| 974 |
+
{
|
| 975 |
+
"step": 109,
|
| 976 |
+
"epoch": 3.0704225352112675,
|
| 977 |
+
"cpu_mem": 1.494491136,
|
| 978 |
+
"gpu_mem": 4.518671872,
|
| 979 |
+
"loss": 1.0919,
|
| 980 |
+
"grad_norm": 6.250692844390869,
|
| 981 |
+
"learning_rate": 4.261997261104223e-05
|
| 982 |
+
},
|
| 983 |
+
{
|
| 984 |
+
"step": 110,
|
| 985 |
+
"epoch": 3.0985915492957745,
|
| 986 |
+
"cpu_mem": 1.494491136,
|
| 987 |
+
"gpu_mem": 4.518725632,
|
| 988 |
+
"loss": 0.9629,
|
| 989 |
+
"grad_norm": 6.343587875366211,
|
| 990 |
+
"learning_rate": 4.004221922552608e-05
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"step": 111,
|
| 994 |
+
"epoch": 3.1267605633802815,
|
| 995 |
+
"cpu_mem": 1.494491136,
|
| 996 |
+
"gpu_mem": 4.518685696,
|
| 997 |
+
"loss": 1.0155,
|
| 998 |
+
"grad_norm": 6.334781169891357,
|
| 999 |
+
"learning_rate": 3.753281955483985e-05
|
| 1000 |
+
},
|
| 1001 |
+
{
|
| 1002 |
+
"step": 112,
|
| 1003 |
+
"epoch": 3.1549295774647885,
|
| 1004 |
+
"cpu_mem": 1.494491136,
|
| 1005 |
+
"gpu_mem": 4.518704128,
|
| 1006 |
+
"loss": 1.039,
|
| 1007 |
+
"grad_norm": 8.391195297241211,
|
| 1008 |
+
"learning_rate": 3.509333353215331e-05
|
| 1009 |
+
},
|
| 1010 |
+
{
|
| 1011 |
+
"step": 113,
|
| 1012 |
+
"epoch": 3.183098591549296,
|
| 1013 |
+
"cpu_mem": 1.494491136,
|
| 1014 |
+
"gpu_mem": 4.518767104,
|
| 1015 |
+
"loss": 0.9537,
|
| 1016 |
+
"grad_norm": 7.252392768859863,
|
| 1017 |
+
"learning_rate": 3.2725277629795526e-05
|
| 1018 |
+
},
|
| 1019 |
+
{
|
| 1020 |
+
"step": 114,
|
| 1021 |
+
"epoch": 3.211267605633803,
|
| 1022 |
+
"cpu_mem": 1.494491136,
|
| 1023 |
+
"gpu_mem": 4.518694912,
|
| 1024 |
+
"loss": 1.0189,
|
| 1025 |
+
"grad_norm": 8.310603141784668,
|
| 1026 |
+
"learning_rate": 3.0430123916561672e-05
|
| 1027 |
+
},
|
| 1028 |
+
{
|
| 1029 |
+
"step": 115,
|
| 1030 |
+
"epoch": 3.23943661971831,
|
| 1031 |
+
"cpu_mem": 1.494491136,
|
| 1032 |
+
"gpu_mem": 4.518688768,
|
| 1033 |
+
"loss": 1.0358,
|
| 1034 |
+
"grad_norm": 7.496110916137695,
|
| 1035 |
+
"learning_rate": 2.8209299142621522e-05
|
| 1036 |
+
},
|
| 1037 |
+
{
|
| 1038 |
+
"step": 116,
|
| 1039 |
+
"epoch": 3.267605633802817,
|
| 1040 |
+
"cpu_mem": 1.494491136,
|
| 1041 |
+
"gpu_mem": 4.518704128,
|
| 1042 |
+
"loss": 0.962,
|
| 1043 |
+
"grad_norm": 8.337830543518066,
|
| 1044 |
+
"learning_rate": 2.6064183852600797e-05
|
| 1045 |
+
},
|
| 1046 |
+
{
|
| 1047 |
+
"step": 117,
|
| 1048 |
+
"epoch": 3.295774647887324,
|
| 1049 |
+
"cpu_mem": 1.494491136,
|
| 1050 |
+
"gpu_mem": 4.518719488,
|
| 1051 |
+
"loss": 0.9699,
|
| 1052 |
+
"grad_norm": 7.644598007202148,
|
| 1053 |
+
"learning_rate": 2.3996111527384288e-05
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"step": 118,
|
| 1057 |
+
"epoch": 3.323943661971831,
|
| 1058 |
+
"cpu_mem": 1.494491136,
|
| 1059 |
+
"gpu_mem": 4.518710272,
|
| 1060 |
+
"loss": 0.89,
|
| 1061 |
+
"grad_norm": 6.858006000518799,
|
| 1062 |
+
"learning_rate": 2.2006367755176655e-05
|
| 1063 |
+
},
|
| 1064 |
+
{
|
| 1065 |
+
"step": 119,
|
| 1066 |
+
"epoch": 3.352112676056338,
|
| 1067 |
+
"cpu_mem": 1.494491136,
|
| 1068 |
+
"gpu_mem": 4.518701056,
|
| 1069 |
+
"loss": 0.8971,
|
| 1070 |
+
"grad_norm": 8.39448356628418,
|
| 1071 |
+
"learning_rate": 2.009618943233419e-05
|
| 1072 |
+
},
|
| 1073 |
+
{
|
| 1074 |
+
"step": 120,
|
| 1075 |
+
"epoch": 3.380281690140845,
|
| 1076 |
+
"cpu_mem": 1.494491136,
|
| 1077 |
+
"gpu_mem": 4.518719488,
|
| 1078 |
+
"loss": 0.9552,
|
| 1079 |
+
"grad_norm": 8.790838241577148,
|
| 1080 |
+
"learning_rate": 1.82667639944657e-05
|
| 1081 |
+
},
|
| 1082 |
+
{
|
| 1083 |
+
"step": 121,
|
| 1084 |
+
"epoch": 3.408450704225352,
|
| 1085 |
+
"cpu_mem": 1.494491136,
|
| 1086 |
+
"gpu_mem": 4.518717952,
|
| 1087 |
+
"loss": 0.7973,
|
| 1088 |
+
"grad_norm": 9.548598289489746,
|
| 1089 |
+
"learning_rate": 1.6519228678279718e-05
|
| 1090 |
+
},
|
| 1091 |
+
{
|
| 1092 |
+
"step": 122,
|
| 1093 |
+
"epoch": 3.436619718309859,
|
| 1094 |
+
"cpu_mem": 1.494491136,
|
| 1095 |
+
"gpu_mem": 4.518674944,
|
| 1096 |
+
"loss": 0.8216,
|
| 1097 |
+
"grad_norm": 9.652661323547363,
|
| 1098 |
+
"learning_rate": 1.4854669814637143e-05
|
| 1099 |
+
},
|
| 1100 |
+
{
|
| 1101 |
+
"step": 123,
|
| 1102 |
+
"epoch": 3.464788732394366,
|
| 1103 |
+
"cpu_mem": 1.494491136,
|
| 1104 |
+
"gpu_mem": 4.5187072,
|
| 1105 |
+
"loss": 0.804,
|
| 1106 |
+
"grad_norm": 8.80784797668457,
|
| 1107 |
+
"learning_rate": 1.3274122153249028e-05
|
| 1108 |
+
},
|
| 1109 |
+
{
|
| 1110 |
+
"step": 124,
|
| 1111 |
+
"epoch": 3.492957746478873,
|
| 1112 |
+
"cpu_mem": 1.494491136,
|
| 1113 |
+
"gpu_mem": 4.51866112,
|
| 1114 |
+
"loss": 0.8326,
|
| 1115 |
+
"grad_norm": 9.57839584350586,
|
| 1116 |
+
"learning_rate": 1.1778568219438839e-05
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"step": 125,
|
| 1120 |
+
"epoch": 3.52112676056338,
|
| 1121 |
+
"cpu_mem": 1.494491136,
|
| 1122 |
+
"gpu_mem": 4.518705664,
|
| 1123 |
+
"loss": 0.8423,
|
| 1124 |
+
"grad_norm": 11.19555377960205,
|
| 1125 |
+
"learning_rate": 1.036893770336938e-05
|
| 1126 |
+
},
|
| 1127 |
+
{
|
| 1128 |
+
"step": 126,
|
| 1129 |
+
"epoch": 3.5492957746478875,
|
| 1130 |
+
"cpu_mem": 1.494491136,
|
| 1131 |
+
"gpu_mem": 4.518656512,
|
| 1132 |
+
"loss": 0.7886,
|
| 1133 |
+
"grad_norm": 10.461869239807129,
|
| 1134 |
+
"learning_rate": 9.046106882113751e-06
|
| 1135 |
+
},
|
| 1136 |
+
{
|
| 1137 |
+
"step": 127,
|
| 1138 |
+
"epoch": 3.5774647887323945,
|
| 1139 |
+
"cpu_mem": 1.494491136,
|
| 1140 |
+
"gpu_mem": 4.5186688,
|
| 1141 |
+
"loss": 0.8942,
|
| 1142 |
+
"grad_norm": 10.252334594726562,
|
| 1143 |
+
"learning_rate": 7.810898074930243e-06
|
| 1144 |
+
},
|
| 1145 |
+
{
|
| 1146 |
+
"step": 128,
|
| 1147 |
+
"epoch": 3.6056338028169015,
|
| 1148 |
+
"cpu_mem": 1.494491136,
|
| 1149 |
+
"gpu_mem": 4.518693376,
|
| 1150 |
+
"loss": 0.9278,
|
| 1151 |
+
"grad_norm": 11.462838172912598,
|
| 1152 |
+
"learning_rate": 6.664079132078881e-06
|
| 1153 |
+
},
|
| 1154 |
+
{
|
| 1155 |
+
"step": 129,
|
| 1156 |
+
"epoch": 3.6338028169014085,
|
| 1157 |
+
"cpu_mem": 1.494491136,
|
| 1158 |
+
"gpu_mem": 4.518654976,
|
| 1159 |
+
"loss": 0.7816,
|
| 1160 |
+
"grad_norm": 10.681913375854492,
|
| 1161 |
+
"learning_rate": 5.606362957498195e-06
|
| 1162 |
+
},
|
| 1163 |
+
{
|
| 1164 |
+
"step": 130,
|
| 1165 |
+
"epoch": 3.6619718309859155,
|
| 1166 |
+
"cpu_mem": 1.494491136,
|
| 1167 |
+
"gpu_mem": 4.518658048,
|
| 1168 |
+
"loss": 0.9345,
|
| 1169 |
+
"grad_norm": 15.080108642578125,
|
| 1170 |
+
"learning_rate": 4.638407065638322e-06
|
| 1171 |
+
},
|
| 1172 |
+
{
|
| 1173 |
+
"step": 131,
|
| 1174 |
+
"epoch": 3.6901408450704225,
|
| 1175 |
+
"cpu_mem": 1.494491136,
|
| 1176 |
+
"gpu_mem": 4.518670336,
|
| 1177 |
+
"loss": 0.8457,
|
| 1178 |
+
"grad_norm": 11.008662223815918,
|
| 1179 |
+
"learning_rate": 3.760813172726457e-06
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"step": 132,
|
| 1183 |
+
"epoch": 3.7183098591549295,
|
| 1184 |
+
"cpu_mem": 1.494491136,
|
| 1185 |
+
"gpu_mem": 4.518635008,
|
| 1186 |
+
"loss": 0.8203,
|
| 1187 |
+
"grad_norm": 9.407642364501953,
|
| 1188 |
+
"learning_rate": 2.9741268227184255e-06
|
| 1189 |
+
},
|
| 1190 |
+
{
|
| 1191 |
+
"step": 133,
|
| 1192 |
+
"epoch": 3.7464788732394365,
|
| 1193 |
+
"cpu_mem": 1.494491136,
|
| 1194 |
+
"gpu_mem": 4.51867648,
|
| 1195 |
+
"loss": 0.8339,
|
| 1196 |
+
"grad_norm": 9.52961254119873,
|
| 1197 |
+
"learning_rate": 2.2788370481687965e-06
|
| 1198 |
+
},
|
| 1199 |
+
{
|
| 1200 |
+
"step": 134,
|
| 1201 |
+
"epoch": 3.7746478873239435,
|
| 1202 |
+
"cpu_mem": 1.494491136,
|
| 1203 |
+
"gpu_mem": 4.51869184,
|
| 1204 |
+
"loss": 0.8828,
|
| 1205 |
+
"grad_norm": 9.80649185180664,
|
| 1206 |
+
"learning_rate": 1.6753760662307215e-06
|
| 1207 |
+
},
|
| 1208 |
+
{
|
| 1209 |
+
"step": 135,
|
| 1210 |
+
"epoch": 3.802816901408451,
|
| 1211 |
+
"cpu_mem": 1.494491136,
|
| 1212 |
+
"gpu_mem": 4.518656512,
|
| 1213 |
+
"loss": 0.9178,
|
| 1214 |
+
"grad_norm": 12.667458534240723,
|
| 1215 |
+
"learning_rate": 1.1641190099741904e-06
|
| 1216 |
+
},
|
| 1217 |
+
{
|
| 1218 |
+
"step": 136,
|
| 1219 |
+
"epoch": 3.830985915492958,
|
| 1220 |
+
"cpu_mem": 1.494491136,
|
| 1221 |
+
"gpu_mem": 4.518664192,
|
| 1222 |
+
"loss": 0.7627,
|
| 1223 |
+
"grad_norm": 11.412312507629395,
|
| 1224 |
+
"learning_rate": 7.453836951897885e-07
|
| 1225 |
+
},
|
| 1226 |
+
{
|
| 1227 |
+
"step": 137,
|
| 1228 |
+
"epoch": 3.859154929577465,
|
| 1229 |
+
"cpu_mem": 1.494491136,
|
| 1230 |
+
"gpu_mem": 4.518685696,
|
| 1231 |
+
"loss": 0.7683,
|
| 1232 |
+
"grad_norm": 11.488083839416504,
|
| 1233 |
+
"learning_rate": 4.194304228229806e-07
|
| 1234 |
+
},
|
| 1235 |
+
{
|
| 1236 |
+
"step": 138,
|
| 1237 |
+
"epoch": 3.887323943661972,
|
| 1238 |
+
"cpu_mem": 1.494491136,
|
| 1239 |
+
"gpu_mem": 4.518696448,
|
| 1240 |
+
"loss": 0.816,
|
| 1241 |
+
"grad_norm": 10.21458911895752,
|
| 1242 |
+
"learning_rate": 1.8646181716164831e-07
|
| 1243 |
+
},
|
| 1244 |
+
{
|
| 1245 |
+
"step": 139,
|
| 1246 |
+
"epoch": 3.915492957746479,
|
| 1247 |
+
"cpu_mem": 1.494491136,
|
| 1248 |
+
"gpu_mem": 4.518688768,
|
| 1249 |
+
"loss": 0.9006,
|
| 1250 |
+
"grad_norm": 12.040121078491211,
|
| 1251 |
+
"learning_rate": 4.662269987756317e-08
|
| 1252 |
+
},
|
| 1253 |
+
{
|
| 1254 |
+
"step": 140,
|
| 1255 |
+
"epoch": 3.943661971830986,
|
| 1256 |
+
"cpu_mem": 1.494491136,
|
| 1257 |
+
"gpu_mem": 4.51872256,
|
| 1258 |
+
"loss": 0.8363,
|
| 1259 |
+
"grad_norm": 12.032706260681152,
|
| 1260 |
+
"learning_rate": 0.0
|
| 1261 |
+
},
|
| 1262 |
+
{
|
| 1263 |
+
"step": 140,
|
| 1264 |
+
"epoch": 3.943661971830986,
|
| 1265 |
+
"cpu_mem": 1.494491136,
|
| 1266 |
+
"gpu_mem": 4.51872256,
|
| 1267 |
+
"train_runtime": 674.8012,
|
| 1268 |
+
"train_samples_per_second": 13.343,
|
| 1269 |
+
"train_steps_per_second": 0.207,
|
| 1270 |
+
"total_flos": 0.0,
|
| 1271 |
+
"train_loss": 1.3040791460445949
|
| 1272 |
+
}
|
| 1273 |
+
]
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r2-a2/adapter_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha": 4,
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": false,
|
| 8 |
+
"init_weight": "kaiming",
|
| 9 |
+
"layers_pattern": null,
|
| 10 |
+
"layers_to_transform": null,
|
| 11 |
+
"metric_tracking": false,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "ABLATION",
|
| 14 |
+
"r": 2,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"seed": 42,
|
| 17 |
+
"share_weights": false,
|
| 18 |
+
"target_modules": [
|
| 19 |
+
"up_proj",
|
| 20 |
+
"gate_proj",
|
| 21 |
+
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"q_proj",
|
| 24 |
+
"down_proj",
|
| 25 |
+
"o_proj"
|
| 26 |
+
],
|
| 27 |
+
"task_type": null,
|
| 28 |
+
"track_n": 100,
|
| 29 |
+
"variant": "A"
|
| 30 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r2-a2/eval_results.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "boolq",
|
| 3 |
+
"results": 0.7926605504587156
|
| 4 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r2-a2/training_configuration.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "TinyLlama/TinyLlama_v1.1",
|
| 3 |
+
"dataset": {
|
| 4 |
+
"name": "BOOLQ",
|
| 5 |
+
"dataset_id": "google/boolq",
|
| 6 |
+
"preprocess_id": "boolq_train_deepeval"
|
| 7 |
+
},
|
| 8 |
+
"peft_config": {
|
| 9 |
+
"method": "abl_A",
|
| 10 |
+
"rank": 2,
|
| 11 |
+
"alpha": 4,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"bias": "none",
|
| 14 |
+
"target_modules": [
|
| 15 |
+
"q_proj",
|
| 16 |
+
"k_proj",
|
| 17 |
+
"v_proj",
|
| 18 |
+
"o_proj",
|
| 19 |
+
"gate_proj",
|
| 20 |
+
"down_proj",
|
| 21 |
+
"up_proj"
|
| 22 |
+
],
|
| 23 |
+
"trainable_parameter_count": 1577576
|
| 24 |
+
},
|
| 25 |
+
"training_config": {
|
| 26 |
+
"max_dataset_length": null,
|
| 27 |
+
"batch_size": 64,
|
| 28 |
+
"per_device_batch_size": 32,
|
| 29 |
+
"gradient_accumulation_steps": 2,
|
| 30 |
+
"learning_rate": 0.0003,
|
| 31 |
+
"num_epochs": 2,
|
| 32 |
+
"warmup_ratio": 0.1
|
| 33 |
+
},
|
| 34 |
+
"model_name": "TinyLlama_v1.1-abl_A-boolq-r2-a2",
|
| 35 |
+
"output_dir": "./experiment_results/TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r2-a2",
|
| 36 |
+
"seed": 42,
|
| 37 |
+
"timestamp": "2025-08-30T12:13:11.031630"
|
| 38 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r2-a2/training_logs.json
ADDED
|
@@ -0,0 +1,2659 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 1,
|
| 4 |
+
"epoch": 0.006779661016949152,
|
| 5 |
+
"cpu_mem": 1.4856192,
|
| 6 |
+
"gpu_mem": 4.424159232,
|
| 7 |
+
"loss": 8.869,
|
| 8 |
+
"grad_norm": 265.7165832519531,
|
| 9 |
+
"learning_rate": 9.999999999999999e-06
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"step": 2,
|
| 13 |
+
"epoch": 0.013559322033898305,
|
| 14 |
+
"cpu_mem": 1.491910656,
|
| 15 |
+
"gpu_mem": 4.437070336,
|
| 16 |
+
"loss": 8.9376,
|
| 17 |
+
"grad_norm": 272.0975036621094,
|
| 18 |
+
"learning_rate": 1.9999999999999998e-05
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"step": 3,
|
| 22 |
+
"epoch": 0.020338983050847456,
|
| 23 |
+
"cpu_mem": 1.492697088,
|
| 24 |
+
"gpu_mem": 4.436988928,
|
| 25 |
+
"loss": 8.2439,
|
| 26 |
+
"grad_norm": 279.63671875,
|
| 27 |
+
"learning_rate": 2.9999999999999997e-05
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"step": 4,
|
| 31 |
+
"epoch": 0.02711864406779661,
|
| 32 |
+
"cpu_mem": 1.49348352,
|
| 33 |
+
"gpu_mem": 4.436988928,
|
| 34 |
+
"loss": 6.7912,
|
| 35 |
+
"grad_norm": 283.7794494628906,
|
| 36 |
+
"learning_rate": 3.9999999999999996e-05
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"step": 5,
|
| 40 |
+
"epoch": 0.03389830508474576,
|
| 41 |
+
"cpu_mem": 1.493876736,
|
| 42 |
+
"gpu_mem": 4.436924416,
|
| 43 |
+
"loss": 4.9316,
|
| 44 |
+
"grad_norm": 254.95008850097656,
|
| 45 |
+
"learning_rate": 4.9999999999999996e-05
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"step": 6,
|
| 49 |
+
"epoch": 0.04067796610169491,
|
| 50 |
+
"cpu_mem": 1.49446656,
|
| 51 |
+
"gpu_mem": 4.436944384,
|
| 52 |
+
"loss": 3.114,
|
| 53 |
+
"grad_norm": 200.49691772460938,
|
| 54 |
+
"learning_rate": 5.9999999999999995e-05
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"step": 7,
|
| 58 |
+
"epoch": 0.04745762711864407,
|
| 59 |
+
"cpu_mem": 1.495056384,
|
| 60 |
+
"gpu_mem": 4.436996608,
|
| 61 |
+
"loss": 1.6758,
|
| 62 |
+
"grad_norm": 93.27091217041016,
|
| 63 |
+
"learning_rate": 7e-05
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"step": 8,
|
| 67 |
+
"epoch": 0.05423728813559322,
|
| 68 |
+
"cpu_mem": 1.495646208,
|
| 69 |
+
"gpu_mem": 4.437082624,
|
| 70 |
+
"loss": 1.039,
|
| 71 |
+
"grad_norm": 57.26808547973633,
|
| 72 |
+
"learning_rate": 7.999999999999999e-05
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 9,
|
| 76 |
+
"epoch": 0.061016949152542375,
|
| 77 |
+
"cpu_mem": 1.496039424,
|
| 78 |
+
"gpu_mem": 4.436990464,
|
| 79 |
+
"loss": 1.0551,
|
| 80 |
+
"grad_norm": 59.7805061340332,
|
| 81 |
+
"learning_rate": 8.999999999999999e-05
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"step": 10,
|
| 85 |
+
"epoch": 0.06779661016949153,
|
| 86 |
+
"cpu_mem": 1.49643264,
|
| 87 |
+
"gpu_mem": 4.436890624,
|
| 88 |
+
"loss": 0.7487,
|
| 89 |
+
"grad_norm": 19.48933982849121,
|
| 90 |
+
"learning_rate": 9.999999999999999e-05
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"step": 11,
|
| 94 |
+
"epoch": 0.07457627118644068,
|
| 95 |
+
"cpu_mem": 1.496825856,
|
| 96 |
+
"gpu_mem": 4.436995072,
|
| 97 |
+
"loss": 1.1287,
|
| 98 |
+
"grad_norm": 91.75373840332031,
|
| 99 |
+
"learning_rate": 0.00010999999999999998
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"step": 12,
|
| 103 |
+
"epoch": 0.08135593220338982,
|
| 104 |
+
"cpu_mem": 1.49741568,
|
| 105 |
+
"gpu_mem": 4.437366784,
|
| 106 |
+
"loss": 1.0336,
|
| 107 |
+
"grad_norm": 73.30252838134766,
|
| 108 |
+
"learning_rate": 0.00011999999999999999
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"step": 13,
|
| 112 |
+
"epoch": 0.08813559322033898,
|
| 113 |
+
"cpu_mem": 1.497808896,
|
| 114 |
+
"gpu_mem": 4.436970496,
|
| 115 |
+
"loss": 0.6842,
|
| 116 |
+
"grad_norm": 6.66005802154541,
|
| 117 |
+
"learning_rate": 0.00013
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"step": 14,
|
| 121 |
+
"epoch": 0.09491525423728814,
|
| 122 |
+
"cpu_mem": 1.498202112,
|
| 123 |
+
"gpu_mem": 4.436947456,
|
| 124 |
+
"loss": 0.745,
|
| 125 |
+
"grad_norm": 33.216796875,
|
| 126 |
+
"learning_rate": 0.00014
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"step": 15,
|
| 130 |
+
"epoch": 0.1016949152542373,
|
| 131 |
+
"cpu_mem": 1.49839872,
|
| 132 |
+
"gpu_mem": 4.436886016,
|
| 133 |
+
"loss": 0.8204,
|
| 134 |
+
"grad_norm": 36.137210845947266,
|
| 135 |
+
"learning_rate": 0.00015
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"step": 16,
|
| 139 |
+
"epoch": 0.10847457627118644,
|
| 140 |
+
"cpu_mem": 1.498791936,
|
| 141 |
+
"gpu_mem": 4.436970496,
|
| 142 |
+
"loss": 0.7792,
|
| 143 |
+
"grad_norm": 31.646080017089844,
|
| 144 |
+
"learning_rate": 0.00015999999999999999
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"step": 17,
|
| 148 |
+
"epoch": 0.1152542372881356,
|
| 149 |
+
"cpu_mem": 1.499185152,
|
| 150 |
+
"gpu_mem": 4.437010432,
|
| 151 |
+
"loss": 0.7334,
|
| 152 |
+
"grad_norm": 24.66205596923828,
|
| 153 |
+
"learning_rate": 0.00016999999999999999
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"step": 18,
|
| 157 |
+
"epoch": 0.12203389830508475,
|
| 158 |
+
"cpu_mem": 1.499578368,
|
| 159 |
+
"gpu_mem": 4.437073408,
|
| 160 |
+
"loss": 0.6744,
|
| 161 |
+
"grad_norm": 3.4665486812591553,
|
| 162 |
+
"learning_rate": 0.00017999999999999998
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"step": 19,
|
| 166 |
+
"epoch": 0.1288135593220339,
|
| 167 |
+
"cpu_mem": 1.499971584,
|
| 168 |
+
"gpu_mem": 4.436910592,
|
| 169 |
+
"loss": 0.8234,
|
| 170 |
+
"grad_norm": 28.424806594848633,
|
| 171 |
+
"learning_rate": 0.00018999999999999998
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"step": 20,
|
| 175 |
+
"epoch": 0.13559322033898305,
|
| 176 |
+
"cpu_mem": 1.500168192,
|
| 177 |
+
"gpu_mem": 4.43702272,
|
| 178 |
+
"loss": 0.6248,
|
| 179 |
+
"grad_norm": 3.359628677368164,
|
| 180 |
+
"learning_rate": 0.00019999999999999998
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"step": 21,
|
| 184 |
+
"epoch": 0.1423728813559322,
|
| 185 |
+
"cpu_mem": 1.500561408,
|
| 186 |
+
"gpu_mem": 4.437180928,
|
| 187 |
+
"loss": 0.6311,
|
| 188 |
+
"grad_norm": 10.9364595413208,
|
| 189 |
+
"learning_rate": 0.00020999999999999998
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"step": 22,
|
| 193 |
+
"epoch": 0.14915254237288136,
|
| 194 |
+
"cpu_mem": 1.500954624,
|
| 195 |
+
"gpu_mem": 4.437073408,
|
| 196 |
+
"loss": 0.7505,
|
| 197 |
+
"grad_norm": 14.361480712890625,
|
| 198 |
+
"learning_rate": 0.00021999999999999995
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"step": 23,
|
| 202 |
+
"epoch": 0.15593220338983052,
|
| 203 |
+
"cpu_mem": 1.501151232,
|
| 204 |
+
"gpu_mem": 4.43704576,
|
| 205 |
+
"loss": 0.6607,
|
| 206 |
+
"grad_norm": 11.354830741882324,
|
| 207 |
+
"learning_rate": 0.00023
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"step": 24,
|
| 211 |
+
"epoch": 0.16271186440677965,
|
| 212 |
+
"cpu_mem": 1.50134784,
|
| 213 |
+
"gpu_mem": 4.437102592,
|
| 214 |
+
"loss": 0.608,
|
| 215 |
+
"grad_norm": 5.800236225128174,
|
| 216 |
+
"learning_rate": 0.00023999999999999998
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"step": 25,
|
| 220 |
+
"epoch": 0.1694915254237288,
|
| 221 |
+
"cpu_mem": 1.501741056,
|
| 222 |
+
"gpu_mem": 4.436887552,
|
| 223 |
+
"loss": 0.8785,
|
| 224 |
+
"grad_norm": 32.040443420410156,
|
| 225 |
+
"learning_rate": 0.00025
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"step": 26,
|
| 229 |
+
"epoch": 0.17627118644067796,
|
| 230 |
+
"cpu_mem": 1.501937664,
|
| 231 |
+
"gpu_mem": 4.436942848,
|
| 232 |
+
"loss": 1.0202,
|
| 233 |
+
"grad_norm": 42.82334899902344,
|
| 234 |
+
"learning_rate": 0.00026
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"step": 27,
|
| 238 |
+
"epoch": 0.18305084745762712,
|
| 239 |
+
"cpu_mem": 1.50233088,
|
| 240 |
+
"gpu_mem": 4.437234688,
|
| 241 |
+
"loss": 0.6503,
|
| 242 |
+
"grad_norm": 10.639617919921875,
|
| 243 |
+
"learning_rate": 0.00027
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"step": 28,
|
| 247 |
+
"epoch": 0.18983050847457628,
|
| 248 |
+
"cpu_mem": 1.502527488,
|
| 249 |
+
"gpu_mem": 4.436913664,
|
| 250 |
+
"loss": 0.7747,
|
| 251 |
+
"grad_norm": 29.44213104248047,
|
| 252 |
+
"learning_rate": 0.00028
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"step": 29,
|
| 256 |
+
"epoch": 0.19661016949152543,
|
| 257 |
+
"cpu_mem": 1.502920704,
|
| 258 |
+
"gpu_mem": 4.436978176,
|
| 259 |
+
"loss": 1.0303,
|
| 260 |
+
"grad_norm": 55.7458381652832,
|
| 261 |
+
"learning_rate": 0.00029
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"step": 30,
|
| 265 |
+
"epoch": 0.2033898305084746,
|
| 266 |
+
"cpu_mem": 1.503117312,
|
| 267 |
+
"gpu_mem": 4.437056512,
|
| 268 |
+
"loss": 0.7492,
|
| 269 |
+
"grad_norm": 22.058603286743164,
|
| 270 |
+
"learning_rate": 0.0003
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"step": 31,
|
| 274 |
+
"epoch": 0.21016949152542372,
|
| 275 |
+
"cpu_mem": 1.50331392,
|
| 276 |
+
"gpu_mem": 4.436859904,
|
| 277 |
+
"loss": 0.5481,
|
| 278 |
+
"grad_norm": 3.686203956604004,
|
| 279 |
+
"learning_rate": 0.0002999893794250036
|
| 280 |
+
},
|
| 281 |
+
{
|
| 282 |
+
"step": 32,
|
| 283 |
+
"epoch": 0.21694915254237288,
|
| 284 |
+
"cpu_mem": 1.503510528,
|
| 285 |
+
"gpu_mem": 4.436973568,
|
| 286 |
+
"loss": 0.834,
|
| 287 |
+
"grad_norm": 23.762094497680664,
|
| 288 |
+
"learning_rate": 0.00029995751920396937
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"step": 33,
|
| 292 |
+
"epoch": 0.22372881355932203,
|
| 293 |
+
"cpu_mem": 1.503707136,
|
| 294 |
+
"gpu_mem": 4.437211648,
|
| 295 |
+
"loss": 0.8735,
|
| 296 |
+
"grad_norm": 20.501628875732422,
|
| 297 |
+
"learning_rate": 0.00029990442384854874
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"step": 34,
|
| 301 |
+
"epoch": 0.2305084745762712,
|
| 302 |
+
"cpu_mem": 1.503903744,
|
| 303 |
+
"gpu_mem": 4.436913664,
|
| 304 |
+
"loss": 0.5833,
|
| 305 |
+
"grad_norm": 5.7299933433532715,
|
| 306 |
+
"learning_rate": 0.0002998301008774512
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"step": 35,
|
| 310 |
+
"epoch": 0.23728813559322035,
|
| 311 |
+
"cpu_mem": 1.504100352,
|
| 312 |
+
"gpu_mem": 4.437124096,
|
| 313 |
+
"loss": 0.6658,
|
| 314 |
+
"grad_norm": 7.160278797149658,
|
| 315 |
+
"learning_rate": 0.0002997345608153792
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"step": 36,
|
| 319 |
+
"epoch": 0.2440677966101695,
|
| 320 |
+
"cpu_mem": 1.50429696,
|
| 321 |
+
"gpu_mem": 4.437074944,
|
| 322 |
+
"loss": 0.711,
|
| 323 |
+
"grad_norm": 18.156116485595703,
|
| 324 |
+
"learning_rate": 0.000299617817191538
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"step": 37,
|
| 328 |
+
"epoch": 0.25084745762711863,
|
| 329 |
+
"cpu_mem": 1.504493568,
|
| 330 |
+
"gpu_mem": 4.436886016,
|
| 331 |
+
"loss": 0.5941,
|
| 332 |
+
"grad_norm": 4.312148094177246,
|
| 333 |
+
"learning_rate": 0.0002994798865377198
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"step": 38,
|
| 337 |
+
"epoch": 0.2576271186440678,
|
| 338 |
+
"cpu_mem": 1.504690176,
|
| 339 |
+
"gpu_mem": 4.437133312,
|
| 340 |
+
"loss": 0.8683,
|
| 341 |
+
"grad_norm": 32.92335891723633,
|
| 342 |
+
"learning_rate": 0.0002993207883859627
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"step": 39,
|
| 346 |
+
"epoch": 0.26440677966101694,
|
| 347 |
+
"cpu_mem": 1.504886784,
|
| 348 |
+
"gpu_mem": 4.437512704,
|
| 349 |
+
"loss": 0.7286,
|
| 350 |
+
"grad_norm": 17.68904685974121,
|
| 351 |
+
"learning_rate": 0.0002991405452657846
|
| 352 |
+
},
|
| 353 |
+
{
|
| 354 |
+
"step": 40,
|
| 355 |
+
"epoch": 0.2711864406779661,
|
| 356 |
+
"cpu_mem": 1.505083392,
|
| 357 |
+
"gpu_mem": 4.437082624,
|
| 358 |
+
"loss": 0.5857,
|
| 359 |
+
"grad_norm": 4.258547782897949,
|
| 360 |
+
"learning_rate": 0.00029893918270099324
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"step": 41,
|
| 364 |
+
"epoch": 0.27796610169491526,
|
| 365 |
+
"cpu_mem": 1.50528,
|
| 366 |
+
"gpu_mem": 4.437309952,
|
| 367 |
+
"loss": 0.6943,
|
| 368 |
+
"grad_norm": 15.192357063293457,
|
| 369 |
+
"learning_rate": 0.00029871672920607153
|
| 370 |
+
},
|
| 371 |
+
{
|
| 372 |
+
"step": 42,
|
| 373 |
+
"epoch": 0.2847457627118644,
|
| 374 |
+
"cpu_mem": 1.505476608,
|
| 375 |
+
"gpu_mem": 4.43720704,
|
| 376 |
+
"loss": 0.5717,
|
| 377 |
+
"grad_norm": 10.226648330688477,
|
| 378 |
+
"learning_rate": 0.0002984732162821399
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"step": 43,
|
| 382 |
+
"epoch": 0.29152542372881357,
|
| 383 |
+
"cpu_mem": 1.505673216,
|
| 384 |
+
"gpu_mem": 4.437028864,
|
| 385 |
+
"loss": 0.6568,
|
| 386 |
+
"grad_norm": 8.291679382324219,
|
| 387 |
+
"learning_rate": 0.0002982086784124952
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"step": 44,
|
| 391 |
+
"epoch": 0.2983050847457627,
|
| 392 |
+
"cpu_mem": 1.505869824,
|
| 393 |
+
"gpu_mem": 4.437171712,
|
| 394 |
+
"loss": 0.6187,
|
| 395 |
+
"grad_norm": 9.696277618408203,
|
| 396 |
+
"learning_rate": 0.00029792315305772796
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"step": 45,
|
| 400 |
+
"epoch": 0.3050847457627119,
|
| 401 |
+
"cpu_mem": 1.505869824,
|
| 402 |
+
"gpu_mem": 4.436952064,
|
| 403 |
+
"loss": 0.8114,
|
| 404 |
+
"grad_norm": 17.778474807739258,
|
| 405 |
+
"learning_rate": 0.0002976166806504174
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"step": 46,
|
| 409 |
+
"epoch": 0.31186440677966104,
|
| 410 |
+
"cpu_mem": 1.506066432,
|
| 411 |
+
"gpu_mem": 4.437194752,
|
| 412 |
+
"loss": 0.7589,
|
| 413 |
+
"grad_norm": 25.64463996887207,
|
| 414 |
+
"learning_rate": 0.00029728930458940595
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"step": 47,
|
| 418 |
+
"epoch": 0.31864406779661014,
|
| 419 |
+
"cpu_mem": 1.50626304,
|
| 420 |
+
"gpu_mem": 4.436918272,
|
| 421 |
+
"loss": 0.8276,
|
| 422 |
+
"grad_norm": 28.21428108215332,
|
| 423 |
+
"learning_rate": 0.00029694107123365385
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"step": 48,
|
| 427 |
+
"epoch": 0.3254237288135593,
|
| 428 |
+
"cpu_mem": 1.50626304,
|
| 429 |
+
"gpu_mem": 4.436995072,
|
| 430 |
+
"loss": 0.6789,
|
| 431 |
+
"grad_norm": 26.740333557128906,
|
| 432 |
+
"learning_rate": 0.00029657202989567393
|
| 433 |
+
},
|
| 434 |
+
{
|
| 435 |
+
"step": 49,
|
| 436 |
+
"epoch": 0.33220338983050846,
|
| 437 |
+
"cpu_mem": 1.506459648,
|
| 438 |
+
"gpu_mem": 4.437011968,
|
| 439 |
+
"loss": 0.7783,
|
| 440 |
+
"grad_norm": 16.810571670532227,
|
| 441 |
+
"learning_rate": 0.00029618223283454893
|
| 442 |
+
},
|
| 443 |
+
{
|
| 444 |
+
"step": 50,
|
| 445 |
+
"epoch": 0.3389830508474576,
|
| 446 |
+
"cpu_mem": 1.506459648,
|
| 447 |
+
"gpu_mem": 4.436950528,
|
| 448 |
+
"loss": 0.6132,
|
| 449 |
+
"grad_norm": 5.90757942199707,
|
| 450 |
+
"learning_rate": 0.00029577173524853123
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"step": 51,
|
| 454 |
+
"epoch": 0.34576271186440677,
|
| 455 |
+
"cpu_mem": 1.506656256,
|
| 456 |
+
"gpu_mem": 4.436955136,
|
| 457 |
+
"loss": 0.5682,
|
| 458 |
+
"grad_norm": 6.385296821594238,
|
| 459 |
+
"learning_rate": 0.0002953405952672261
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"step": 52,
|
| 463 |
+
"epoch": 0.3525423728813559,
|
| 464 |
+
"cpu_mem": 1.506656256,
|
| 465 |
+
"gpu_mem": 4.437035008,
|
| 466 |
+
"loss": 0.5911,
|
| 467 |
+
"grad_norm": 4.772059917449951,
|
| 468 |
+
"learning_rate": 0.0002948888739433602
|
| 469 |
+
},
|
| 470 |
+
{
|
| 471 |
+
"step": 53,
|
| 472 |
+
"epoch": 0.3593220338983051,
|
| 473 |
+
"cpu_mem": 1.506852864,
|
| 474 |
+
"gpu_mem": 4.437058048,
|
| 475 |
+
"loss": 0.5848,
|
| 476 |
+
"grad_norm": 9.717635154724121,
|
| 477 |
+
"learning_rate": 0.0002944166352441363
|
| 478 |
+
},
|
| 479 |
+
{
|
| 480 |
+
"step": 54,
|
| 481 |
+
"epoch": 0.36610169491525424,
|
| 482 |
+
"cpu_mem": 1.507049472,
|
| 483 |
+
"gpu_mem": 4.436985856,
|
| 484 |
+
"loss": 0.6352,
|
| 485 |
+
"grad_norm": 10.524502754211426,
|
| 486 |
+
"learning_rate": 0.0002939239460421746
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"step": 55,
|
| 490 |
+
"epoch": 0.3728813559322034,
|
| 491 |
+
"cpu_mem": 1.507049472,
|
| 492 |
+
"gpu_mem": 4.437256192,
|
| 493 |
+
"loss": 0.6172,
|
| 494 |
+
"grad_norm": 6.137028694152832,
|
| 495 |
+
"learning_rate": 0.00029341087610604337
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"step": 56,
|
| 499 |
+
"epoch": 0.37966101694915255,
|
| 500 |
+
"cpu_mem": 1.50724608,
|
| 501 |
+
"gpu_mem": 4.437042688,
|
| 502 |
+
"loss": 0.7273,
|
| 503 |
+
"grad_norm": 11.40707015991211,
|
| 504 |
+
"learning_rate": 0.00029287749809037904
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"step": 57,
|
| 508 |
+
"epoch": 0.3864406779661017,
|
| 509 |
+
"cpu_mem": 1.50724608,
|
| 510 |
+
"gpu_mem": 4.437036544,
|
| 511 |
+
"loss": 0.5833,
|
| 512 |
+
"grad_norm": 8.116436958312988,
|
| 513 |
+
"learning_rate": 0.0002923238875255979
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"step": 58,
|
| 517 |
+
"epoch": 0.39322033898305087,
|
| 518 |
+
"cpu_mem": 1.507442688,
|
| 519 |
+
"gpu_mem": 4.436932096,
|
| 520 |
+
"loss": 0.5561,
|
| 521 |
+
"grad_norm": 4.918622016906738,
|
| 522 |
+
"learning_rate": 0.00029175012280720024
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"step": 59,
|
| 526 |
+
"epoch": 0.4,
|
| 527 |
+
"cpu_mem": 1.507442688,
|
| 528 |
+
"gpu_mem": 4.436948992,
|
| 529 |
+
"loss": 0.6541,
|
| 530 |
+
"grad_norm": 8.79553508758545,
|
| 531 |
+
"learning_rate": 0.000291156285184669
|
| 532 |
+
},
|
| 533 |
+
{
|
| 534 |
+
"step": 60,
|
| 535 |
+
"epoch": 0.4067796610169492,
|
| 536 |
+
"cpu_mem": 1.507639296,
|
| 537 |
+
"gpu_mem": 4.437042688,
|
| 538 |
+
"loss": 0.5448,
|
| 539 |
+
"grad_norm": 4.7404704093933105,
|
| 540 |
+
"learning_rate": 0.00029054245874996426
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"step": 61,
|
| 544 |
+
"epoch": 0.4135593220338983,
|
| 545 |
+
"cpu_mem": 1.507639296,
|
| 546 |
+
"gpu_mem": 4.43705344,
|
| 547 |
+
"loss": 0.5885,
|
| 548 |
+
"grad_norm": 4.77510929107666,
|
| 549 |
+
"learning_rate": 0.0002899087304256151
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"step": 62,
|
| 553 |
+
"epoch": 0.42033898305084744,
|
| 554 |
+
"cpu_mem": 1.507835904,
|
| 555 |
+
"gpu_mem": 4.437041152,
|
| 556 |
+
"loss": 0.7054,
|
| 557 |
+
"grad_norm": 11.460348129272461,
|
| 558 |
+
"learning_rate": 0.0002892551899524109
|
| 559 |
+
},
|
| 560 |
+
{
|
| 561 |
+
"step": 63,
|
| 562 |
+
"epoch": 0.4271186440677966,
|
| 563 |
+
"cpu_mem": 1.507835904,
|
| 564 |
+
"gpu_mem": 4.437033472,
|
| 565 |
+
"loss": 0.5369,
|
| 566 |
+
"grad_norm": 31.070810317993164,
|
| 567 |
+
"learning_rate": 0.000288581929876693
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"step": 64,
|
| 571 |
+
"epoch": 0.43389830508474575,
|
| 572 |
+
"cpu_mem": 1.507835904,
|
| 573 |
+
"gpu_mem": 4.436962816,
|
| 574 |
+
"loss": 0.609,
|
| 575 |
+
"grad_norm": 15.022584915161133,
|
| 576 |
+
"learning_rate": 0.0002878890455372498
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"step": 65,
|
| 580 |
+
"epoch": 0.4406779661016949,
|
| 581 |
+
"cpu_mem": 1.507835904,
|
| 582 |
+
"gpu_mem": 4.43700736,
|
| 583 |
+
"loss": 0.5679,
|
| 584 |
+
"grad_norm": 5.917741775512695,
|
| 585 |
+
"learning_rate": 0.0002871766350518159
|
| 586 |
+
},
|
| 587 |
+
{
|
| 588 |
+
"step": 66,
|
| 589 |
+
"epoch": 0.44745762711864406,
|
| 590 |
+
"cpu_mem": 1.507835904,
|
| 591 |
+
"gpu_mem": 4.437200896,
|
| 592 |
+
"loss": 0.5544,
|
| 593 |
+
"grad_norm": 6.815539836883545,
|
| 594 |
+
"learning_rate": 0.00028644479930317775
|
| 595 |
+
},
|
| 596 |
+
{
|
| 597 |
+
"step": 67,
|
| 598 |
+
"epoch": 0.4542372881355932,
|
| 599 |
+
"cpu_mem": 1.508032512,
|
| 600 |
+
"gpu_mem": 4.436910592,
|
| 601 |
+
"loss": 0.6308,
|
| 602 |
+
"grad_norm": 13.951557159423828,
|
| 603 |
+
"learning_rate": 0.00028569364192488803
|
| 604 |
+
},
|
| 605 |
+
{
|
| 606 |
+
"step": 68,
|
| 607 |
+
"epoch": 0.4610169491525424,
|
| 608 |
+
"cpu_mem": 1.508032512,
|
| 609 |
+
"gpu_mem": 4.436878336,
|
| 610 |
+
"loss": 0.7345,
|
| 611 |
+
"grad_norm": 16.037248611450195,
|
| 612 |
+
"learning_rate": 0.00028492326928659045
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"step": 69,
|
| 616 |
+
"epoch": 0.46779661016949153,
|
| 617 |
+
"cpu_mem": 1.50822912,
|
| 618 |
+
"gpu_mem": 4.436944384,
|
| 619 |
+
"loss": 0.6223,
|
| 620 |
+
"grad_norm": 8.664552688598633,
|
| 621 |
+
"learning_rate": 0.00028413379047895665
|
| 622 |
+
},
|
| 623 |
+
{
|
| 624 |
+
"step": 70,
|
| 625 |
+
"epoch": 0.4745762711864407,
|
| 626 |
+
"cpu_mem": 1.50822912,
|
| 627 |
+
"gpu_mem": 4.43693824,
|
| 628 |
+
"loss": 0.5409,
|
| 629 |
+
"grad_norm": 9.303351402282715,
|
| 630 |
+
"learning_rate": 0.0002833253172982385
|
| 631 |
+
},
|
| 632 |
+
{
|
| 633 |
+
"step": 71,
|
| 634 |
+
"epoch": 0.48135593220338985,
|
| 635 |
+
"cpu_mem": 1.50822912,
|
| 636 |
+
"gpu_mem": 4.437167104,
|
| 637 |
+
"loss": 0.5304,
|
| 638 |
+
"grad_norm": 7.759465217590332,
|
| 639 |
+
"learning_rate": 0.0002824979642304366
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"step": 72,
|
| 643 |
+
"epoch": 0.488135593220339,
|
| 644 |
+
"cpu_mem": 1.50822912,
|
| 645 |
+
"gpu_mem": 4.437159424,
|
| 646 |
+
"loss": 0.5726,
|
| 647 |
+
"grad_norm": 8.323700904846191,
|
| 648 |
+
"learning_rate": 0.0002816518484350883
|
| 649 |
+
},
|
| 650 |
+
{
|
| 651 |
+
"step": 73,
|
| 652 |
+
"epoch": 0.49491525423728816,
|
| 653 |
+
"cpu_mem": 1.508425728,
|
| 654 |
+
"gpu_mem": 4.437125632,
|
| 655 |
+
"loss": 0.8418,
|
| 656 |
+
"grad_norm": 20.5197696685791,
|
| 657 |
+
"learning_rate": 0.0002807870897286772
|
| 658 |
+
},
|
| 659 |
+
{
|
| 660 |
+
"step": 74,
|
| 661 |
+
"epoch": 0.5016949152542373,
|
| 662 |
+
"cpu_mem": 1.508622336,
|
| 663 |
+
"gpu_mem": 4.436985856,
|
| 664 |
+
"loss": 0.506,
|
| 665 |
+
"grad_norm": 8.913507461547852,
|
| 666 |
+
"learning_rate": 0.0002799038105676658
|
| 667 |
+
},
|
| 668 |
+
{
|
| 669 |
+
"step": 75,
|
| 670 |
+
"epoch": 0.5084745762711864,
|
| 671 |
+
"cpu_mem": 1.508622336,
|
| 672 |
+
"gpu_mem": 4.436910592,
|
| 673 |
+
"loss": 0.5309,
|
| 674 |
+
"grad_norm": 10.124161720275879,
|
| 675 |
+
"learning_rate": 0.000279002136031155
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"step": 76,
|
| 679 |
+
"epoch": 0.5152542372881356,
|
| 680 |
+
"cpu_mem": 1.508622336,
|
| 681 |
+
"gpu_mem": 4.436850688,
|
| 682 |
+
"loss": 0.6009,
|
| 683 |
+
"grad_norm": 12.703954696655273,
|
| 684 |
+
"learning_rate": 0.00027808219380317216
|
| 685 |
+
},
|
| 686 |
+
{
|
| 687 |
+
"step": 77,
|
| 688 |
+
"epoch": 0.5220338983050847,
|
| 689 |
+
"cpu_mem": 1.508622336,
|
| 690 |
+
"gpu_mem": 4.436924416,
|
| 691 |
+
"loss": 0.5558,
|
| 692 |
+
"grad_norm": 10.466872215270996,
|
| 693 |
+
"learning_rate": 0.0002771441141545895
|
| 694 |
+
},
|
| 695 |
+
{
|
| 696 |
+
"step": 78,
|
| 697 |
+
"epoch": 0.5288135593220339,
|
| 698 |
+
"cpu_mem": 1.508622336,
|
| 699 |
+
"gpu_mem": 4.43697664,
|
| 700 |
+
"loss": 0.7043,
|
| 701 |
+
"grad_norm": 20.635786056518555,
|
| 702 |
+
"learning_rate": 0.0002761880299246772
|
| 703 |
+
},
|
| 704 |
+
{
|
| 705 |
+
"step": 79,
|
| 706 |
+
"epoch": 0.535593220338983,
|
| 707 |
+
"cpu_mem": 1.508622336,
|
| 708 |
+
"gpu_mem": 4.437108736,
|
| 709 |
+
"loss": 0.6018,
|
| 710 |
+
"grad_norm": 11.645002365112305,
|
| 711 |
+
"learning_rate": 0.000275214076502292
|
| 712 |
+
},
|
| 713 |
+
{
|
| 714 |
+
"step": 80,
|
| 715 |
+
"epoch": 0.5423728813559322,
|
| 716 |
+
"cpu_mem": 1.508622336,
|
| 717 |
+
"gpu_mem": 4.43699968,
|
| 718 |
+
"loss": 0.5279,
|
| 719 |
+
"grad_norm": 6.659698963165283,
|
| 720 |
+
"learning_rate": 0.0002742223918067056
|
| 721 |
+
},
|
| 722 |
+
{
|
| 723 |
+
"step": 81,
|
| 724 |
+
"epoch": 0.5491525423728814,
|
| 725 |
+
"cpu_mem": 1.508622336,
|
| 726 |
+
"gpu_mem": 4.436879872,
|
| 727 |
+
"loss": 0.5791,
|
| 728 |
+
"grad_norm": 6.4264235496521,
|
| 729 |
+
"learning_rate": 0.00027321311626807374
|
| 730 |
+
},
|
| 731 |
+
{
|
| 732 |
+
"step": 82,
|
| 733 |
+
"epoch": 0.5559322033898305,
|
| 734 |
+
"cpu_mem": 1.508622336,
|
| 735 |
+
"gpu_mem": 4.436948992,
|
| 736 |
+
"loss": 0.6501,
|
| 737 |
+
"grad_norm": 9.969060897827148,
|
| 738 |
+
"learning_rate": 0.0002721863928075503
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"step": 83,
|
| 742 |
+
"epoch": 0.5627118644067797,
|
| 743 |
+
"cpu_mem": 1.508818944,
|
| 744 |
+
"gpu_mem": 4.437048832,
|
| 745 |
+
"loss": 0.6573,
|
| 746 |
+
"grad_norm": 8.324270248413086,
|
| 747 |
+
"learning_rate": 0.000271142366817049
|
| 748 |
+
},
|
| 749 |
+
{
|
| 750 |
+
"step": 84,
|
| 751 |
+
"epoch": 0.5694915254237288,
|
| 752 |
+
"cpu_mem": 1.508818944,
|
| 753 |
+
"gpu_mem": 4.437011968,
|
| 754 |
+
"loss": 0.5463,
|
| 755 |
+
"grad_norm": 8.097661972045898,
|
| 756 |
+
"learning_rate": 0.00027008118613865406
|
| 757 |
+
},
|
| 758 |
+
{
|
| 759 |
+
"step": 85,
|
| 760 |
+
"epoch": 0.576271186440678,
|
| 761 |
+
"cpu_mem": 1.508818944,
|
| 762 |
+
"gpu_mem": 4.437044224,
|
| 763 |
+
"loss": 0.5767,
|
| 764 |
+
"grad_norm": 6.6934494972229,
|
| 765 |
+
"learning_rate": 0.00026900300104368524
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"step": 86,
|
| 769 |
+
"epoch": 0.5830508474576271,
|
| 770 |
+
"cpu_mem": 1.509015552,
|
| 771 |
+
"gpu_mem": 4.436995072,
|
| 772 |
+
"loss": 0.6116,
|
| 773 |
+
"grad_norm": 9.769342422485352,
|
| 774 |
+
"learning_rate": 0.00026790796421141813
|
| 775 |
+
},
|
| 776 |
+
{
|
| 777 |
+
"step": 87,
|
| 778 |
+
"epoch": 0.5898305084745763,
|
| 779 |
+
"cpu_mem": 1.509015552,
|
| 780 |
+
"gpu_mem": 4.437002752,
|
| 781 |
+
"loss": 0.564,
|
| 782 |
+
"grad_norm": 9.166972160339355,
|
| 783 |
+
"learning_rate": 0.00026679623070746325
|
| 784 |
+
},
|
| 785 |
+
{
|
| 786 |
+
"step": 88,
|
| 787 |
+
"epoch": 0.5966101694915255,
|
| 788 |
+
"cpu_mem": 1.509015552,
|
| 789 |
+
"gpu_mem": 4.437147136,
|
| 790 |
+
"loss": 0.5411,
|
| 791 |
+
"grad_norm": 5.252224445343018,
|
| 792 |
+
"learning_rate": 0.0002656679579618081
|
| 793 |
+
},
|
| 794 |
+
{
|
| 795 |
+
"step": 89,
|
| 796 |
+
"epoch": 0.6033898305084746,
|
| 797 |
+
"cpu_mem": 1.509015552,
|
| 798 |
+
"gpu_mem": 4.436929024,
|
| 799 |
+
"loss": 0.6234,
|
| 800 |
+
"grad_norm": 6.647994518280029,
|
| 801 |
+
"learning_rate": 0.0002645233057465235
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"step": 90,
|
| 805 |
+
"epoch": 0.6101694915254238,
|
| 806 |
+
"cpu_mem": 1.509015552,
|
| 807 |
+
"gpu_mem": 4.436982784,
|
| 808 |
+
"loss": 0.5256,
|
| 809 |
+
"grad_norm": 7.916544437408447,
|
| 810 |
+
"learning_rate": 0.00026336243615313873
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"step": 91,
|
| 814 |
+
"epoch": 0.6169491525423729,
|
| 815 |
+
"cpu_mem": 1.509015552,
|
| 816 |
+
"gpu_mem": 4.436950528,
|
| 817 |
+
"loss": 0.4528,
|
| 818 |
+
"grad_norm": 6.592220306396484,
|
| 819 |
+
"learning_rate": 0.00026218551356968814
|
| 820 |
+
},
|
| 821 |
+
{
|
| 822 |
+
"step": 92,
|
| 823 |
+
"epoch": 0.6237288135593221,
|
| 824 |
+
"cpu_mem": 1.509015552,
|
| 825 |
+
"gpu_mem": 4.437031936,
|
| 826 |
+
"loss": 0.6788,
|
| 827 |
+
"grad_norm": 12.278592109680176,
|
| 828 |
+
"learning_rate": 0.00026099270465743254
|
| 829 |
+
},
|
| 830 |
+
{
|
| 831 |
+
"step": 93,
|
| 832 |
+
"epoch": 0.6305084745762712,
|
| 833 |
+
"cpu_mem": 1.509015552,
|
| 834 |
+
"gpu_mem": 4.436835328,
|
| 835 |
+
"loss": 0.8175,
|
| 836 |
+
"grad_norm": 13.33436393737793,
|
| 837 |
+
"learning_rate": 0.0002597841783272588
|
| 838 |
+
},
|
| 839 |
+
{
|
| 840 |
+
"step": 94,
|
| 841 |
+
"epoch": 0.6372881355932203,
|
| 842 |
+
"cpu_mem": 1.509015552,
|
| 843 |
+
"gpu_mem": 4.436948992,
|
| 844 |
+
"loss": 0.5207,
|
| 845 |
+
"grad_norm": 6.806389331817627,
|
| 846 |
+
"learning_rate": 0.0002585601057157605
|
| 847 |
+
},
|
| 848 |
+
{
|
| 849 |
+
"step": 95,
|
| 850 |
+
"epoch": 0.6440677966101694,
|
| 851 |
+
"cpu_mem": 1.509015552,
|
| 852 |
+
"gpu_mem": 4.43696896,
|
| 853 |
+
"loss": 0.5228,
|
| 854 |
+
"grad_norm": 6.125219821929932,
|
| 855 |
+
"learning_rate": 0.00025732066016100394
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"step": 96,
|
| 859 |
+
"epoch": 0.6508474576271186,
|
| 860 |
+
"cpu_mem": 1.50921216,
|
| 861 |
+
"gpu_mem": 4.43700736,
|
| 862 |
+
"loss": 0.4348,
|
| 863 |
+
"grad_norm": 6.964541912078857,
|
| 864 |
+
"learning_rate": 0.00025606601717798207
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"step": 97,
|
| 868 |
+
"epoch": 0.6576271186440678,
|
| 869 |
+
"cpu_mem": 1.50921216,
|
| 870 |
+
"gpu_mem": 4.436992,
|
| 871 |
+
"loss": 0.6261,
|
| 872 |
+
"grad_norm": 8.247956275939941,
|
| 873 |
+
"learning_rate": 0.0002547963544337602
|
| 874 |
+
},
|
| 875 |
+
{
|
| 876 |
+
"step": 98,
|
| 877 |
+
"epoch": 0.6644067796610169,
|
| 878 |
+
"cpu_mem": 1.50921216,
|
| 879 |
+
"gpu_mem": 4.436904448,
|
| 880 |
+
"loss": 0.4827,
|
| 881 |
+
"grad_norm": 8.204977035522461,
|
| 882 |
+
"learning_rate": 0.0002535118517223168
|
| 883 |
+
},
|
| 884 |
+
{
|
| 885 |
+
"step": 99,
|
| 886 |
+
"epoch": 0.6711864406779661,
|
| 887 |
+
"cpu_mem": 1.50921216,
|
| 888 |
+
"gpu_mem": 4.43685376,
|
| 889 |
+
"loss": 0.5029,
|
| 890 |
+
"grad_norm": 11.964973449707031,
|
| 891 |
+
"learning_rate": 0.00025221269093908365
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"step": 100,
|
| 895 |
+
"epoch": 0.6779661016949152,
|
| 896 |
+
"cpu_mem": 1.50921216,
|
| 897 |
+
"gpu_mem": 4.436970496,
|
| 898 |
+
"loss": 0.51,
|
| 899 |
+
"grad_norm": 11.552522659301758,
|
| 900 |
+
"learning_rate": 0.0002508990560551879
|
| 901 |
+
},
|
| 902 |
+
{
|
| 903 |
+
"step": 101,
|
| 904 |
+
"epoch": 0.6847457627118644,
|
| 905 |
+
"cpu_mem": 1.50921216,
|
| 906 |
+
"gpu_mem": 4.437002752,
|
| 907 |
+
"loss": 0.5467,
|
| 908 |
+
"grad_norm": 13.213567733764648,
|
| 909 |
+
"learning_rate": 0.0002495711330914001
|
| 910 |
+
},
|
| 911 |
+
{
|
| 912 |
+
"step": 102,
|
| 913 |
+
"epoch": 0.6915254237288135,
|
| 914 |
+
"cpu_mem": 1.50921216,
|
| 915 |
+
"gpu_mem": 4.437036544,
|
| 916 |
+
"loss": 0.5372,
|
| 917 |
+
"grad_norm": 8.773509979248047,
|
| 918 |
+
"learning_rate": 0.00024822911009179276
|
| 919 |
+
},
|
| 920 |
+
{
|
| 921 |
+
"step": 103,
|
| 922 |
+
"epoch": 0.6983050847457627,
|
| 923 |
+
"cpu_mem": 1.50921216,
|
| 924 |
+
"gpu_mem": 4.437087232,
|
| 925 |
+
"loss": 0.6076,
|
| 926 |
+
"grad_norm": 12.28891372680664,
|
| 927 |
+
"learning_rate": 0.0002468731770971113
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"step": 104,
|
| 931 |
+
"epoch": 0.7050847457627119,
|
| 932 |
+
"cpu_mem": 1.50921216,
|
| 933 |
+
"gpu_mem": 4.436992,
|
| 934 |
+
"loss": 0.5038,
|
| 935 |
+
"grad_norm": 11.784523010253906,
|
| 936 |
+
"learning_rate": 0.0002455035261178632
|
| 937 |
+
},
|
| 938 |
+
{
|
| 939 |
+
"step": 105,
|
| 940 |
+
"epoch": 0.711864406779661,
|
| 941 |
+
"cpu_mem": 1.50921216,
|
| 942 |
+
"gpu_mem": 4.437093376,
|
| 943 |
+
"loss": 0.4978,
|
| 944 |
+
"grad_norm": 8.252092361450195,
|
| 945 |
+
"learning_rate": 0.0002441203511071278
|
| 946 |
+
},
|
| 947 |
+
{
|
| 948 |
+
"step": 106,
|
| 949 |
+
"epoch": 0.7186440677966102,
|
| 950 |
+
"cpu_mem": 1.50921216,
|
| 951 |
+
"gpu_mem": 4.437044224,
|
| 952 |
+
"loss": 0.5585,
|
| 953 |
+
"grad_norm": 9.146671295166016,
|
| 954 |
+
"learning_rate": 0.00024272384793309077
|
| 955 |
+
},
|
| 956 |
+
{
|
| 957 |
+
"step": 107,
|
| 958 |
+
"epoch": 0.7254237288135593,
|
| 959 |
+
"cpu_mem": 1.509408768,
|
| 960 |
+
"gpu_mem": 4.436932096,
|
| 961 |
+
"loss": 0.512,
|
| 962 |
+
"grad_norm": 7.901447772979736,
|
| 963 |
+
"learning_rate": 0.00024131421435130807
|
| 964 |
+
},
|
| 965 |
+
{
|
| 966 |
+
"step": 108,
|
| 967 |
+
"epoch": 0.7322033898305085,
|
| 968 |
+
"cpu_mem": 1.509408768,
|
| 969 |
+
"gpu_mem": 4.437116416,
|
| 970 |
+
"loss": 0.5794,
|
| 971 |
+
"grad_norm": 7.828333377838135,
|
| 972 |
+
"learning_rate": 0.00023989164997670202
|
| 973 |
+
},
|
| 974 |
+
{
|
| 975 |
+
"step": 109,
|
| 976 |
+
"epoch": 0.7389830508474576,
|
| 977 |
+
"cpu_mem": 1.509408768,
|
| 978 |
+
"gpu_mem": 4.436970496,
|
| 979 |
+
"loss": 0.5699,
|
| 980 |
+
"grad_norm": 5.465837478637695,
|
| 981 |
+
"learning_rate": 0.0002384563562552943
|
| 982 |
+
},
|
| 983 |
+
{
|
| 984 |
+
"step": 110,
|
| 985 |
+
"epoch": 0.7457627118644068,
|
| 986 |
+
"cpu_mem": 1.509408768,
|
| 987 |
+
"gpu_mem": 4.436973568,
|
| 988 |
+
"loss": 0.5263,
|
| 989 |
+
"grad_norm": 5.203726768493652,
|
| 990 |
+
"learning_rate": 0.0002370085364356797
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"step": 111,
|
| 994 |
+
"epoch": 0.752542372881356,
|
| 995 |
+
"cpu_mem": 1.509408768,
|
| 996 |
+
"gpu_mem": 4.436942848,
|
| 997 |
+
"loss": 0.5348,
|
| 998 |
+
"grad_norm": 6.427229881286621,
|
| 999 |
+
"learning_rate": 0.0002355483955402446
|
| 1000 |
+
},
|
| 1001 |
+
{
|
| 1002 |
+
"step": 112,
|
| 1003 |
+
"epoch": 0.7593220338983051,
|
| 1004 |
+
"cpu_mem": 1.509408768,
|
| 1005 |
+
"gpu_mem": 4.436988928,
|
| 1006 |
+
"loss": 0.5472,
|
| 1007 |
+
"grad_norm": 6.567240238189697,
|
| 1008 |
+
"learning_rate": 0.00023407614033613407
|
| 1009 |
+
},
|
| 1010 |
+
{
|
| 1011 |
+
"step": 113,
|
| 1012 |
+
"epoch": 0.7661016949152543,
|
| 1013 |
+
"cpu_mem": 1.509408768,
|
| 1014 |
+
"gpu_mem": 4.436979712,
|
| 1015 |
+
"loss": 0.5146,
|
| 1016 |
+
"grad_norm": 5.874990463256836,
|
| 1017 |
+
"learning_rate": 0.0002325919793059723
|
| 1018 |
+
},
|
| 1019 |
+
{
|
| 1020 |
+
"step": 114,
|
| 1021 |
+
"epoch": 0.7728813559322034,
|
| 1022 |
+
"cpu_mem": 1.509408768,
|
| 1023 |
+
"gpu_mem": 4.43696128,
|
| 1024 |
+
"loss": 0.4087,
|
| 1025 |
+
"grad_norm": 6.523895740509033,
|
| 1026 |
+
"learning_rate": 0.00023109612261833963
|
| 1027 |
+
},
|
| 1028 |
+
{
|
| 1029 |
+
"step": 115,
|
| 1030 |
+
"epoch": 0.7796610169491526,
|
| 1031 |
+
"cpu_mem": 1.509408768,
|
| 1032 |
+
"gpu_mem": 4.437036544,
|
| 1033 |
+
"loss": 0.4837,
|
| 1034 |
+
"grad_norm": 6.8895134925842285,
|
| 1035 |
+
"learning_rate": 0.0002295887820980112
|
| 1036 |
+
},
|
| 1037 |
+
{
|
| 1038 |
+
"step": 116,
|
| 1039 |
+
"epoch": 0.7864406779661017,
|
| 1040 |
+
"cpu_mem": 1.509408768,
|
| 1041 |
+
"gpu_mem": 4.436956672,
|
| 1042 |
+
"loss": 0.53,
|
| 1043 |
+
"grad_norm": 12.884737968444824,
|
| 1044 |
+
"learning_rate": 0.0002280701711959608
|
| 1045 |
+
},
|
| 1046 |
+
{
|
| 1047 |
+
"step": 117,
|
| 1048 |
+
"epoch": 0.7932203389830509,
|
| 1049 |
+
"cpu_mem": 1.509408768,
|
| 1050 |
+
"gpu_mem": 4.436847616,
|
| 1051 |
+
"loss": 0.418,
|
| 1052 |
+
"grad_norm": 8.543777465820312,
|
| 1053 |
+
"learning_rate": 0.00022654050495913495
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"step": 118,
|
| 1057 |
+
"epoch": 0.8,
|
| 1058 |
+
"cpu_mem": 1.509408768,
|
| 1059 |
+
"gpu_mem": 4.437085696,
|
| 1060 |
+
"loss": 0.6238,
|
| 1061 |
+
"grad_norm": 18.599990844726562,
|
| 1062 |
+
"learning_rate": 0.000225
|
| 1063 |
+
},
|
| 1064 |
+
{
|
| 1065 |
+
"step": 119,
|
| 1066 |
+
"epoch": 0.8067796610169492,
|
| 1067 |
+
"cpu_mem": 1.509408768,
|
| 1068 |
+
"gpu_mem": 4.437256192,
|
| 1069 |
+
"loss": 0.4314,
|
| 1070 |
+
"grad_norm": 8.732511520385742,
|
| 1071 |
+
"learning_rate": 0.00022344887446586865
|
| 1072 |
+
},
|
| 1073 |
+
{
|
| 1074 |
+
"step": 120,
|
| 1075 |
+
"epoch": 0.8135593220338984,
|
| 1076 |
+
"cpu_mem": 1.509408768,
|
| 1077 |
+
"gpu_mem": 4.436988928,
|
| 1078 |
+
"loss": 0.4056,
|
| 1079 |
+
"grad_norm": 9.370827674865723,
|
| 1080 |
+
"learning_rate": 0.00022188734800800852
|
| 1081 |
+
},
|
| 1082 |
+
{
|
| 1083 |
+
"step": 121,
|
| 1084 |
+
"epoch": 0.8203389830508474,
|
| 1085 |
+
"cpu_mem": 1.509408768,
|
| 1086 |
+
"gpu_mem": 4.437016576,
|
| 1087 |
+
"loss": 0.6384,
|
| 1088 |
+
"grad_norm": 15.165224075317383,
|
| 1089 |
+
"learning_rate": 0.00022031564175053754
|
| 1090 |
+
},
|
| 1091 |
+
{
|
| 1092 |
+
"step": 122,
|
| 1093 |
+
"epoch": 0.8271186440677966,
|
| 1094 |
+
"cpu_mem": 1.509408768,
|
| 1095 |
+
"gpu_mem": 4.437067264,
|
| 1096 |
+
"loss": 0.5406,
|
| 1097 |
+
"grad_norm": 14.913131713867188,
|
| 1098 |
+
"learning_rate": 0.00021873397825911153
|
| 1099 |
+
},
|
| 1100 |
+
{
|
| 1101 |
+
"step": 123,
|
| 1102 |
+
"epoch": 0.8338983050847457,
|
| 1103 |
+
"cpu_mem": 1.509408768,
|
| 1104 |
+
"gpu_mem": 4.4368768,
|
| 1105 |
+
"loss": 0.429,
|
| 1106 |
+
"grad_norm": 10.716595649719238,
|
| 1107 |
+
"learning_rate": 0.00021714258150940685
|
| 1108 |
+
},
|
| 1109 |
+
{
|
| 1110 |
+
"step": 124,
|
| 1111 |
+
"epoch": 0.8406779661016949,
|
| 1112 |
+
"cpu_mem": 1.509408768,
|
| 1113 |
+
"gpu_mem": 4.437319168,
|
| 1114 |
+
"loss": 0.3996,
|
| 1115 |
+
"grad_norm": 7.652353286743164,
|
| 1116 |
+
"learning_rate": 0.0002155416768554039
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"step": 125,
|
| 1120 |
+
"epoch": 0.847457627118644,
|
| 1121 |
+
"cpu_mem": 1.509408768,
|
| 1122 |
+
"gpu_mem": 4.43704576,
|
| 1123 |
+
"loss": 0.5075,
|
| 1124 |
+
"grad_norm": 13.265007972717285,
|
| 1125 |
+
"learning_rate": 0.00021393149099747523
|
| 1126 |
+
},
|
| 1127 |
+
{
|
| 1128 |
+
"step": 126,
|
| 1129 |
+
"epoch": 0.8542372881355932,
|
| 1130 |
+
"cpu_mem": 1.509408768,
|
| 1131 |
+
"gpu_mem": 4.436929024,
|
| 1132 |
+
"loss": 0.4667,
|
| 1133 |
+
"grad_norm": 9.696459770202637,
|
| 1134 |
+
"learning_rate": 0.00021231225195028297
|
| 1135 |
+
},
|
| 1136 |
+
{
|
| 1137 |
+
"step": 127,
|
| 1138 |
+
"epoch": 0.8610169491525423,
|
| 1139 |
+
"cpu_mem": 1.509408768,
|
| 1140 |
+
"gpu_mem": 4.43736832,
|
| 1141 |
+
"loss": 0.5595,
|
| 1142 |
+
"grad_norm": 15.351218223571777,
|
| 1143 |
+
"learning_rate": 0.00021068418901049025
|
| 1144 |
+
},
|
| 1145 |
+
{
|
| 1146 |
+
"step": 128,
|
| 1147 |
+
"epoch": 0.8677966101694915,
|
| 1148 |
+
"cpu_mem": 1.509408768,
|
| 1149 |
+
"gpu_mem": 4.437144064,
|
| 1150 |
+
"loss": 0.5056,
|
| 1151 |
+
"grad_norm": 10.441043853759766,
|
| 1152 |
+
"learning_rate": 0.0002090475327242912
|
| 1153 |
+
},
|
| 1154 |
+
{
|
| 1155 |
+
"step": 129,
|
| 1156 |
+
"epoch": 0.8745762711864407,
|
| 1157 |
+
"cpu_mem": 1.509408768,
|
| 1158 |
+
"gpu_mem": 4.437184,
|
| 1159 |
+
"loss": 0.6595,
|
| 1160 |
+
"grad_norm": 13.417473793029785,
|
| 1161 |
+
"learning_rate": 0.00020740251485476345
|
| 1162 |
+
},
|
| 1163 |
+
{
|
| 1164 |
+
"step": 130,
|
| 1165 |
+
"epoch": 0.8813559322033898,
|
| 1166 |
+
"cpu_mem": 1.509408768,
|
| 1167 |
+
"gpu_mem": 4.436965888,
|
| 1168 |
+
"loss": 0.6462,
|
| 1169 |
+
"grad_norm": 7.923618316650391,
|
| 1170 |
+
"learning_rate": 0.0002057493683490491
|
| 1171 |
+
},
|
| 1172 |
+
{
|
| 1173 |
+
"step": 131,
|
| 1174 |
+
"epoch": 0.888135593220339,
|
| 1175 |
+
"cpu_mem": 1.509408768,
|
| 1176 |
+
"gpu_mem": 4.437094912,
|
| 1177 |
+
"loss": 0.5114,
|
| 1178 |
+
"grad_norm": 6.110599994659424,
|
| 1179 |
+
"learning_rate": 0.00020408832730536746
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"step": 132,
|
| 1183 |
+
"epoch": 0.8949152542372881,
|
| 1184 |
+
"cpu_mem": 1.509408768,
|
| 1185 |
+
"gpu_mem": 4.43717632,
|
| 1186 |
+
"loss": 0.4793,
|
| 1187 |
+
"grad_norm": 12.383698463439941,
|
| 1188 |
+
"learning_rate": 0.00020241962693986476
|
| 1189 |
+
},
|
| 1190 |
+
{
|
| 1191 |
+
"step": 133,
|
| 1192 |
+
"epoch": 0.9016949152542373,
|
| 1193 |
+
"cpu_mem": 1.509408768,
|
| 1194 |
+
"gpu_mem": 4.436959744,
|
| 1195 |
+
"loss": 0.5583,
|
| 1196 |
+
"grad_norm": 12.624692916870117,
|
| 1197 |
+
"learning_rate": 0.0002007435035533061
|
| 1198 |
+
},
|
| 1199 |
+
{
|
| 1200 |
+
"step": 134,
|
| 1201 |
+
"epoch": 0.9084745762711864,
|
| 1202 |
+
"cpu_mem": 1.509408768,
|
| 1203 |
+
"gpu_mem": 4.437093376,
|
| 1204 |
+
"loss": 0.4981,
|
| 1205 |
+
"grad_norm": 12.406174659729004,
|
| 1206 |
+
"learning_rate": 0.00019906019449761325
|
| 1207 |
+
},
|
| 1208 |
+
{
|
| 1209 |
+
"step": 135,
|
| 1210 |
+
"epoch": 0.9152542372881356,
|
| 1211 |
+
"cpu_mem": 1.509605376,
|
| 1212 |
+
"gpu_mem": 4.437116416,
|
| 1213 |
+
"loss": 0.4536,
|
| 1214 |
+
"grad_norm": 7.415020942687988,
|
| 1215 |
+
"learning_rate": 0.00019736993814225374
|
| 1216 |
+
},
|
| 1217 |
+
{
|
| 1218 |
+
"step": 136,
|
| 1219 |
+
"epoch": 0.9220338983050848,
|
| 1220 |
+
"cpu_mem": 1.509605376,
|
| 1221 |
+
"gpu_mem": 4.4369536,
|
| 1222 |
+
"loss": 0.5096,
|
| 1223 |
+
"grad_norm": 9.677729606628418,
|
| 1224 |
+
"learning_rate": 0.00019567297384048604
|
| 1225 |
+
},
|
| 1226 |
+
{
|
| 1227 |
+
"step": 137,
|
| 1228 |
+
"epoch": 0.9288135593220339,
|
| 1229 |
+
"cpu_mem": 1.509605376,
|
| 1230 |
+
"gpu_mem": 4.436833792,
|
| 1231 |
+
"loss": 0.5439,
|
| 1232 |
+
"grad_norm": 11.361948013305664,
|
| 1233 |
+
"learning_rate": 0.0001939695418954653
|
| 1234 |
+
},
|
| 1235 |
+
{
|
| 1236 |
+
"step": 138,
|
| 1237 |
+
"epoch": 0.9355932203389831,
|
| 1238 |
+
"cpu_mem": 1.509605376,
|
| 1239 |
+
"gpu_mem": 4.43701504,
|
| 1240 |
+
"loss": 0.5307,
|
| 1241 |
+
"grad_norm": 11.554671287536621,
|
| 1242 |
+
"learning_rate": 0.00019225988352621445
|
| 1243 |
+
},
|
| 1244 |
+
{
|
| 1245 |
+
"step": 139,
|
| 1246 |
+
"epoch": 0.9423728813559322,
|
| 1247 |
+
"cpu_mem": 1.509605376,
|
| 1248 |
+
"gpu_mem": 4.436913664,
|
| 1249 |
+
"loss": 0.4409,
|
| 1250 |
+
"grad_norm": 7.895120620727539,
|
| 1251 |
+
"learning_rate": 0.00019054424083346592
|
| 1252 |
+
},
|
| 1253 |
+
{
|
| 1254 |
+
"step": 140,
|
| 1255 |
+
"epoch": 0.9491525423728814,
|
| 1256 |
+
"cpu_mem": 1.509605376,
|
| 1257 |
+
"gpu_mem": 4.436965888,
|
| 1258 |
+
"loss": 0.4858,
|
| 1259 |
+
"grad_norm": 10.334193229675293,
|
| 1260 |
+
"learning_rate": 0.0001888228567653781
|
| 1261 |
+
},
|
| 1262 |
+
{
|
| 1263 |
+
"step": 141,
|
| 1264 |
+
"epoch": 0.9559322033898305,
|
| 1265 |
+
"cpu_mem": 1.509605376,
|
| 1266 |
+
"gpu_mem": 4.436998144,
|
| 1267 |
+
"loss": 0.6513,
|
| 1268 |
+
"grad_norm": 19.94317626953125,
|
| 1269 |
+
"learning_rate": 0.0001870959750831323
|
| 1270 |
+
},
|
| 1271 |
+
{
|
| 1272 |
+
"step": 142,
|
| 1273 |
+
"epoch": 0.9627118644067797,
|
| 1274 |
+
"cpu_mem": 1.509605376,
|
| 1275 |
+
"gpu_mem": 4.43713792,
|
| 1276 |
+
"loss": 0.5677,
|
| 1277 |
+
"grad_norm": 16.330734252929688,
|
| 1278 |
+
"learning_rate": 0.0001853638403264141
|
| 1279 |
+
},
|
| 1280 |
+
{
|
| 1281 |
+
"step": 143,
|
| 1282 |
+
"epoch": 0.9694915254237289,
|
| 1283 |
+
"cpu_mem": 1.509605376,
|
| 1284 |
+
"gpu_mem": 4.437121024,
|
| 1285 |
+
"loss": 0.5919,
|
| 1286 |
+
"grad_norm": 11.14167308807373,
|
| 1287 |
+
"learning_rate": 0.00018362669777878453
|
| 1288 |
+
},
|
| 1289 |
+
{
|
| 1290 |
+
"step": 144,
|
| 1291 |
+
"epoch": 0.976271186440678,
|
| 1292 |
+
"cpu_mem": 1.509605376,
|
| 1293 |
+
"gpu_mem": 4.437313024,
|
| 1294 |
+
"loss": 0.5742,
|
| 1295 |
+
"grad_norm": 7.925104141235352,
|
| 1296 |
+
"learning_rate": 0.00018188479343294648
|
| 1297 |
+
},
|
| 1298 |
+
{
|
| 1299 |
+
"step": 145,
|
| 1300 |
+
"epoch": 0.9830508474576272,
|
| 1301 |
+
"cpu_mem": 1.509605376,
|
| 1302 |
+
"gpu_mem": 4.437024256,
|
| 1303 |
+
"loss": 0.4559,
|
| 1304 |
+
"grad_norm": 6.353579044342041,
|
| 1305 |
+
"learning_rate": 0.0001801383739559098
|
| 1306 |
+
},
|
| 1307 |
+
{
|
| 1308 |
+
"step": 146,
|
| 1309 |
+
"epoch": 0.9898305084745763,
|
| 1310 |
+
"cpu_mem": 1.509605376,
|
| 1311 |
+
"gpu_mem": 4.437059584,
|
| 1312 |
+
"loss": 0.579,
|
| 1313 |
+
"grad_norm": 9.637496948242188,
|
| 1314 |
+
"learning_rate": 0.0001783876866540615
|
| 1315 |
+
},
|
| 1316 |
+
{
|
| 1317 |
+
"step": 147,
|
| 1318 |
+
"epoch": 0.9966101694915255,
|
| 1319 |
+
"cpu_mem": 1.509605376,
|
| 1320 |
+
"gpu_mem": 4.436958208,
|
| 1321 |
+
"loss": 0.5382,
|
| 1322 |
+
"grad_norm": 7.5189595222473145,
|
| 1323 |
+
"learning_rate": 0.00017663297943814552
|
| 1324 |
+
},
|
| 1325 |
+
{
|
| 1326 |
+
"step": 148,
|
| 1327 |
+
"epoch": 1.0033898305084745,
|
| 1328 |
+
"cpu_mem": 1.509605376,
|
| 1329 |
+
"gpu_mem": 4.443509248,
|
| 1330 |
+
"loss": 0.6157,
|
| 1331 |
+
"grad_norm": 11.473037719726562,
|
| 1332 |
+
"learning_rate": 0.0001748745007881561
|
| 1333 |
+
},
|
| 1334 |
+
{
|
| 1335 |
+
"step": 149,
|
| 1336 |
+
"epoch": 1.0101694915254238,
|
| 1337 |
+
"cpu_mem": 1.509605376,
|
| 1338 |
+
"gpu_mem": 4.443444736,
|
| 1339 |
+
"loss": 0.3944,
|
| 1340 |
+
"grad_norm": 5.674694538116455,
|
| 1341 |
+
"learning_rate": 0.00017311249971815185
|
| 1342 |
+
},
|
| 1343 |
+
{
|
| 1344 |
+
"step": 150,
|
| 1345 |
+
"epoch": 1.0169491525423728,
|
| 1346 |
+
"cpu_mem": 1.509605376,
|
| 1347 |
+
"gpu_mem": 4.44328192,
|
| 1348 |
+
"loss": 0.3795,
|
| 1349 |
+
"grad_norm": 6.071469783782959,
|
| 1350 |
+
"learning_rate": 0.00017134722574099276
|
| 1351 |
+
},
|
| 1352 |
+
{
|
| 1353 |
+
"step": 151,
|
| 1354 |
+
"epoch": 1.023728813559322,
|
| 1355 |
+
"cpu_mem": 1.509605376,
|
| 1356 |
+
"gpu_mem": 4.443354112,
|
| 1357 |
+
"loss": 0.3988,
|
| 1358 |
+
"grad_norm": 5.714435577392578,
|
| 1359 |
+
"learning_rate": 0.00016957892883300775
|
| 1360 |
+
},
|
| 1361 |
+
{
|
| 1362 |
+
"step": 152,
|
| 1363 |
+
"epoch": 1.0305084745762711,
|
| 1364 |
+
"cpu_mem": 1.509605376,
|
| 1365 |
+
"gpu_mem": 4.44338944,
|
| 1366 |
+
"loss": 0.3796,
|
| 1367 |
+
"grad_norm": 6.586073875427246,
|
| 1368 |
+
"learning_rate": 0.00016780785939859576
|
| 1369 |
+
},
|
| 1370 |
+
{
|
| 1371 |
+
"step": 153,
|
| 1372 |
+
"epoch": 1.0372881355932204,
|
| 1373 |
+
"cpu_mem": 1.509605376,
|
| 1374 |
+
"gpu_mem": 4.443414016,
|
| 1375 |
+
"loss": 0.452,
|
| 1376 |
+
"grad_norm": 6.130358695983887,
|
| 1377 |
+
"learning_rate": 0.00016603426823476693
|
| 1378 |
+
},
|
| 1379 |
+
{
|
| 1380 |
+
"step": 154,
|
| 1381 |
+
"epoch": 1.0440677966101695,
|
| 1382 |
+
"cpu_mem": 1.509605376,
|
| 1383 |
+
"gpu_mem": 4.443375616,
|
| 1384 |
+
"loss": 0.3782,
|
| 1385 |
+
"grad_norm": 6.469320774078369,
|
| 1386 |
+
"learning_rate": 0.00016425840649562736
|
| 1387 |
+
},
|
| 1388 |
+
{
|
| 1389 |
+
"step": 155,
|
| 1390 |
+
"epoch": 1.0508474576271187,
|
| 1391 |
+
"cpu_mem": 1.509605376,
|
| 1392 |
+
"gpu_mem": 4.4435968,
|
| 1393 |
+
"loss": 0.4447,
|
| 1394 |
+
"grad_norm": 9.796608924865723,
|
| 1395 |
+
"learning_rate": 0.00016248052565681436
|
| 1396 |
+
},
|
| 1397 |
+
{
|
| 1398 |
+
"step": 156,
|
| 1399 |
+
"epoch": 1.0576271186440678,
|
| 1400 |
+
"cpu_mem": 1.509605376,
|
| 1401 |
+
"gpu_mem": 4.44350464,
|
| 1402 |
+
"loss": 0.3952,
|
| 1403 |
+
"grad_norm": 10.653168678283691,
|
| 1404 |
+
"learning_rate": 0.00016070087747988482
|
| 1405 |
+
},
|
| 1406 |
+
{
|
| 1407 |
+
"step": 157,
|
| 1408 |
+
"epoch": 1.064406779661017,
|
| 1409 |
+
"cpu_mem": 1.509605376,
|
| 1410 |
+
"gpu_mem": 4.443410944,
|
| 1411 |
+
"loss": 0.4352,
|
| 1412 |
+
"grad_norm": 10.047937393188477,
|
| 1413 |
+
"learning_rate": 0.00015891971397666464
|
| 1414 |
+
},
|
| 1415 |
+
{
|
| 1416 |
+
"step": 158,
|
| 1417 |
+
"epoch": 1.071186440677966,
|
| 1418 |
+
"cpu_mem": 1.509605376,
|
| 1419 |
+
"gpu_mem": 4.443337216,
|
| 1420 |
+
"loss": 0.3561,
|
| 1421 |
+
"grad_norm": 7.982423782348633,
|
| 1422 |
+
"learning_rate": 0.00015713728737356137
|
| 1423 |
+
},
|
| 1424 |
+
{
|
| 1425 |
+
"step": 159,
|
| 1426 |
+
"epoch": 1.0779661016949154,
|
| 1427 |
+
"cpu_mem": 1.509605376,
|
| 1428 |
+
"gpu_mem": 4.443685888,
|
| 1429 |
+
"loss": 0.3552,
|
| 1430 |
+
"grad_norm": 8.629858016967773,
|
| 1431 |
+
"learning_rate": 0.00015535385007584706
|
| 1432 |
+
},
|
| 1433 |
+
{
|
| 1434 |
+
"step": 160,
|
| 1435 |
+
"epoch": 1.0847457627118644,
|
| 1436 |
+
"cpu_mem": 1.509605376,
|
| 1437 |
+
"gpu_mem": 4.443280384,
|
| 1438 |
+
"loss": 0.3805,
|
| 1439 |
+
"grad_norm": 10.6151123046875,
|
| 1440 |
+
"learning_rate": 0.0001535696546319161
|
| 1441 |
+
},
|
| 1442 |
+
{
|
| 1443 |
+
"step": 161,
|
| 1444 |
+
"epoch": 1.0915254237288137,
|
| 1445 |
+
"cpu_mem": 1.509605376,
|
| 1446 |
+
"gpu_mem": 4.443226624,
|
| 1447 |
+
"loss": 0.38,
|
| 1448 |
+
"grad_norm": 8.782222747802734,
|
| 1449 |
+
"learning_rate": 0.00015178495369752213
|
| 1450 |
+
},
|
| 1451 |
+
{
|
| 1452 |
+
"step": 162,
|
| 1453 |
+
"epoch": 1.0983050847457627,
|
| 1454 |
+
"cpu_mem": 1.509605376,
|
| 1455 |
+
"gpu_mem": 4.444002304,
|
| 1456 |
+
"loss": 0.26,
|
| 1457 |
+
"grad_norm": 7.612800598144531,
|
| 1458 |
+
"learning_rate": 0.00015
|
| 1459 |
+
},
|
| 1460 |
+
{
|
| 1461 |
+
"step": 163,
|
| 1462 |
+
"epoch": 1.1050847457627118,
|
| 1463 |
+
"cpu_mem": 1.509605376,
|
| 1464 |
+
"gpu_mem": 4.443478528,
|
| 1465 |
+
"loss": 0.3393,
|
| 1466 |
+
"grad_norm": 7.5802998542785645,
|
| 1467 |
+
"learning_rate": 0.00014821504630247785
|
| 1468 |
+
},
|
| 1469 |
+
{
|
| 1470 |
+
"step": 164,
|
| 1471 |
+
"epoch": 1.111864406779661,
|
| 1472 |
+
"cpu_mem": 1.509605376,
|
| 1473 |
+
"gpu_mem": 4.443390976,
|
| 1474 |
+
"loss": 0.429,
|
| 1475 |
+
"grad_norm": 9.032646179199219,
|
| 1476 |
+
"learning_rate": 0.00014643034536808387
|
| 1477 |
+
},
|
| 1478 |
+
{
|
| 1479 |
+
"step": 165,
|
| 1480 |
+
"epoch": 1.11864406779661,
|
| 1481 |
+
"cpu_mem": 1.509605376,
|
| 1482 |
+
"gpu_mem": 4.443340288,
|
| 1483 |
+
"loss": 0.3151,
|
| 1484 |
+
"grad_norm": 8.619991302490234,
|
| 1485 |
+
"learning_rate": 0.00014464614992415294
|
| 1486 |
+
},
|
| 1487 |
+
{
|
| 1488 |
+
"step": 166,
|
| 1489 |
+
"epoch": 1.1254237288135593,
|
| 1490 |
+
"cpu_mem": 1.509605376,
|
| 1491 |
+
"gpu_mem": 4.44343552,
|
| 1492 |
+
"loss": 0.3742,
|
| 1493 |
+
"grad_norm": 9.681302070617676,
|
| 1494 |
+
"learning_rate": 0.00014286271262643866
|
| 1495 |
+
},
|
| 1496 |
+
{
|
| 1497 |
+
"step": 167,
|
| 1498 |
+
"epoch": 1.1322033898305084,
|
| 1499 |
+
"cpu_mem": 1.509605376,
|
| 1500 |
+
"gpu_mem": 4.443352576,
|
| 1501 |
+
"loss": 0.4062,
|
| 1502 |
+
"grad_norm": 14.850049018859863,
|
| 1503 |
+
"learning_rate": 0.00014108028602333536
|
| 1504 |
+
},
|
| 1505 |
+
{
|
| 1506 |
+
"step": 168,
|
| 1507 |
+
"epoch": 1.1389830508474577,
|
| 1508 |
+
"cpu_mem": 1.509605376,
|
| 1509 |
+
"gpu_mem": 4.443371008,
|
| 1510 |
+
"loss": 0.4535,
|
| 1511 |
+
"grad_norm": 10.279086112976074,
|
| 1512 |
+
"learning_rate": 0.00013929912252011516
|
| 1513 |
+
},
|
| 1514 |
+
{
|
| 1515 |
+
"step": 169,
|
| 1516 |
+
"epoch": 1.1457627118644067,
|
| 1517 |
+
"cpu_mem": 1.509605376,
|
| 1518 |
+
"gpu_mem": 4.44345856,
|
| 1519 |
+
"loss": 0.3463,
|
| 1520 |
+
"grad_norm": 9.80820369720459,
|
| 1521 |
+
"learning_rate": 0.00013751947434318564
|
| 1522 |
+
},
|
| 1523 |
+
{
|
| 1524 |
+
"step": 170,
|
| 1525 |
+
"epoch": 1.152542372881356,
|
| 1526 |
+
"cpu_mem": 1.509605376,
|
| 1527 |
+
"gpu_mem": 4.44334336,
|
| 1528 |
+
"loss": 0.3783,
|
| 1529 |
+
"grad_norm": 10.852489471435547,
|
| 1530 |
+
"learning_rate": 0.00013574159350437261
|
| 1531 |
+
},
|
| 1532 |
+
{
|
| 1533 |
+
"step": 171,
|
| 1534 |
+
"epoch": 1.159322033898305,
|
| 1535 |
+
"cpu_mem": 1.509605376,
|
| 1536 |
+
"gpu_mem": 4.443406336,
|
| 1537 |
+
"loss": 0.4453,
|
| 1538 |
+
"grad_norm": 11.156643867492676,
|
| 1539 |
+
"learning_rate": 0.0001339657317652331
|
| 1540 |
+
},
|
| 1541 |
+
{
|
| 1542 |
+
"step": 172,
|
| 1543 |
+
"epoch": 1.1661016949152543,
|
| 1544 |
+
"cpu_mem": 1.509605376,
|
| 1545 |
+
"gpu_mem": 4.443314176,
|
| 1546 |
+
"loss": 0.3344,
|
| 1547 |
+
"grad_norm": 8.870190620422363,
|
| 1548 |
+
"learning_rate": 0.00013219214060140424
|
| 1549 |
+
},
|
| 1550 |
+
{
|
| 1551 |
+
"step": 173,
|
| 1552 |
+
"epoch": 1.1728813559322033,
|
| 1553 |
+
"cpu_mem": 1.509605376,
|
| 1554 |
+
"gpu_mem": 4.443613696,
|
| 1555 |
+
"loss": 0.4352,
|
| 1556 |
+
"grad_norm": 9.64120101928711,
|
| 1557 |
+
"learning_rate": 0.00013042107116699228
|
| 1558 |
+
},
|
| 1559 |
+
{
|
| 1560 |
+
"step": 174,
|
| 1561 |
+
"epoch": 1.1796610169491526,
|
| 1562 |
+
"cpu_mem": 1.509605376,
|
| 1563 |
+
"gpu_mem": 4.443337216,
|
| 1564 |
+
"loss": 0.333,
|
| 1565 |
+
"grad_norm": 8.450601577758789,
|
| 1566 |
+
"learning_rate": 0.00012865277425900724
|
| 1567 |
+
},
|
| 1568 |
+
{
|
| 1569 |
+
"step": 175,
|
| 1570 |
+
"epoch": 1.1864406779661016,
|
| 1571 |
+
"cpu_mem": 1.509605376,
|
| 1572 |
+
"gpu_mem": 4.443303424,
|
| 1573 |
+
"loss": 0.3363,
|
| 1574 |
+
"grad_norm": 7.288397312164307,
|
| 1575 |
+
"learning_rate": 0.00012688750028184818
|
| 1576 |
+
},
|
| 1577 |
+
{
|
| 1578 |
+
"step": 176,
|
| 1579 |
+
"epoch": 1.193220338983051,
|
| 1580 |
+
"cpu_mem": 1.509605376,
|
| 1581 |
+
"gpu_mem": 4.443441664,
|
| 1582 |
+
"loss": 0.3218,
|
| 1583 |
+
"grad_norm": 10.227561950683594,
|
| 1584 |
+
"learning_rate": 0.0001251254992118439
|
| 1585 |
+
},
|
| 1586 |
+
{
|
| 1587 |
+
"step": 177,
|
| 1588 |
+
"epoch": 1.2,
|
| 1589 |
+
"cpu_mem": 1.509605376,
|
| 1590 |
+
"gpu_mem": 4.443539968,
|
| 1591 |
+
"loss": 0.351,
|
| 1592 |
+
"grad_norm": 7.246642112731934,
|
| 1593 |
+
"learning_rate": 0.00012336702056185453
|
| 1594 |
+
},
|
| 1595 |
+
{
|
| 1596 |
+
"step": 178,
|
| 1597 |
+
"epoch": 1.2067796610169492,
|
| 1598 |
+
"cpu_mem": 1.509605376,
|
| 1599 |
+
"gpu_mem": 4.443286528,
|
| 1600 |
+
"loss": 0.5062,
|
| 1601 |
+
"grad_norm": 15.948833465576172,
|
| 1602 |
+
"learning_rate": 0.00012161231334593851
|
| 1603 |
+
},
|
| 1604 |
+
{
|
| 1605 |
+
"step": 179,
|
| 1606 |
+
"epoch": 1.2135593220338983,
|
| 1607 |
+
"cpu_mem": 1.509605376,
|
| 1608 |
+
"gpu_mem": 4.443386368,
|
| 1609 |
+
"loss": 0.413,
|
| 1610 |
+
"grad_norm": 9.143070220947266,
|
| 1611 |
+
"learning_rate": 0.00011986162604409015
|
| 1612 |
+
},
|
| 1613 |
+
{
|
| 1614 |
+
"step": 180,
|
| 1615 |
+
"epoch": 1.2203389830508475,
|
| 1616 |
+
"cpu_mem": 1.509605376,
|
| 1617 |
+
"gpu_mem": 4.44335872,
|
| 1618 |
+
"loss": 0.314,
|
| 1619 |
+
"grad_norm": 9.495607376098633,
|
| 1620 |
+
"learning_rate": 0.00011811520656705348
|
| 1621 |
+
},
|
| 1622 |
+
{
|
| 1623 |
+
"step": 181,
|
| 1624 |
+
"epoch": 1.2271186440677966,
|
| 1625 |
+
"cpu_mem": 1.509605376,
|
| 1626 |
+
"gpu_mem": 4.443295744,
|
| 1627 |
+
"loss": 0.3162,
|
| 1628 |
+
"grad_norm": 9.015178680419922,
|
| 1629 |
+
"learning_rate": 0.00011637330222121543
|
| 1630 |
+
},
|
| 1631 |
+
{
|
| 1632 |
+
"step": 182,
|
| 1633 |
+
"epoch": 1.2338983050847459,
|
| 1634 |
+
"cpu_mem": 1.509605376,
|
| 1635 |
+
"gpu_mem": 4.443513856,
|
| 1636 |
+
"loss": 0.4318,
|
| 1637 |
+
"grad_norm": 10.653160095214844,
|
| 1638 |
+
"learning_rate": 0.00011463615967358588
|
| 1639 |
+
},
|
| 1640 |
+
{
|
| 1641 |
+
"step": 183,
|
| 1642 |
+
"epoch": 1.240677966101695,
|
| 1643 |
+
"cpu_mem": 1.509605376,
|
| 1644 |
+
"gpu_mem": 4.443410944,
|
| 1645 |
+
"loss": 0.3551,
|
| 1646 |
+
"grad_norm": 11.33315372467041,
|
| 1647 |
+
"learning_rate": 0.00011290402491686766
|
| 1648 |
+
},
|
| 1649 |
+
{
|
| 1650 |
+
"step": 184,
|
| 1651 |
+
"epoch": 1.2474576271186442,
|
| 1652 |
+
"cpu_mem": 1.509605376,
|
| 1653 |
+
"gpu_mem": 4.44335872,
|
| 1654 |
+
"loss": 0.3593,
|
| 1655 |
+
"grad_norm": 9.929872512817383,
|
| 1656 |
+
"learning_rate": 0.00011117714323462186
|
| 1657 |
+
},
|
| 1658 |
+
{
|
| 1659 |
+
"step": 185,
|
| 1660 |
+
"epoch": 1.2542372881355932,
|
| 1661 |
+
"cpu_mem": 1.509605376,
|
| 1662 |
+
"gpu_mem": 4.443337216,
|
| 1663 |
+
"loss": 0.3663,
|
| 1664 |
+
"grad_norm": 9.877488136291504,
|
| 1665 |
+
"learning_rate": 0.00010945575916653407
|
| 1666 |
+
},
|
| 1667 |
+
{
|
| 1668 |
+
"step": 186,
|
| 1669 |
+
"epoch": 1.2610169491525425,
|
| 1670 |
+
"cpu_mem": 1.509605376,
|
| 1671 |
+
"gpu_mem": 4.443346432,
|
| 1672 |
+
"loss": 0.2439,
|
| 1673 |
+
"grad_norm": 6.803407192230225,
|
| 1674 |
+
"learning_rate": 0.00010774011647378553
|
| 1675 |
+
},
|
| 1676 |
+
{
|
| 1677 |
+
"step": 187,
|
| 1678 |
+
"epoch": 1.2677966101694915,
|
| 1679 |
+
"cpu_mem": 1.509605376,
|
| 1680 |
+
"gpu_mem": 4.443278848,
|
| 1681 |
+
"loss": 0.4327,
|
| 1682 |
+
"grad_norm": 13.585451126098633,
|
| 1683 |
+
"learning_rate": 0.00010603045810453468
|
| 1684 |
+
},
|
| 1685 |
+
{
|
| 1686 |
+
"step": 188,
|
| 1687 |
+
"epoch": 1.2745762711864406,
|
| 1688 |
+
"cpu_mem": 1.509605376,
|
| 1689 |
+
"gpu_mem": 4.443441664,
|
| 1690 |
+
"loss": 0.245,
|
| 1691 |
+
"grad_norm": 10.62351131439209,
|
| 1692 |
+
"learning_rate": 0.00010432702615951396
|
| 1693 |
+
},
|
| 1694 |
+
{
|
| 1695 |
+
"step": 189,
|
| 1696 |
+
"epoch": 1.2813559322033898,
|
| 1697 |
+
"cpu_mem": 1.509605376,
|
| 1698 |
+
"gpu_mem": 4.443311104,
|
| 1699 |
+
"loss": 0.4337,
|
| 1700 |
+
"grad_norm": 9.73697280883789,
|
| 1701 |
+
"learning_rate": 0.00010263006185774627
|
| 1702 |
+
},
|
| 1703 |
+
{
|
| 1704 |
+
"step": 190,
|
| 1705 |
+
"epoch": 1.288135593220339,
|
| 1706 |
+
"cpu_mem": 1.509605376,
|
| 1707 |
+
"gpu_mem": 4.443430912,
|
| 1708 |
+
"loss": 0.3561,
|
| 1709 |
+
"grad_norm": 11.984561920166016,
|
| 1710 |
+
"learning_rate": 0.00010093980550238675
|
| 1711 |
+
},
|
| 1712 |
+
{
|
| 1713 |
+
"step": 191,
|
| 1714 |
+
"epoch": 1.2949152542372881,
|
| 1715 |
+
"cpu_mem": 1.509605376,
|
| 1716 |
+
"gpu_mem": 4.443249664,
|
| 1717 |
+
"loss": 0.3225,
|
| 1718 |
+
"grad_norm": 12.810022354125977,
|
| 1719 |
+
"learning_rate": 9.925649644669391e-05
|
| 1720 |
+
},
|
| 1721 |
+
{
|
| 1722 |
+
"step": 192,
|
| 1723 |
+
"epoch": 1.3016949152542372,
|
| 1724 |
+
"cpu_mem": 1.509605376,
|
| 1725 |
+
"gpu_mem": 4.44338176,
|
| 1726 |
+
"loss": 0.2664,
|
| 1727 |
+
"grad_norm": 10.05370807647705,
|
| 1728 |
+
"learning_rate": 9.758037306013526e-05
|
| 1729 |
+
},
|
| 1730 |
+
{
|
| 1731 |
+
"step": 193,
|
| 1732 |
+
"epoch": 1.3084745762711865,
|
| 1733 |
+
"cpu_mem": 1.509605376,
|
| 1734 |
+
"gpu_mem": 4.443355648,
|
| 1735 |
+
"loss": 0.4455,
|
| 1736 |
+
"grad_norm": 14.030529975891113,
|
| 1737 |
+
"learning_rate": 9.591167269463255e-05
|
| 1738 |
+
},
|
| 1739 |
+
{
|
| 1740 |
+
"step": 194,
|
| 1741 |
+
"epoch": 1.3152542372881357,
|
| 1742 |
+
"cpu_mem": 1.509605376,
|
| 1743 |
+
"gpu_mem": 4.443321856,
|
| 1744 |
+
"loss": 0.3602,
|
| 1745 |
+
"grad_norm": 12.235883712768555,
|
| 1746 |
+
"learning_rate": 9.425063165095088e-05
|
| 1747 |
+
},
|
| 1748 |
+
{
|
| 1749 |
+
"step": 195,
|
| 1750 |
+
"epoch": 1.3220338983050848,
|
| 1751 |
+
"cpu_mem": 1.509605376,
|
| 1752 |
+
"gpu_mem": 4.443426304,
|
| 1753 |
+
"loss": 0.2762,
|
| 1754 |
+
"grad_norm": 9.612601280212402,
|
| 1755 |
+
"learning_rate": 9.259748514523653e-05
|
| 1756 |
+
},
|
| 1757 |
+
{
|
| 1758 |
+
"step": 196,
|
| 1759 |
+
"epoch": 1.3288135593220338,
|
| 1760 |
+
"cpu_mem": 1.509605376,
|
| 1761 |
+
"gpu_mem": 4.443421696,
|
| 1762 |
+
"loss": 0.3528,
|
| 1763 |
+
"grad_norm": 8.862492561340332,
|
| 1764 |
+
"learning_rate": 9.095246727570879e-05
|
| 1765 |
+
},
|
| 1766 |
+
{
|
| 1767 |
+
"step": 197,
|
| 1768 |
+
"epoch": 1.335593220338983,
|
| 1769 |
+
"cpu_mem": 1.509605376,
|
| 1770 |
+
"gpu_mem": 4.443280384,
|
| 1771 |
+
"loss": 0.2842,
|
| 1772 |
+
"grad_norm": 11.436196327209473,
|
| 1773 |
+
"learning_rate": 8.931581098950973e-05
|
| 1774 |
+
},
|
| 1775 |
+
{
|
| 1776 |
+
"step": 198,
|
| 1777 |
+
"epoch": 1.3423728813559321,
|
| 1778 |
+
"cpu_mem": 1.509605376,
|
| 1779 |
+
"gpu_mem": 4.443472384,
|
| 1780 |
+
"loss": 0.3027,
|
| 1781 |
+
"grad_norm": 10.520874977111816,
|
| 1782 |
+
"learning_rate": 8.768774804971705e-05
|
| 1783 |
+
},
|
| 1784 |
+
{
|
| 1785 |
+
"step": 199,
|
| 1786 |
+
"epoch": 1.3491525423728814,
|
| 1787 |
+
"cpu_mem": 1.509605376,
|
| 1788 |
+
"gpu_mem": 4.443323392,
|
| 1789 |
+
"loss": 0.3717,
|
| 1790 |
+
"grad_norm": 14.724334716796875,
|
| 1791 |
+
"learning_rate": 8.606850900252478e-05
|
| 1792 |
+
},
|
| 1793 |
+
{
|
| 1794 |
+
"step": 200,
|
| 1795 |
+
"epoch": 1.3559322033898304,
|
| 1796 |
+
"cpu_mem": 1.509605376,
|
| 1797 |
+
"gpu_mem": 4.443426304,
|
| 1798 |
+
"loss": 0.3074,
|
| 1799 |
+
"grad_norm": 12.475892066955566,
|
| 1800 |
+
"learning_rate": 8.445832314459608e-05
|
| 1801 |
+
},
|
| 1802 |
+
{
|
| 1803 |
+
"step": 201,
|
| 1804 |
+
"epoch": 1.3627118644067797,
|
| 1805 |
+
"cpu_mem": 1.509605376,
|
| 1806 |
+
"gpu_mem": 4.443629056,
|
| 1807 |
+
"loss": 0.2782,
|
| 1808 |
+
"grad_norm": 8.169771194458008,
|
| 1809 |
+
"learning_rate": 8.285741849059311e-05
|
| 1810 |
+
},
|
| 1811 |
+
{
|
| 1812 |
+
"step": 202,
|
| 1813 |
+
"epoch": 1.3694915254237288,
|
| 1814 |
+
"cpu_mem": 1.509605376,
|
| 1815 |
+
"gpu_mem": 4.443430912,
|
| 1816 |
+
"loss": 0.2923,
|
| 1817 |
+
"grad_norm": 9.363910675048828,
|
| 1818 |
+
"learning_rate": 8.126602174088843e-05
|
| 1819 |
+
},
|
| 1820 |
+
{
|
| 1821 |
+
"step": 203,
|
| 1822 |
+
"epoch": 1.376271186440678,
|
| 1823 |
+
"cpu_mem": 1.509605376,
|
| 1824 |
+
"gpu_mem": 4.443317248,
|
| 1825 |
+
"loss": 0.2826,
|
| 1826 |
+
"grad_norm": 7.223607063293457,
|
| 1827 |
+
"learning_rate": 7.968435824946242e-05
|
| 1828 |
+
},
|
| 1829 |
+
{
|
| 1830 |
+
"step": 204,
|
| 1831 |
+
"epoch": 1.383050847457627,
|
| 1832 |
+
"cpu_mem": 1.509605376,
|
| 1833 |
+
"gpu_mem": 4.443331072,
|
| 1834 |
+
"loss": 0.262,
|
| 1835 |
+
"grad_norm": 8.399836540222168,
|
| 1836 |
+
"learning_rate": 7.811265199199152e-05
|
| 1837 |
+
},
|
| 1838 |
+
{
|
| 1839 |
+
"step": 205,
|
| 1840 |
+
"epoch": 1.3898305084745763,
|
| 1841 |
+
"cpu_mem": 1.509605376,
|
| 1842 |
+
"gpu_mem": 4.443375616,
|
| 1843 |
+
"loss": 0.2814,
|
| 1844 |
+
"grad_norm": 8.11514663696289,
|
| 1845 |
+
"learning_rate": 7.655112553413135e-05
|
| 1846 |
+
},
|
| 1847 |
+
{
|
| 1848 |
+
"step": 206,
|
| 1849 |
+
"epoch": 1.3966101694915254,
|
| 1850 |
+
"cpu_mem": 1.509605376,
|
| 1851 |
+
"gpu_mem": 4.443317248,
|
| 1852 |
+
"loss": 0.2771,
|
| 1853 |
+
"grad_norm": 9.344612121582031,
|
| 1854 |
+
"learning_rate": 7.500000000000002e-05
|
| 1855 |
+
},
|
| 1856 |
+
{
|
| 1857 |
+
"step": 207,
|
| 1858 |
+
"epoch": 1.4033898305084747,
|
| 1859 |
+
"cpu_mem": 1.509605376,
|
| 1860 |
+
"gpu_mem": 4.44355072,
|
| 1861 |
+
"loss": 0.3327,
|
| 1862 |
+
"grad_norm": 10.335131645202637,
|
| 1863 |
+
"learning_rate": 7.345949504086507e-05
|
| 1864 |
+
},
|
| 1865 |
+
{
|
| 1866 |
+
"step": 208,
|
| 1867 |
+
"epoch": 1.4101694915254237,
|
| 1868 |
+
"cpu_mem": 1.509605376,
|
| 1869 |
+
"gpu_mem": 4.44358144,
|
| 1870 |
+
"loss": 0.2319,
|
| 1871 |
+
"grad_norm": 12.18466567993164,
|
| 1872 |
+
"learning_rate": 7.192982880403917e-05
|
| 1873 |
+
},
|
| 1874 |
+
{
|
| 1875 |
+
"step": 209,
|
| 1876 |
+
"epoch": 1.4169491525423727,
|
| 1877 |
+
"cpu_mem": 1.509605376,
|
| 1878 |
+
"gpu_mem": 4.443507712,
|
| 1879 |
+
"loss": 0.3699,
|
| 1880 |
+
"grad_norm": 11.099276542663574,
|
| 1881 |
+
"learning_rate": 7.041121790198881e-05
|
| 1882 |
+
},
|
| 1883 |
+
{
|
| 1884 |
+
"step": 210,
|
| 1885 |
+
"epoch": 1.423728813559322,
|
| 1886 |
+
"cpu_mem": 1.509605376,
|
| 1887 |
+
"gpu_mem": 4.443395584,
|
| 1888 |
+
"loss": 0.4507,
|
| 1889 |
+
"grad_norm": 12.702630996704102,
|
| 1890 |
+
"learning_rate": 6.890387738166041e-05
|
| 1891 |
+
},
|
| 1892 |
+
{
|
| 1893 |
+
"step": 211,
|
| 1894 |
+
"epoch": 1.4305084745762713,
|
| 1895 |
+
"cpu_mem": 1.509605376,
|
| 1896 |
+
"gpu_mem": 4.443344896,
|
| 1897 |
+
"loss": 0.4079,
|
| 1898 |
+
"grad_norm": 10.903487205505371,
|
| 1899 |
+
"learning_rate": 6.740802069402771e-05
|
| 1900 |
+
},
|
| 1901 |
+
{
|
| 1902 |
+
"step": 212,
|
| 1903 |
+
"epoch": 1.4372881355932203,
|
| 1904 |
+
"cpu_mem": 1.509605376,
|
| 1905 |
+
"gpu_mem": 4.443314176,
|
| 1906 |
+
"loss": 0.3,
|
| 1907 |
+
"grad_norm": 10.615644454956055,
|
| 1908 |
+
"learning_rate": 6.592385966386588e-05
|
| 1909 |
+
},
|
| 1910 |
+
{
|
| 1911 |
+
"step": 213,
|
| 1912 |
+
"epoch": 1.4440677966101694,
|
| 1913 |
+
"cpu_mem": 1.509605376,
|
| 1914 |
+
"gpu_mem": 4.443337216,
|
| 1915 |
+
"loss": 0.3518,
|
| 1916 |
+
"grad_norm": 10.656424522399902,
|
| 1917 |
+
"learning_rate": 6.445160445975536e-05
|
| 1918 |
+
},
|
| 1919 |
+
{
|
| 1920 |
+
"step": 214,
|
| 1921 |
+
"epoch": 1.4508474576271186,
|
| 1922 |
+
"cpu_mem": 1.509605376,
|
| 1923 |
+
"gpu_mem": 4.44342016,
|
| 1924 |
+
"loss": 0.4096,
|
| 1925 |
+
"grad_norm": 16.58029556274414,
|
| 1926 |
+
"learning_rate": 6.299146356432029e-05
|
| 1927 |
+
},
|
| 1928 |
+
{
|
| 1929 |
+
"step": 215,
|
| 1930 |
+
"epoch": 1.457627118644068,
|
| 1931 |
+
"cpu_mem": 1.509605376,
|
| 1932 |
+
"gpu_mem": 4.443347968,
|
| 1933 |
+
"loss": 0.4284,
|
| 1934 |
+
"grad_norm": 16.37483024597168,
|
| 1935 |
+
"learning_rate": 6.154364374470568e-05
|
| 1936 |
+
},
|
| 1937 |
+
{
|
| 1938 |
+
"step": 216,
|
| 1939 |
+
"epoch": 1.464406779661017,
|
| 1940 |
+
"cpu_mem": 1.509605376,
|
| 1941 |
+
"gpu_mem": 4.443513856,
|
| 1942 |
+
"loss": 0.2537,
|
| 1943 |
+
"grad_norm": 8.520498275756836,
|
| 1944 |
+
"learning_rate": 6.010835002329795e-05
|
| 1945 |
+
},
|
| 1946 |
+
{
|
| 1947 |
+
"step": 217,
|
| 1948 |
+
"epoch": 1.471186440677966,
|
| 1949 |
+
"cpu_mem": 1.509605376,
|
| 1950 |
+
"gpu_mem": 4.443355648,
|
| 1951 |
+
"loss": 0.3216,
|
| 1952 |
+
"grad_norm": 15.882022857666016,
|
| 1953 |
+
"learning_rate": 5.8685785648691894e-05
|
| 1954 |
+
},
|
| 1955 |
+
{
|
| 1956 |
+
"step": 218,
|
| 1957 |
+
"epoch": 1.4779661016949153,
|
| 1958 |
+
"cpu_mem": 1.509801984,
|
| 1959 |
+
"gpu_mem": 4.443332608,
|
| 1960 |
+
"loss": 0.3527,
|
| 1961 |
+
"grad_norm": 11.748701095581055,
|
| 1962 |
+
"learning_rate": 5.72761520669092e-05
|
| 1963 |
+
},
|
| 1964 |
+
{
|
| 1965 |
+
"step": 219,
|
| 1966 |
+
"epoch": 1.4847457627118645,
|
| 1967 |
+
"cpu_mem": 1.509801984,
|
| 1968 |
+
"gpu_mem": 4.44345856,
|
| 1969 |
+
"loss": 0.4225,
|
| 1970 |
+
"grad_norm": 10.945619583129883,
|
| 1971 |
+
"learning_rate": 5.587964889287218e-05
|
| 1972 |
+
},
|
| 1973 |
+
{
|
| 1974 |
+
"step": 220,
|
| 1975 |
+
"epoch": 1.4915254237288136,
|
| 1976 |
+
"cpu_mem": 1.509801984,
|
| 1977 |
+
"gpu_mem": 4.443492352,
|
| 1978 |
+
"loss": 0.3201,
|
| 1979 |
+
"grad_norm": 10.223454475402832,
|
| 1980 |
+
"learning_rate": 5.449647388213678e-05
|
| 1981 |
+
},
|
| 1982 |
+
{
|
| 1983 |
+
"step": 221,
|
| 1984 |
+
"epoch": 1.4983050847457626,
|
| 1985 |
+
"cpu_mem": 1.509801984,
|
| 1986 |
+
"gpu_mem": 4.443360256,
|
| 1987 |
+
"loss": 0.2756,
|
| 1988 |
+
"grad_norm": 8.79985237121582,
|
| 1989 |
+
"learning_rate": 5.312682290288869e-05
|
| 1990 |
+
},
|
| 1991 |
+
{
|
| 1992 |
+
"step": 222,
|
| 1993 |
+
"epoch": 1.505084745762712,
|
| 1994 |
+
"cpu_mem": 1.509801984,
|
| 1995 |
+
"gpu_mem": 4.44349696,
|
| 1996 |
+
"loss": 0.3005,
|
| 1997 |
+
"grad_norm": 9.963611602783203,
|
| 1998 |
+
"learning_rate": 5.1770889908207245e-05
|
| 1999 |
+
},
|
| 2000 |
+
{
|
| 2001 |
+
"step": 223,
|
| 2002 |
+
"epoch": 1.5118644067796612,
|
| 2003 |
+
"cpu_mem": 1.509801984,
|
| 2004 |
+
"gpu_mem": 4.443410944,
|
| 2005 |
+
"loss": 0.2828,
|
| 2006 |
+
"grad_norm": 13.369943618774414,
|
| 2007 |
+
"learning_rate": 5.0428866908599864e-05
|
| 2008 |
+
},
|
| 2009 |
+
{
|
| 2010 |
+
"step": 224,
|
| 2011 |
+
"epoch": 1.5186440677966102,
|
| 2012 |
+
"cpu_mem": 1.509801984,
|
| 2013 |
+
"gpu_mem": 4.443375616,
|
| 2014 |
+
"loss": 0.2504,
|
| 2015 |
+
"grad_norm": 8.19646167755127,
|
| 2016 |
+
"learning_rate": 4.9100943944812114e-05
|
| 2017 |
+
},
|
| 2018 |
+
{
|
| 2019 |
+
"step": 225,
|
| 2020 |
+
"epoch": 1.5254237288135593,
|
| 2021 |
+
"cpu_mem": 1.509801984,
|
| 2022 |
+
"gpu_mem": 4.443340288,
|
| 2023 |
+
"loss": 0.2712,
|
| 2024 |
+
"grad_norm": 10.986623764038086,
|
| 2025 |
+
"learning_rate": 4.778730906091632e-05
|
| 2026 |
+
},
|
| 2027 |
+
{
|
| 2028 |
+
"step": 226,
|
| 2029 |
+
"epoch": 1.5322033898305085,
|
| 2030 |
+
"cpu_mem": 1.509801984,
|
| 2031 |
+
"gpu_mem": 4.44348928,
|
| 2032 |
+
"loss": 0.3418,
|
| 2033 |
+
"grad_norm": 9.064874649047852,
|
| 2034 |
+
"learning_rate": 4.648814827768322e-05
|
| 2035 |
+
},
|
| 2036 |
+
{
|
| 2037 |
+
"step": 227,
|
| 2038 |
+
"epoch": 1.5389830508474578,
|
| 2039 |
+
"cpu_mem": 1.509801984,
|
| 2040 |
+
"gpu_mem": 4.443378688,
|
| 2041 |
+
"loss": 0.3776,
|
| 2042 |
+
"grad_norm": 13.318199157714844,
|
| 2043 |
+
"learning_rate": 4.5203645566239816e-05
|
| 2044 |
+
},
|
| 2045 |
+
{
|
| 2046 |
+
"step": 228,
|
| 2047 |
+
"epoch": 1.5457627118644068,
|
| 2048 |
+
"cpu_mem": 1.509801984,
|
| 2049 |
+
"gpu_mem": 4.443323392,
|
| 2050 |
+
"loss": 0.4031,
|
| 2051 |
+
"grad_norm": 11.604644775390625,
|
| 2052 |
+
"learning_rate": 4.3933982822017876e-05
|
| 2053 |
+
},
|
| 2054 |
+
{
|
| 2055 |
+
"step": 229,
|
| 2056 |
+
"epoch": 1.5525423728813559,
|
| 2057 |
+
"cpu_mem": 1.509801984,
|
| 2058 |
+
"gpu_mem": 4.443265024,
|
| 2059 |
+
"loss": 0.2382,
|
| 2060 |
+
"grad_norm": 7.073112487792969,
|
| 2061 |
+
"learning_rate": 4.267933983899601e-05
|
| 2062 |
+
},
|
| 2063 |
+
{
|
| 2064 |
+
"step": 230,
|
| 2065 |
+
"epoch": 1.559322033898305,
|
| 2066 |
+
"cpu_mem": 1.509801984,
|
| 2067 |
+
"gpu_mem": 4.443321856,
|
| 2068 |
+
"loss": 0.2682,
|
| 2069 |
+
"grad_norm": 8.373688697814941,
|
| 2070 |
+
"learning_rate": 4.143989428423947e-05
|
| 2071 |
+
},
|
| 2072 |
+
{
|
| 2073 |
+
"step": 231,
|
| 2074 |
+
"epoch": 1.5661016949152542,
|
| 2075 |
+
"cpu_mem": 1.509801984,
|
| 2076 |
+
"gpu_mem": 4.443599872,
|
| 2077 |
+
"loss": 0.4171,
|
| 2078 |
+
"grad_norm": 13.975024223327637,
|
| 2079 |
+
"learning_rate": 4.0215821672741213e-05
|
| 2080 |
+
},
|
| 2081 |
+
{
|
| 2082 |
+
"step": 232,
|
| 2083 |
+
"epoch": 1.5728813559322035,
|
| 2084 |
+
"cpu_mem": 1.509801984,
|
| 2085 |
+
"gpu_mem": 4.443323392,
|
| 2086 |
+
"loss": 0.3271,
|
| 2087 |
+
"grad_norm": 9.13338851928711,
|
| 2088 |
+
"learning_rate": 3.900729534256745e-05
|
| 2089 |
+
},
|
| 2090 |
+
{
|
| 2091 |
+
"step": 233,
|
| 2092 |
+
"epoch": 1.5796610169491525,
|
| 2093 |
+
"cpu_mem": 1.509801984,
|
| 2094 |
+
"gpu_mem": 4.443636736,
|
| 2095 |
+
"loss": 0.3622,
|
| 2096 |
+
"grad_norm": 12.784040451049805,
|
| 2097 |
+
"learning_rate": 3.781448643031187e-05
|
| 2098 |
+
},
|
| 2099 |
+
{
|
| 2100 |
+
"step": 234,
|
| 2101 |
+
"epoch": 1.5864406779661016,
|
| 2102 |
+
"cpu_mem": 1.509801984,
|
| 2103 |
+
"gpu_mem": 4.44351232,
|
| 2104 |
+
"loss": 0.2907,
|
| 2105 |
+
"grad_norm": 9.858100891113281,
|
| 2106 |
+
"learning_rate": 3.663756384686127e-05
|
| 2107 |
+
},
|
| 2108 |
+
{
|
| 2109 |
+
"step": 235,
|
| 2110 |
+
"epoch": 1.5932203389830508,
|
| 2111 |
+
"cpu_mem": 1.509801984,
|
| 2112 |
+
"gpu_mem": 4.443268096,
|
| 2113 |
+
"loss": 0.2375,
|
| 2114 |
+
"grad_norm": 7.437261581420898,
|
| 2115 |
+
"learning_rate": 3.547669425347647e-05
|
| 2116 |
+
},
|
| 2117 |
+
{
|
| 2118 |
+
"step": 236,
|
| 2119 |
+
"epoch": 1.6,
|
| 2120 |
+
"cpu_mem": 1.509801984,
|
| 2121 |
+
"gpu_mem": 4.443328,
|
| 2122 |
+
"loss": 0.3846,
|
| 2123 |
+
"grad_norm": 15.018045425415039,
|
| 2124 |
+
"learning_rate": 3.433204203819185e-05
|
| 2125 |
+
},
|
| 2126 |
+
{
|
| 2127 |
+
"step": 237,
|
| 2128 |
+
"epoch": 1.6067796610169491,
|
| 2129 |
+
"cpu_mem": 1.509801984,
|
| 2130 |
+
"gpu_mem": 4.44338944,
|
| 2131 |
+
"loss": 0.4068,
|
| 2132 |
+
"grad_norm": 13.178804397583008,
|
| 2133 |
+
"learning_rate": 3.3203769292536764e-05
|
| 2134 |
+
},
|
| 2135 |
+
{
|
| 2136 |
+
"step": 238,
|
| 2137 |
+
"epoch": 1.6135593220338982,
|
| 2138 |
+
"cpu_mem": 1.509801984,
|
| 2139 |
+
"gpu_mem": 4.443390976,
|
| 2140 |
+
"loss": 0.3033,
|
| 2141 |
+
"grad_norm": 11.403144836425781,
|
| 2142 |
+
"learning_rate": 3.209203578858191e-05
|
| 2143 |
+
},
|
| 2144 |
+
{
|
| 2145 |
+
"step": 239,
|
| 2146 |
+
"epoch": 1.6203389830508474,
|
| 2147 |
+
"cpu_mem": 1.509801984,
|
| 2148 |
+
"gpu_mem": 4.443644416,
|
| 2149 |
+
"loss": 0.3641,
|
| 2150 |
+
"grad_norm": 11.864053726196289,
|
| 2151 |
+
"learning_rate": 3.099699895631474e-05
|
| 2152 |
+
},
|
| 2153 |
+
{
|
| 2154 |
+
"step": 240,
|
| 2155 |
+
"epoch": 1.6271186440677967,
|
| 2156 |
+
"cpu_mem": 1.509801984,
|
| 2157 |
+
"gpu_mem": 4.443294208,
|
| 2158 |
+
"loss": 0.5409,
|
| 2159 |
+
"grad_norm": 16.487295150756836,
|
| 2160 |
+
"learning_rate": 2.9918813861345952e-05
|
| 2161 |
+
},
|
| 2162 |
+
{
|
| 2163 |
+
"step": 241,
|
| 2164 |
+
"epoch": 1.6338983050847458,
|
| 2165 |
+
"cpu_mem": 1.509801984,
|
| 2166 |
+
"gpu_mem": 4.443590656,
|
| 2167 |
+
"loss": 0.2953,
|
| 2168 |
+
"grad_norm": 10.208303451538086,
|
| 2169 |
+
"learning_rate": 2.885763318295102e-05
|
| 2170 |
+
},
|
| 2171 |
+
{
|
| 2172 |
+
"step": 242,
|
| 2173 |
+
"epoch": 1.6406779661016948,
|
| 2174 |
+
"cpu_mem": 1.509801984,
|
| 2175 |
+
"gpu_mem": 4.443452416,
|
| 2176 |
+
"loss": 0.3304,
|
| 2177 |
+
"grad_norm": 12.945493698120117,
|
| 2178 |
+
"learning_rate": 2.781360719244964e-05
|
| 2179 |
+
},
|
| 2180 |
+
{
|
| 2181 |
+
"step": 243,
|
| 2182 |
+
"epoch": 1.647457627118644,
|
| 2183 |
+
"cpu_mem": 1.509801984,
|
| 2184 |
+
"gpu_mem": 4.44330496,
|
| 2185 |
+
"loss": 0.4223,
|
| 2186 |
+
"grad_norm": 10.953478813171387,
|
| 2187 |
+
"learning_rate": 2.6786883731926306e-05
|
| 2188 |
+
},
|
| 2189 |
+
{
|
| 2190 |
+
"step": 244,
|
| 2191 |
+
"epoch": 1.6542372881355933,
|
| 2192 |
+
"cpu_mem": 1.509801984,
|
| 2193 |
+
"gpu_mem": 4.443444736,
|
| 2194 |
+
"loss": 0.2251,
|
| 2195 |
+
"grad_norm": 8.908162117004395,
|
| 2196 |
+
"learning_rate": 2.5777608193294396e-05
|
| 2197 |
+
},
|
| 2198 |
+
{
|
| 2199 |
+
"step": 245,
|
| 2200 |
+
"epoch": 1.6610169491525424,
|
| 2201 |
+
"cpu_mem": 1.509801984,
|
| 2202 |
+
"gpu_mem": 4.443323392,
|
| 2203 |
+
"loss": 0.3956,
|
| 2204 |
+
"grad_norm": 11.671682357788086,
|
| 2205 |
+
"learning_rate": 2.4785923497707956e-05
|
| 2206 |
+
},
|
| 2207 |
+
{
|
| 2208 |
+
"step": 246,
|
| 2209 |
+
"epoch": 1.6677966101694914,
|
| 2210 |
+
"cpu_mem": 1.509801984,
|
| 2211 |
+
"gpu_mem": 4.443417088,
|
| 2212 |
+
"loss": 0.4087,
|
| 2213 |
+
"grad_norm": 10.720558166503906,
|
| 2214 |
+
"learning_rate": 2.38119700753228e-05
|
| 2215 |
+
},
|
| 2216 |
+
{
|
| 2217 |
+
"step": 247,
|
| 2218 |
+
"epoch": 1.6745762711864407,
|
| 2219 |
+
"cpu_mem": 1.509801984,
|
| 2220 |
+
"gpu_mem": 4.44343552,
|
| 2221 |
+
"loss": 0.2392,
|
| 2222 |
+
"grad_norm": 7.951722621917725,
|
| 2223 |
+
"learning_rate": 2.285588584541047e-05
|
| 2224 |
+
},
|
| 2225 |
+
{
|
| 2226 |
+
"step": 248,
|
| 2227 |
+
"epoch": 1.68135593220339,
|
| 2228 |
+
"cpu_mem": 1.509801984,
|
| 2229 |
+
"gpu_mem": 4.443387904,
|
| 2230 |
+
"loss": 0.286,
|
| 2231 |
+
"grad_norm": 8.448762893676758,
|
| 2232 |
+
"learning_rate": 2.1917806196827792e-05
|
| 2233 |
+
},
|
| 2234 |
+
{
|
| 2235 |
+
"step": 249,
|
| 2236 |
+
"epoch": 1.688135593220339,
|
| 2237 |
+
"cpu_mem": 1.509801984,
|
| 2238 |
+
"gpu_mem": 4.443294208,
|
| 2239 |
+
"loss": 0.2903,
|
| 2240 |
+
"grad_norm": 7.876997947692871,
|
| 2241 |
+
"learning_rate": 2.0997863968844914e-05
|
| 2242 |
+
},
|
| 2243 |
+
{
|
| 2244 |
+
"step": 250,
|
| 2245 |
+
"epoch": 1.694915254237288,
|
| 2246 |
+
"cpu_mem": 1.510785024,
|
| 2247 |
+
"gpu_mem": 4.443386368,
|
| 2248 |
+
"loss": 0.3307,
|
| 2249 |
+
"grad_norm": 11.870707511901855,
|
| 2250 |
+
"learning_rate": 2.009618943233419e-05
|
| 2251 |
+
},
|
| 2252 |
+
{
|
| 2253 |
+
"step": 251,
|
| 2254 |
+
"epoch": 1.7016949152542373,
|
| 2255 |
+
"cpu_mem": 1.510785024,
|
| 2256 |
+
"gpu_mem": 4.443298816,
|
| 2257 |
+
"loss": 0.2187,
|
| 2258 |
+
"grad_norm": 9.706609725952148,
|
| 2259 |
+
"learning_rate": 1.921291027132278e-05
|
| 2260 |
+
},
|
| 2261 |
+
{
|
| 2262 |
+
"step": 252,
|
| 2263 |
+
"epoch": 1.7084745762711866,
|
| 2264 |
+
"cpu_mem": 1.510785024,
|
| 2265 |
+
"gpu_mem": 4.443341824,
|
| 2266 |
+
"loss": 0.3021,
|
| 2267 |
+
"grad_norm": 8.621807098388672,
|
| 2268 |
+
"learning_rate": 1.834815156491165e-05
|
| 2269 |
+
},
|
| 2270 |
+
{
|
| 2271 |
+
"step": 253,
|
| 2272 |
+
"epoch": 1.7152542372881356,
|
| 2273 |
+
"cpu_mem": 1.510785024,
|
| 2274 |
+
"gpu_mem": 4.44353536,
|
| 2275 |
+
"loss": 0.3015,
|
| 2276 |
+
"grad_norm": 10.553934097290039,
|
| 2277 |
+
"learning_rate": 1.750203576956341e-05
|
| 2278 |
+
},
|
| 2279 |
+
{
|
| 2280 |
+
"step": 254,
|
| 2281 |
+
"epoch": 1.7220338983050847,
|
| 2282 |
+
"cpu_mem": 1.510785024,
|
| 2283 |
+
"gpu_mem": 4.443331072,
|
| 2284 |
+
"loss": 0.3854,
|
| 2285 |
+
"grad_norm": 11.682550430297852,
|
| 2286 |
+
"learning_rate": 1.6674682701761493e-05
|
| 2287 |
+
},
|
| 2288 |
+
{
|
| 2289 |
+
"step": 255,
|
| 2290 |
+
"epoch": 1.7288135593220337,
|
| 2291 |
+
"cpu_mem": 1.510785024,
|
| 2292 |
+
"gpu_mem": 4.443487744,
|
| 2293 |
+
"loss": 0.3721,
|
| 2294 |
+
"grad_norm": 12.575425148010254,
|
| 2295 |
+
"learning_rate": 1.5866209521043304e-05
|
| 2296 |
+
},
|
| 2297 |
+
{
|
| 2298 |
+
"step": 256,
|
| 2299 |
+
"epoch": 1.735593220338983,
|
| 2300 |
+
"cpu_mem": 1.510785024,
|
| 2301 |
+
"gpu_mem": 4.443314176,
|
| 2302 |
+
"loss": 0.3563,
|
| 2303 |
+
"grad_norm": 13.305479049682617,
|
| 2304 |
+
"learning_rate": 1.5076730713409523e-05
|
| 2305 |
+
},
|
| 2306 |
+
{
|
| 2307 |
+
"step": 257,
|
| 2308 |
+
"epoch": 1.7423728813559323,
|
| 2309 |
+
"cpu_mem": 1.510785024,
|
| 2310 |
+
"gpu_mem": 4.44372736,
|
| 2311 |
+
"loss": 0.372,
|
| 2312 |
+
"grad_norm": 8.018106460571289,
|
| 2313 |
+
"learning_rate": 1.4306358075111923e-05
|
| 2314 |
+
},
|
| 2315 |
+
{
|
| 2316 |
+
"step": 258,
|
| 2317 |
+
"epoch": 1.7491525423728813,
|
| 2318 |
+
"cpu_mem": 1.510785024,
|
| 2319 |
+
"gpu_mem": 4.443386368,
|
| 2320 |
+
"loss": 0.3647,
|
| 2321 |
+
"grad_norm": 11.9230375289917,
|
| 2322 |
+
"learning_rate": 1.3555200696822232e-05
|
| 2323 |
+
},
|
| 2324 |
+
{
|
| 2325 |
+
"step": 259,
|
| 2326 |
+
"epoch": 1.7559322033898304,
|
| 2327 |
+
"cpu_mem": 1.510785024,
|
| 2328 |
+
"gpu_mem": 4.443303424,
|
| 2329 |
+
"loss": 0.3188,
|
| 2330 |
+
"grad_norm": 9.143546104431152,
|
| 2331 |
+
"learning_rate": 1.2823364948184095e-05
|
| 2332 |
+
},
|
| 2333 |
+
{
|
| 2334 |
+
"step": 260,
|
| 2335 |
+
"epoch": 1.7627118644067796,
|
| 2336 |
+
"cpu_mem": 1.510785024,
|
| 2337 |
+
"gpu_mem": 4.44342016,
|
| 2338 |
+
"loss": 0.2007,
|
| 2339 |
+
"grad_norm": 7.097804069519043,
|
| 2340 |
+
"learning_rate": 1.2110954462750166e-05
|
| 2341 |
+
},
|
| 2342 |
+
{
|
| 2343 |
+
"step": 261,
|
| 2344 |
+
"epoch": 1.769491525423729,
|
| 2345 |
+
"cpu_mem": 1.510785024,
|
| 2346 |
+
"gpu_mem": 4.443375616,
|
| 2347 |
+
"loss": 0.1669,
|
| 2348 |
+
"grad_norm": 6.277010440826416,
|
| 2349 |
+
"learning_rate": 1.1418070123306989e-05
|
| 2350 |
+
},
|
| 2351 |
+
{
|
| 2352 |
+
"step": 262,
|
| 2353 |
+
"epoch": 1.776271186440678,
|
| 2354 |
+
"cpu_mem": 1.510785024,
|
| 2355 |
+
"gpu_mem": 4.443332608,
|
| 2356 |
+
"loss": 0.2355,
|
| 2357 |
+
"grad_norm": 8.6841459274292,
|
| 2358 |
+
"learning_rate": 1.0744810047589115e-05
|
| 2359 |
+
},
|
| 2360 |
+
{
|
| 2361 |
+
"step": 263,
|
| 2362 |
+
"epoch": 1.783050847457627,
|
| 2363 |
+
"cpu_mem": 1.510785024,
|
| 2364 |
+
"gpu_mem": 4.443369472,
|
| 2365 |
+
"loss": 0.273,
|
| 2366 |
+
"grad_norm": 7.298412322998047,
|
| 2367 |
+
"learning_rate": 1.0091269574384874e-05
|
| 2368 |
+
},
|
| 2369 |
+
{
|
| 2370 |
+
"step": 264,
|
| 2371 |
+
"epoch": 1.7898305084745763,
|
| 2372 |
+
"cpu_mem": 1.510785024,
|
| 2373 |
+
"gpu_mem": 4.443457024,
|
| 2374 |
+
"loss": 0.2702,
|
| 2375 |
+
"grad_norm": 10.429343223571777,
|
| 2376 |
+
"learning_rate": 9.45754125003576e-06
|
| 2377 |
+
},
|
| 2378 |
+
{
|
| 2379 |
+
"step": 265,
|
| 2380 |
+
"epoch": 1.7966101694915255,
|
| 2381 |
+
"cpu_mem": 1.510785024,
|
| 2382 |
+
"gpu_mem": 4.443375616,
|
| 2383 |
+
"loss": 0.3714,
|
| 2384 |
+
"grad_norm": 10.106574058532715,
|
| 2385 |
+
"learning_rate": 8.843714815330987e-06
|
| 2386 |
+
},
|
| 2387 |
+
{
|
| 2388 |
+
"step": 266,
|
| 2389 |
+
"epoch": 1.8033898305084746,
|
| 2390 |
+
"cpu_mem": 1.510785024,
|
| 2391 |
+
"gpu_mem": 4.443590656,
|
| 2392 |
+
"loss": 0.3821,
|
| 2393 |
+
"grad_norm": 12.320114135742188,
|
| 2394 |
+
"learning_rate": 8.249877192799731e-06
|
| 2395 |
+
},
|
| 2396 |
+
{
|
| 2397 |
+
"step": 267,
|
| 2398 |
+
"epoch": 1.8101694915254236,
|
| 2399 |
+
"cpu_mem": 1.510785024,
|
| 2400 |
+
"gpu_mem": 4.443383296,
|
| 2401 |
+
"loss": 0.2547,
|
| 2402 |
+
"grad_norm": 9.98828125,
|
| 2403 |
+
"learning_rate": 7.676112474402068e-06
|
| 2404 |
+
},
|
| 2405 |
+
{
|
| 2406 |
+
"step": 268,
|
| 2407 |
+
"epoch": 1.8169491525423729,
|
| 2408 |
+
"cpu_mem": 1.510785024,
|
| 2409 |
+
"gpu_mem": 4.443387904,
|
| 2410 |
+
"loss": 0.2823,
|
| 2411 |
+
"grad_norm": 10.371786117553711,
|
| 2412 |
+
"learning_rate": 7.122501909620926e-06
|
| 2413 |
+
},
|
| 2414 |
+
{
|
| 2415 |
+
"step": 269,
|
| 2416 |
+
"epoch": 1.8237288135593221,
|
| 2417 |
+
"cpu_mem": 1.510785024,
|
| 2418 |
+
"gpu_mem": 4.443398656,
|
| 2419 |
+
"loss": 0.3287,
|
| 2420 |
+
"grad_norm": 9.897567749023438,
|
| 2421 |
+
"learning_rate": 6.5891238939566275e-06
|
| 2422 |
+
},
|
| 2423 |
+
{
|
| 2424 |
+
"step": 270,
|
| 2425 |
+
"epoch": 1.8305084745762712,
|
| 2426 |
+
"cpu_mem": 1.510785024,
|
| 2427 |
+
"gpu_mem": 4.443437056,
|
| 2428 |
+
"loss": 0.2544,
|
| 2429 |
+
"grad_norm": 10.063066482543945,
|
| 2430 |
+
"learning_rate": 6.076053957825411e-06
|
| 2431 |
+
},
|
| 2432 |
+
{
|
| 2433 |
+
"step": 271,
|
| 2434 |
+
"epoch": 1.8372881355932202,
|
| 2435 |
+
"cpu_mem": 1.510785024,
|
| 2436 |
+
"gpu_mem": 4.44348928,
|
| 2437 |
+
"loss": 0.3142,
|
| 2438 |
+
"grad_norm": 10.186683654785156,
|
| 2439 |
+
"learning_rate": 5.583364755863701e-06
|
| 2440 |
+
},
|
| 2441 |
+
{
|
| 2442 |
+
"step": 272,
|
| 2443 |
+
"epoch": 1.8440677966101695,
|
| 2444 |
+
"cpu_mem": 1.510785024,
|
| 2445 |
+
"gpu_mem": 4.443347968,
|
| 2446 |
+
"loss": 0.3402,
|
| 2447 |
+
"grad_norm": 8.863958358764648,
|
| 2448 |
+
"learning_rate": 5.11112605663977e-06
|
| 2449 |
+
},
|
| 2450 |
+
{
|
| 2451 |
+
"step": 273,
|
| 2452 |
+
"epoch": 1.8508474576271188,
|
| 2453 |
+
"cpu_mem": 1.510785024,
|
| 2454 |
+
"gpu_mem": 4.44322816,
|
| 2455 |
+
"loss": 0.2973,
|
| 2456 |
+
"grad_norm": 9.513245582580566,
|
| 2457 |
+
"learning_rate": 4.659404732773908e-06
|
| 2458 |
+
},
|
| 2459 |
+
{
|
| 2460 |
+
"step": 274,
|
| 2461 |
+
"epoch": 1.8576271186440678,
|
| 2462 |
+
"cpu_mem": 1.510785024,
|
| 2463 |
+
"gpu_mem": 4.443455488,
|
| 2464 |
+
"loss": 0.279,
|
| 2465 |
+
"grad_norm": 10.836873054504395,
|
| 2466 |
+
"learning_rate": 4.228264751468752e-06
|
| 2467 |
+
},
|
| 2468 |
+
{
|
| 2469 |
+
"step": 275,
|
| 2470 |
+
"epoch": 1.8644067796610169,
|
| 2471 |
+
"cpu_mem": 1.510785024,
|
| 2472 |
+
"gpu_mem": 4.443699712,
|
| 2473 |
+
"loss": 0.32,
|
| 2474 |
+
"grad_norm": 11.420705795288086,
|
| 2475 |
+
"learning_rate": 3.817767165451041e-06
|
| 2476 |
+
},
|
| 2477 |
+
{
|
| 2478 |
+
"step": 276,
|
| 2479 |
+
"epoch": 1.8711864406779661,
|
| 2480 |
+
"cpu_mem": 1.510785024,
|
| 2481 |
+
"gpu_mem": 4.443360256,
|
| 2482 |
+
"loss": 0.3155,
|
| 2483 |
+
"grad_norm": 10.466375350952148,
|
| 2484 |
+
"learning_rate": 3.4279701043260886e-06
|
| 2485 |
+
},
|
| 2486 |
+
{
|
| 2487 |
+
"step": 277,
|
| 2488 |
+
"epoch": 1.8779661016949154,
|
| 2489 |
+
"cpu_mem": 1.510785024,
|
| 2490 |
+
"gpu_mem": 4.443306496,
|
| 2491 |
+
"loss": 0.3167,
|
| 2492 |
+
"grad_norm": 10.945162773132324,
|
| 2493 |
+
"learning_rate": 3.0589287663461472e-06
|
| 2494 |
+
},
|
| 2495 |
+
{
|
| 2496 |
+
"step": 278,
|
| 2497 |
+
"epoch": 1.8847457627118644,
|
| 2498 |
+
"cpu_mem": 1.510785024,
|
| 2499 |
+
"gpu_mem": 4.443469312,
|
| 2500 |
+
"loss": 0.4518,
|
| 2501 |
+
"grad_norm": 12.45777416229248,
|
| 2502 |
+
"learning_rate": 2.710695410593994e-06
|
| 2503 |
+
},
|
| 2504 |
+
{
|
| 2505 |
+
"step": 279,
|
| 2506 |
+
"epoch": 1.8915254237288135,
|
| 2507 |
+
"cpu_mem": 1.510785024,
|
| 2508 |
+
"gpu_mem": 4.443409408,
|
| 2509 |
+
"loss": 0.2909,
|
| 2510 |
+
"grad_norm": 8.404402732849121,
|
| 2511 |
+
"learning_rate": 2.3833193495825853e-06
|
| 2512 |
+
},
|
| 2513 |
+
{
|
| 2514 |
+
"step": 280,
|
| 2515 |
+
"epoch": 1.8983050847457628,
|
| 2516 |
+
"cpu_mem": 1.510785024,
|
| 2517 |
+
"gpu_mem": 4.44338944,
|
| 2518 |
+
"loss": 0.3295,
|
| 2519 |
+
"grad_norm": 10.143949508666992,
|
| 2520 |
+
"learning_rate": 2.076846942272026e-06
|
| 2521 |
+
},
|
| 2522 |
+
{
|
| 2523 |
+
"step": 281,
|
| 2524 |
+
"epoch": 1.905084745762712,
|
| 2525 |
+
"cpu_mem": 1.510785024,
|
| 2526 |
+
"gpu_mem": 4.443324928,
|
| 2527 |
+
"loss": 0.3426,
|
| 2528 |
+
"grad_norm": 10.10682201385498,
|
| 2529 |
+
"learning_rate": 1.791321587504768e-06
|
| 2530 |
+
},
|
| 2531 |
+
{
|
| 2532 |
+
"step": 282,
|
| 2533 |
+
"epoch": 1.911864406779661,
|
| 2534 |
+
"cpu_mem": 1.510785024,
|
| 2535 |
+
"gpu_mem": 4.443753472,
|
| 2536 |
+
"loss": 0.2746,
|
| 2537 |
+
"grad_norm": 8.048744201660156,
|
| 2538 |
+
"learning_rate": 1.5267837178600972e-06
|
| 2539 |
+
},
|
| 2540 |
+
{
|
| 2541 |
+
"step": 283,
|
| 2542 |
+
"epoch": 1.9186440677966101,
|
| 2543 |
+
"cpu_mem": 1.510785024,
|
| 2544 |
+
"gpu_mem": 4.443460096,
|
| 2545 |
+
"loss": 0.3007,
|
| 2546 |
+
"grad_norm": 10.570841789245605,
|
| 2547 |
+
"learning_rate": 1.2832707939284427e-06
|
| 2548 |
+
},
|
| 2549 |
+
{
|
| 2550 |
+
"step": 284,
|
| 2551 |
+
"epoch": 1.9254237288135592,
|
| 2552 |
+
"cpu_mem": 1.510785024,
|
| 2553 |
+
"gpu_mem": 4.443315712,
|
| 2554 |
+
"loss": 0.3486,
|
| 2555 |
+
"grad_norm": 8.057785987854004,
|
| 2556 |
+
"learning_rate": 1.0608172990067553e-06
|
| 2557 |
+
},
|
| 2558 |
+
{
|
| 2559 |
+
"step": 285,
|
| 2560 |
+
"epoch": 1.9322033898305084,
|
| 2561 |
+
"cpu_mem": 1.510785024,
|
| 2562 |
+
"gpu_mem": 4.443369472,
|
| 2563 |
+
"loss": 0.3287,
|
| 2564 |
+
"grad_norm": 9.795562744140625,
|
| 2565 |
+
"learning_rate": 8.594547342153979e-07
|
| 2566 |
+
},
|
| 2567 |
+
{
|
| 2568 |
+
"step": 286,
|
| 2569 |
+
"epoch": 1.9389830508474577,
|
| 2570 |
+
"cpu_mem": 1.510785024,
|
| 2571 |
+
"gpu_mem": 4.443787264,
|
| 2572 |
+
"loss": 0.2857,
|
| 2573 |
+
"grad_norm": 13.503580093383789,
|
| 2574 |
+
"learning_rate": 6.792116140373116e-07
|
| 2575 |
+
},
|
| 2576 |
+
{
|
| 2577 |
+
"step": 287,
|
| 2578 |
+
"epoch": 1.9457627118644067,
|
| 2579 |
+
"cpu_mem": 1.510785024,
|
| 2580 |
+
"gpu_mem": 4.443556864,
|
| 2581 |
+
"loss": 0.2571,
|
| 2582 |
+
"grad_norm": 9.61729621887207,
|
| 2583 |
+
"learning_rate": 5.201134622801473e-07
|
| 2584 |
+
},
|
| 2585 |
+
{
|
| 2586 |
+
"step": 288,
|
| 2587 |
+
"epoch": 1.9525423728813558,
|
| 2588 |
+
"cpu_mem": 1.510785024,
|
| 2589 |
+
"gpu_mem": 4.443341824,
|
| 2590 |
+
"loss": 0.4314,
|
| 2591 |
+
"grad_norm": 11.989745140075684,
|
| 2592 |
+
"learning_rate": 3.821828084619727e-07
|
| 2593 |
+
},
|
| 2594 |
+
{
|
| 2595 |
+
"step": 289,
|
| 2596 |
+
"epoch": 1.959322033898305,
|
| 2597 |
+
"cpu_mem": 1.510785024,
|
| 2598 |
+
"gpu_mem": 4.443426304,
|
| 2599 |
+
"loss": 0.3231,
|
| 2600 |
+
"grad_norm": 10.468039512634277,
|
| 2601 |
+
"learning_rate": 2.654391846207915e-07
|
| 2602 |
+
},
|
| 2603 |
+
{
|
| 2604 |
+
"step": 290,
|
| 2605 |
+
"epoch": 1.9661016949152543,
|
| 2606 |
+
"cpu_mem": 1.510785024,
|
| 2607 |
+
"gpu_mem": 4.44335104,
|
| 2608 |
+
"loss": 0.3028,
|
| 2609 |
+
"grad_norm": 13.236854553222656,
|
| 2610 |
+
"learning_rate": 1.6989912254880556e-07
|
| 2611 |
+
},
|
| 2612 |
+
{
|
| 2613 |
+
"step": 291,
|
| 2614 |
+
"epoch": 1.9728813559322034,
|
| 2615 |
+
"cpu_mem": 1.510785024,
|
| 2616 |
+
"gpu_mem": 4.443386368,
|
| 2617 |
+
"loss": 0.3675,
|
| 2618 |
+
"grad_norm": 10.517420768737793,
|
| 2619 |
+
"learning_rate": 9.557615145123765e-08
|
| 2620 |
+
},
|
| 2621 |
+
{
|
| 2622 |
+
"step": 292,
|
| 2623 |
+
"epoch": 1.9796610169491524,
|
| 2624 |
+
"cpu_mem": 1.510785024,
|
| 2625 |
+
"gpu_mem": 4.443469312,
|
| 2626 |
+
"loss": 0.3511,
|
| 2627 |
+
"grad_norm": 9.854143142700195,
|
| 2628 |
+
"learning_rate": 4.248079603064724e-08
|
| 2629 |
+
},
|
| 2630 |
+
{
|
| 2631 |
+
"step": 293,
|
| 2632 |
+
"epoch": 1.9864406779661017,
|
| 2633 |
+
"cpu_mem": 1.510785024,
|
| 2634 |
+
"gpu_mem": 4.443386368,
|
| 2635 |
+
"loss": 0.4175,
|
| 2636 |
+
"grad_norm": 13.601873397827148,
|
| 2637 |
+
"learning_rate": 1.0620574996372811e-08
|
| 2638 |
+
},
|
| 2639 |
+
{
|
| 2640 |
+
"step": 294,
|
| 2641 |
+
"epoch": 1.993220338983051,
|
| 2642 |
+
"cpu_mem": 1.510785024,
|
| 2643 |
+
"gpu_mem": 4.44341248,
|
| 2644 |
+
"loss": 0.3715,
|
| 2645 |
+
"grad_norm": 12.147957801818848,
|
| 2646 |
+
"learning_rate": 0.0
|
| 2647 |
+
},
|
| 2648 |
+
{
|
| 2649 |
+
"step": 294,
|
| 2650 |
+
"epoch": 1.993220338983051,
|
| 2651 |
+
"cpu_mem": 1.510785024,
|
| 2652 |
+
"gpu_mem": 4.44341248,
|
| 2653 |
+
"train_runtime": 4455.2502,
|
| 2654 |
+
"train_samples_per_second": 4.232,
|
| 2655 |
+
"train_steps_per_second": 0.066,
|
| 2656 |
+
"total_flos": 0.0,
|
| 2657 |
+
"train_loss": 0.610709656562124
|
| 2658 |
+
}
|
| 2659 |
+
]
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r32-a2/adapter_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha": 64,
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": false,
|
| 8 |
+
"init_weight": "kaiming",
|
| 9 |
+
"layers_pattern": null,
|
| 10 |
+
"layers_to_transform": null,
|
| 11 |
+
"metric_tracking": false,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "ABLATION",
|
| 14 |
+
"r": 32,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"seed": 42,
|
| 17 |
+
"share_weights": false,
|
| 18 |
+
"target_modules": [
|
| 19 |
+
"up_proj",
|
| 20 |
+
"gate_proj",
|
| 21 |
+
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"q_proj",
|
| 24 |
+
"down_proj",
|
| 25 |
+
"o_proj"
|
| 26 |
+
],
|
| 27 |
+
"task_type": null,
|
| 28 |
+
"track_n": 100,
|
| 29 |
+
"variant": "A"
|
| 30 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r32-a2/eval_results.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "boolq",
|
| 3 |
+
"results": 0.7929663608562691
|
| 4 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r32-a2/training_configuration.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "TinyLlama/TinyLlama_v1.1",
|
| 3 |
+
"dataset": {
|
| 4 |
+
"name": "BOOLQ",
|
| 5 |
+
"dataset_id": "google/boolq",
|
| 6 |
+
"preprocess_id": "boolq_train_deepeval"
|
| 7 |
+
},
|
| 8 |
+
"peft_config": {
|
| 9 |
+
"method": "abl_A",
|
| 10 |
+
"rank": 32,
|
| 11 |
+
"alpha": 64,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"bias": "none",
|
| 14 |
+
"target_modules": [
|
| 15 |
+
"q_proj",
|
| 16 |
+
"k_proj",
|
| 17 |
+
"v_proj",
|
| 18 |
+
"o_proj",
|
| 19 |
+
"gate_proj",
|
| 20 |
+
"down_proj",
|
| 21 |
+
"up_proj"
|
| 22 |
+
],
|
| 23 |
+
"trainable_parameter_count": 25389056
|
| 24 |
+
},
|
| 25 |
+
"training_config": {
|
| 26 |
+
"max_dataset_length": null,
|
| 27 |
+
"batch_size": 64,
|
| 28 |
+
"per_device_batch_size": 32,
|
| 29 |
+
"gradient_accumulation_steps": 2,
|
| 30 |
+
"learning_rate": 0.0003,
|
| 31 |
+
"num_epochs": 2,
|
| 32 |
+
"warmup_ratio": 0.1
|
| 33 |
+
},
|
| 34 |
+
"model_name": "TinyLlama_v1.1-abl_A-boolq-r32-a2",
|
| 35 |
+
"output_dir": "./experiment_results/TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r32-a2",
|
| 36 |
+
"seed": 42,
|
| 37 |
+
"timestamp": "2025-08-31T02:06:30.838829"
|
| 38 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r32-a2/training_logs.json
ADDED
|
@@ -0,0 +1,2659 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 1,
|
| 4 |
+
"epoch": 0.006779661016949152,
|
| 5 |
+
"cpu_mem": 1.503981568,
|
| 6 |
+
"gpu_mem": 4.519328768,
|
| 7 |
+
"loss": 8.869,
|
| 8 |
+
"grad_norm": 233.80860900878906,
|
| 9 |
+
"learning_rate": 9.999999999999999e-06
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"step": 2,
|
| 13 |
+
"epoch": 0.013559322033898305,
|
| 14 |
+
"cpu_mem": 1.510076416,
|
| 15 |
+
"gpu_mem": 4.722578944,
|
| 16 |
+
"loss": 8.9376,
|
| 17 |
+
"grad_norm": 239.66294860839844,
|
| 18 |
+
"learning_rate": 1.9999999999999998e-05
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"step": 3,
|
| 22 |
+
"epoch": 0.020338983050847456,
|
| 23 |
+
"cpu_mem": 1.510862848,
|
| 24 |
+
"gpu_mem": 4.722497536,
|
| 25 |
+
"loss": 6.3632,
|
| 26 |
+
"grad_norm": 227.827392578125,
|
| 27 |
+
"learning_rate": 2.9999999999999997e-05
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"step": 4,
|
| 31 |
+
"epoch": 0.02711864406779661,
|
| 32 |
+
"cpu_mem": 1.511452672,
|
| 33 |
+
"gpu_mem": 4.722497536,
|
| 34 |
+
"loss": 2.6529,
|
| 35 |
+
"grad_norm": 144.43919372558594,
|
| 36 |
+
"learning_rate": 3.9999999999999996e-05
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"step": 5,
|
| 40 |
+
"epoch": 0.03389830508474576,
|
| 41 |
+
"cpu_mem": 1.512042496,
|
| 42 |
+
"gpu_mem": 4.722433024,
|
| 43 |
+
"loss": 1.212,
|
| 44 |
+
"grad_norm": 47.84184265136719,
|
| 45 |
+
"learning_rate": 4.9999999999999996e-05
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"step": 6,
|
| 49 |
+
"epoch": 0.04067796610169491,
|
| 50 |
+
"cpu_mem": 1.51263232,
|
| 51 |
+
"gpu_mem": 4.722452992,
|
| 52 |
+
"loss": 0.8617,
|
| 53 |
+
"grad_norm": 43.27740478515625,
|
| 54 |
+
"learning_rate": 5.9999999999999995e-05
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"step": 7,
|
| 58 |
+
"epoch": 0.04745762711864407,
|
| 59 |
+
"cpu_mem": 1.513222144,
|
| 60 |
+
"gpu_mem": 4.722505216,
|
| 61 |
+
"loss": 1.1695,
|
| 62 |
+
"grad_norm": 69.79517364501953,
|
| 63 |
+
"learning_rate": 7e-05
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"step": 8,
|
| 67 |
+
"epoch": 0.05423728813559322,
|
| 68 |
+
"cpu_mem": 1.51361536,
|
| 69 |
+
"gpu_mem": 4.722591232,
|
| 70 |
+
"loss": 0.6559,
|
| 71 |
+
"grad_norm": 18.012210845947266,
|
| 72 |
+
"learning_rate": 7.999999999999999e-05
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 9,
|
| 76 |
+
"epoch": 0.061016949152542375,
|
| 77 |
+
"cpu_mem": 1.514008576,
|
| 78 |
+
"gpu_mem": 4.722499072,
|
| 79 |
+
"loss": 2.141,
|
| 80 |
+
"grad_norm": 79.17948913574219,
|
| 81 |
+
"learning_rate": 8.999999999999999e-05
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"step": 10,
|
| 85 |
+
"epoch": 0.06779661016949153,
|
| 86 |
+
"cpu_mem": 1.5145984,
|
| 87 |
+
"gpu_mem": 4.722399232,
|
| 88 |
+
"loss": 1.456,
|
| 89 |
+
"grad_norm": 58.11606979370117,
|
| 90 |
+
"learning_rate": 9.999999999999999e-05
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"step": 11,
|
| 94 |
+
"epoch": 0.07457627118644068,
|
| 95 |
+
"cpu_mem": 1.515188224,
|
| 96 |
+
"gpu_mem": 4.72250368,
|
| 97 |
+
"loss": 0.6457,
|
| 98 |
+
"grad_norm": 6.229933738708496,
|
| 99 |
+
"learning_rate": 0.00010999999999999998
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"step": 12,
|
| 103 |
+
"epoch": 0.08135593220338982,
|
| 104 |
+
"cpu_mem": 1.51558144,
|
| 105 |
+
"gpu_mem": 4.722875392,
|
| 106 |
+
"loss": 1.2234,
|
| 107 |
+
"grad_norm": 116.1498794555664,
|
| 108 |
+
"learning_rate": 0.00011999999999999999
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"step": 13,
|
| 112 |
+
"epoch": 0.08813559322033898,
|
| 113 |
+
"cpu_mem": 1.515974656,
|
| 114 |
+
"gpu_mem": 4.722479104,
|
| 115 |
+
"loss": 1.2587,
|
| 116 |
+
"grad_norm": 52.9954719543457,
|
| 117 |
+
"learning_rate": 0.00013
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"step": 14,
|
| 121 |
+
"epoch": 0.09491525423728814,
|
| 122 |
+
"cpu_mem": 1.516367872,
|
| 123 |
+
"gpu_mem": 4.722456064,
|
| 124 |
+
"loss": 0.686,
|
| 125 |
+
"grad_norm": 9.985751152038574,
|
| 126 |
+
"learning_rate": 0.00014
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"step": 15,
|
| 130 |
+
"epoch": 0.1016949152542373,
|
| 131 |
+
"cpu_mem": 1.516761088,
|
| 132 |
+
"gpu_mem": 4.722394624,
|
| 133 |
+
"loss": 1.1792,
|
| 134 |
+
"grad_norm": 29.390972137451172,
|
| 135 |
+
"learning_rate": 0.00015
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"step": 16,
|
| 139 |
+
"epoch": 0.10847457627118644,
|
| 140 |
+
"cpu_mem": 1.516957696,
|
| 141 |
+
"gpu_mem": 4.722479104,
|
| 142 |
+
"loss": 1.8027,
|
| 143 |
+
"grad_norm": 77.97930908203125,
|
| 144 |
+
"learning_rate": 0.00015999999999999999
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"step": 17,
|
| 148 |
+
"epoch": 0.1152542372881356,
|
| 149 |
+
"cpu_mem": 1.517350912,
|
| 150 |
+
"gpu_mem": 4.72251904,
|
| 151 |
+
"loss": 0.9339,
|
| 152 |
+
"grad_norm": 31.851720809936523,
|
| 153 |
+
"learning_rate": 0.00016999999999999999
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"step": 18,
|
| 157 |
+
"epoch": 0.12203389830508475,
|
| 158 |
+
"cpu_mem": 1.517744128,
|
| 159 |
+
"gpu_mem": 4.722582016,
|
| 160 |
+
"loss": 1.2629,
|
| 161 |
+
"grad_norm": 45.89571762084961,
|
| 162 |
+
"learning_rate": 0.00017999999999999998
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"step": 19,
|
| 166 |
+
"epoch": 0.1288135593220339,
|
| 167 |
+
"cpu_mem": 1.518137344,
|
| 168 |
+
"gpu_mem": 4.7224192,
|
| 169 |
+
"loss": 1.2291,
|
| 170 |
+
"grad_norm": 32.983150482177734,
|
| 171 |
+
"learning_rate": 0.00018999999999999998
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"step": 20,
|
| 175 |
+
"epoch": 0.13559322033898305,
|
| 176 |
+
"cpu_mem": 1.51853056,
|
| 177 |
+
"gpu_mem": 4.722531328,
|
| 178 |
+
"loss": 0.8234,
|
| 179 |
+
"grad_norm": 22.25924301147461,
|
| 180 |
+
"learning_rate": 0.00019999999999999998
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"step": 21,
|
| 184 |
+
"epoch": 0.1423728813559322,
|
| 185 |
+
"cpu_mem": 1.518727168,
|
| 186 |
+
"gpu_mem": 4.722689536,
|
| 187 |
+
"loss": 0.6795,
|
| 188 |
+
"grad_norm": 8.868040084838867,
|
| 189 |
+
"learning_rate": 0.00020999999999999998
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"step": 22,
|
| 193 |
+
"epoch": 0.14915254237288136,
|
| 194 |
+
"cpu_mem": 1.519120384,
|
| 195 |
+
"gpu_mem": 4.722582016,
|
| 196 |
+
"loss": 1.0734,
|
| 197 |
+
"grad_norm": 22.563232421875,
|
| 198 |
+
"learning_rate": 0.00021999999999999995
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"step": 23,
|
| 202 |
+
"epoch": 0.15593220338983052,
|
| 203 |
+
"cpu_mem": 1.519316992,
|
| 204 |
+
"gpu_mem": 4.722554368,
|
| 205 |
+
"loss": 0.8224,
|
| 206 |
+
"grad_norm": 13.91633415222168,
|
| 207 |
+
"learning_rate": 0.00023
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"step": 24,
|
| 211 |
+
"epoch": 0.16271186440677965,
|
| 212 |
+
"cpu_mem": 1.5195136,
|
| 213 |
+
"gpu_mem": 4.7226112,
|
| 214 |
+
"loss": 0.7573,
|
| 215 |
+
"grad_norm": 16.691375732421875,
|
| 216 |
+
"learning_rate": 0.00023999999999999998
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"step": 25,
|
| 220 |
+
"epoch": 0.1694915254237288,
|
| 221 |
+
"cpu_mem": 1.519906816,
|
| 222 |
+
"gpu_mem": 4.72239616,
|
| 223 |
+
"loss": 0.6975,
|
| 224 |
+
"grad_norm": 6.51262092590332,
|
| 225 |
+
"learning_rate": 0.00025
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"step": 26,
|
| 229 |
+
"epoch": 0.17627118644067796,
|
| 230 |
+
"cpu_mem": 1.520300032,
|
| 231 |
+
"gpu_mem": 4.722451456,
|
| 232 |
+
"loss": 0.882,
|
| 233 |
+
"grad_norm": 14.44697093963623,
|
| 234 |
+
"learning_rate": 0.00026
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"step": 27,
|
| 238 |
+
"epoch": 0.18305084745762712,
|
| 239 |
+
"cpu_mem": 1.52049664,
|
| 240 |
+
"gpu_mem": 4.722743296,
|
| 241 |
+
"loss": 0.6362,
|
| 242 |
+
"grad_norm": 3.7360706329345703,
|
| 243 |
+
"learning_rate": 0.00027
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"step": 28,
|
| 247 |
+
"epoch": 0.18983050847457628,
|
| 248 |
+
"cpu_mem": 1.520693248,
|
| 249 |
+
"gpu_mem": 4.722422272,
|
| 250 |
+
"loss": 0.7593,
|
| 251 |
+
"grad_norm": 8.032002449035645,
|
| 252 |
+
"learning_rate": 0.00028
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"step": 29,
|
| 256 |
+
"epoch": 0.19661016949152543,
|
| 257 |
+
"cpu_mem": 1.521086464,
|
| 258 |
+
"gpu_mem": 4.722486784,
|
| 259 |
+
"loss": 0.7451,
|
| 260 |
+
"grad_norm": 12.261842727661133,
|
| 261 |
+
"learning_rate": 0.00029
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"step": 30,
|
| 265 |
+
"epoch": 0.2033898305084746,
|
| 266 |
+
"cpu_mem": 1.521283072,
|
| 267 |
+
"gpu_mem": 4.72256512,
|
| 268 |
+
"loss": 0.7261,
|
| 269 |
+
"grad_norm": 7.222959518432617,
|
| 270 |
+
"learning_rate": 0.0003
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"step": 31,
|
| 274 |
+
"epoch": 0.21016949152542372,
|
| 275 |
+
"cpu_mem": 1.52147968,
|
| 276 |
+
"gpu_mem": 4.722368512,
|
| 277 |
+
"loss": 0.6136,
|
| 278 |
+
"grad_norm": 2.5524110794067383,
|
| 279 |
+
"learning_rate": 0.0002999893794250036
|
| 280 |
+
},
|
| 281 |
+
{
|
| 282 |
+
"step": 32,
|
| 283 |
+
"epoch": 0.21694915254237288,
|
| 284 |
+
"cpu_mem": 1.521676288,
|
| 285 |
+
"gpu_mem": 4.722482176,
|
| 286 |
+
"loss": 0.7509,
|
| 287 |
+
"grad_norm": 9.033954620361328,
|
| 288 |
+
"learning_rate": 0.00029995751920396937
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"step": 33,
|
| 292 |
+
"epoch": 0.22372881355932203,
|
| 293 |
+
"cpu_mem": 1.521872896,
|
| 294 |
+
"gpu_mem": 4.722720256,
|
| 295 |
+
"loss": 0.7194,
|
| 296 |
+
"grad_norm": 8.025568008422852,
|
| 297 |
+
"learning_rate": 0.00029990442384854874
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"step": 34,
|
| 301 |
+
"epoch": 0.2305084745762712,
|
| 302 |
+
"cpu_mem": 1.522069504,
|
| 303 |
+
"gpu_mem": 4.722422272,
|
| 304 |
+
"loss": 0.5858,
|
| 305 |
+
"grad_norm": 2.7295961380004883,
|
| 306 |
+
"learning_rate": 0.0002998301008774512
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"step": 35,
|
| 310 |
+
"epoch": 0.23728813559322035,
|
| 311 |
+
"cpu_mem": 1.522266112,
|
| 312 |
+
"gpu_mem": 4.722632704,
|
| 313 |
+
"loss": 0.8056,
|
| 314 |
+
"grad_norm": 10.636892318725586,
|
| 315 |
+
"learning_rate": 0.0002997345608153792
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"step": 36,
|
| 319 |
+
"epoch": 0.2440677966101695,
|
| 320 |
+
"cpu_mem": 1.52246272,
|
| 321 |
+
"gpu_mem": 4.722583552,
|
| 322 |
+
"loss": 0.6826,
|
| 323 |
+
"grad_norm": 2.589643955230713,
|
| 324 |
+
"learning_rate": 0.000299617817191538
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"step": 37,
|
| 328 |
+
"epoch": 0.25084745762711863,
|
| 329 |
+
"cpu_mem": 1.522659328,
|
| 330 |
+
"gpu_mem": 4.722394624,
|
| 331 |
+
"loss": 0.6468,
|
| 332 |
+
"grad_norm": 4.602322101593018,
|
| 333 |
+
"learning_rate": 0.0002994798865377198
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"step": 38,
|
| 337 |
+
"epoch": 0.2576271186440678,
|
| 338 |
+
"cpu_mem": 1.522855936,
|
| 339 |
+
"gpu_mem": 4.72264192,
|
| 340 |
+
"loss": 0.8638,
|
| 341 |
+
"grad_norm": 11.864049911499023,
|
| 342 |
+
"learning_rate": 0.0002993207883859627
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"step": 39,
|
| 346 |
+
"epoch": 0.26440677966101694,
|
| 347 |
+
"cpu_mem": 1.523052544,
|
| 348 |
+
"gpu_mem": 4.723021312,
|
| 349 |
+
"loss": 0.7293,
|
| 350 |
+
"grad_norm": 7.218650817871094,
|
| 351 |
+
"learning_rate": 0.0002991405452657846
|
| 352 |
+
},
|
| 353 |
+
{
|
| 354 |
+
"step": 40,
|
| 355 |
+
"epoch": 0.2711864406779661,
|
| 356 |
+
"cpu_mem": 1.523249152,
|
| 357 |
+
"gpu_mem": 4.722591232,
|
| 358 |
+
"loss": 0.6592,
|
| 359 |
+
"grad_norm": 2.3266420364379883,
|
| 360 |
+
"learning_rate": 0.00029893918270099324
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"step": 41,
|
| 364 |
+
"epoch": 0.27796610169491526,
|
| 365 |
+
"cpu_mem": 1.52344576,
|
| 366 |
+
"gpu_mem": 4.72281856,
|
| 367 |
+
"loss": 0.6739,
|
| 368 |
+
"grad_norm": 2.876038074493408,
|
| 369 |
+
"learning_rate": 0.00029871672920607153
|
| 370 |
+
},
|
| 371 |
+
{
|
| 372 |
+
"step": 42,
|
| 373 |
+
"epoch": 0.2847457627118644,
|
| 374 |
+
"cpu_mem": 1.523642368,
|
| 375 |
+
"gpu_mem": 4.722715648,
|
| 376 |
+
"loss": 0.6569,
|
| 377 |
+
"grad_norm": 5.236875057220459,
|
| 378 |
+
"learning_rate": 0.0002984732162821399
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"step": 43,
|
| 382 |
+
"epoch": 0.29152542372881357,
|
| 383 |
+
"cpu_mem": 1.523838976,
|
| 384 |
+
"gpu_mem": 4.722537472,
|
| 385 |
+
"loss": 0.6302,
|
| 386 |
+
"grad_norm": 2.8164334297180176,
|
| 387 |
+
"learning_rate": 0.0002982086784124952
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"step": 44,
|
| 391 |
+
"epoch": 0.2983050847457627,
|
| 392 |
+
"cpu_mem": 1.524035584,
|
| 393 |
+
"gpu_mem": 4.72268032,
|
| 394 |
+
"loss": 0.6166,
|
| 395 |
+
"grad_norm": 4.41773796081543,
|
| 396 |
+
"learning_rate": 0.00029792315305772796
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"step": 45,
|
| 400 |
+
"epoch": 0.3050847457627119,
|
| 401 |
+
"cpu_mem": 1.524232192,
|
| 402 |
+
"gpu_mem": 4.722460672,
|
| 403 |
+
"loss": 0.8092,
|
| 404 |
+
"grad_norm": 9.20035457611084,
|
| 405 |
+
"learning_rate": 0.0002976166806504174
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"step": 46,
|
| 409 |
+
"epoch": 0.31186440677966104,
|
| 410 |
+
"cpu_mem": 1.524232192,
|
| 411 |
+
"gpu_mem": 4.72270336,
|
| 412 |
+
"loss": 0.8993,
|
| 413 |
+
"grad_norm": 15.634708404541016,
|
| 414 |
+
"learning_rate": 0.00029728930458940595
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"step": 47,
|
| 418 |
+
"epoch": 0.31864406779661014,
|
| 419 |
+
"cpu_mem": 1.5244288,
|
| 420 |
+
"gpu_mem": 4.72242688,
|
| 421 |
+
"loss": 0.8146,
|
| 422 |
+
"grad_norm": 14.326847076416016,
|
| 423 |
+
"learning_rate": 0.00029694107123365385
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"step": 48,
|
| 427 |
+
"epoch": 0.3254237288135593,
|
| 428 |
+
"cpu_mem": 1.524625408,
|
| 429 |
+
"gpu_mem": 4.72250368,
|
| 430 |
+
"loss": 0.6621,
|
| 431 |
+
"grad_norm": 7.587663650512695,
|
| 432 |
+
"learning_rate": 0.00029657202989567393
|
| 433 |
+
},
|
| 434 |
+
{
|
| 435 |
+
"step": 49,
|
| 436 |
+
"epoch": 0.33220338983050846,
|
| 437 |
+
"cpu_mem": 1.524822016,
|
| 438 |
+
"gpu_mem": 4.722520576,
|
| 439 |
+
"loss": 0.9905,
|
| 440 |
+
"grad_norm": 13.618229866027832,
|
| 441 |
+
"learning_rate": 0.00029618223283454893
|
| 442 |
+
},
|
| 443 |
+
{
|
| 444 |
+
"step": 50,
|
| 445 |
+
"epoch": 0.3389830508474576,
|
| 446 |
+
"cpu_mem": 1.524822016,
|
| 447 |
+
"gpu_mem": 4.722459136,
|
| 448 |
+
"loss": 0.8435,
|
| 449 |
+
"grad_norm": 9.786823272705078,
|
| 450 |
+
"learning_rate": 0.00029577173524853123
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"step": 51,
|
| 454 |
+
"epoch": 0.34576271186440677,
|
| 455 |
+
"cpu_mem": 1.524822016,
|
| 456 |
+
"gpu_mem": 4.722463744,
|
| 457 |
+
"loss": 0.6079,
|
| 458 |
+
"grad_norm": 3.4515862464904785,
|
| 459 |
+
"learning_rate": 0.0002953405952672261
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"step": 52,
|
| 463 |
+
"epoch": 0.3525423728813559,
|
| 464 |
+
"cpu_mem": 1.525018624,
|
| 465 |
+
"gpu_mem": 4.722543616,
|
| 466 |
+
"loss": 0.6645,
|
| 467 |
+
"grad_norm": 2.619760751724243,
|
| 468 |
+
"learning_rate": 0.0002948888739433602
|
| 469 |
+
},
|
| 470 |
+
{
|
| 471 |
+
"step": 53,
|
| 472 |
+
"epoch": 0.3593220338983051,
|
| 473 |
+
"cpu_mem": 1.525018624,
|
| 474 |
+
"gpu_mem": 4.722566656,
|
| 475 |
+
"loss": 0.7463,
|
| 476 |
+
"grad_norm": 10.795845031738281,
|
| 477 |
+
"learning_rate": 0.0002944166352441363
|
| 478 |
+
},
|
| 479 |
+
{
|
| 480 |
+
"step": 54,
|
| 481 |
+
"epoch": 0.36610169491525424,
|
| 482 |
+
"cpu_mem": 1.525215232,
|
| 483 |
+
"gpu_mem": 4.722494464,
|
| 484 |
+
"loss": 0.6756,
|
| 485 |
+
"grad_norm": 3.1487743854522705,
|
| 486 |
+
"learning_rate": 0.0002939239460421746
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"step": 55,
|
| 490 |
+
"epoch": 0.3728813559322034,
|
| 491 |
+
"cpu_mem": 1.52541184,
|
| 492 |
+
"gpu_mem": 4.7227648,
|
| 493 |
+
"loss": 0.6088,
|
| 494 |
+
"grad_norm": 2.5326249599456787,
|
| 495 |
+
"learning_rate": 0.00029341087610604337
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"step": 56,
|
| 499 |
+
"epoch": 0.37966101694915255,
|
| 500 |
+
"cpu_mem": 1.525608448,
|
| 501 |
+
"gpu_mem": 4.722551296,
|
| 502 |
+
"loss": 0.8536,
|
| 503 |
+
"grad_norm": 9.061946868896484,
|
| 504 |
+
"learning_rate": 0.00029287749809037904
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"step": 57,
|
| 508 |
+
"epoch": 0.3864406779661017,
|
| 509 |
+
"cpu_mem": 1.525608448,
|
| 510 |
+
"gpu_mem": 4.722545152,
|
| 511 |
+
"loss": 0.6668,
|
| 512 |
+
"grad_norm": 4.727614879608154,
|
| 513 |
+
"learning_rate": 0.0002923238875255979
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"step": 58,
|
| 517 |
+
"epoch": 0.39322033898305087,
|
| 518 |
+
"cpu_mem": 1.525805056,
|
| 519 |
+
"gpu_mem": 4.722440704,
|
| 520 |
+
"loss": 0.6361,
|
| 521 |
+
"grad_norm": 3.2904443740844727,
|
| 522 |
+
"learning_rate": 0.00029175012280720024
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"step": 59,
|
| 526 |
+
"epoch": 0.4,
|
| 527 |
+
"cpu_mem": 1.525805056,
|
| 528 |
+
"gpu_mem": 4.7224576,
|
| 529 |
+
"loss": 0.6821,
|
| 530 |
+
"grad_norm": 1.5682965517044067,
|
| 531 |
+
"learning_rate": 0.000291156285184669
|
| 532 |
+
},
|
| 533 |
+
{
|
| 534 |
+
"step": 60,
|
| 535 |
+
"epoch": 0.4067796610169492,
|
| 536 |
+
"cpu_mem": 1.526001664,
|
| 537 |
+
"gpu_mem": 4.722551296,
|
| 538 |
+
"loss": 0.6106,
|
| 539 |
+
"grad_norm": 5.173000812530518,
|
| 540 |
+
"learning_rate": 0.00029054245874996426
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"step": 61,
|
| 544 |
+
"epoch": 0.4135593220338983,
|
| 545 |
+
"cpu_mem": 1.526001664,
|
| 546 |
+
"gpu_mem": 4.722562048,
|
| 547 |
+
"loss": 0.6116,
|
| 548 |
+
"grad_norm": 2.3430614471435547,
|
| 549 |
+
"learning_rate": 0.0002899087304256151
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"step": 62,
|
| 553 |
+
"epoch": 0.42033898305084744,
|
| 554 |
+
"cpu_mem": 1.526198272,
|
| 555 |
+
"gpu_mem": 4.72254976,
|
| 556 |
+
"loss": 0.7339,
|
| 557 |
+
"grad_norm": 6.764347553253174,
|
| 558 |
+
"learning_rate": 0.0002892551899524109
|
| 559 |
+
},
|
| 560 |
+
{
|
| 561 |
+
"step": 63,
|
| 562 |
+
"epoch": 0.4271186440677966,
|
| 563 |
+
"cpu_mem": 1.526198272,
|
| 564 |
+
"gpu_mem": 4.72254208,
|
| 565 |
+
"loss": 0.5956,
|
| 566 |
+
"grad_norm": 6.654665946960449,
|
| 567 |
+
"learning_rate": 0.000288581929876693
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"step": 64,
|
| 571 |
+
"epoch": 0.43389830508474575,
|
| 572 |
+
"cpu_mem": 1.526198272,
|
| 573 |
+
"gpu_mem": 4.722471424,
|
| 574 |
+
"loss": 0.6023,
|
| 575 |
+
"grad_norm": 3.233610153198242,
|
| 576 |
+
"learning_rate": 0.0002878890455372498
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"step": 65,
|
| 580 |
+
"epoch": 0.4406779661016949,
|
| 581 |
+
"cpu_mem": 1.52639488,
|
| 582 |
+
"gpu_mem": 4.722515968,
|
| 583 |
+
"loss": 0.6724,
|
| 584 |
+
"grad_norm": 5.837782382965088,
|
| 585 |
+
"learning_rate": 0.0002871766350518159
|
| 586 |
+
},
|
| 587 |
+
{
|
| 588 |
+
"step": 66,
|
| 589 |
+
"epoch": 0.44745762711864406,
|
| 590 |
+
"cpu_mem": 1.52639488,
|
| 591 |
+
"gpu_mem": 4.722709504,
|
| 592 |
+
"loss": 0.6014,
|
| 593 |
+
"grad_norm": 4.1243205070495605,
|
| 594 |
+
"learning_rate": 0.00028644479930317775
|
| 595 |
+
},
|
| 596 |
+
{
|
| 597 |
+
"step": 67,
|
| 598 |
+
"epoch": 0.4542372881355932,
|
| 599 |
+
"cpu_mem": 1.52639488,
|
| 600 |
+
"gpu_mem": 4.7224192,
|
| 601 |
+
"loss": 0.6198,
|
| 602 |
+
"grad_norm": 4.467631816864014,
|
| 603 |
+
"learning_rate": 0.00028569364192488803
|
| 604 |
+
},
|
| 605 |
+
{
|
| 606 |
+
"step": 68,
|
| 607 |
+
"epoch": 0.4610169491525424,
|
| 608 |
+
"cpu_mem": 1.526591488,
|
| 609 |
+
"gpu_mem": 4.722386944,
|
| 610 |
+
"loss": 0.6373,
|
| 611 |
+
"grad_norm": 3.114952802658081,
|
| 612 |
+
"learning_rate": 0.00028492326928659045
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"step": 69,
|
| 616 |
+
"epoch": 0.46779661016949153,
|
| 617 |
+
"cpu_mem": 1.526591488,
|
| 618 |
+
"gpu_mem": 4.722452992,
|
| 619 |
+
"loss": 0.6229,
|
| 620 |
+
"grad_norm": 4.952956199645996,
|
| 621 |
+
"learning_rate": 0.00028413379047895665
|
| 622 |
+
},
|
| 623 |
+
{
|
| 624 |
+
"step": 70,
|
| 625 |
+
"epoch": 0.4745762711864407,
|
| 626 |
+
"cpu_mem": 1.526591488,
|
| 627 |
+
"gpu_mem": 4.722446848,
|
| 628 |
+
"loss": 0.6326,
|
| 629 |
+
"grad_norm": 3.901777744293213,
|
| 630 |
+
"learning_rate": 0.0002833253172982385
|
| 631 |
+
},
|
| 632 |
+
{
|
| 633 |
+
"step": 71,
|
| 634 |
+
"epoch": 0.48135593220338985,
|
| 635 |
+
"cpu_mem": 1.526591488,
|
| 636 |
+
"gpu_mem": 4.722675712,
|
| 637 |
+
"loss": 0.525,
|
| 638 |
+
"grad_norm": 4.247412204742432,
|
| 639 |
+
"learning_rate": 0.0002824979642304366
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"step": 72,
|
| 643 |
+
"epoch": 0.488135593220339,
|
| 644 |
+
"cpu_mem": 1.526788096,
|
| 645 |
+
"gpu_mem": 4.722668032,
|
| 646 |
+
"loss": 0.5622,
|
| 647 |
+
"grad_norm": 5.4284772872924805,
|
| 648 |
+
"learning_rate": 0.0002816518484350883
|
| 649 |
+
},
|
| 650 |
+
{
|
| 651 |
+
"step": 73,
|
| 652 |
+
"epoch": 0.49491525423728816,
|
| 653 |
+
"cpu_mem": 1.526984704,
|
| 654 |
+
"gpu_mem": 4.72263424,
|
| 655 |
+
"loss": 0.7902,
|
| 656 |
+
"grad_norm": 8.788385391235352,
|
| 657 |
+
"learning_rate": 0.0002807870897286772
|
| 658 |
+
},
|
| 659 |
+
{
|
| 660 |
+
"step": 74,
|
| 661 |
+
"epoch": 0.5016949152542373,
|
| 662 |
+
"cpu_mem": 1.526984704,
|
| 663 |
+
"gpu_mem": 4.722494464,
|
| 664 |
+
"loss": 0.5742,
|
| 665 |
+
"grad_norm": 8.913131713867188,
|
| 666 |
+
"learning_rate": 0.0002799038105676658
|
| 667 |
+
},
|
| 668 |
+
{
|
| 669 |
+
"step": 75,
|
| 670 |
+
"epoch": 0.5084745762711864,
|
| 671 |
+
"cpu_mem": 1.526984704,
|
| 672 |
+
"gpu_mem": 4.7224192,
|
| 673 |
+
"loss": 0.6146,
|
| 674 |
+
"grad_norm": 8.38949203491211,
|
| 675 |
+
"learning_rate": 0.000279002136031155
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"step": 76,
|
| 679 |
+
"epoch": 0.5152542372881356,
|
| 680 |
+
"cpu_mem": 1.526984704,
|
| 681 |
+
"gpu_mem": 4.722359296,
|
| 682 |
+
"loss": 0.5725,
|
| 683 |
+
"grad_norm": 4.2171711921691895,
|
| 684 |
+
"learning_rate": 0.00027808219380317216
|
| 685 |
+
},
|
| 686 |
+
{
|
| 687 |
+
"step": 77,
|
| 688 |
+
"epoch": 0.5220338983050847,
|
| 689 |
+
"cpu_mem": 1.526984704,
|
| 690 |
+
"gpu_mem": 4.722433024,
|
| 691 |
+
"loss": 0.6514,
|
| 692 |
+
"grad_norm": 6.086472988128662,
|
| 693 |
+
"learning_rate": 0.0002771441141545895
|
| 694 |
+
},
|
| 695 |
+
{
|
| 696 |
+
"step": 78,
|
| 697 |
+
"epoch": 0.5288135593220339,
|
| 698 |
+
"cpu_mem": 1.526984704,
|
| 699 |
+
"gpu_mem": 4.722485248,
|
| 700 |
+
"loss": 0.8012,
|
| 701 |
+
"grad_norm": 10.957486152648926,
|
| 702 |
+
"learning_rate": 0.0002761880299246772
|
| 703 |
+
},
|
| 704 |
+
{
|
| 705 |
+
"step": 79,
|
| 706 |
+
"epoch": 0.535593220338983,
|
| 707 |
+
"cpu_mem": 1.527181312,
|
| 708 |
+
"gpu_mem": 4.722617344,
|
| 709 |
+
"loss": 0.5858,
|
| 710 |
+
"grad_norm": 3.74336576461792,
|
| 711 |
+
"learning_rate": 0.000275214076502292
|
| 712 |
+
},
|
| 713 |
+
{
|
| 714 |
+
"step": 80,
|
| 715 |
+
"epoch": 0.5423728813559322,
|
| 716 |
+
"cpu_mem": 1.527181312,
|
| 717 |
+
"gpu_mem": 4.722508288,
|
| 718 |
+
"loss": 0.6445,
|
| 719 |
+
"grad_norm": 7.315963268280029,
|
| 720 |
+
"learning_rate": 0.0002742223918067056
|
| 721 |
+
},
|
| 722 |
+
{
|
| 723 |
+
"step": 81,
|
| 724 |
+
"epoch": 0.5491525423728814,
|
| 725 |
+
"cpu_mem": 1.527181312,
|
| 726 |
+
"gpu_mem": 4.72238848,
|
| 727 |
+
"loss": 0.6349,
|
| 728 |
+
"grad_norm": 4.654501438140869,
|
| 729 |
+
"learning_rate": 0.00027321311626807374
|
| 730 |
+
},
|
| 731 |
+
{
|
| 732 |
+
"step": 82,
|
| 733 |
+
"epoch": 0.5559322033898305,
|
| 734 |
+
"cpu_mem": 1.527181312,
|
| 735 |
+
"gpu_mem": 4.7224576,
|
| 736 |
+
"loss": 0.6408,
|
| 737 |
+
"grad_norm": 4.707687854766846,
|
| 738 |
+
"learning_rate": 0.0002721863928075503
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"step": 83,
|
| 742 |
+
"epoch": 0.5627118644067797,
|
| 743 |
+
"cpu_mem": 1.527181312,
|
| 744 |
+
"gpu_mem": 4.72255744,
|
| 745 |
+
"loss": 0.7144,
|
| 746 |
+
"grad_norm": 7.812034606933594,
|
| 747 |
+
"learning_rate": 0.000271142366817049
|
| 748 |
+
},
|
| 749 |
+
{
|
| 750 |
+
"step": 84,
|
| 751 |
+
"epoch": 0.5694915254237288,
|
| 752 |
+
"cpu_mem": 1.527181312,
|
| 753 |
+
"gpu_mem": 4.722520576,
|
| 754 |
+
"loss": 0.5678,
|
| 755 |
+
"grad_norm": 3.4686105251312256,
|
| 756 |
+
"learning_rate": 0.00027008118613865406
|
| 757 |
+
},
|
| 758 |
+
{
|
| 759 |
+
"step": 85,
|
| 760 |
+
"epoch": 0.576271186440678,
|
| 761 |
+
"cpu_mem": 1.527181312,
|
| 762 |
+
"gpu_mem": 4.722552832,
|
| 763 |
+
"loss": 0.5893,
|
| 764 |
+
"grad_norm": 5.409703254699707,
|
| 765 |
+
"learning_rate": 0.00026900300104368524
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"step": 86,
|
| 769 |
+
"epoch": 0.5830508474576271,
|
| 770 |
+
"cpu_mem": 1.52737792,
|
| 771 |
+
"gpu_mem": 4.72250368,
|
| 772 |
+
"loss": 0.6168,
|
| 773 |
+
"grad_norm": 7.087602138519287,
|
| 774 |
+
"learning_rate": 0.00026790796421141813
|
| 775 |
+
},
|
| 776 |
+
{
|
| 777 |
+
"step": 87,
|
| 778 |
+
"epoch": 0.5898305084745763,
|
| 779 |
+
"cpu_mem": 1.52737792,
|
| 780 |
+
"gpu_mem": 4.72251136,
|
| 781 |
+
"loss": 0.5399,
|
| 782 |
+
"grad_norm": 5.04774284362793,
|
| 783 |
+
"learning_rate": 0.00026679623070746325
|
| 784 |
+
},
|
| 785 |
+
{
|
| 786 |
+
"step": 88,
|
| 787 |
+
"epoch": 0.5966101694915255,
|
| 788 |
+
"cpu_mem": 1.52737792,
|
| 789 |
+
"gpu_mem": 4.722655744,
|
| 790 |
+
"loss": 0.512,
|
| 791 |
+
"grad_norm": 4.10666561126709,
|
| 792 |
+
"learning_rate": 0.0002656679579618081
|
| 793 |
+
},
|
| 794 |
+
{
|
| 795 |
+
"step": 89,
|
| 796 |
+
"epoch": 0.6033898305084746,
|
| 797 |
+
"cpu_mem": 1.52737792,
|
| 798 |
+
"gpu_mem": 4.722437632,
|
| 799 |
+
"loss": 0.5958,
|
| 800 |
+
"grad_norm": 5.078695774078369,
|
| 801 |
+
"learning_rate": 0.0002645233057465235
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"step": 90,
|
| 805 |
+
"epoch": 0.6101694915254238,
|
| 806 |
+
"cpu_mem": 1.527574528,
|
| 807 |
+
"gpu_mem": 4.722491392,
|
| 808 |
+
"loss": 0.5342,
|
| 809 |
+
"grad_norm": 4.536576747894287,
|
| 810 |
+
"learning_rate": 0.00026336243615313873
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"step": 91,
|
| 814 |
+
"epoch": 0.6169491525423729,
|
| 815 |
+
"cpu_mem": 1.527574528,
|
| 816 |
+
"gpu_mem": 4.722459136,
|
| 817 |
+
"loss": 0.4877,
|
| 818 |
+
"grad_norm": 4.9868693351745605,
|
| 819 |
+
"learning_rate": 0.00026218551356968814
|
| 820 |
+
},
|
| 821 |
+
{
|
| 822 |
+
"step": 92,
|
| 823 |
+
"epoch": 0.6237288135593221,
|
| 824 |
+
"cpu_mem": 1.527574528,
|
| 825 |
+
"gpu_mem": 4.722540544,
|
| 826 |
+
"loss": 0.7501,
|
| 827 |
+
"grad_norm": 11.705029487609863,
|
| 828 |
+
"learning_rate": 0.00026099270465743254
|
| 829 |
+
},
|
| 830 |
+
{
|
| 831 |
+
"step": 93,
|
| 832 |
+
"epoch": 0.6305084745762712,
|
| 833 |
+
"cpu_mem": 1.527574528,
|
| 834 |
+
"gpu_mem": 4.722343936,
|
| 835 |
+
"loss": 0.7142,
|
| 836 |
+
"grad_norm": 9.641088485717773,
|
| 837 |
+
"learning_rate": 0.0002597841783272588
|
| 838 |
+
},
|
| 839 |
+
{
|
| 840 |
+
"step": 94,
|
| 841 |
+
"epoch": 0.6372881355932203,
|
| 842 |
+
"cpu_mem": 1.527574528,
|
| 843 |
+
"gpu_mem": 4.7224576,
|
| 844 |
+
"loss": 0.5724,
|
| 845 |
+
"grad_norm": 6.110875129699707,
|
| 846 |
+
"learning_rate": 0.0002585601057157605
|
| 847 |
+
},
|
| 848 |
+
{
|
| 849 |
+
"step": 95,
|
| 850 |
+
"epoch": 0.6440677966101694,
|
| 851 |
+
"cpu_mem": 1.527574528,
|
| 852 |
+
"gpu_mem": 4.722477568,
|
| 853 |
+
"loss": 0.6165,
|
| 854 |
+
"grad_norm": 4.48118257522583,
|
| 855 |
+
"learning_rate": 0.00025732066016100394
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"step": 96,
|
| 859 |
+
"epoch": 0.6508474576271186,
|
| 860 |
+
"cpu_mem": 1.527574528,
|
| 861 |
+
"gpu_mem": 4.722515968,
|
| 862 |
+
"loss": 0.5351,
|
| 863 |
+
"grad_norm": 2.5242867469787598,
|
| 864 |
+
"learning_rate": 0.00025606601717798207
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"step": 97,
|
| 868 |
+
"epoch": 0.6576271186440678,
|
| 869 |
+
"cpu_mem": 1.527574528,
|
| 870 |
+
"gpu_mem": 4.722500608,
|
| 871 |
+
"loss": 0.7382,
|
| 872 |
+
"grad_norm": 7.024951457977295,
|
| 873 |
+
"learning_rate": 0.0002547963544337602
|
| 874 |
+
},
|
| 875 |
+
{
|
| 876 |
+
"step": 98,
|
| 877 |
+
"epoch": 0.6644067796610169,
|
| 878 |
+
"cpu_mem": 1.527574528,
|
| 879 |
+
"gpu_mem": 4.722413056,
|
| 880 |
+
"loss": 0.6812,
|
| 881 |
+
"grad_norm": 5.02927827835083,
|
| 882 |
+
"learning_rate": 0.0002535118517223168
|
| 883 |
+
},
|
| 884 |
+
{
|
| 885 |
+
"step": 99,
|
| 886 |
+
"epoch": 0.6711864406779661,
|
| 887 |
+
"cpu_mem": 1.527574528,
|
| 888 |
+
"gpu_mem": 4.722362368,
|
| 889 |
+
"loss": 0.5979,
|
| 890 |
+
"grad_norm": 5.04942512512207,
|
| 891 |
+
"learning_rate": 0.00025221269093908365
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"step": 100,
|
| 895 |
+
"epoch": 0.6779661016949152,
|
| 896 |
+
"cpu_mem": 1.527574528,
|
| 897 |
+
"gpu_mem": 4.722479104,
|
| 898 |
+
"loss": 0.6131,
|
| 899 |
+
"grad_norm": 6.651904106140137,
|
| 900 |
+
"learning_rate": 0.0002508990560551879
|
| 901 |
+
},
|
| 902 |
+
{
|
| 903 |
+
"step": 101,
|
| 904 |
+
"epoch": 0.6847457627118644,
|
| 905 |
+
"cpu_mem": 1.527574528,
|
| 906 |
+
"gpu_mem": 4.72251136,
|
| 907 |
+
"loss": 0.6984,
|
| 908 |
+
"grad_norm": 3.8917150497436523,
|
| 909 |
+
"learning_rate": 0.0002495711330914001
|
| 910 |
+
},
|
| 911 |
+
{
|
| 912 |
+
"step": 102,
|
| 913 |
+
"epoch": 0.6915254237288135,
|
| 914 |
+
"cpu_mem": 1.527574528,
|
| 915 |
+
"gpu_mem": 4.722545152,
|
| 916 |
+
"loss": 0.5914,
|
| 917 |
+
"grad_norm": 2.322913885116577,
|
| 918 |
+
"learning_rate": 0.00024822911009179276
|
| 919 |
+
},
|
| 920 |
+
{
|
| 921 |
+
"step": 103,
|
| 922 |
+
"epoch": 0.6983050847457627,
|
| 923 |
+
"cpu_mem": 1.527574528,
|
| 924 |
+
"gpu_mem": 4.72259584,
|
| 925 |
+
"loss": 0.6322,
|
| 926 |
+
"grad_norm": 3.5212390422821045,
|
| 927 |
+
"learning_rate": 0.0002468731770971113
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"step": 104,
|
| 931 |
+
"epoch": 0.7050847457627119,
|
| 932 |
+
"cpu_mem": 1.527574528,
|
| 933 |
+
"gpu_mem": 4.722500608,
|
| 934 |
+
"loss": 0.5647,
|
| 935 |
+
"grad_norm": 3.077224016189575,
|
| 936 |
+
"learning_rate": 0.0002455035261178632
|
| 937 |
+
},
|
| 938 |
+
{
|
| 939 |
+
"step": 105,
|
| 940 |
+
"epoch": 0.711864406779661,
|
| 941 |
+
"cpu_mem": 1.527574528,
|
| 942 |
+
"gpu_mem": 4.722601984,
|
| 943 |
+
"loss": 0.5667,
|
| 944 |
+
"grad_norm": 3.436150550842285,
|
| 945 |
+
"learning_rate": 0.0002441203511071278
|
| 946 |
+
},
|
| 947 |
+
{
|
| 948 |
+
"step": 106,
|
| 949 |
+
"epoch": 0.7186440677966102,
|
| 950 |
+
"cpu_mem": 1.527574528,
|
| 951 |
+
"gpu_mem": 4.722552832,
|
| 952 |
+
"loss": 0.5603,
|
| 953 |
+
"grad_norm": 5.609046936035156,
|
| 954 |
+
"learning_rate": 0.00024272384793309077
|
| 955 |
+
},
|
| 956 |
+
{
|
| 957 |
+
"step": 107,
|
| 958 |
+
"epoch": 0.7254237288135593,
|
| 959 |
+
"cpu_mem": 1.527574528,
|
| 960 |
+
"gpu_mem": 4.722440704,
|
| 961 |
+
"loss": 0.4929,
|
| 962 |
+
"grad_norm": 4.49297571182251,
|
| 963 |
+
"learning_rate": 0.00024131421435130807
|
| 964 |
+
},
|
| 965 |
+
{
|
| 966 |
+
"step": 108,
|
| 967 |
+
"epoch": 0.7322033898305085,
|
| 968 |
+
"cpu_mem": 1.527574528,
|
| 969 |
+
"gpu_mem": 4.722625024,
|
| 970 |
+
"loss": 0.5647,
|
| 971 |
+
"grad_norm": 5.14377498626709,
|
| 972 |
+
"learning_rate": 0.00023989164997670202
|
| 973 |
+
},
|
| 974 |
+
{
|
| 975 |
+
"step": 109,
|
| 976 |
+
"epoch": 0.7389830508474576,
|
| 977 |
+
"cpu_mem": 1.527574528,
|
| 978 |
+
"gpu_mem": 4.722479104,
|
| 979 |
+
"loss": 0.591,
|
| 980 |
+
"grad_norm": 4.947544574737549,
|
| 981 |
+
"learning_rate": 0.0002384563562552943
|
| 982 |
+
},
|
| 983 |
+
{
|
| 984 |
+
"step": 110,
|
| 985 |
+
"epoch": 0.7457627118644068,
|
| 986 |
+
"cpu_mem": 1.527574528,
|
| 987 |
+
"gpu_mem": 4.722482176,
|
| 988 |
+
"loss": 0.5745,
|
| 989 |
+
"grad_norm": 4.043002605438232,
|
| 990 |
+
"learning_rate": 0.0002370085364356797
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"step": 111,
|
| 994 |
+
"epoch": 0.752542372881356,
|
| 995 |
+
"cpu_mem": 1.527574528,
|
| 996 |
+
"gpu_mem": 4.722451456,
|
| 997 |
+
"loss": 0.5688,
|
| 998 |
+
"grad_norm": 6.002414703369141,
|
| 999 |
+
"learning_rate": 0.0002355483955402446
|
| 1000 |
+
},
|
| 1001 |
+
{
|
| 1002 |
+
"step": 112,
|
| 1003 |
+
"epoch": 0.7593220338983051,
|
| 1004 |
+
"cpu_mem": 1.527771136,
|
| 1005 |
+
"gpu_mem": 4.722497536,
|
| 1006 |
+
"loss": 0.6035,
|
| 1007 |
+
"grad_norm": 5.334151268005371,
|
| 1008 |
+
"learning_rate": 0.00023407614033613407
|
| 1009 |
+
},
|
| 1010 |
+
{
|
| 1011 |
+
"step": 113,
|
| 1012 |
+
"epoch": 0.7661016949152543,
|
| 1013 |
+
"cpu_mem": 1.527771136,
|
| 1014 |
+
"gpu_mem": 4.72248832,
|
| 1015 |
+
"loss": 0.5538,
|
| 1016 |
+
"grad_norm": 4.7518310546875,
|
| 1017 |
+
"learning_rate": 0.0002325919793059723
|
| 1018 |
+
},
|
| 1019 |
+
{
|
| 1020 |
+
"step": 114,
|
| 1021 |
+
"epoch": 0.7728813559322034,
|
| 1022 |
+
"cpu_mem": 1.527771136,
|
| 1023 |
+
"gpu_mem": 4.722469888,
|
| 1024 |
+
"loss": 0.4641,
|
| 1025 |
+
"grad_norm": 4.555304527282715,
|
| 1026 |
+
"learning_rate": 0.00023109612261833963
|
| 1027 |
+
},
|
| 1028 |
+
{
|
| 1029 |
+
"step": 115,
|
| 1030 |
+
"epoch": 0.7796610169491526,
|
| 1031 |
+
"cpu_mem": 1.527967744,
|
| 1032 |
+
"gpu_mem": 4.722545152,
|
| 1033 |
+
"loss": 0.6187,
|
| 1034 |
+
"grad_norm": 5.561794281005859,
|
| 1035 |
+
"learning_rate": 0.0002295887820980112
|
| 1036 |
+
},
|
| 1037 |
+
{
|
| 1038 |
+
"step": 116,
|
| 1039 |
+
"epoch": 0.7864406779661017,
|
| 1040 |
+
"cpu_mem": 1.527967744,
|
| 1041 |
+
"gpu_mem": 4.72246528,
|
| 1042 |
+
"loss": 0.5549,
|
| 1043 |
+
"grad_norm": 4.324403285980225,
|
| 1044 |
+
"learning_rate": 0.0002280701711959608
|
| 1045 |
+
},
|
| 1046 |
+
{
|
| 1047 |
+
"step": 117,
|
| 1048 |
+
"epoch": 0.7932203389830509,
|
| 1049 |
+
"cpu_mem": 1.527967744,
|
| 1050 |
+
"gpu_mem": 4.722356224,
|
| 1051 |
+
"loss": 0.4801,
|
| 1052 |
+
"grad_norm": 2.37454891204834,
|
| 1053 |
+
"learning_rate": 0.00022654050495913495
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"step": 118,
|
| 1057 |
+
"epoch": 0.8,
|
| 1058 |
+
"cpu_mem": 1.527967744,
|
| 1059 |
+
"gpu_mem": 4.722594304,
|
| 1060 |
+
"loss": 0.6734,
|
| 1061 |
+
"grad_norm": 4.633813858032227,
|
| 1062 |
+
"learning_rate": 0.000225
|
| 1063 |
+
},
|
| 1064 |
+
{
|
| 1065 |
+
"step": 119,
|
| 1066 |
+
"epoch": 0.8067796610169492,
|
| 1067 |
+
"cpu_mem": 1.527967744,
|
| 1068 |
+
"gpu_mem": 4.7227648,
|
| 1069 |
+
"loss": 0.4906,
|
| 1070 |
+
"grad_norm": 4.976076126098633,
|
| 1071 |
+
"learning_rate": 0.00022344887446586865
|
| 1072 |
+
},
|
| 1073 |
+
{
|
| 1074 |
+
"step": 120,
|
| 1075 |
+
"epoch": 0.8135593220338984,
|
| 1076 |
+
"cpu_mem": 1.527967744,
|
| 1077 |
+
"gpu_mem": 4.722497536,
|
| 1078 |
+
"loss": 0.4778,
|
| 1079 |
+
"grad_norm": 4.014054775238037,
|
| 1080 |
+
"learning_rate": 0.00022188734800800852
|
| 1081 |
+
},
|
| 1082 |
+
{
|
| 1083 |
+
"step": 121,
|
| 1084 |
+
"epoch": 0.8203389830508474,
|
| 1085 |
+
"cpu_mem": 1.527967744,
|
| 1086 |
+
"gpu_mem": 4.722525184,
|
| 1087 |
+
"loss": 0.5773,
|
| 1088 |
+
"grad_norm": 4.499019622802734,
|
| 1089 |
+
"learning_rate": 0.00022031564175053754
|
| 1090 |
+
},
|
| 1091 |
+
{
|
| 1092 |
+
"step": 122,
|
| 1093 |
+
"epoch": 0.8271186440677966,
|
| 1094 |
+
"cpu_mem": 1.527967744,
|
| 1095 |
+
"gpu_mem": 4.722575872,
|
| 1096 |
+
"loss": 0.4382,
|
| 1097 |
+
"grad_norm": 4.889860153198242,
|
| 1098 |
+
"learning_rate": 0.00021873397825911153
|
| 1099 |
+
},
|
| 1100 |
+
{
|
| 1101 |
+
"step": 123,
|
| 1102 |
+
"epoch": 0.8338983050847457,
|
| 1103 |
+
"cpu_mem": 1.527967744,
|
| 1104 |
+
"gpu_mem": 4.722385408,
|
| 1105 |
+
"loss": 0.5738,
|
| 1106 |
+
"grad_norm": 6.226327419281006,
|
| 1107 |
+
"learning_rate": 0.00021714258150940685
|
| 1108 |
+
},
|
| 1109 |
+
{
|
| 1110 |
+
"step": 124,
|
| 1111 |
+
"epoch": 0.8406779661016949,
|
| 1112 |
+
"cpu_mem": 1.527967744,
|
| 1113 |
+
"gpu_mem": 4.722827776,
|
| 1114 |
+
"loss": 0.5588,
|
| 1115 |
+
"grad_norm": 5.272500514984131,
|
| 1116 |
+
"learning_rate": 0.0002155416768554039
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"step": 125,
|
| 1120 |
+
"epoch": 0.847457627118644,
|
| 1121 |
+
"cpu_mem": 1.527967744,
|
| 1122 |
+
"gpu_mem": 4.722554368,
|
| 1123 |
+
"loss": 0.6352,
|
| 1124 |
+
"grad_norm": 8.107681274414062,
|
| 1125 |
+
"learning_rate": 0.00021393149099747523
|
| 1126 |
+
},
|
| 1127 |
+
{
|
| 1128 |
+
"step": 126,
|
| 1129 |
+
"epoch": 0.8542372881355932,
|
| 1130 |
+
"cpu_mem": 1.527967744,
|
| 1131 |
+
"gpu_mem": 4.722437632,
|
| 1132 |
+
"loss": 0.5789,
|
| 1133 |
+
"grad_norm": 6.859676837921143,
|
| 1134 |
+
"learning_rate": 0.00021231225195028297
|
| 1135 |
+
},
|
| 1136 |
+
{
|
| 1137 |
+
"step": 127,
|
| 1138 |
+
"epoch": 0.8610169491525423,
|
| 1139 |
+
"cpu_mem": 1.530130432,
|
| 1140 |
+
"gpu_mem": 4.722876928,
|
| 1141 |
+
"loss": 0.5435,
|
| 1142 |
+
"grad_norm": 4.632746696472168,
|
| 1143 |
+
"learning_rate": 0.00021068418901049025
|
| 1144 |
+
},
|
| 1145 |
+
{
|
| 1146 |
+
"step": 128,
|
| 1147 |
+
"epoch": 0.8677966101694915,
|
| 1148 |
+
"cpu_mem": 1.530130432,
|
| 1149 |
+
"gpu_mem": 4.722652672,
|
| 1150 |
+
"loss": 0.4812,
|
| 1151 |
+
"grad_norm": 3.7110488414764404,
|
| 1152 |
+
"learning_rate": 0.0002090475327242912
|
| 1153 |
+
},
|
| 1154 |
+
{
|
| 1155 |
+
"step": 129,
|
| 1156 |
+
"epoch": 0.8745762711864407,
|
| 1157 |
+
"cpu_mem": 1.530130432,
|
| 1158 |
+
"gpu_mem": 4.722692608,
|
| 1159 |
+
"loss": 0.5938,
|
| 1160 |
+
"grad_norm": 6.665708541870117,
|
| 1161 |
+
"learning_rate": 0.00020740251485476345
|
| 1162 |
+
},
|
| 1163 |
+
{
|
| 1164 |
+
"step": 130,
|
| 1165 |
+
"epoch": 0.8813559322033898,
|
| 1166 |
+
"cpu_mem": 1.530130432,
|
| 1167 |
+
"gpu_mem": 4.722474496,
|
| 1168 |
+
"loss": 0.5986,
|
| 1169 |
+
"grad_norm": 3.7718665599823,
|
| 1170 |
+
"learning_rate": 0.0002057493683490491
|
| 1171 |
+
},
|
| 1172 |
+
{
|
| 1173 |
+
"step": 131,
|
| 1174 |
+
"epoch": 0.888135593220339,
|
| 1175 |
+
"cpu_mem": 1.530130432,
|
| 1176 |
+
"gpu_mem": 4.72260352,
|
| 1177 |
+
"loss": 0.5023,
|
| 1178 |
+
"grad_norm": 3.4843013286590576,
|
| 1179 |
+
"learning_rate": 0.00020408832730536746
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"step": 132,
|
| 1183 |
+
"epoch": 0.8949152542372881,
|
| 1184 |
+
"cpu_mem": 1.530130432,
|
| 1185 |
+
"gpu_mem": 4.722684928,
|
| 1186 |
+
"loss": 0.5568,
|
| 1187 |
+
"grad_norm": 6.419014930725098,
|
| 1188 |
+
"learning_rate": 0.00020241962693986476
|
| 1189 |
+
},
|
| 1190 |
+
{
|
| 1191 |
+
"step": 133,
|
| 1192 |
+
"epoch": 0.9016949152542373,
|
| 1193 |
+
"cpu_mem": 1.530130432,
|
| 1194 |
+
"gpu_mem": 4.722468352,
|
| 1195 |
+
"loss": 0.4996,
|
| 1196 |
+
"grad_norm": 4.196140766143799,
|
| 1197 |
+
"learning_rate": 0.0002007435035533061
|
| 1198 |
+
},
|
| 1199 |
+
{
|
| 1200 |
+
"step": 134,
|
| 1201 |
+
"epoch": 0.9084745762711864,
|
| 1202 |
+
"cpu_mem": 1.530130432,
|
| 1203 |
+
"gpu_mem": 4.722601984,
|
| 1204 |
+
"loss": 0.4804,
|
| 1205 |
+
"grad_norm": 3.8392629623413086,
|
| 1206 |
+
"learning_rate": 0.00019906019449761325
|
| 1207 |
+
},
|
| 1208 |
+
{
|
| 1209 |
+
"step": 135,
|
| 1210 |
+
"epoch": 0.9152542372881356,
|
| 1211 |
+
"cpu_mem": 1.530130432,
|
| 1212 |
+
"gpu_mem": 4.722625024,
|
| 1213 |
+
"loss": 0.664,
|
| 1214 |
+
"grad_norm": 9.533957481384277,
|
| 1215 |
+
"learning_rate": 0.00019736993814225374
|
| 1216 |
+
},
|
| 1217 |
+
{
|
| 1218 |
+
"step": 136,
|
| 1219 |
+
"epoch": 0.9220338983050848,
|
| 1220 |
+
"cpu_mem": 1.530130432,
|
| 1221 |
+
"gpu_mem": 4.722462208,
|
| 1222 |
+
"loss": 0.526,
|
| 1223 |
+
"grad_norm": 6.26369047164917,
|
| 1224 |
+
"learning_rate": 0.00019567297384048604
|
| 1225 |
+
},
|
| 1226 |
+
{
|
| 1227 |
+
"step": 137,
|
| 1228 |
+
"epoch": 0.9288135593220339,
|
| 1229 |
+
"cpu_mem": 1.530130432,
|
| 1230 |
+
"gpu_mem": 4.7223424,
|
| 1231 |
+
"loss": 0.5442,
|
| 1232 |
+
"grad_norm": 5.538697719573975,
|
| 1233 |
+
"learning_rate": 0.0001939695418954653
|
| 1234 |
+
},
|
| 1235 |
+
{
|
| 1236 |
+
"step": 138,
|
| 1237 |
+
"epoch": 0.9355932203389831,
|
| 1238 |
+
"cpu_mem": 1.530130432,
|
| 1239 |
+
"gpu_mem": 4.722523648,
|
| 1240 |
+
"loss": 0.4846,
|
| 1241 |
+
"grad_norm": 3.0243053436279297,
|
| 1242 |
+
"learning_rate": 0.00019225988352621445
|
| 1243 |
+
},
|
| 1244 |
+
{
|
| 1245 |
+
"step": 139,
|
| 1246 |
+
"epoch": 0.9423728813559322,
|
| 1247 |
+
"cpu_mem": 1.530130432,
|
| 1248 |
+
"gpu_mem": 4.722422272,
|
| 1249 |
+
"loss": 0.5617,
|
| 1250 |
+
"grad_norm": 5.228503704071045,
|
| 1251 |
+
"learning_rate": 0.00019054424083346592
|
| 1252 |
+
},
|
| 1253 |
+
{
|
| 1254 |
+
"step": 140,
|
| 1255 |
+
"epoch": 0.9491525423728814,
|
| 1256 |
+
"cpu_mem": 1.530130432,
|
| 1257 |
+
"gpu_mem": 4.722474496,
|
| 1258 |
+
"loss": 0.5831,
|
| 1259 |
+
"grad_norm": 6.361831188201904,
|
| 1260 |
+
"learning_rate": 0.0001888228567653781
|
| 1261 |
+
},
|
| 1262 |
+
{
|
| 1263 |
+
"step": 141,
|
| 1264 |
+
"epoch": 0.9559322033898305,
|
| 1265 |
+
"cpu_mem": 1.530130432,
|
| 1266 |
+
"gpu_mem": 4.722506752,
|
| 1267 |
+
"loss": 0.5879,
|
| 1268 |
+
"grad_norm": 5.449702262878418,
|
| 1269 |
+
"learning_rate": 0.0001870959750831323
|
| 1270 |
+
},
|
| 1271 |
+
{
|
| 1272 |
+
"step": 142,
|
| 1273 |
+
"epoch": 0.9627118644067797,
|
| 1274 |
+
"cpu_mem": 1.530130432,
|
| 1275 |
+
"gpu_mem": 4.722646528,
|
| 1276 |
+
"loss": 0.4928,
|
| 1277 |
+
"grad_norm": 4.112429141998291,
|
| 1278 |
+
"learning_rate": 0.0001853638403264141
|
| 1279 |
+
},
|
| 1280 |
+
{
|
| 1281 |
+
"step": 143,
|
| 1282 |
+
"epoch": 0.9694915254237289,
|
| 1283 |
+
"cpu_mem": 1.530130432,
|
| 1284 |
+
"gpu_mem": 4.722629632,
|
| 1285 |
+
"loss": 0.6331,
|
| 1286 |
+
"grad_norm": 4.983449459075928,
|
| 1287 |
+
"learning_rate": 0.00018362669777878453
|
| 1288 |
+
},
|
| 1289 |
+
{
|
| 1290 |
+
"step": 144,
|
| 1291 |
+
"epoch": 0.976271186440678,
|
| 1292 |
+
"cpu_mem": 1.530130432,
|
| 1293 |
+
"gpu_mem": 4.722821632,
|
| 1294 |
+
"loss": 0.6526,
|
| 1295 |
+
"grad_norm": 6.455733299255371,
|
| 1296 |
+
"learning_rate": 0.00018188479343294648
|
| 1297 |
+
},
|
| 1298 |
+
{
|
| 1299 |
+
"step": 145,
|
| 1300 |
+
"epoch": 0.9830508474576272,
|
| 1301 |
+
"cpu_mem": 1.530130432,
|
| 1302 |
+
"gpu_mem": 4.722532864,
|
| 1303 |
+
"loss": 0.505,
|
| 1304 |
+
"grad_norm": 3.729292392730713,
|
| 1305 |
+
"learning_rate": 0.0001801383739559098
|
| 1306 |
+
},
|
| 1307 |
+
{
|
| 1308 |
+
"step": 146,
|
| 1309 |
+
"epoch": 0.9898305084745763,
|
| 1310 |
+
"cpu_mem": 1.530130432,
|
| 1311 |
+
"gpu_mem": 4.722568192,
|
| 1312 |
+
"loss": 0.5684,
|
| 1313 |
+
"grad_norm": 3.6822805404663086,
|
| 1314 |
+
"learning_rate": 0.0001783876866540615
|
| 1315 |
+
},
|
| 1316 |
+
{
|
| 1317 |
+
"step": 147,
|
| 1318 |
+
"epoch": 0.9966101694915255,
|
| 1319 |
+
"cpu_mem": 1.530130432,
|
| 1320 |
+
"gpu_mem": 4.722466816,
|
| 1321 |
+
"loss": 0.5114,
|
| 1322 |
+
"grad_norm": 3.8303308486938477,
|
| 1323 |
+
"learning_rate": 0.00017663297943814552
|
| 1324 |
+
},
|
| 1325 |
+
{
|
| 1326 |
+
"step": 148,
|
| 1327 |
+
"epoch": 1.0033898305084745,
|
| 1328 |
+
"cpu_mem": 1.530130432,
|
| 1329 |
+
"gpu_mem": 4.824187392,
|
| 1330 |
+
"loss": 0.7086,
|
| 1331 |
+
"grad_norm": 4.2697529792785645,
|
| 1332 |
+
"learning_rate": 0.0001748745007881561
|
| 1333 |
+
},
|
| 1334 |
+
{
|
| 1335 |
+
"step": 149,
|
| 1336 |
+
"epoch": 1.0101694915254238,
|
| 1337 |
+
"cpu_mem": 1.530130432,
|
| 1338 |
+
"gpu_mem": 4.82412288,
|
| 1339 |
+
"loss": 0.4824,
|
| 1340 |
+
"grad_norm": 3.1086933612823486,
|
| 1341 |
+
"learning_rate": 0.00017311249971815185
|
| 1342 |
+
},
|
| 1343 |
+
{
|
| 1344 |
+
"step": 150,
|
| 1345 |
+
"epoch": 1.0169491525423728,
|
| 1346 |
+
"cpu_mem": 1.530130432,
|
| 1347 |
+
"gpu_mem": 4.823960064,
|
| 1348 |
+
"loss": 0.4912,
|
| 1349 |
+
"grad_norm": 3.0585293769836426,
|
| 1350 |
+
"learning_rate": 0.00017134722574099276
|
| 1351 |
+
},
|
| 1352 |
+
{
|
| 1353 |
+
"step": 151,
|
| 1354 |
+
"epoch": 1.023728813559322,
|
| 1355 |
+
"cpu_mem": 1.530130432,
|
| 1356 |
+
"gpu_mem": 4.824032256,
|
| 1357 |
+
"loss": 0.5404,
|
| 1358 |
+
"grad_norm": 4.751421928405762,
|
| 1359 |
+
"learning_rate": 0.00016957892883300775
|
| 1360 |
+
},
|
| 1361 |
+
{
|
| 1362 |
+
"step": 152,
|
| 1363 |
+
"epoch": 1.0305084745762711,
|
| 1364 |
+
"cpu_mem": 1.530130432,
|
| 1365 |
+
"gpu_mem": 4.824067584,
|
| 1366 |
+
"loss": 0.4155,
|
| 1367 |
+
"grad_norm": 3.7576749324798584,
|
| 1368 |
+
"learning_rate": 0.00016780785939859576
|
| 1369 |
+
},
|
| 1370 |
+
{
|
| 1371 |
+
"step": 153,
|
| 1372 |
+
"epoch": 1.0372881355932204,
|
| 1373 |
+
"cpu_mem": 1.530130432,
|
| 1374 |
+
"gpu_mem": 4.82409216,
|
| 1375 |
+
"loss": 0.5912,
|
| 1376 |
+
"grad_norm": 4.714956283569336,
|
| 1377 |
+
"learning_rate": 0.00016603426823476693
|
| 1378 |
+
},
|
| 1379 |
+
{
|
| 1380 |
+
"step": 154,
|
| 1381 |
+
"epoch": 1.0440677966101695,
|
| 1382 |
+
"cpu_mem": 1.530130432,
|
| 1383 |
+
"gpu_mem": 4.82405376,
|
| 1384 |
+
"loss": 0.4068,
|
| 1385 |
+
"grad_norm": 6.185878753662109,
|
| 1386 |
+
"learning_rate": 0.00016425840649562736
|
| 1387 |
+
},
|
| 1388 |
+
{
|
| 1389 |
+
"step": 155,
|
| 1390 |
+
"epoch": 1.0508474576271187,
|
| 1391 |
+
"cpu_mem": 1.530130432,
|
| 1392 |
+
"gpu_mem": 4.824274944,
|
| 1393 |
+
"loss": 0.5478,
|
| 1394 |
+
"grad_norm": 5.617146015167236,
|
| 1395 |
+
"learning_rate": 0.00016248052565681436
|
| 1396 |
+
},
|
| 1397 |
+
{
|
| 1398 |
+
"step": 156,
|
| 1399 |
+
"epoch": 1.0576271186440678,
|
| 1400 |
+
"cpu_mem": 1.530130432,
|
| 1401 |
+
"gpu_mem": 4.824182784,
|
| 1402 |
+
"loss": 0.3949,
|
| 1403 |
+
"grad_norm": 3.8420050144195557,
|
| 1404 |
+
"learning_rate": 0.00016070087747988482
|
| 1405 |
+
},
|
| 1406 |
+
{
|
| 1407 |
+
"step": 157,
|
| 1408 |
+
"epoch": 1.064406779661017,
|
| 1409 |
+
"cpu_mem": 1.530130432,
|
| 1410 |
+
"gpu_mem": 4.824089088,
|
| 1411 |
+
"loss": 0.3574,
|
| 1412 |
+
"grad_norm": 3.955946683883667,
|
| 1413 |
+
"learning_rate": 0.00015891971397666464
|
| 1414 |
+
},
|
| 1415 |
+
{
|
| 1416 |
+
"step": 158,
|
| 1417 |
+
"epoch": 1.071186440677966,
|
| 1418 |
+
"cpu_mem": 1.530130432,
|
| 1419 |
+
"gpu_mem": 4.82401536,
|
| 1420 |
+
"loss": 0.478,
|
| 1421 |
+
"grad_norm": 5.138974189758301,
|
| 1422 |
+
"learning_rate": 0.00015713728737356137
|
| 1423 |
+
},
|
| 1424 |
+
{
|
| 1425 |
+
"step": 159,
|
| 1426 |
+
"epoch": 1.0779661016949154,
|
| 1427 |
+
"cpu_mem": 1.530130432,
|
| 1428 |
+
"gpu_mem": 4.824364032,
|
| 1429 |
+
"loss": 0.3684,
|
| 1430 |
+
"grad_norm": 5.789059638977051,
|
| 1431 |
+
"learning_rate": 0.00015535385007584706
|
| 1432 |
+
},
|
| 1433 |
+
{
|
| 1434 |
+
"step": 160,
|
| 1435 |
+
"epoch": 1.0847457627118644,
|
| 1436 |
+
"cpu_mem": 1.530130432,
|
| 1437 |
+
"gpu_mem": 4.823958528,
|
| 1438 |
+
"loss": 0.417,
|
| 1439 |
+
"grad_norm": 4.242332935333252,
|
| 1440 |
+
"learning_rate": 0.0001535696546319161
|
| 1441 |
+
},
|
| 1442 |
+
{
|
| 1443 |
+
"step": 161,
|
| 1444 |
+
"epoch": 1.0915254237288137,
|
| 1445 |
+
"cpu_mem": 1.530130432,
|
| 1446 |
+
"gpu_mem": 4.823904768,
|
| 1447 |
+
"loss": 0.4055,
|
| 1448 |
+
"grad_norm": 5.762329578399658,
|
| 1449 |
+
"learning_rate": 0.00015178495369752213
|
| 1450 |
+
},
|
| 1451 |
+
{
|
| 1452 |
+
"step": 162,
|
| 1453 |
+
"epoch": 1.0983050847457627,
|
| 1454 |
+
"cpu_mem": 1.530130432,
|
| 1455 |
+
"gpu_mem": 4.824680448,
|
| 1456 |
+
"loss": 0.3548,
|
| 1457 |
+
"grad_norm": 4.2064528465271,
|
| 1458 |
+
"learning_rate": 0.00015
|
| 1459 |
+
},
|
| 1460 |
+
{
|
| 1461 |
+
"step": 163,
|
| 1462 |
+
"epoch": 1.1050847457627118,
|
| 1463 |
+
"cpu_mem": 1.530130432,
|
| 1464 |
+
"gpu_mem": 4.824156672,
|
| 1465 |
+
"loss": 0.4353,
|
| 1466 |
+
"grad_norm": 7.034615993499756,
|
| 1467 |
+
"learning_rate": 0.00014821504630247785
|
| 1468 |
+
},
|
| 1469 |
+
{
|
| 1470 |
+
"step": 164,
|
| 1471 |
+
"epoch": 1.111864406779661,
|
| 1472 |
+
"cpu_mem": 1.530130432,
|
| 1473 |
+
"gpu_mem": 4.82406912,
|
| 1474 |
+
"loss": 0.4521,
|
| 1475 |
+
"grad_norm": 5.314269065856934,
|
| 1476 |
+
"learning_rate": 0.00014643034536808387
|
| 1477 |
+
},
|
| 1478 |
+
{
|
| 1479 |
+
"step": 165,
|
| 1480 |
+
"epoch": 1.11864406779661,
|
| 1481 |
+
"cpu_mem": 1.530130432,
|
| 1482 |
+
"gpu_mem": 4.824018432,
|
| 1483 |
+
"loss": 0.4232,
|
| 1484 |
+
"grad_norm": 4.84352970123291,
|
| 1485 |
+
"learning_rate": 0.00014464614992415294
|
| 1486 |
+
},
|
| 1487 |
+
{
|
| 1488 |
+
"step": 166,
|
| 1489 |
+
"epoch": 1.1254237288135593,
|
| 1490 |
+
"cpu_mem": 1.530130432,
|
| 1491 |
+
"gpu_mem": 4.824113664,
|
| 1492 |
+
"loss": 0.2749,
|
| 1493 |
+
"grad_norm": 6.184368133544922,
|
| 1494 |
+
"learning_rate": 0.00014286271262643866
|
| 1495 |
+
},
|
| 1496 |
+
{
|
| 1497 |
+
"step": 167,
|
| 1498 |
+
"epoch": 1.1322033898305084,
|
| 1499 |
+
"cpu_mem": 1.530130432,
|
| 1500 |
+
"gpu_mem": 4.82403072,
|
| 1501 |
+
"loss": 0.5079,
|
| 1502 |
+
"grad_norm": 6.006751537322998,
|
| 1503 |
+
"learning_rate": 0.00014108028602333536
|
| 1504 |
+
},
|
| 1505 |
+
{
|
| 1506 |
+
"step": 168,
|
| 1507 |
+
"epoch": 1.1389830508474577,
|
| 1508 |
+
"cpu_mem": 1.530130432,
|
| 1509 |
+
"gpu_mem": 4.824049152,
|
| 1510 |
+
"loss": 0.4548,
|
| 1511 |
+
"grad_norm": 6.466878414154053,
|
| 1512 |
+
"learning_rate": 0.00013929912252011516
|
| 1513 |
+
},
|
| 1514 |
+
{
|
| 1515 |
+
"step": 169,
|
| 1516 |
+
"epoch": 1.1457627118644067,
|
| 1517 |
+
"cpu_mem": 1.530130432,
|
| 1518 |
+
"gpu_mem": 4.824136704,
|
| 1519 |
+
"loss": 0.4594,
|
| 1520 |
+
"grad_norm": 6.34998083114624,
|
| 1521 |
+
"learning_rate": 0.00013751947434318564
|
| 1522 |
+
},
|
| 1523 |
+
{
|
| 1524 |
+
"step": 170,
|
| 1525 |
+
"epoch": 1.152542372881356,
|
| 1526 |
+
"cpu_mem": 1.530130432,
|
| 1527 |
+
"gpu_mem": 4.824021504,
|
| 1528 |
+
"loss": 0.5691,
|
| 1529 |
+
"grad_norm": 7.32921028137207,
|
| 1530 |
+
"learning_rate": 0.00013574159350437261
|
| 1531 |
+
},
|
| 1532 |
+
{
|
| 1533 |
+
"step": 171,
|
| 1534 |
+
"epoch": 1.159322033898305,
|
| 1535 |
+
"cpu_mem": 1.530130432,
|
| 1536 |
+
"gpu_mem": 4.82408448,
|
| 1537 |
+
"loss": 0.5156,
|
| 1538 |
+
"grad_norm": 7.228818416595459,
|
| 1539 |
+
"learning_rate": 0.0001339657317652331
|
| 1540 |
+
},
|
| 1541 |
+
{
|
| 1542 |
+
"step": 172,
|
| 1543 |
+
"epoch": 1.1661016949152543,
|
| 1544 |
+
"cpu_mem": 1.530130432,
|
| 1545 |
+
"gpu_mem": 4.82399232,
|
| 1546 |
+
"loss": 0.3484,
|
| 1547 |
+
"grad_norm": 5.381630897521973,
|
| 1548 |
+
"learning_rate": 0.00013219214060140424
|
| 1549 |
+
},
|
| 1550 |
+
{
|
| 1551 |
+
"step": 173,
|
| 1552 |
+
"epoch": 1.1728813559322033,
|
| 1553 |
+
"cpu_mem": 1.530130432,
|
| 1554 |
+
"gpu_mem": 4.82429184,
|
| 1555 |
+
"loss": 0.4047,
|
| 1556 |
+
"grad_norm": 7.392773151397705,
|
| 1557 |
+
"learning_rate": 0.00013042107116699228
|
| 1558 |
+
},
|
| 1559 |
+
{
|
| 1560 |
+
"step": 174,
|
| 1561 |
+
"epoch": 1.1796610169491526,
|
| 1562 |
+
"cpu_mem": 1.530130432,
|
| 1563 |
+
"gpu_mem": 4.82401536,
|
| 1564 |
+
"loss": 0.5447,
|
| 1565 |
+
"grad_norm": 7.2776360511779785,
|
| 1566 |
+
"learning_rate": 0.00012865277425900724
|
| 1567 |
+
},
|
| 1568 |
+
{
|
| 1569 |
+
"step": 175,
|
| 1570 |
+
"epoch": 1.1864406779661016,
|
| 1571 |
+
"cpu_mem": 1.530130432,
|
| 1572 |
+
"gpu_mem": 4.823981568,
|
| 1573 |
+
"loss": 0.422,
|
| 1574 |
+
"grad_norm": 5.06213903427124,
|
| 1575 |
+
"learning_rate": 0.00012688750028184818
|
| 1576 |
+
},
|
| 1577 |
+
{
|
| 1578 |
+
"step": 176,
|
| 1579 |
+
"epoch": 1.193220338983051,
|
| 1580 |
+
"cpu_mem": 1.530130432,
|
| 1581 |
+
"gpu_mem": 4.824119808,
|
| 1582 |
+
"loss": 0.4156,
|
| 1583 |
+
"grad_norm": 7.205942630767822,
|
| 1584 |
+
"learning_rate": 0.0001251254992118439
|
| 1585 |
+
},
|
| 1586 |
+
{
|
| 1587 |
+
"step": 177,
|
| 1588 |
+
"epoch": 1.2,
|
| 1589 |
+
"cpu_mem": 1.530130432,
|
| 1590 |
+
"gpu_mem": 4.824218112,
|
| 1591 |
+
"loss": 0.4374,
|
| 1592 |
+
"grad_norm": 4.606486797332764,
|
| 1593 |
+
"learning_rate": 0.00012336702056185453
|
| 1594 |
+
},
|
| 1595 |
+
{
|
| 1596 |
+
"step": 178,
|
| 1597 |
+
"epoch": 1.2067796610169492,
|
| 1598 |
+
"cpu_mem": 1.530130432,
|
| 1599 |
+
"gpu_mem": 4.823964672,
|
| 1600 |
+
"loss": 0.4501,
|
| 1601 |
+
"grad_norm": 5.773846626281738,
|
| 1602 |
+
"learning_rate": 0.00012161231334593851
|
| 1603 |
+
},
|
| 1604 |
+
{
|
| 1605 |
+
"step": 179,
|
| 1606 |
+
"epoch": 1.2135593220338983,
|
| 1607 |
+
"cpu_mem": 1.530130432,
|
| 1608 |
+
"gpu_mem": 4.824064512,
|
| 1609 |
+
"loss": 0.4543,
|
| 1610 |
+
"grad_norm": 5.905703544616699,
|
| 1611 |
+
"learning_rate": 0.00011986162604409015
|
| 1612 |
+
},
|
| 1613 |
+
{
|
| 1614 |
+
"step": 180,
|
| 1615 |
+
"epoch": 1.2203389830508475,
|
| 1616 |
+
"cpu_mem": 1.530130432,
|
| 1617 |
+
"gpu_mem": 4.824036864,
|
| 1618 |
+
"loss": 0.4158,
|
| 1619 |
+
"grad_norm": 5.3787360191345215,
|
| 1620 |
+
"learning_rate": 0.00011811520656705348
|
| 1621 |
+
},
|
| 1622 |
+
{
|
| 1623 |
+
"step": 181,
|
| 1624 |
+
"epoch": 1.2271186440677966,
|
| 1625 |
+
"cpu_mem": 1.530130432,
|
| 1626 |
+
"gpu_mem": 4.823973888,
|
| 1627 |
+
"loss": 0.3065,
|
| 1628 |
+
"grad_norm": 4.495090007781982,
|
| 1629 |
+
"learning_rate": 0.00011637330222121543
|
| 1630 |
+
},
|
| 1631 |
+
{
|
| 1632 |
+
"step": 182,
|
| 1633 |
+
"epoch": 1.2338983050847459,
|
| 1634 |
+
"cpu_mem": 1.530130432,
|
| 1635 |
+
"gpu_mem": 4.824192,
|
| 1636 |
+
"loss": 0.5224,
|
| 1637 |
+
"grad_norm": 7.384599685668945,
|
| 1638 |
+
"learning_rate": 0.00011463615967358588
|
| 1639 |
+
},
|
| 1640 |
+
{
|
| 1641 |
+
"step": 183,
|
| 1642 |
+
"epoch": 1.240677966101695,
|
| 1643 |
+
"cpu_mem": 1.530130432,
|
| 1644 |
+
"gpu_mem": 4.824089088,
|
| 1645 |
+
"loss": 0.3435,
|
| 1646 |
+
"grad_norm": 5.406001091003418,
|
| 1647 |
+
"learning_rate": 0.00011290402491686766
|
| 1648 |
+
},
|
| 1649 |
+
{
|
| 1650 |
+
"step": 184,
|
| 1651 |
+
"epoch": 1.2474576271186442,
|
| 1652 |
+
"cpu_mem": 1.530130432,
|
| 1653 |
+
"gpu_mem": 4.824036864,
|
| 1654 |
+
"loss": 0.396,
|
| 1655 |
+
"grad_norm": 5.245143890380859,
|
| 1656 |
+
"learning_rate": 0.00011117714323462186
|
| 1657 |
+
},
|
| 1658 |
+
{
|
| 1659 |
+
"step": 185,
|
| 1660 |
+
"epoch": 1.2542372881355932,
|
| 1661 |
+
"cpu_mem": 1.530130432,
|
| 1662 |
+
"gpu_mem": 4.82401536,
|
| 1663 |
+
"loss": 0.5028,
|
| 1664 |
+
"grad_norm": 8.920019149780273,
|
| 1665 |
+
"learning_rate": 0.00010945575916653407
|
| 1666 |
+
},
|
| 1667 |
+
{
|
| 1668 |
+
"step": 186,
|
| 1669 |
+
"epoch": 1.2610169491525425,
|
| 1670 |
+
"cpu_mem": 1.530130432,
|
| 1671 |
+
"gpu_mem": 4.824024576,
|
| 1672 |
+
"loss": 0.315,
|
| 1673 |
+
"grad_norm": 6.18515682220459,
|
| 1674 |
+
"learning_rate": 0.00010774011647378553
|
| 1675 |
+
},
|
| 1676 |
+
{
|
| 1677 |
+
"step": 187,
|
| 1678 |
+
"epoch": 1.2677966101694915,
|
| 1679 |
+
"cpu_mem": 1.530130432,
|
| 1680 |
+
"gpu_mem": 4.823956992,
|
| 1681 |
+
"loss": 0.4535,
|
| 1682 |
+
"grad_norm": 10.364043235778809,
|
| 1683 |
+
"learning_rate": 0.00010603045810453468
|
| 1684 |
+
},
|
| 1685 |
+
{
|
| 1686 |
+
"step": 188,
|
| 1687 |
+
"epoch": 1.2745762711864406,
|
| 1688 |
+
"cpu_mem": 1.530130432,
|
| 1689 |
+
"gpu_mem": 4.824119808,
|
| 1690 |
+
"loss": 0.2713,
|
| 1691 |
+
"grad_norm": 4.690507888793945,
|
| 1692 |
+
"learning_rate": 0.00010432702615951396
|
| 1693 |
+
},
|
| 1694 |
+
{
|
| 1695 |
+
"step": 189,
|
| 1696 |
+
"epoch": 1.2813559322033898,
|
| 1697 |
+
"cpu_mem": 1.530130432,
|
| 1698 |
+
"gpu_mem": 4.823989248,
|
| 1699 |
+
"loss": 0.4559,
|
| 1700 |
+
"grad_norm": 5.3982133865356445,
|
| 1701 |
+
"learning_rate": 0.00010263006185774627
|
| 1702 |
+
},
|
| 1703 |
+
{
|
| 1704 |
+
"step": 190,
|
| 1705 |
+
"epoch": 1.288135593220339,
|
| 1706 |
+
"cpu_mem": 1.530130432,
|
| 1707 |
+
"gpu_mem": 4.824109056,
|
| 1708 |
+
"loss": 0.3843,
|
| 1709 |
+
"grad_norm": 4.67440128326416,
|
| 1710 |
+
"learning_rate": 0.00010093980550238675
|
| 1711 |
+
},
|
| 1712 |
+
{
|
| 1713 |
+
"step": 191,
|
| 1714 |
+
"epoch": 1.2949152542372881,
|
| 1715 |
+
"cpu_mem": 1.530130432,
|
| 1716 |
+
"gpu_mem": 4.823927808,
|
| 1717 |
+
"loss": 0.3162,
|
| 1718 |
+
"grad_norm": 5.031257152557373,
|
| 1719 |
+
"learning_rate": 9.925649644669391e-05
|
| 1720 |
+
},
|
| 1721 |
+
{
|
| 1722 |
+
"step": 192,
|
| 1723 |
+
"epoch": 1.3016949152542372,
|
| 1724 |
+
"cpu_mem": 1.530130432,
|
| 1725 |
+
"gpu_mem": 4.824059904,
|
| 1726 |
+
"loss": 0.2999,
|
| 1727 |
+
"grad_norm": 5.343701362609863,
|
| 1728 |
+
"learning_rate": 9.758037306013526e-05
|
| 1729 |
+
},
|
| 1730 |
+
{
|
| 1731 |
+
"step": 193,
|
| 1732 |
+
"epoch": 1.3084745762711865,
|
| 1733 |
+
"cpu_mem": 1.530130432,
|
| 1734 |
+
"gpu_mem": 4.824033792,
|
| 1735 |
+
"loss": 0.3915,
|
| 1736 |
+
"grad_norm": 7.458854675292969,
|
| 1737 |
+
"learning_rate": 9.591167269463255e-05
|
| 1738 |
+
},
|
| 1739 |
+
{
|
| 1740 |
+
"step": 194,
|
| 1741 |
+
"epoch": 1.3152542372881357,
|
| 1742 |
+
"cpu_mem": 1.530130432,
|
| 1743 |
+
"gpu_mem": 4.824,
|
| 1744 |
+
"loss": 0.345,
|
| 1745 |
+
"grad_norm": 7.649142265319824,
|
| 1746 |
+
"learning_rate": 9.425063165095088e-05
|
| 1747 |
+
},
|
| 1748 |
+
{
|
| 1749 |
+
"step": 195,
|
| 1750 |
+
"epoch": 1.3220338983050848,
|
| 1751 |
+
"cpu_mem": 1.530130432,
|
| 1752 |
+
"gpu_mem": 4.824104448,
|
| 1753 |
+
"loss": 0.2545,
|
| 1754 |
+
"grad_norm": 6.707334518432617,
|
| 1755 |
+
"learning_rate": 9.259748514523653e-05
|
| 1756 |
+
},
|
| 1757 |
+
{
|
| 1758 |
+
"step": 196,
|
| 1759 |
+
"epoch": 1.3288135593220338,
|
| 1760 |
+
"cpu_mem": 1.530130432,
|
| 1761 |
+
"gpu_mem": 4.82409984,
|
| 1762 |
+
"loss": 0.4273,
|
| 1763 |
+
"grad_norm": 6.2424116134643555,
|
| 1764 |
+
"learning_rate": 9.095246727570879e-05
|
| 1765 |
+
},
|
| 1766 |
+
{
|
| 1767 |
+
"step": 197,
|
| 1768 |
+
"epoch": 1.335593220338983,
|
| 1769 |
+
"cpu_mem": 1.530130432,
|
| 1770 |
+
"gpu_mem": 4.823958528,
|
| 1771 |
+
"loss": 0.3421,
|
| 1772 |
+
"grad_norm": 6.736060619354248,
|
| 1773 |
+
"learning_rate": 8.931581098950973e-05
|
| 1774 |
+
},
|
| 1775 |
+
{
|
| 1776 |
+
"step": 198,
|
| 1777 |
+
"epoch": 1.3423728813559321,
|
| 1778 |
+
"cpu_mem": 1.530130432,
|
| 1779 |
+
"gpu_mem": 4.824150528,
|
| 1780 |
+
"loss": 0.409,
|
| 1781 |
+
"grad_norm": 6.864956378936768,
|
| 1782 |
+
"learning_rate": 8.768774804971705e-05
|
| 1783 |
+
},
|
| 1784 |
+
{
|
| 1785 |
+
"step": 199,
|
| 1786 |
+
"epoch": 1.3491525423728814,
|
| 1787 |
+
"cpu_mem": 1.530130432,
|
| 1788 |
+
"gpu_mem": 4.824001536,
|
| 1789 |
+
"loss": 0.4033,
|
| 1790 |
+
"grad_norm": 8.16348934173584,
|
| 1791 |
+
"learning_rate": 8.606850900252478e-05
|
| 1792 |
+
},
|
| 1793 |
+
{
|
| 1794 |
+
"step": 200,
|
| 1795 |
+
"epoch": 1.3559322033898304,
|
| 1796 |
+
"cpu_mem": 1.530130432,
|
| 1797 |
+
"gpu_mem": 4.824104448,
|
| 1798 |
+
"loss": 0.2264,
|
| 1799 |
+
"grad_norm": 4.169793605804443,
|
| 1800 |
+
"learning_rate": 8.445832314459608e-05
|
| 1801 |
+
},
|
| 1802 |
+
{
|
| 1803 |
+
"step": 201,
|
| 1804 |
+
"epoch": 1.3627118644067797,
|
| 1805 |
+
"cpu_mem": 1.530130432,
|
| 1806 |
+
"gpu_mem": 4.8243072,
|
| 1807 |
+
"loss": 0.301,
|
| 1808 |
+
"grad_norm": 5.401573657989502,
|
| 1809 |
+
"learning_rate": 8.285741849059311e-05
|
| 1810 |
+
},
|
| 1811 |
+
{
|
| 1812 |
+
"step": 202,
|
| 1813 |
+
"epoch": 1.3694915254237288,
|
| 1814 |
+
"cpu_mem": 1.530130432,
|
| 1815 |
+
"gpu_mem": 4.824109056,
|
| 1816 |
+
"loss": 0.2946,
|
| 1817 |
+
"grad_norm": 5.444881439208984,
|
| 1818 |
+
"learning_rate": 8.126602174088843e-05
|
| 1819 |
+
},
|
| 1820 |
+
{
|
| 1821 |
+
"step": 203,
|
| 1822 |
+
"epoch": 1.376271186440678,
|
| 1823 |
+
"cpu_mem": 1.530130432,
|
| 1824 |
+
"gpu_mem": 4.823995392,
|
| 1825 |
+
"loss": 0.3586,
|
| 1826 |
+
"grad_norm": 7.087218284606934,
|
| 1827 |
+
"learning_rate": 7.968435824946242e-05
|
| 1828 |
+
},
|
| 1829 |
+
{
|
| 1830 |
+
"step": 204,
|
| 1831 |
+
"epoch": 1.383050847457627,
|
| 1832 |
+
"cpu_mem": 1.530130432,
|
| 1833 |
+
"gpu_mem": 4.824009216,
|
| 1834 |
+
"loss": 0.2954,
|
| 1835 |
+
"grad_norm": 5.234076976776123,
|
| 1836 |
+
"learning_rate": 7.811265199199152e-05
|
| 1837 |
+
},
|
| 1838 |
+
{
|
| 1839 |
+
"step": 205,
|
| 1840 |
+
"epoch": 1.3898305084745763,
|
| 1841 |
+
"cpu_mem": 1.530130432,
|
| 1842 |
+
"gpu_mem": 4.82405376,
|
| 1843 |
+
"loss": 0.3908,
|
| 1844 |
+
"grad_norm": 6.1905012130737305,
|
| 1845 |
+
"learning_rate": 7.655112553413135e-05
|
| 1846 |
+
},
|
| 1847 |
+
{
|
| 1848 |
+
"step": 206,
|
| 1849 |
+
"epoch": 1.3966101694915254,
|
| 1850 |
+
"cpu_mem": 1.530130432,
|
| 1851 |
+
"gpu_mem": 4.823995392,
|
| 1852 |
+
"loss": 0.3348,
|
| 1853 |
+
"grad_norm": 5.874136447906494,
|
| 1854 |
+
"learning_rate": 7.500000000000002e-05
|
| 1855 |
+
},
|
| 1856 |
+
{
|
| 1857 |
+
"step": 207,
|
| 1858 |
+
"epoch": 1.4033898305084747,
|
| 1859 |
+
"cpu_mem": 1.530130432,
|
| 1860 |
+
"gpu_mem": 4.824228864,
|
| 1861 |
+
"loss": 0.3414,
|
| 1862 |
+
"grad_norm": 5.023383617401123,
|
| 1863 |
+
"learning_rate": 7.345949504086507e-05
|
| 1864 |
+
},
|
| 1865 |
+
{
|
| 1866 |
+
"step": 208,
|
| 1867 |
+
"epoch": 1.4101694915254237,
|
| 1868 |
+
"cpu_mem": 1.530130432,
|
| 1869 |
+
"gpu_mem": 4.824259584,
|
| 1870 |
+
"loss": 0.2438,
|
| 1871 |
+
"grad_norm": 7.677697658538818,
|
| 1872 |
+
"learning_rate": 7.192982880403917e-05
|
| 1873 |
+
},
|
| 1874 |
+
{
|
| 1875 |
+
"step": 209,
|
| 1876 |
+
"epoch": 1.4169491525423727,
|
| 1877 |
+
"cpu_mem": 1.530130432,
|
| 1878 |
+
"gpu_mem": 4.824185856,
|
| 1879 |
+
"loss": 0.3552,
|
| 1880 |
+
"grad_norm": 5.987409591674805,
|
| 1881 |
+
"learning_rate": 7.041121790198881e-05
|
| 1882 |
+
},
|
| 1883 |
+
{
|
| 1884 |
+
"step": 210,
|
| 1885 |
+
"epoch": 1.423728813559322,
|
| 1886 |
+
"cpu_mem": 1.530130432,
|
| 1887 |
+
"gpu_mem": 4.824073728,
|
| 1888 |
+
"loss": 0.4027,
|
| 1889 |
+
"grad_norm": 5.519845485687256,
|
| 1890 |
+
"learning_rate": 6.890387738166041e-05
|
| 1891 |
+
},
|
| 1892 |
+
{
|
| 1893 |
+
"step": 211,
|
| 1894 |
+
"epoch": 1.4305084745762713,
|
| 1895 |
+
"cpu_mem": 1.530130432,
|
| 1896 |
+
"gpu_mem": 4.82402304,
|
| 1897 |
+
"loss": 0.3283,
|
| 1898 |
+
"grad_norm": 6.361313819885254,
|
| 1899 |
+
"learning_rate": 6.740802069402771e-05
|
| 1900 |
+
},
|
| 1901 |
+
{
|
| 1902 |
+
"step": 212,
|
| 1903 |
+
"epoch": 1.4372881355932203,
|
| 1904 |
+
"cpu_mem": 1.530130432,
|
| 1905 |
+
"gpu_mem": 4.82399232,
|
| 1906 |
+
"loss": 0.3592,
|
| 1907 |
+
"grad_norm": 5.429263591766357,
|
| 1908 |
+
"learning_rate": 6.592385966386588e-05
|
| 1909 |
+
},
|
| 1910 |
+
{
|
| 1911 |
+
"step": 213,
|
| 1912 |
+
"epoch": 1.4440677966101694,
|
| 1913 |
+
"cpu_mem": 1.530130432,
|
| 1914 |
+
"gpu_mem": 4.82401536,
|
| 1915 |
+
"loss": 0.4217,
|
| 1916 |
+
"grad_norm": 8.665788650512695,
|
| 1917 |
+
"learning_rate": 6.445160445975536e-05
|
| 1918 |
+
},
|
| 1919 |
+
{
|
| 1920 |
+
"step": 214,
|
| 1921 |
+
"epoch": 1.4508474576271186,
|
| 1922 |
+
"cpu_mem": 1.530130432,
|
| 1923 |
+
"gpu_mem": 4.824098304,
|
| 1924 |
+
"loss": 0.421,
|
| 1925 |
+
"grad_norm": 7.960748195648193,
|
| 1926 |
+
"learning_rate": 6.299146356432029e-05
|
| 1927 |
+
},
|
| 1928 |
+
{
|
| 1929 |
+
"step": 215,
|
| 1930 |
+
"epoch": 1.457627118644068,
|
| 1931 |
+
"cpu_mem": 1.530130432,
|
| 1932 |
+
"gpu_mem": 4.824026112,
|
| 1933 |
+
"loss": 0.397,
|
| 1934 |
+
"grad_norm": 8.969430923461914,
|
| 1935 |
+
"learning_rate": 6.154364374470568e-05
|
| 1936 |
+
},
|
| 1937 |
+
{
|
| 1938 |
+
"step": 216,
|
| 1939 |
+
"epoch": 1.464406779661017,
|
| 1940 |
+
"cpu_mem": 1.530130432,
|
| 1941 |
+
"gpu_mem": 4.824192,
|
| 1942 |
+
"loss": 0.373,
|
| 1943 |
+
"grad_norm": 5.677824020385742,
|
| 1944 |
+
"learning_rate": 6.010835002329795e-05
|
| 1945 |
+
},
|
| 1946 |
+
{
|
| 1947 |
+
"step": 217,
|
| 1948 |
+
"epoch": 1.471186440677966,
|
| 1949 |
+
"cpu_mem": 1.530130432,
|
| 1950 |
+
"gpu_mem": 4.824033792,
|
| 1951 |
+
"loss": 0.4046,
|
| 1952 |
+
"grad_norm": 6.211999893188477,
|
| 1953 |
+
"learning_rate": 5.8685785648691894e-05
|
| 1954 |
+
},
|
| 1955 |
+
{
|
| 1956 |
+
"step": 218,
|
| 1957 |
+
"epoch": 1.4779661016949153,
|
| 1958 |
+
"cpu_mem": 1.530130432,
|
| 1959 |
+
"gpu_mem": 4.824010752,
|
| 1960 |
+
"loss": 0.3678,
|
| 1961 |
+
"grad_norm": 5.141634464263916,
|
| 1962 |
+
"learning_rate": 5.72761520669092e-05
|
| 1963 |
+
},
|
| 1964 |
+
{
|
| 1965 |
+
"step": 219,
|
| 1966 |
+
"epoch": 1.4847457627118645,
|
| 1967 |
+
"cpu_mem": 1.530130432,
|
| 1968 |
+
"gpu_mem": 4.824136704,
|
| 1969 |
+
"loss": 0.4164,
|
| 1970 |
+
"grad_norm": 5.972975254058838,
|
| 1971 |
+
"learning_rate": 5.587964889287218e-05
|
| 1972 |
+
},
|
| 1973 |
+
{
|
| 1974 |
+
"step": 220,
|
| 1975 |
+
"epoch": 1.4915254237288136,
|
| 1976 |
+
"cpu_mem": 1.530130432,
|
| 1977 |
+
"gpu_mem": 4.824170496,
|
| 1978 |
+
"loss": 0.3468,
|
| 1979 |
+
"grad_norm": 5.6360673904418945,
|
| 1980 |
+
"learning_rate": 5.449647388213678e-05
|
| 1981 |
+
},
|
| 1982 |
+
{
|
| 1983 |
+
"step": 221,
|
| 1984 |
+
"epoch": 1.4983050847457626,
|
| 1985 |
+
"cpu_mem": 1.530130432,
|
| 1986 |
+
"gpu_mem": 4.8240384,
|
| 1987 |
+
"loss": 0.3753,
|
| 1988 |
+
"grad_norm": 5.116311550140381,
|
| 1989 |
+
"learning_rate": 5.312682290288869e-05
|
| 1990 |
+
},
|
| 1991 |
+
{
|
| 1992 |
+
"step": 222,
|
| 1993 |
+
"epoch": 1.505084745762712,
|
| 1994 |
+
"cpu_mem": 1.530130432,
|
| 1995 |
+
"gpu_mem": 4.824175104,
|
| 1996 |
+
"loss": 0.3698,
|
| 1997 |
+
"grad_norm": 6.675261974334717,
|
| 1998 |
+
"learning_rate": 5.1770889908207245e-05
|
| 1999 |
+
},
|
| 2000 |
+
{
|
| 2001 |
+
"step": 223,
|
| 2002 |
+
"epoch": 1.5118644067796612,
|
| 2003 |
+
"cpu_mem": 1.530130432,
|
| 2004 |
+
"gpu_mem": 4.824089088,
|
| 2005 |
+
"loss": 0.4058,
|
| 2006 |
+
"grad_norm": 6.807044982910156,
|
| 2007 |
+
"learning_rate": 5.0428866908599864e-05
|
| 2008 |
+
},
|
| 2009 |
+
{
|
| 2010 |
+
"step": 224,
|
| 2011 |
+
"epoch": 1.5186440677966102,
|
| 2012 |
+
"cpu_mem": 1.530130432,
|
| 2013 |
+
"gpu_mem": 4.82405376,
|
| 2014 |
+
"loss": 0.2952,
|
| 2015 |
+
"grad_norm": 4.836634635925293,
|
| 2016 |
+
"learning_rate": 4.9100943944812114e-05
|
| 2017 |
+
},
|
| 2018 |
+
{
|
| 2019 |
+
"step": 225,
|
| 2020 |
+
"epoch": 1.5254237288135593,
|
| 2021 |
+
"cpu_mem": 1.530130432,
|
| 2022 |
+
"gpu_mem": 4.824018432,
|
| 2023 |
+
"loss": 0.2557,
|
| 2024 |
+
"grad_norm": 3.945059299468994,
|
| 2025 |
+
"learning_rate": 4.778730906091632e-05
|
| 2026 |
+
},
|
| 2027 |
+
{
|
| 2028 |
+
"step": 226,
|
| 2029 |
+
"epoch": 1.5322033898305085,
|
| 2030 |
+
"cpu_mem": 1.530130432,
|
| 2031 |
+
"gpu_mem": 4.824167424,
|
| 2032 |
+
"loss": 0.3338,
|
| 2033 |
+
"grad_norm": 4.126738548278809,
|
| 2034 |
+
"learning_rate": 4.648814827768322e-05
|
| 2035 |
+
},
|
| 2036 |
+
{
|
| 2037 |
+
"step": 227,
|
| 2038 |
+
"epoch": 1.5389830508474578,
|
| 2039 |
+
"cpu_mem": 1.530130432,
|
| 2040 |
+
"gpu_mem": 4.824056832,
|
| 2041 |
+
"loss": 0.2903,
|
| 2042 |
+
"grad_norm": 6.6302809715271,
|
| 2043 |
+
"learning_rate": 4.5203645566239816e-05
|
| 2044 |
+
},
|
| 2045 |
+
{
|
| 2046 |
+
"step": 228,
|
| 2047 |
+
"epoch": 1.5457627118644068,
|
| 2048 |
+
"cpu_mem": 1.530130432,
|
| 2049 |
+
"gpu_mem": 4.824001536,
|
| 2050 |
+
"loss": 0.4299,
|
| 2051 |
+
"grad_norm": 7.667603015899658,
|
| 2052 |
+
"learning_rate": 4.3933982822017876e-05
|
| 2053 |
+
},
|
| 2054 |
+
{
|
| 2055 |
+
"step": 229,
|
| 2056 |
+
"epoch": 1.5525423728813559,
|
| 2057 |
+
"cpu_mem": 1.530130432,
|
| 2058 |
+
"gpu_mem": 4.823943168,
|
| 2059 |
+
"loss": 0.2388,
|
| 2060 |
+
"grad_norm": 4.576896667480469,
|
| 2061 |
+
"learning_rate": 4.267933983899601e-05
|
| 2062 |
+
},
|
| 2063 |
+
{
|
| 2064 |
+
"step": 230,
|
| 2065 |
+
"epoch": 1.559322033898305,
|
| 2066 |
+
"cpu_mem": 1.530130432,
|
| 2067 |
+
"gpu_mem": 4.824,
|
| 2068 |
+
"loss": 0.3806,
|
| 2069 |
+
"grad_norm": 6.581130027770996,
|
| 2070 |
+
"learning_rate": 4.143989428423947e-05
|
| 2071 |
+
},
|
| 2072 |
+
{
|
| 2073 |
+
"step": 231,
|
| 2074 |
+
"epoch": 1.5661016949152542,
|
| 2075 |
+
"cpu_mem": 1.530130432,
|
| 2076 |
+
"gpu_mem": 4.824278016,
|
| 2077 |
+
"loss": 0.3558,
|
| 2078 |
+
"grad_norm": 4.444263458251953,
|
| 2079 |
+
"learning_rate": 4.0215821672741213e-05
|
| 2080 |
+
},
|
| 2081 |
+
{
|
| 2082 |
+
"step": 232,
|
| 2083 |
+
"epoch": 1.5728813559322035,
|
| 2084 |
+
"cpu_mem": 1.530130432,
|
| 2085 |
+
"gpu_mem": 4.824001536,
|
| 2086 |
+
"loss": 0.3733,
|
| 2087 |
+
"grad_norm": 6.31781005859375,
|
| 2088 |
+
"learning_rate": 3.900729534256745e-05
|
| 2089 |
+
},
|
| 2090 |
+
{
|
| 2091 |
+
"step": 233,
|
| 2092 |
+
"epoch": 1.5796610169491525,
|
| 2093 |
+
"cpu_mem": 1.530130432,
|
| 2094 |
+
"gpu_mem": 4.82431488,
|
| 2095 |
+
"loss": 0.2761,
|
| 2096 |
+
"grad_norm": 4.112189769744873,
|
| 2097 |
+
"learning_rate": 3.781448643031187e-05
|
| 2098 |
+
},
|
| 2099 |
+
{
|
| 2100 |
+
"step": 234,
|
| 2101 |
+
"epoch": 1.5864406779661016,
|
| 2102 |
+
"cpu_mem": 1.530130432,
|
| 2103 |
+
"gpu_mem": 4.824190464,
|
| 2104 |
+
"loss": 0.218,
|
| 2105 |
+
"grad_norm": 3.6443135738372803,
|
| 2106 |
+
"learning_rate": 3.663756384686127e-05
|
| 2107 |
+
},
|
| 2108 |
+
{
|
| 2109 |
+
"step": 235,
|
| 2110 |
+
"epoch": 1.5932203389830508,
|
| 2111 |
+
"cpu_mem": 1.530130432,
|
| 2112 |
+
"gpu_mem": 4.82394624,
|
| 2113 |
+
"loss": 0.3588,
|
| 2114 |
+
"grad_norm": 6.56972599029541,
|
| 2115 |
+
"learning_rate": 3.547669425347647e-05
|
| 2116 |
+
},
|
| 2117 |
+
{
|
| 2118 |
+
"step": 236,
|
| 2119 |
+
"epoch": 1.6,
|
| 2120 |
+
"cpu_mem": 1.530130432,
|
| 2121 |
+
"gpu_mem": 4.824006144,
|
| 2122 |
+
"loss": 0.3269,
|
| 2123 |
+
"grad_norm": 5.762430191040039,
|
| 2124 |
+
"learning_rate": 3.433204203819185e-05
|
| 2125 |
+
},
|
| 2126 |
+
{
|
| 2127 |
+
"step": 237,
|
| 2128 |
+
"epoch": 1.6067796610169491,
|
| 2129 |
+
"cpu_mem": 1.530130432,
|
| 2130 |
+
"gpu_mem": 4.824067584,
|
| 2131 |
+
"loss": 0.4199,
|
| 2132 |
+
"grad_norm": 6.846770763397217,
|
| 2133 |
+
"learning_rate": 3.3203769292536764e-05
|
| 2134 |
+
},
|
| 2135 |
+
{
|
| 2136 |
+
"step": 238,
|
| 2137 |
+
"epoch": 1.6135593220338982,
|
| 2138 |
+
"cpu_mem": 1.530130432,
|
| 2139 |
+
"gpu_mem": 4.82406912,
|
| 2140 |
+
"loss": 0.3317,
|
| 2141 |
+
"grad_norm": 5.823359489440918,
|
| 2142 |
+
"learning_rate": 3.209203578858191e-05
|
| 2143 |
+
},
|
| 2144 |
+
{
|
| 2145 |
+
"step": 239,
|
| 2146 |
+
"epoch": 1.6203389830508474,
|
| 2147 |
+
"cpu_mem": 1.530130432,
|
| 2148 |
+
"gpu_mem": 4.82432256,
|
| 2149 |
+
"loss": 0.3896,
|
| 2150 |
+
"grad_norm": 7.253147125244141,
|
| 2151 |
+
"learning_rate": 3.099699895631474e-05
|
| 2152 |
+
},
|
| 2153 |
+
{
|
| 2154 |
+
"step": 240,
|
| 2155 |
+
"epoch": 1.6271186440677967,
|
| 2156 |
+
"cpu_mem": 1.530130432,
|
| 2157 |
+
"gpu_mem": 4.823972352,
|
| 2158 |
+
"loss": 0.5372,
|
| 2159 |
+
"grad_norm": 7.62632417678833,
|
| 2160 |
+
"learning_rate": 2.9918813861345952e-05
|
| 2161 |
+
},
|
| 2162 |
+
{
|
| 2163 |
+
"step": 241,
|
| 2164 |
+
"epoch": 1.6338983050847458,
|
| 2165 |
+
"cpu_mem": 1.530130432,
|
| 2166 |
+
"gpu_mem": 4.8242688,
|
| 2167 |
+
"loss": 0.2841,
|
| 2168 |
+
"grad_norm": 5.374094009399414,
|
| 2169 |
+
"learning_rate": 2.885763318295102e-05
|
| 2170 |
+
},
|
| 2171 |
+
{
|
| 2172 |
+
"step": 242,
|
| 2173 |
+
"epoch": 1.6406779661016948,
|
| 2174 |
+
"cpu_mem": 1.530130432,
|
| 2175 |
+
"gpu_mem": 4.82413056,
|
| 2176 |
+
"loss": 0.2746,
|
| 2177 |
+
"grad_norm": 5.998660564422607,
|
| 2178 |
+
"learning_rate": 2.781360719244964e-05
|
| 2179 |
+
},
|
| 2180 |
+
{
|
| 2181 |
+
"step": 243,
|
| 2182 |
+
"epoch": 1.647457627118644,
|
| 2183 |
+
"cpu_mem": 1.530130432,
|
| 2184 |
+
"gpu_mem": 4.823983104,
|
| 2185 |
+
"loss": 0.4833,
|
| 2186 |
+
"grad_norm": 7.127227306365967,
|
| 2187 |
+
"learning_rate": 2.6786883731926306e-05
|
| 2188 |
+
},
|
| 2189 |
+
{
|
| 2190 |
+
"step": 244,
|
| 2191 |
+
"epoch": 1.6542372881355933,
|
| 2192 |
+
"cpu_mem": 1.530130432,
|
| 2193 |
+
"gpu_mem": 4.82412288,
|
| 2194 |
+
"loss": 0.1932,
|
| 2195 |
+
"grad_norm": 3.3074233531951904,
|
| 2196 |
+
"learning_rate": 2.5777608193294396e-05
|
| 2197 |
+
},
|
| 2198 |
+
{
|
| 2199 |
+
"step": 245,
|
| 2200 |
+
"epoch": 1.6610169491525424,
|
| 2201 |
+
"cpu_mem": 1.530130432,
|
| 2202 |
+
"gpu_mem": 4.824001536,
|
| 2203 |
+
"loss": 0.4678,
|
| 2204 |
+
"grad_norm": 7.167958736419678,
|
| 2205 |
+
"learning_rate": 2.4785923497707956e-05
|
| 2206 |
+
},
|
| 2207 |
+
{
|
| 2208 |
+
"step": 246,
|
| 2209 |
+
"epoch": 1.6677966101694914,
|
| 2210 |
+
"cpu_mem": 1.530130432,
|
| 2211 |
+
"gpu_mem": 4.824095232,
|
| 2212 |
+
"loss": 0.3263,
|
| 2213 |
+
"grad_norm": 5.361066818237305,
|
| 2214 |
+
"learning_rate": 2.38119700753228e-05
|
| 2215 |
+
},
|
| 2216 |
+
{
|
| 2217 |
+
"step": 247,
|
| 2218 |
+
"epoch": 1.6745762711864407,
|
| 2219 |
+
"cpu_mem": 1.530130432,
|
| 2220 |
+
"gpu_mem": 4.824113664,
|
| 2221 |
+
"loss": 0.3142,
|
| 2222 |
+
"grad_norm": 7.1523284912109375,
|
| 2223 |
+
"learning_rate": 2.285588584541047e-05
|
| 2224 |
+
},
|
| 2225 |
+
{
|
| 2226 |
+
"step": 248,
|
| 2227 |
+
"epoch": 1.68135593220339,
|
| 2228 |
+
"cpu_mem": 1.530130432,
|
| 2229 |
+
"gpu_mem": 4.824066048,
|
| 2230 |
+
"loss": 0.4077,
|
| 2231 |
+
"grad_norm": 5.50556755065918,
|
| 2232 |
+
"learning_rate": 2.1917806196827792e-05
|
| 2233 |
+
},
|
| 2234 |
+
{
|
| 2235 |
+
"step": 249,
|
| 2236 |
+
"epoch": 1.688135593220339,
|
| 2237 |
+
"cpu_mem": 1.530130432,
|
| 2238 |
+
"gpu_mem": 4.823972352,
|
| 2239 |
+
"loss": 0.2624,
|
| 2240 |
+
"grad_norm": 4.57589864730835,
|
| 2241 |
+
"learning_rate": 2.0997863968844914e-05
|
| 2242 |
+
},
|
| 2243 |
+
{
|
| 2244 |
+
"step": 250,
|
| 2245 |
+
"epoch": 1.694915254237288,
|
| 2246 |
+
"cpu_mem": 1.530130432,
|
| 2247 |
+
"gpu_mem": 4.824064512,
|
| 2248 |
+
"loss": 0.2678,
|
| 2249 |
+
"grad_norm": 6.028458118438721,
|
| 2250 |
+
"learning_rate": 2.009618943233419e-05
|
| 2251 |
+
},
|
| 2252 |
+
{
|
| 2253 |
+
"step": 251,
|
| 2254 |
+
"epoch": 1.7016949152542373,
|
| 2255 |
+
"cpu_mem": 1.530130432,
|
| 2256 |
+
"gpu_mem": 4.82397696,
|
| 2257 |
+
"loss": 0.2385,
|
| 2258 |
+
"grad_norm": 9.162259101867676,
|
| 2259 |
+
"learning_rate": 1.921291027132278e-05
|
| 2260 |
+
},
|
| 2261 |
+
{
|
| 2262 |
+
"step": 252,
|
| 2263 |
+
"epoch": 1.7084745762711866,
|
| 2264 |
+
"cpu_mem": 1.530130432,
|
| 2265 |
+
"gpu_mem": 4.824019968,
|
| 2266 |
+
"loss": 0.3512,
|
| 2267 |
+
"grad_norm": 4.852019309997559,
|
| 2268 |
+
"learning_rate": 1.834815156491165e-05
|
| 2269 |
+
},
|
| 2270 |
+
{
|
| 2271 |
+
"step": 253,
|
| 2272 |
+
"epoch": 1.7152542372881356,
|
| 2273 |
+
"cpu_mem": 1.530130432,
|
| 2274 |
+
"gpu_mem": 4.824213504,
|
| 2275 |
+
"loss": 0.3405,
|
| 2276 |
+
"grad_norm": 6.2241082191467285,
|
| 2277 |
+
"learning_rate": 1.750203576956341e-05
|
| 2278 |
+
},
|
| 2279 |
+
{
|
| 2280 |
+
"step": 254,
|
| 2281 |
+
"epoch": 1.7220338983050847,
|
| 2282 |
+
"cpu_mem": 1.530130432,
|
| 2283 |
+
"gpu_mem": 4.824009216,
|
| 2284 |
+
"loss": 0.3576,
|
| 2285 |
+
"grad_norm": 5.062891960144043,
|
| 2286 |
+
"learning_rate": 1.6674682701761493e-05
|
| 2287 |
+
},
|
| 2288 |
+
{
|
| 2289 |
+
"step": 255,
|
| 2290 |
+
"epoch": 1.7288135593220337,
|
| 2291 |
+
"cpu_mem": 1.530130432,
|
| 2292 |
+
"gpu_mem": 4.824165888,
|
| 2293 |
+
"loss": 0.4266,
|
| 2294 |
+
"grad_norm": 6.995367527008057,
|
| 2295 |
+
"learning_rate": 1.5866209521043304e-05
|
| 2296 |
+
},
|
| 2297 |
+
{
|
| 2298 |
+
"step": 256,
|
| 2299 |
+
"epoch": 1.735593220338983,
|
| 2300 |
+
"cpu_mem": 1.530130432,
|
| 2301 |
+
"gpu_mem": 4.82399232,
|
| 2302 |
+
"loss": 0.2653,
|
| 2303 |
+
"grad_norm": 4.953624248504639,
|
| 2304 |
+
"learning_rate": 1.5076730713409523e-05
|
| 2305 |
+
},
|
| 2306 |
+
{
|
| 2307 |
+
"step": 257,
|
| 2308 |
+
"epoch": 1.7423728813559323,
|
| 2309 |
+
"cpu_mem": 1.530130432,
|
| 2310 |
+
"gpu_mem": 4.824405504,
|
| 2311 |
+
"loss": 0.4008,
|
| 2312 |
+
"grad_norm": 4.404302597045898,
|
| 2313 |
+
"learning_rate": 1.4306358075111923e-05
|
| 2314 |
+
},
|
| 2315 |
+
{
|
| 2316 |
+
"step": 258,
|
| 2317 |
+
"epoch": 1.7491525423728813,
|
| 2318 |
+
"cpu_mem": 1.530130432,
|
| 2319 |
+
"gpu_mem": 4.824064512,
|
| 2320 |
+
"loss": 0.4191,
|
| 2321 |
+
"grad_norm": 7.855671405792236,
|
| 2322 |
+
"learning_rate": 1.3555200696822232e-05
|
| 2323 |
+
},
|
| 2324 |
+
{
|
| 2325 |
+
"step": 259,
|
| 2326 |
+
"epoch": 1.7559322033898304,
|
| 2327 |
+
"cpu_mem": 1.530130432,
|
| 2328 |
+
"gpu_mem": 4.823981568,
|
| 2329 |
+
"loss": 0.2937,
|
| 2330 |
+
"grad_norm": 7.6881279945373535,
|
| 2331 |
+
"learning_rate": 1.2823364948184095e-05
|
| 2332 |
+
},
|
| 2333 |
+
{
|
| 2334 |
+
"step": 260,
|
| 2335 |
+
"epoch": 1.7627118644067796,
|
| 2336 |
+
"cpu_mem": 1.530130432,
|
| 2337 |
+
"gpu_mem": 4.824098304,
|
| 2338 |
+
"loss": 0.1671,
|
| 2339 |
+
"grad_norm": 4.152520179748535,
|
| 2340 |
+
"learning_rate": 1.2110954462750166e-05
|
| 2341 |
+
},
|
| 2342 |
+
{
|
| 2343 |
+
"step": 261,
|
| 2344 |
+
"epoch": 1.769491525423729,
|
| 2345 |
+
"cpu_mem": 1.530130432,
|
| 2346 |
+
"gpu_mem": 4.82405376,
|
| 2347 |
+
"loss": 0.2918,
|
| 2348 |
+
"grad_norm": 4.768893241882324,
|
| 2349 |
+
"learning_rate": 1.1418070123306989e-05
|
| 2350 |
+
},
|
| 2351 |
+
{
|
| 2352 |
+
"step": 262,
|
| 2353 |
+
"epoch": 1.776271186440678,
|
| 2354 |
+
"cpu_mem": 1.530130432,
|
| 2355 |
+
"gpu_mem": 4.824010752,
|
| 2356 |
+
"loss": 0.2511,
|
| 2357 |
+
"grad_norm": 3.7594785690307617,
|
| 2358 |
+
"learning_rate": 1.0744810047589115e-05
|
| 2359 |
+
},
|
| 2360 |
+
{
|
| 2361 |
+
"step": 263,
|
| 2362 |
+
"epoch": 1.783050847457627,
|
| 2363 |
+
"cpu_mem": 1.530130432,
|
| 2364 |
+
"gpu_mem": 4.824047616,
|
| 2365 |
+
"loss": 0.2518,
|
| 2366 |
+
"grad_norm": 4.245553970336914,
|
| 2367 |
+
"learning_rate": 1.0091269574384874e-05
|
| 2368 |
+
},
|
| 2369 |
+
{
|
| 2370 |
+
"step": 264,
|
| 2371 |
+
"epoch": 1.7898305084745763,
|
| 2372 |
+
"cpu_mem": 1.530130432,
|
| 2373 |
+
"gpu_mem": 4.824135168,
|
| 2374 |
+
"loss": 0.2495,
|
| 2375 |
+
"grad_norm": 4.321831226348877,
|
| 2376 |
+
"learning_rate": 9.45754125003576e-06
|
| 2377 |
+
},
|
| 2378 |
+
{
|
| 2379 |
+
"step": 265,
|
| 2380 |
+
"epoch": 1.7966101694915255,
|
| 2381 |
+
"cpu_mem": 1.530130432,
|
| 2382 |
+
"gpu_mem": 4.82405376,
|
| 2383 |
+
"loss": 0.3896,
|
| 2384 |
+
"grad_norm": 5.726314067840576,
|
| 2385 |
+
"learning_rate": 8.843714815330987e-06
|
| 2386 |
+
},
|
| 2387 |
+
{
|
| 2388 |
+
"step": 266,
|
| 2389 |
+
"epoch": 1.8033898305084746,
|
| 2390 |
+
"cpu_mem": 1.530130432,
|
| 2391 |
+
"gpu_mem": 4.8242688,
|
| 2392 |
+
"loss": 0.4383,
|
| 2393 |
+
"grad_norm": 5.080480098724365,
|
| 2394 |
+
"learning_rate": 8.249877192799731e-06
|
| 2395 |
+
},
|
| 2396 |
+
{
|
| 2397 |
+
"step": 267,
|
| 2398 |
+
"epoch": 1.8101694915254236,
|
| 2399 |
+
"cpu_mem": 1.530130432,
|
| 2400 |
+
"gpu_mem": 4.82406144,
|
| 2401 |
+
"loss": 0.3601,
|
| 2402 |
+
"grad_norm": 5.688658714294434,
|
| 2403 |
+
"learning_rate": 7.676112474402068e-06
|
| 2404 |
+
},
|
| 2405 |
+
{
|
| 2406 |
+
"step": 268,
|
| 2407 |
+
"epoch": 1.8169491525423729,
|
| 2408 |
+
"cpu_mem": 1.530130432,
|
| 2409 |
+
"gpu_mem": 4.824066048,
|
| 2410 |
+
"loss": 0.3501,
|
| 2411 |
+
"grad_norm": 6.421919345855713,
|
| 2412 |
+
"learning_rate": 7.122501909620926e-06
|
| 2413 |
+
},
|
| 2414 |
+
{
|
| 2415 |
+
"step": 269,
|
| 2416 |
+
"epoch": 1.8237288135593221,
|
| 2417 |
+
"cpu_mem": 1.530130432,
|
| 2418 |
+
"gpu_mem": 4.8240768,
|
| 2419 |
+
"loss": 0.3546,
|
| 2420 |
+
"grad_norm": 6.48486328125,
|
| 2421 |
+
"learning_rate": 6.5891238939566275e-06
|
| 2422 |
+
},
|
| 2423 |
+
{
|
| 2424 |
+
"step": 270,
|
| 2425 |
+
"epoch": 1.8305084745762712,
|
| 2426 |
+
"cpu_mem": 1.530130432,
|
| 2427 |
+
"gpu_mem": 4.8241152,
|
| 2428 |
+
"loss": 0.2864,
|
| 2429 |
+
"grad_norm": 5.4872260093688965,
|
| 2430 |
+
"learning_rate": 6.076053957825411e-06
|
| 2431 |
+
},
|
| 2432 |
+
{
|
| 2433 |
+
"step": 271,
|
| 2434 |
+
"epoch": 1.8372881355932202,
|
| 2435 |
+
"cpu_mem": 1.530130432,
|
| 2436 |
+
"gpu_mem": 4.824167424,
|
| 2437 |
+
"loss": 0.3182,
|
| 2438 |
+
"grad_norm": 5.292596817016602,
|
| 2439 |
+
"learning_rate": 5.583364755863701e-06
|
| 2440 |
+
},
|
| 2441 |
+
{
|
| 2442 |
+
"step": 272,
|
| 2443 |
+
"epoch": 1.8440677966101695,
|
| 2444 |
+
"cpu_mem": 1.530130432,
|
| 2445 |
+
"gpu_mem": 4.824026112,
|
| 2446 |
+
"loss": 0.3154,
|
| 2447 |
+
"grad_norm": 4.313265800476074,
|
| 2448 |
+
"learning_rate": 5.11112605663977e-06
|
| 2449 |
+
},
|
| 2450 |
+
{
|
| 2451 |
+
"step": 273,
|
| 2452 |
+
"epoch": 1.8508474576271188,
|
| 2453 |
+
"cpu_mem": 1.530130432,
|
| 2454 |
+
"gpu_mem": 4.823906304,
|
| 2455 |
+
"loss": 0.2951,
|
| 2456 |
+
"grad_norm": 5.555436134338379,
|
| 2457 |
+
"learning_rate": 4.659404732773908e-06
|
| 2458 |
+
},
|
| 2459 |
+
{
|
| 2460 |
+
"step": 274,
|
| 2461 |
+
"epoch": 1.8576271186440678,
|
| 2462 |
+
"cpu_mem": 1.530130432,
|
| 2463 |
+
"gpu_mem": 4.824133632,
|
| 2464 |
+
"loss": 0.2837,
|
| 2465 |
+
"grad_norm": 4.105805397033691,
|
| 2466 |
+
"learning_rate": 4.228264751468752e-06
|
| 2467 |
+
},
|
| 2468 |
+
{
|
| 2469 |
+
"step": 275,
|
| 2470 |
+
"epoch": 1.8644067796610169,
|
| 2471 |
+
"cpu_mem": 1.530130432,
|
| 2472 |
+
"gpu_mem": 4.824377856,
|
| 2473 |
+
"loss": 0.2878,
|
| 2474 |
+
"grad_norm": 6.073948860168457,
|
| 2475 |
+
"learning_rate": 3.817767165451041e-06
|
| 2476 |
+
},
|
| 2477 |
+
{
|
| 2478 |
+
"step": 276,
|
| 2479 |
+
"epoch": 1.8711864406779661,
|
| 2480 |
+
"cpu_mem": 1.530130432,
|
| 2481 |
+
"gpu_mem": 4.8240384,
|
| 2482 |
+
"loss": 0.2768,
|
| 2483 |
+
"grad_norm": 4.70308256149292,
|
| 2484 |
+
"learning_rate": 3.4279701043260886e-06
|
| 2485 |
+
},
|
| 2486 |
+
{
|
| 2487 |
+
"step": 277,
|
| 2488 |
+
"epoch": 1.8779661016949154,
|
| 2489 |
+
"cpu_mem": 1.530130432,
|
| 2490 |
+
"gpu_mem": 4.82398464,
|
| 2491 |
+
"loss": 0.3124,
|
| 2492 |
+
"grad_norm": 4.56823205947876,
|
| 2493 |
+
"learning_rate": 3.0589287663461472e-06
|
| 2494 |
+
},
|
| 2495 |
+
{
|
| 2496 |
+
"step": 278,
|
| 2497 |
+
"epoch": 1.8847457627118644,
|
| 2498 |
+
"cpu_mem": 1.530130432,
|
| 2499 |
+
"gpu_mem": 4.824147456,
|
| 2500 |
+
"loss": 0.3178,
|
| 2501 |
+
"grad_norm": 6.935946464538574,
|
| 2502 |
+
"learning_rate": 2.710695410593994e-06
|
| 2503 |
+
},
|
| 2504 |
+
{
|
| 2505 |
+
"step": 279,
|
| 2506 |
+
"epoch": 1.8915254237288135,
|
| 2507 |
+
"cpu_mem": 1.530130432,
|
| 2508 |
+
"gpu_mem": 4.824087552,
|
| 2509 |
+
"loss": 0.4199,
|
| 2510 |
+
"grad_norm": 5.427887439727783,
|
| 2511 |
+
"learning_rate": 2.3833193495825853e-06
|
| 2512 |
+
},
|
| 2513 |
+
{
|
| 2514 |
+
"step": 280,
|
| 2515 |
+
"epoch": 1.8983050847457628,
|
| 2516 |
+
"cpu_mem": 1.530130432,
|
| 2517 |
+
"gpu_mem": 4.824067584,
|
| 2518 |
+
"loss": 0.3266,
|
| 2519 |
+
"grad_norm": 4.603908538818359,
|
| 2520 |
+
"learning_rate": 2.076846942272026e-06
|
| 2521 |
+
},
|
| 2522 |
+
{
|
| 2523 |
+
"step": 281,
|
| 2524 |
+
"epoch": 1.905084745762712,
|
| 2525 |
+
"cpu_mem": 1.530130432,
|
| 2526 |
+
"gpu_mem": 4.824003072,
|
| 2527 |
+
"loss": 0.3995,
|
| 2528 |
+
"grad_norm": 4.938188076019287,
|
| 2529 |
+
"learning_rate": 1.791321587504768e-06
|
| 2530 |
+
},
|
| 2531 |
+
{
|
| 2532 |
+
"step": 282,
|
| 2533 |
+
"epoch": 1.911864406779661,
|
| 2534 |
+
"cpu_mem": 1.530130432,
|
| 2535 |
+
"gpu_mem": 4.824431616,
|
| 2536 |
+
"loss": 0.2409,
|
| 2537 |
+
"grad_norm": 4.201538562774658,
|
| 2538 |
+
"learning_rate": 1.5267837178600972e-06
|
| 2539 |
+
},
|
| 2540 |
+
{
|
| 2541 |
+
"step": 283,
|
| 2542 |
+
"epoch": 1.9186440677966101,
|
| 2543 |
+
"cpu_mem": 1.530130432,
|
| 2544 |
+
"gpu_mem": 4.82413824,
|
| 2545 |
+
"loss": 0.3625,
|
| 2546 |
+
"grad_norm": 6.524600982666016,
|
| 2547 |
+
"learning_rate": 1.2832707939284427e-06
|
| 2548 |
+
},
|
| 2549 |
+
{
|
| 2550 |
+
"step": 284,
|
| 2551 |
+
"epoch": 1.9254237288135592,
|
| 2552 |
+
"cpu_mem": 1.530130432,
|
| 2553 |
+
"gpu_mem": 4.823993856,
|
| 2554 |
+
"loss": 0.3915,
|
| 2555 |
+
"grad_norm": 4.94792366027832,
|
| 2556 |
+
"learning_rate": 1.0608172990067553e-06
|
| 2557 |
+
},
|
| 2558 |
+
{
|
| 2559 |
+
"step": 285,
|
| 2560 |
+
"epoch": 1.9322033898305084,
|
| 2561 |
+
"cpu_mem": 1.530130432,
|
| 2562 |
+
"gpu_mem": 4.824047616,
|
| 2563 |
+
"loss": 0.3007,
|
| 2564 |
+
"grad_norm": 6.609028339385986,
|
| 2565 |
+
"learning_rate": 8.594547342153979e-07
|
| 2566 |
+
},
|
| 2567 |
+
{
|
| 2568 |
+
"step": 286,
|
| 2569 |
+
"epoch": 1.9389830508474577,
|
| 2570 |
+
"cpu_mem": 1.530130432,
|
| 2571 |
+
"gpu_mem": 4.824465408,
|
| 2572 |
+
"loss": 0.293,
|
| 2573 |
+
"grad_norm": 4.769420623779297,
|
| 2574 |
+
"learning_rate": 6.792116140373116e-07
|
| 2575 |
+
},
|
| 2576 |
+
{
|
| 2577 |
+
"step": 287,
|
| 2578 |
+
"epoch": 1.9457627118644067,
|
| 2579 |
+
"cpu_mem": 1.530130432,
|
| 2580 |
+
"gpu_mem": 4.824235008,
|
| 2581 |
+
"loss": 0.3999,
|
| 2582 |
+
"grad_norm": 8.176187515258789,
|
| 2583 |
+
"learning_rate": 5.201134622801473e-07
|
| 2584 |
+
},
|
| 2585 |
+
{
|
| 2586 |
+
"step": 288,
|
| 2587 |
+
"epoch": 1.9525423728813558,
|
| 2588 |
+
"cpu_mem": 1.530130432,
|
| 2589 |
+
"gpu_mem": 4.824019968,
|
| 2590 |
+
"loss": 0.4427,
|
| 2591 |
+
"grad_norm": 6.034461975097656,
|
| 2592 |
+
"learning_rate": 3.821828084619727e-07
|
| 2593 |
+
},
|
| 2594 |
+
{
|
| 2595 |
+
"step": 289,
|
| 2596 |
+
"epoch": 1.959322033898305,
|
| 2597 |
+
"cpu_mem": 1.530130432,
|
| 2598 |
+
"gpu_mem": 4.824104448,
|
| 2599 |
+
"loss": 0.254,
|
| 2600 |
+
"grad_norm": 7.134485721588135,
|
| 2601 |
+
"learning_rate": 2.654391846207915e-07
|
| 2602 |
+
},
|
| 2603 |
+
{
|
| 2604 |
+
"step": 290,
|
| 2605 |
+
"epoch": 1.9661016949152543,
|
| 2606 |
+
"cpu_mem": 1.530130432,
|
| 2607 |
+
"gpu_mem": 4.824029184,
|
| 2608 |
+
"loss": 0.405,
|
| 2609 |
+
"grad_norm": 6.609726905822754,
|
| 2610 |
+
"learning_rate": 1.6989912254880556e-07
|
| 2611 |
+
},
|
| 2612 |
+
{
|
| 2613 |
+
"step": 291,
|
| 2614 |
+
"epoch": 1.9728813559322034,
|
| 2615 |
+
"cpu_mem": 1.530130432,
|
| 2616 |
+
"gpu_mem": 4.824064512,
|
| 2617 |
+
"loss": 0.4325,
|
| 2618 |
+
"grad_norm": 6.219490051269531,
|
| 2619 |
+
"learning_rate": 9.557615145123765e-08
|
| 2620 |
+
},
|
| 2621 |
+
{
|
| 2622 |
+
"step": 292,
|
| 2623 |
+
"epoch": 1.9796610169491524,
|
| 2624 |
+
"cpu_mem": 1.530130432,
|
| 2625 |
+
"gpu_mem": 4.824147456,
|
| 2626 |
+
"loss": 0.3779,
|
| 2627 |
+
"grad_norm": 4.7131218910217285,
|
| 2628 |
+
"learning_rate": 4.248079603064724e-08
|
| 2629 |
+
},
|
| 2630 |
+
{
|
| 2631 |
+
"step": 293,
|
| 2632 |
+
"epoch": 1.9864406779661017,
|
| 2633 |
+
"cpu_mem": 1.530130432,
|
| 2634 |
+
"gpu_mem": 4.824064512,
|
| 2635 |
+
"loss": 0.4359,
|
| 2636 |
+
"grad_norm": 6.299029350280762,
|
| 2637 |
+
"learning_rate": 1.0620574996372811e-08
|
| 2638 |
+
},
|
| 2639 |
+
{
|
| 2640 |
+
"step": 294,
|
| 2641 |
+
"epoch": 1.993220338983051,
|
| 2642 |
+
"cpu_mem": 1.530130432,
|
| 2643 |
+
"gpu_mem": 4.824090624,
|
| 2644 |
+
"loss": 0.3799,
|
| 2645 |
+
"grad_norm": 6.89005184173584,
|
| 2646 |
+
"learning_rate": 0.0
|
| 2647 |
+
},
|
| 2648 |
+
{
|
| 2649 |
+
"step": 294,
|
| 2650 |
+
"epoch": 1.993220338983051,
|
| 2651 |
+
"cpu_mem": 1.530130432,
|
| 2652 |
+
"gpu_mem": 4.824090624,
|
| 2653 |
+
"train_runtime": 4484.6084,
|
| 2654 |
+
"train_samples_per_second": 4.204,
|
| 2655 |
+
"train_steps_per_second": 0.066,
|
| 2656 |
+
"total_flos": 0.0,
|
| 2657 |
+
"train_loss": 0.6128277448671204
|
| 2658 |
+
}
|
| 2659 |
+
]
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r8-a2/adapter_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha": 16,
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": false,
|
| 8 |
+
"init_weight": "kaiming",
|
| 9 |
+
"layers_pattern": null,
|
| 10 |
+
"layers_to_transform": null,
|
| 11 |
+
"metric_tracking": false,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "ABLATION",
|
| 14 |
+
"r": 8,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"seed": 42,
|
| 17 |
+
"share_weights": false,
|
| 18 |
+
"target_modules": [
|
| 19 |
+
"up_proj",
|
| 20 |
+
"gate_proj",
|
| 21 |
+
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"q_proj",
|
| 24 |
+
"down_proj",
|
| 25 |
+
"o_proj"
|
| 26 |
+
],
|
| 27 |
+
"task_type": null,
|
| 28 |
+
"track_n": 100,
|
| 29 |
+
"variant": "A"
|
| 30 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r8-a2/eval_results.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "boolq",
|
| 3 |
+
"results": 0.7033639143730887
|
| 4 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r8-a2/training_configuration.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "TinyLlama/TinyLlama_v1.1",
|
| 3 |
+
"dataset": {
|
| 4 |
+
"name": "BOOLQ",
|
| 5 |
+
"dataset_id": "google/boolq",
|
| 6 |
+
"preprocess_id": "boolq_train_deepeval"
|
| 7 |
+
},
|
| 8 |
+
"peft_config": {
|
| 9 |
+
"method": "abl_A",
|
| 10 |
+
"rank": 8,
|
| 11 |
+
"alpha": 16,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"bias": "none",
|
| 14 |
+
"target_modules": [
|
| 15 |
+
"q_proj",
|
| 16 |
+
"k_proj",
|
| 17 |
+
"v_proj",
|
| 18 |
+
"o_proj",
|
| 19 |
+
"gate_proj",
|
| 20 |
+
"down_proj",
|
| 21 |
+
"up_proj"
|
| 22 |
+
],
|
| 23 |
+
"trainable_parameter_count": 6317696
|
| 24 |
+
},
|
| 25 |
+
"training_config": {
|
| 26 |
+
"max_dataset_length": null,
|
| 27 |
+
"batch_size": 64,
|
| 28 |
+
"per_device_batch_size": 32,
|
| 29 |
+
"gradient_accumulation_steps": 2,
|
| 30 |
+
"learning_rate": 0.0003,
|
| 31 |
+
"num_epochs": 2,
|
| 32 |
+
"warmup_ratio": 0.1
|
| 33 |
+
},
|
| 34 |
+
"model_name": "TinyLlama_v1.1-abl_A-boolq-r8-a2",
|
| 35 |
+
"output_dir": "./experiment_results/TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r8-a2",
|
| 36 |
+
"seed": 42,
|
| 37 |
+
"timestamp": "2025-08-30T19:09:28.617533"
|
| 38 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-boolq-r8-a2/training_logs.json
ADDED
|
@@ -0,0 +1,2659 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"step": 1,
|
| 4 |
+
"epoch": 0.006779661016949152,
|
| 5 |
+
"cpu_mem": 1.48830208,
|
| 6 |
+
"gpu_mem": 4.443082752,
|
| 7 |
+
"loss": 8.869,
|
| 8 |
+
"grad_norm": 234.86416625976562,
|
| 9 |
+
"learning_rate": 9.999999999999999e-06
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"step": 2,
|
| 13 |
+
"epoch": 0.013559322033898305,
|
| 14 |
+
"cpu_mem": 1.494396928,
|
| 15 |
+
"gpu_mem": 4.493840896,
|
| 16 |
+
"loss": 8.9376,
|
| 17 |
+
"grad_norm": 240.33407592773438,
|
| 18 |
+
"learning_rate": 1.9999999999999998e-05
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"step": 3,
|
| 22 |
+
"epoch": 0.020338983050847456,
|
| 23 |
+
"cpu_mem": 1.49518336,
|
| 24 |
+
"gpu_mem": 4.493759488,
|
| 25 |
+
"loss": 7.5679,
|
| 26 |
+
"grad_norm": 243.47679138183594,
|
| 27 |
+
"learning_rate": 2.9999999999999997e-05
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"step": 4,
|
| 31 |
+
"epoch": 0.02711864406779661,
|
| 32 |
+
"cpu_mem": 1.495773184,
|
| 33 |
+
"gpu_mem": 4.493759488,
|
| 34 |
+
"loss": 4.959,
|
| 35 |
+
"grad_norm": 228.1814727783203,
|
| 36 |
+
"learning_rate": 3.9999999999999996e-05
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"step": 5,
|
| 40 |
+
"epoch": 0.03389830508474576,
|
| 41 |
+
"cpu_mem": 1.496363008,
|
| 42 |
+
"gpu_mem": 4.493694976,
|
| 43 |
+
"loss": 2.537,
|
| 44 |
+
"grad_norm": 137.45384216308594,
|
| 45 |
+
"learning_rate": 4.9999999999999996e-05
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"step": 6,
|
| 49 |
+
"epoch": 0.04067796610169491,
|
| 50 |
+
"cpu_mem": 1.496952832,
|
| 51 |
+
"gpu_mem": 4.493714944,
|
| 52 |
+
"loss": 1.4387,
|
| 53 |
+
"grad_norm": 56.679893493652344,
|
| 54 |
+
"learning_rate": 5.9999999999999995e-05
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"step": 7,
|
| 58 |
+
"epoch": 0.04745762711864407,
|
| 59 |
+
"cpu_mem": 1.497542656,
|
| 60 |
+
"gpu_mem": 4.493767168,
|
| 61 |
+
"loss": 0.8578,
|
| 62 |
+
"grad_norm": 21.124313354492188,
|
| 63 |
+
"learning_rate": 7e-05
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"step": 8,
|
| 67 |
+
"epoch": 0.05423728813559322,
|
| 68 |
+
"cpu_mem": 1.497935872,
|
| 69 |
+
"gpu_mem": 4.493853184,
|
| 70 |
+
"loss": 0.6193,
|
| 71 |
+
"grad_norm": 10.238547325134277,
|
| 72 |
+
"learning_rate": 7.999999999999999e-05
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"step": 9,
|
| 76 |
+
"epoch": 0.061016949152542375,
|
| 77 |
+
"cpu_mem": 1.498329088,
|
| 78 |
+
"gpu_mem": 4.493761024,
|
| 79 |
+
"loss": 0.6998,
|
| 80 |
+
"grad_norm": 18.19664764404297,
|
| 81 |
+
"learning_rate": 8.999999999999999e-05
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"step": 10,
|
| 85 |
+
"epoch": 0.06779661016949153,
|
| 86 |
+
"cpu_mem": 1.498918912,
|
| 87 |
+
"gpu_mem": 4.493661184,
|
| 88 |
+
"loss": 1.6846,
|
| 89 |
+
"grad_norm": 192.40335083007812,
|
| 90 |
+
"learning_rate": 9.999999999999999e-05
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"step": 11,
|
| 94 |
+
"epoch": 0.07457627118644068,
|
| 95 |
+
"cpu_mem": 1.499312128,
|
| 96 |
+
"gpu_mem": 4.493765632,
|
| 97 |
+
"loss": 1.0755,
|
| 98 |
+
"grad_norm": 123.23554229736328,
|
| 99 |
+
"learning_rate": 0.00010999999999999998
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"step": 12,
|
| 103 |
+
"epoch": 0.08135593220338982,
|
| 104 |
+
"cpu_mem": 1.499705344,
|
| 105 |
+
"gpu_mem": 4.494137344,
|
| 106 |
+
"loss": 1.2442,
|
| 107 |
+
"grad_norm": 281.9166259765625,
|
| 108 |
+
"learning_rate": 0.00011999999999999999
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"step": 13,
|
| 112 |
+
"epoch": 0.08813559322033898,
|
| 113 |
+
"cpu_mem": 1.50009856,
|
| 114 |
+
"gpu_mem": 4.493741056,
|
| 115 |
+
"loss": 1.585,
|
| 116 |
+
"grad_norm": 92.95726013183594,
|
| 117 |
+
"learning_rate": 0.00013
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"step": 14,
|
| 121 |
+
"epoch": 0.09491525423728814,
|
| 122 |
+
"cpu_mem": 1.500688384,
|
| 123 |
+
"gpu_mem": 4.493718016,
|
| 124 |
+
"loss": 0.7672,
|
| 125 |
+
"grad_norm": 36.01921081542969,
|
| 126 |
+
"learning_rate": 0.00014
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"step": 15,
|
| 130 |
+
"epoch": 0.1016949152542373,
|
| 131 |
+
"cpu_mem": 1.500884992,
|
| 132 |
+
"gpu_mem": 4.493656576,
|
| 133 |
+
"loss": 1.018,
|
| 134 |
+
"grad_norm": 49.682037353515625,
|
| 135 |
+
"learning_rate": 0.00015
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"step": 16,
|
| 139 |
+
"epoch": 0.10847457627118644,
|
| 140 |
+
"cpu_mem": 1.501278208,
|
| 141 |
+
"gpu_mem": 4.493741056,
|
| 142 |
+
"loss": 0.9599,
|
| 143 |
+
"grad_norm": 113.08747100830078,
|
| 144 |
+
"learning_rate": 0.00015999999999999999
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"step": 17,
|
| 148 |
+
"epoch": 0.1152542372881356,
|
| 149 |
+
"cpu_mem": 1.501671424,
|
| 150 |
+
"gpu_mem": 4.493780992,
|
| 151 |
+
"loss": 0.6904,
|
| 152 |
+
"grad_norm": 5.700827598571777,
|
| 153 |
+
"learning_rate": 0.00016999999999999999
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"step": 18,
|
| 157 |
+
"epoch": 0.12203389830508475,
|
| 158 |
+
"cpu_mem": 1.50206464,
|
| 159 |
+
"gpu_mem": 4.493843968,
|
| 160 |
+
"loss": 1.0158,
|
| 161 |
+
"grad_norm": 47.9433479309082,
|
| 162 |
+
"learning_rate": 0.00017999999999999998
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"step": 19,
|
| 166 |
+
"epoch": 0.1288135593220339,
|
| 167 |
+
"cpu_mem": 1.502261248,
|
| 168 |
+
"gpu_mem": 4.493681152,
|
| 169 |
+
"loss": 1.2045,
|
| 170 |
+
"grad_norm": 46.0986213684082,
|
| 171 |
+
"learning_rate": 0.00018999999999999998
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"step": 20,
|
| 175 |
+
"epoch": 0.13559322033898305,
|
| 176 |
+
"cpu_mem": 1.502654464,
|
| 177 |
+
"gpu_mem": 4.49379328,
|
| 178 |
+
"loss": 0.617,
|
| 179 |
+
"grad_norm": 4.902522087097168,
|
| 180 |
+
"learning_rate": 0.00019999999999999998
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"step": 21,
|
| 184 |
+
"epoch": 0.1423728813559322,
|
| 185 |
+
"cpu_mem": 1.50304768,
|
| 186 |
+
"gpu_mem": 4.493951488,
|
| 187 |
+
"loss": 0.6933,
|
| 188 |
+
"grad_norm": 14.804486274719238,
|
| 189 |
+
"learning_rate": 0.00020999999999999998
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"step": 22,
|
| 193 |
+
"epoch": 0.14915254237288136,
|
| 194 |
+
"cpu_mem": 1.503244288,
|
| 195 |
+
"gpu_mem": 4.493843968,
|
| 196 |
+
"loss": 0.7679,
|
| 197 |
+
"grad_norm": 14.584829330444336,
|
| 198 |
+
"learning_rate": 0.00021999999999999995
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"step": 23,
|
| 202 |
+
"epoch": 0.15593220338983052,
|
| 203 |
+
"cpu_mem": 1.503440896,
|
| 204 |
+
"gpu_mem": 4.49381632,
|
| 205 |
+
"loss": 0.6614,
|
| 206 |
+
"grad_norm": 6.266756057739258,
|
| 207 |
+
"learning_rate": 0.00023
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"step": 24,
|
| 211 |
+
"epoch": 0.16271186440677965,
|
| 212 |
+
"cpu_mem": 1.503834112,
|
| 213 |
+
"gpu_mem": 4.493873152,
|
| 214 |
+
"loss": 0.6063,
|
| 215 |
+
"grad_norm": 5.272337913513184,
|
| 216 |
+
"learning_rate": 0.00023999999999999998
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"step": 25,
|
| 220 |
+
"epoch": 0.1694915254237288,
|
| 221 |
+
"cpu_mem": 1.504227328,
|
| 222 |
+
"gpu_mem": 4.493658112,
|
| 223 |
+
"loss": 0.7254,
|
| 224 |
+
"grad_norm": 11.210253715515137,
|
| 225 |
+
"learning_rate": 0.00025
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"step": 26,
|
| 229 |
+
"epoch": 0.17627118644067796,
|
| 230 |
+
"cpu_mem": 1.504423936,
|
| 231 |
+
"gpu_mem": 4.493713408,
|
| 232 |
+
"loss": 0.7634,
|
| 233 |
+
"grad_norm": 18.455121994018555,
|
| 234 |
+
"learning_rate": 0.00026
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"step": 27,
|
| 238 |
+
"epoch": 0.18305084745762712,
|
| 239 |
+
"cpu_mem": 1.504620544,
|
| 240 |
+
"gpu_mem": 4.494005248,
|
| 241 |
+
"loss": 0.7479,
|
| 242 |
+
"grad_norm": 19.921911239624023,
|
| 243 |
+
"learning_rate": 0.00027
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"step": 28,
|
| 247 |
+
"epoch": 0.18983050847457628,
|
| 248 |
+
"cpu_mem": 1.50501376,
|
| 249 |
+
"gpu_mem": 4.493684224,
|
| 250 |
+
"loss": 0.845,
|
| 251 |
+
"grad_norm": 19.626916885375977,
|
| 252 |
+
"learning_rate": 0.00028
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"step": 29,
|
| 256 |
+
"epoch": 0.19661016949152543,
|
| 257 |
+
"cpu_mem": 1.505210368,
|
| 258 |
+
"gpu_mem": 4.493748736,
|
| 259 |
+
"loss": 0.678,
|
| 260 |
+
"grad_norm": 8.194727897644043,
|
| 261 |
+
"learning_rate": 0.00029
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"step": 30,
|
| 265 |
+
"epoch": 0.2033898305084746,
|
| 266 |
+
"cpu_mem": 1.505406976,
|
| 267 |
+
"gpu_mem": 4.493827072,
|
| 268 |
+
"loss": 0.7646,
|
| 269 |
+
"grad_norm": 10.00369644165039,
|
| 270 |
+
"learning_rate": 0.0003
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"step": 31,
|
| 274 |
+
"epoch": 0.21016949152542372,
|
| 275 |
+
"cpu_mem": 1.505603584,
|
| 276 |
+
"gpu_mem": 4.493630464,
|
| 277 |
+
"loss": 0.6244,
|
| 278 |
+
"grad_norm": 6.776846885681152,
|
| 279 |
+
"learning_rate": 0.0002999893794250036
|
| 280 |
+
},
|
| 281 |
+
{
|
| 282 |
+
"step": 32,
|
| 283 |
+
"epoch": 0.21694915254237288,
|
| 284 |
+
"cpu_mem": 1.505800192,
|
| 285 |
+
"gpu_mem": 4.493744128,
|
| 286 |
+
"loss": 0.7586,
|
| 287 |
+
"grad_norm": 16.354310989379883,
|
| 288 |
+
"learning_rate": 0.00029995751920396937
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"step": 33,
|
| 292 |
+
"epoch": 0.22372881355932203,
|
| 293 |
+
"cpu_mem": 1.506193408,
|
| 294 |
+
"gpu_mem": 4.493982208,
|
| 295 |
+
"loss": 0.7397,
|
| 296 |
+
"grad_norm": 12.160492897033691,
|
| 297 |
+
"learning_rate": 0.00029990442384854874
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"step": 34,
|
| 301 |
+
"epoch": 0.2305084745762712,
|
| 302 |
+
"cpu_mem": 1.506390016,
|
| 303 |
+
"gpu_mem": 4.493684224,
|
| 304 |
+
"loss": 0.6239,
|
| 305 |
+
"grad_norm": 9.15272331237793,
|
| 306 |
+
"learning_rate": 0.0002998301008774512
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"step": 35,
|
| 310 |
+
"epoch": 0.23728813559322035,
|
| 311 |
+
"cpu_mem": 1.506586624,
|
| 312 |
+
"gpu_mem": 4.493894656,
|
| 313 |
+
"loss": 0.6729,
|
| 314 |
+
"grad_norm": 5.571009635925293,
|
| 315 |
+
"learning_rate": 0.0002997345608153792
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"step": 36,
|
| 319 |
+
"epoch": 0.2440677966101695,
|
| 320 |
+
"cpu_mem": 1.506783232,
|
| 321 |
+
"gpu_mem": 4.493845504,
|
| 322 |
+
"loss": 0.7223,
|
| 323 |
+
"grad_norm": 25.372941970825195,
|
| 324 |
+
"learning_rate": 0.000299617817191538
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"step": 37,
|
| 328 |
+
"epoch": 0.25084745762711863,
|
| 329 |
+
"cpu_mem": 1.50697984,
|
| 330 |
+
"gpu_mem": 4.493656576,
|
| 331 |
+
"loss": 0.8172,
|
| 332 |
+
"grad_norm": 16.308820724487305,
|
| 333 |
+
"learning_rate": 0.0002994798865377198
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"step": 38,
|
| 337 |
+
"epoch": 0.2576271186440678,
|
| 338 |
+
"cpu_mem": 1.507373056,
|
| 339 |
+
"gpu_mem": 4.493903872,
|
| 340 |
+
"loss": 0.6857,
|
| 341 |
+
"grad_norm": 4.158070087432861,
|
| 342 |
+
"learning_rate": 0.0002993207883859627
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"step": 39,
|
| 346 |
+
"epoch": 0.26440677966101694,
|
| 347 |
+
"cpu_mem": 1.507569664,
|
| 348 |
+
"gpu_mem": 4.494283264,
|
| 349 |
+
"loss": 0.664,
|
| 350 |
+
"grad_norm": 3.092892646789551,
|
| 351 |
+
"learning_rate": 0.0002991405452657846
|
| 352 |
+
},
|
| 353 |
+
{
|
| 354 |
+
"step": 40,
|
| 355 |
+
"epoch": 0.2711864406779661,
|
| 356 |
+
"cpu_mem": 1.507569664,
|
| 357 |
+
"gpu_mem": 4.493853184,
|
| 358 |
+
"loss": 0.6499,
|
| 359 |
+
"grad_norm": 4.837502956390381,
|
| 360 |
+
"learning_rate": 0.00029893918270099324
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"step": 41,
|
| 364 |
+
"epoch": 0.27796610169491526,
|
| 365 |
+
"cpu_mem": 1.507766272,
|
| 366 |
+
"gpu_mem": 4.494080512,
|
| 367 |
+
"loss": 0.6745,
|
| 368 |
+
"grad_norm": 2.230825424194336,
|
| 369 |
+
"learning_rate": 0.00029871672920607153
|
| 370 |
+
},
|
| 371 |
+
{
|
| 372 |
+
"step": 42,
|
| 373 |
+
"epoch": 0.2847457627118644,
|
| 374 |
+
"cpu_mem": 1.507766272,
|
| 375 |
+
"gpu_mem": 4.4939776,
|
| 376 |
+
"loss": 0.6285,
|
| 377 |
+
"grad_norm": 6.971991539001465,
|
| 378 |
+
"learning_rate": 0.0002984732162821399
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"step": 43,
|
| 382 |
+
"epoch": 0.29152542372881357,
|
| 383 |
+
"cpu_mem": 1.50796288,
|
| 384 |
+
"gpu_mem": 4.493799424,
|
| 385 |
+
"loss": 0.6839,
|
| 386 |
+
"grad_norm": 7.030607223510742,
|
| 387 |
+
"learning_rate": 0.0002982086784124952
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"step": 44,
|
| 391 |
+
"epoch": 0.2983050847457627,
|
| 392 |
+
"cpu_mem": 1.508159488,
|
| 393 |
+
"gpu_mem": 4.493942272,
|
| 394 |
+
"loss": 0.6815,
|
| 395 |
+
"grad_norm": 9.800080299377441,
|
| 396 |
+
"learning_rate": 0.00029792315305772796
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"step": 45,
|
| 400 |
+
"epoch": 0.3050847457627119,
|
| 401 |
+
"cpu_mem": 1.508356096,
|
| 402 |
+
"gpu_mem": 4.493722624,
|
| 403 |
+
"loss": 1.188,
|
| 404 |
+
"grad_norm": 21.56556510925293,
|
| 405 |
+
"learning_rate": 0.0002976166806504174
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"step": 46,
|
| 409 |
+
"epoch": 0.31186440677966104,
|
| 410 |
+
"cpu_mem": 1.508552704,
|
| 411 |
+
"gpu_mem": 4.493965312,
|
| 412 |
+
"loss": 0.7325,
|
| 413 |
+
"grad_norm": 7.097214221954346,
|
| 414 |
+
"learning_rate": 0.00029728930458940595
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"step": 47,
|
| 418 |
+
"epoch": 0.31864406779661014,
|
| 419 |
+
"cpu_mem": 1.508552704,
|
| 420 |
+
"gpu_mem": 4.493688832,
|
| 421 |
+
"loss": 0.686,
|
| 422 |
+
"grad_norm": 10.250021934509277,
|
| 423 |
+
"learning_rate": 0.00029694107123365385
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"step": 48,
|
| 427 |
+
"epoch": 0.3254237288135593,
|
| 428 |
+
"cpu_mem": 1.508749312,
|
| 429 |
+
"gpu_mem": 4.493765632,
|
| 430 |
+
"loss": 0.8689,
|
| 431 |
+
"grad_norm": 17.215639114379883,
|
| 432 |
+
"learning_rate": 0.00029657202989567393
|
| 433 |
+
},
|
| 434 |
+
{
|
| 435 |
+
"step": 49,
|
| 436 |
+
"epoch": 0.33220338983050846,
|
| 437 |
+
"cpu_mem": 1.50894592,
|
| 438 |
+
"gpu_mem": 4.493782528,
|
| 439 |
+
"loss": 0.7259,
|
| 440 |
+
"grad_norm": 3.048801898956299,
|
| 441 |
+
"learning_rate": 0.00029618223283454893
|
| 442 |
+
},
|
| 443 |
+
{
|
| 444 |
+
"step": 50,
|
| 445 |
+
"epoch": 0.3389830508474576,
|
| 446 |
+
"cpu_mem": 1.509142528,
|
| 447 |
+
"gpu_mem": 4.493721088,
|
| 448 |
+
"loss": 0.6172,
|
| 449 |
+
"grad_norm": 2.2575485706329346,
|
| 450 |
+
"learning_rate": 0.00029577173524853123
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"step": 51,
|
| 454 |
+
"epoch": 0.34576271186440677,
|
| 455 |
+
"cpu_mem": 1.509142528,
|
| 456 |
+
"gpu_mem": 4.493725696,
|
| 457 |
+
"loss": 0.5723,
|
| 458 |
+
"grad_norm": 2.044959306716919,
|
| 459 |
+
"learning_rate": 0.0002953405952672261
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"step": 52,
|
| 463 |
+
"epoch": 0.3525423728813559,
|
| 464 |
+
"cpu_mem": 1.509339136,
|
| 465 |
+
"gpu_mem": 4.493805568,
|
| 466 |
+
"loss": 0.6726,
|
| 467 |
+
"grad_norm": 4.235073089599609,
|
| 468 |
+
"learning_rate": 0.0002948888739433602
|
| 469 |
+
},
|
| 470 |
+
{
|
| 471 |
+
"step": 53,
|
| 472 |
+
"epoch": 0.3593220338983051,
|
| 473 |
+
"cpu_mem": 1.509339136,
|
| 474 |
+
"gpu_mem": 4.493828608,
|
| 475 |
+
"loss": 0.6754,
|
| 476 |
+
"grad_norm": 10.029523849487305,
|
| 477 |
+
"learning_rate": 0.0002944166352441363
|
| 478 |
+
},
|
| 479 |
+
{
|
| 480 |
+
"step": 54,
|
| 481 |
+
"epoch": 0.36610169491525424,
|
| 482 |
+
"cpu_mem": 1.509535744,
|
| 483 |
+
"gpu_mem": 4.493756416,
|
| 484 |
+
"loss": 0.6683,
|
| 485 |
+
"grad_norm": 4.766758918762207,
|
| 486 |
+
"learning_rate": 0.0002939239460421746
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"step": 55,
|
| 490 |
+
"epoch": 0.3728813559322034,
|
| 491 |
+
"cpu_mem": 1.509535744,
|
| 492 |
+
"gpu_mem": 4.494026752,
|
| 493 |
+
"loss": 0.6831,
|
| 494 |
+
"grad_norm": 3.753432273864746,
|
| 495 |
+
"learning_rate": 0.00029341087610604337
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"step": 56,
|
| 499 |
+
"epoch": 0.37966101694915255,
|
| 500 |
+
"cpu_mem": 1.509732352,
|
| 501 |
+
"gpu_mem": 4.493813248,
|
| 502 |
+
"loss": 0.926,
|
| 503 |
+
"grad_norm": 12.049140930175781,
|
| 504 |
+
"learning_rate": 0.00029287749809037904
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"step": 57,
|
| 508 |
+
"epoch": 0.3864406779661017,
|
| 509 |
+
"cpu_mem": 1.509732352,
|
| 510 |
+
"gpu_mem": 4.493807104,
|
| 511 |
+
"loss": 0.7591,
|
| 512 |
+
"grad_norm": 7.700575351715088,
|
| 513 |
+
"learning_rate": 0.0002923238875255979
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"step": 58,
|
| 517 |
+
"epoch": 0.39322033898305087,
|
| 518 |
+
"cpu_mem": 1.50992896,
|
| 519 |
+
"gpu_mem": 4.493702656,
|
| 520 |
+
"loss": 0.6608,
|
| 521 |
+
"grad_norm": 2.9501571655273438,
|
| 522 |
+
"learning_rate": 0.00029175012280720024
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"step": 59,
|
| 526 |
+
"epoch": 0.4,
|
| 527 |
+
"cpu_mem": 1.50992896,
|
| 528 |
+
"gpu_mem": 4.493719552,
|
| 529 |
+
"loss": 0.7055,
|
| 530 |
+
"grad_norm": 4.570174217224121,
|
| 531 |
+
"learning_rate": 0.000291156285184669
|
| 532 |
+
},
|
| 533 |
+
{
|
| 534 |
+
"step": 60,
|
| 535 |
+
"epoch": 0.4067796610169492,
|
| 536 |
+
"cpu_mem": 1.510125568,
|
| 537 |
+
"gpu_mem": 4.493813248,
|
| 538 |
+
"loss": 0.7155,
|
| 539 |
+
"grad_norm": 10.250066757202148,
|
| 540 |
+
"learning_rate": 0.00029054245874996426
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"step": 61,
|
| 544 |
+
"epoch": 0.4135593220338983,
|
| 545 |
+
"cpu_mem": 1.510125568,
|
| 546 |
+
"gpu_mem": 4.493824,
|
| 547 |
+
"loss": 0.6198,
|
| 548 |
+
"grad_norm": 1.4150381088256836,
|
| 549 |
+
"learning_rate": 0.0002899087304256151
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"step": 62,
|
| 553 |
+
"epoch": 0.42033898305084744,
|
| 554 |
+
"cpu_mem": 1.510125568,
|
| 555 |
+
"gpu_mem": 4.493811712,
|
| 556 |
+
"loss": 0.8658,
|
| 557 |
+
"grad_norm": 11.572601318359375,
|
| 558 |
+
"learning_rate": 0.0002892551899524109
|
| 559 |
+
},
|
| 560 |
+
{
|
| 561 |
+
"step": 63,
|
| 562 |
+
"epoch": 0.4271186440677966,
|
| 563 |
+
"cpu_mem": 1.510125568,
|
| 564 |
+
"gpu_mem": 4.493804032,
|
| 565 |
+
"loss": 0.5689,
|
| 566 |
+
"grad_norm": 2.9226982593536377,
|
| 567 |
+
"learning_rate": 0.000288581929876693
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"step": 64,
|
| 571 |
+
"epoch": 0.43389830508474575,
|
| 572 |
+
"cpu_mem": 1.510518784,
|
| 573 |
+
"gpu_mem": 4.493733376,
|
| 574 |
+
"loss": 0.6423,
|
| 575 |
+
"grad_norm": 1.547162413597107,
|
| 576 |
+
"learning_rate": 0.0002878890455372498
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"step": 65,
|
| 580 |
+
"epoch": 0.4406779661016949,
|
| 581 |
+
"cpu_mem": 1.510715392,
|
| 582 |
+
"gpu_mem": 4.49377792,
|
| 583 |
+
"loss": 0.6797,
|
| 584 |
+
"grad_norm": 3.6416873931884766,
|
| 585 |
+
"learning_rate": 0.0002871766350518159
|
| 586 |
+
},
|
| 587 |
+
{
|
| 588 |
+
"step": 66,
|
| 589 |
+
"epoch": 0.44745762711864406,
|
| 590 |
+
"cpu_mem": 1.510715392,
|
| 591 |
+
"gpu_mem": 4.493971456,
|
| 592 |
+
"loss": 0.6036,
|
| 593 |
+
"grad_norm": 3.8238625526428223,
|
| 594 |
+
"learning_rate": 0.00028644479930317775
|
| 595 |
+
},
|
| 596 |
+
{
|
| 597 |
+
"step": 67,
|
| 598 |
+
"epoch": 0.4542372881355932,
|
| 599 |
+
"cpu_mem": 1.510715392,
|
| 600 |
+
"gpu_mem": 4.493681152,
|
| 601 |
+
"loss": 0.7277,
|
| 602 |
+
"grad_norm": 8.09846019744873,
|
| 603 |
+
"learning_rate": 0.00028569364192488803
|
| 604 |
+
},
|
| 605 |
+
{
|
| 606 |
+
"step": 68,
|
| 607 |
+
"epoch": 0.4610169491525424,
|
| 608 |
+
"cpu_mem": 1.510912,
|
| 609 |
+
"gpu_mem": 4.493648896,
|
| 610 |
+
"loss": 0.8994,
|
| 611 |
+
"grad_norm": 13.207178115844727,
|
| 612 |
+
"learning_rate": 0.00028492326928659045
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"step": 69,
|
| 616 |
+
"epoch": 0.46779661016949153,
|
| 617 |
+
"cpu_mem": 1.510912,
|
| 618 |
+
"gpu_mem": 4.493714944,
|
| 619 |
+
"loss": 0.5864,
|
| 620 |
+
"grad_norm": 1.9375393390655518,
|
| 621 |
+
"learning_rate": 0.00028413379047895665
|
| 622 |
+
},
|
| 623 |
+
{
|
| 624 |
+
"step": 70,
|
| 625 |
+
"epoch": 0.4745762711864407,
|
| 626 |
+
"cpu_mem": 1.510912,
|
| 627 |
+
"gpu_mem": 4.4937088,
|
| 628 |
+
"loss": 0.7792,
|
| 629 |
+
"grad_norm": 10.860440254211426,
|
| 630 |
+
"learning_rate": 0.0002833253172982385
|
| 631 |
+
},
|
| 632 |
+
{
|
| 633 |
+
"step": 71,
|
| 634 |
+
"epoch": 0.48135593220338985,
|
| 635 |
+
"cpu_mem": 1.511108608,
|
| 636 |
+
"gpu_mem": 4.493937664,
|
| 637 |
+
"loss": 0.8567,
|
| 638 |
+
"grad_norm": 15.644757270812988,
|
| 639 |
+
"learning_rate": 0.0002824979642304366
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"step": 72,
|
| 643 |
+
"epoch": 0.488135593220339,
|
| 644 |
+
"cpu_mem": 1.511108608,
|
| 645 |
+
"gpu_mem": 4.493929984,
|
| 646 |
+
"loss": 0.6608,
|
| 647 |
+
"grad_norm": 6.629893779754639,
|
| 648 |
+
"learning_rate": 0.0002816518484350883
|
| 649 |
+
},
|
| 650 |
+
{
|
| 651 |
+
"step": 73,
|
| 652 |
+
"epoch": 0.49491525423728816,
|
| 653 |
+
"cpu_mem": 1.511108608,
|
| 654 |
+
"gpu_mem": 4.493896192,
|
| 655 |
+
"loss": 0.8023,
|
| 656 |
+
"grad_norm": 10.461833953857422,
|
| 657 |
+
"learning_rate": 0.0002807870897286772
|
| 658 |
+
},
|
| 659 |
+
{
|
| 660 |
+
"step": 74,
|
| 661 |
+
"epoch": 0.5016949152542373,
|
| 662 |
+
"cpu_mem": 1.511305216,
|
| 663 |
+
"gpu_mem": 4.493756416,
|
| 664 |
+
"loss": 0.6224,
|
| 665 |
+
"grad_norm": 7.1698713302612305,
|
| 666 |
+
"learning_rate": 0.0002799038105676658
|
| 667 |
+
},
|
| 668 |
+
{
|
| 669 |
+
"step": 75,
|
| 670 |
+
"epoch": 0.5084745762711864,
|
| 671 |
+
"cpu_mem": 1.511305216,
|
| 672 |
+
"gpu_mem": 4.493681152,
|
| 673 |
+
"loss": 0.5662,
|
| 674 |
+
"grad_norm": 2.1262013912200928,
|
| 675 |
+
"learning_rate": 0.000279002136031155
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"step": 76,
|
| 679 |
+
"epoch": 0.5152542372881356,
|
| 680 |
+
"cpu_mem": 1.511305216,
|
| 681 |
+
"gpu_mem": 4.493621248,
|
| 682 |
+
"loss": 0.6138,
|
| 683 |
+
"grad_norm": 3.743492841720581,
|
| 684 |
+
"learning_rate": 0.00027808219380317216
|
| 685 |
+
},
|
| 686 |
+
{
|
| 687 |
+
"step": 77,
|
| 688 |
+
"epoch": 0.5220338983050847,
|
| 689 |
+
"cpu_mem": 1.511305216,
|
| 690 |
+
"gpu_mem": 4.493694976,
|
| 691 |
+
"loss": 0.6539,
|
| 692 |
+
"grad_norm": 6.320612907409668,
|
| 693 |
+
"learning_rate": 0.0002771441141545895
|
| 694 |
+
},
|
| 695 |
+
{
|
| 696 |
+
"step": 78,
|
| 697 |
+
"epoch": 0.5288135593220339,
|
| 698 |
+
"cpu_mem": 1.511305216,
|
| 699 |
+
"gpu_mem": 4.4937472,
|
| 700 |
+
"loss": 0.7095,
|
| 701 |
+
"grad_norm": 5.929784774780273,
|
| 702 |
+
"learning_rate": 0.0002761880299246772
|
| 703 |
+
},
|
| 704 |
+
{
|
| 705 |
+
"step": 79,
|
| 706 |
+
"epoch": 0.535593220338983,
|
| 707 |
+
"cpu_mem": 1.511305216,
|
| 708 |
+
"gpu_mem": 4.493879296,
|
| 709 |
+
"loss": 0.6698,
|
| 710 |
+
"grad_norm": 1.6754264831542969,
|
| 711 |
+
"learning_rate": 0.000275214076502292
|
| 712 |
+
},
|
| 713 |
+
{
|
| 714 |
+
"step": 80,
|
| 715 |
+
"epoch": 0.5423728813559322,
|
| 716 |
+
"cpu_mem": 1.511305216,
|
| 717 |
+
"gpu_mem": 4.49377024,
|
| 718 |
+
"loss": 0.6258,
|
| 719 |
+
"grad_norm": 1.8663870096206665,
|
| 720 |
+
"learning_rate": 0.0002742223918067056
|
| 721 |
+
},
|
| 722 |
+
{
|
| 723 |
+
"step": 81,
|
| 724 |
+
"epoch": 0.5491525423728814,
|
| 725 |
+
"cpu_mem": 1.511305216,
|
| 726 |
+
"gpu_mem": 4.493650432,
|
| 727 |
+
"loss": 0.6314,
|
| 728 |
+
"grad_norm": 2.4315168857574463,
|
| 729 |
+
"learning_rate": 0.00027321311626807374
|
| 730 |
+
},
|
| 731 |
+
{
|
| 732 |
+
"step": 82,
|
| 733 |
+
"epoch": 0.5559322033898305,
|
| 734 |
+
"cpu_mem": 1.511305216,
|
| 735 |
+
"gpu_mem": 4.493719552,
|
| 736 |
+
"loss": 0.635,
|
| 737 |
+
"grad_norm": 1.932876467704773,
|
| 738 |
+
"learning_rate": 0.0002721863928075503
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"step": 83,
|
| 742 |
+
"epoch": 0.5627118644067797,
|
| 743 |
+
"cpu_mem": 1.511305216,
|
| 744 |
+
"gpu_mem": 4.493819392,
|
| 745 |
+
"loss": 0.6554,
|
| 746 |
+
"grad_norm": 3.4201409816741943,
|
| 747 |
+
"learning_rate": 0.000271142366817049
|
| 748 |
+
},
|
| 749 |
+
{
|
| 750 |
+
"step": 84,
|
| 751 |
+
"epoch": 0.5694915254237288,
|
| 752 |
+
"cpu_mem": 1.511501824,
|
| 753 |
+
"gpu_mem": 4.493782528,
|
| 754 |
+
"loss": 0.7156,
|
| 755 |
+
"grad_norm": 7.90298318862915,
|
| 756 |
+
"learning_rate": 0.00027008118613865406
|
| 757 |
+
},
|
| 758 |
+
{
|
| 759 |
+
"step": 85,
|
| 760 |
+
"epoch": 0.576271186440678,
|
| 761 |
+
"cpu_mem": 1.511698432,
|
| 762 |
+
"gpu_mem": 4.493814784,
|
| 763 |
+
"loss": 0.5996,
|
| 764 |
+
"grad_norm": 3.2398500442504883,
|
| 765 |
+
"learning_rate": 0.00026900300104368524
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"step": 86,
|
| 769 |
+
"epoch": 0.5830508474576271,
|
| 770 |
+
"cpu_mem": 1.511698432,
|
| 771 |
+
"gpu_mem": 4.493765632,
|
| 772 |
+
"loss": 0.731,
|
| 773 |
+
"grad_norm": 5.571208953857422,
|
| 774 |
+
"learning_rate": 0.00026790796421141813
|
| 775 |
+
},
|
| 776 |
+
{
|
| 777 |
+
"step": 87,
|
| 778 |
+
"epoch": 0.5898305084745763,
|
| 779 |
+
"cpu_mem": 1.511698432,
|
| 780 |
+
"gpu_mem": 4.493773312,
|
| 781 |
+
"loss": 0.7147,
|
| 782 |
+
"grad_norm": 7.965809345245361,
|
| 783 |
+
"learning_rate": 0.00026679623070746325
|
| 784 |
+
},
|
| 785 |
+
{
|
| 786 |
+
"step": 88,
|
| 787 |
+
"epoch": 0.5966101694915255,
|
| 788 |
+
"cpu_mem": 1.511698432,
|
| 789 |
+
"gpu_mem": 4.493917696,
|
| 790 |
+
"loss": 0.5825,
|
| 791 |
+
"grad_norm": 2.4796321392059326,
|
| 792 |
+
"learning_rate": 0.0002656679579618081
|
| 793 |
+
},
|
| 794 |
+
{
|
| 795 |
+
"step": 89,
|
| 796 |
+
"epoch": 0.6033898305084746,
|
| 797 |
+
"cpu_mem": 1.511698432,
|
| 798 |
+
"gpu_mem": 4.493699584,
|
| 799 |
+
"loss": 0.6858,
|
| 800 |
+
"grad_norm": 3.1727724075317383,
|
| 801 |
+
"learning_rate": 0.0002645233057465235
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"step": 90,
|
| 805 |
+
"epoch": 0.6101694915254238,
|
| 806 |
+
"cpu_mem": 1.511698432,
|
| 807 |
+
"gpu_mem": 4.493753344,
|
| 808 |
+
"loss": 0.7181,
|
| 809 |
+
"grad_norm": 8.902009010314941,
|
| 810 |
+
"learning_rate": 0.00026336243615313873
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"step": 91,
|
| 814 |
+
"epoch": 0.6169491525423729,
|
| 815 |
+
"cpu_mem": 1.511698432,
|
| 816 |
+
"gpu_mem": 4.493721088,
|
| 817 |
+
"loss": 0.7394,
|
| 818 |
+
"grad_norm": 10.157062530517578,
|
| 819 |
+
"learning_rate": 0.00026218551356968814
|
| 820 |
+
},
|
| 821 |
+
{
|
| 822 |
+
"step": 92,
|
| 823 |
+
"epoch": 0.6237288135593221,
|
| 824 |
+
"cpu_mem": 1.511698432,
|
| 825 |
+
"gpu_mem": 4.493802496,
|
| 826 |
+
"loss": 0.6819,
|
| 827 |
+
"grad_norm": 2.2884974479675293,
|
| 828 |
+
"learning_rate": 0.00026099270465743254
|
| 829 |
+
},
|
| 830 |
+
{
|
| 831 |
+
"step": 93,
|
| 832 |
+
"epoch": 0.6305084745762712,
|
| 833 |
+
"cpu_mem": 1.511698432,
|
| 834 |
+
"gpu_mem": 4.493605888,
|
| 835 |
+
"loss": 0.7171,
|
| 836 |
+
"grad_norm": 3.9411673545837402,
|
| 837 |
+
"learning_rate": 0.0002597841783272588
|
| 838 |
+
},
|
| 839 |
+
{
|
| 840 |
+
"step": 94,
|
| 841 |
+
"epoch": 0.6372881355932203,
|
| 842 |
+
"cpu_mem": 1.511698432,
|
| 843 |
+
"gpu_mem": 4.493719552,
|
| 844 |
+
"loss": 0.5627,
|
| 845 |
+
"grad_norm": 2.8555171489715576,
|
| 846 |
+
"learning_rate": 0.0002585601057157605
|
| 847 |
+
},
|
| 848 |
+
{
|
| 849 |
+
"step": 95,
|
| 850 |
+
"epoch": 0.6440677966101694,
|
| 851 |
+
"cpu_mem": 1.511698432,
|
| 852 |
+
"gpu_mem": 4.49373952,
|
| 853 |
+
"loss": 0.6865,
|
| 854 |
+
"grad_norm": 5.10888147354126,
|
| 855 |
+
"learning_rate": 0.00025732066016100394
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"step": 96,
|
| 859 |
+
"epoch": 0.6508474576271186,
|
| 860 |
+
"cpu_mem": 1.511698432,
|
| 861 |
+
"gpu_mem": 4.49377792,
|
| 862 |
+
"loss": 0.631,
|
| 863 |
+
"grad_norm": 4.621267318725586,
|
| 864 |
+
"learning_rate": 0.00025606601717798207
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"step": 97,
|
| 868 |
+
"epoch": 0.6576271186440678,
|
| 869 |
+
"cpu_mem": 1.51189504,
|
| 870 |
+
"gpu_mem": 4.49376256,
|
| 871 |
+
"loss": 0.6521,
|
| 872 |
+
"grad_norm": 3.2591891288757324,
|
| 873 |
+
"learning_rate": 0.0002547963544337602
|
| 874 |
+
},
|
| 875 |
+
{
|
| 876 |
+
"step": 98,
|
| 877 |
+
"epoch": 0.6644067796610169,
|
| 878 |
+
"cpu_mem": 1.51189504,
|
| 879 |
+
"gpu_mem": 4.493675008,
|
| 880 |
+
"loss": 0.6631,
|
| 881 |
+
"grad_norm": 3.0139002799987793,
|
| 882 |
+
"learning_rate": 0.0002535118517223168
|
| 883 |
+
},
|
| 884 |
+
{
|
| 885 |
+
"step": 99,
|
| 886 |
+
"epoch": 0.6711864406779661,
|
| 887 |
+
"cpu_mem": 1.51189504,
|
| 888 |
+
"gpu_mem": 4.49362432,
|
| 889 |
+
"loss": 0.6299,
|
| 890 |
+
"grad_norm": 2.386324405670166,
|
| 891 |
+
"learning_rate": 0.00025221269093908365
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"step": 100,
|
| 895 |
+
"epoch": 0.6779661016949152,
|
| 896 |
+
"cpu_mem": 1.51189504,
|
| 897 |
+
"gpu_mem": 4.493741056,
|
| 898 |
+
"loss": 0.6204,
|
| 899 |
+
"grad_norm": 1.986992359161377,
|
| 900 |
+
"learning_rate": 0.0002508990560551879
|
| 901 |
+
},
|
| 902 |
+
{
|
| 903 |
+
"step": 101,
|
| 904 |
+
"epoch": 0.6847457627118644,
|
| 905 |
+
"cpu_mem": 1.51189504,
|
| 906 |
+
"gpu_mem": 4.493773312,
|
| 907 |
+
"loss": 0.7108,
|
| 908 |
+
"grad_norm": 6.049999237060547,
|
| 909 |
+
"learning_rate": 0.0002495711330914001
|
| 910 |
+
},
|
| 911 |
+
{
|
| 912 |
+
"step": 102,
|
| 913 |
+
"epoch": 0.6915254237288135,
|
| 914 |
+
"cpu_mem": 1.51189504,
|
| 915 |
+
"gpu_mem": 4.493807104,
|
| 916 |
+
"loss": 0.6755,
|
| 917 |
+
"grad_norm": 5.4609575271606445,
|
| 918 |
+
"learning_rate": 0.00024822911009179276
|
| 919 |
+
},
|
| 920 |
+
{
|
| 921 |
+
"step": 103,
|
| 922 |
+
"epoch": 0.6983050847457627,
|
| 923 |
+
"cpu_mem": 1.51189504,
|
| 924 |
+
"gpu_mem": 4.493857792,
|
| 925 |
+
"loss": 0.6327,
|
| 926 |
+
"grad_norm": 2.271766185760498,
|
| 927 |
+
"learning_rate": 0.0002468731770971113
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"step": 104,
|
| 931 |
+
"epoch": 0.7050847457627119,
|
| 932 |
+
"cpu_mem": 1.51189504,
|
| 933 |
+
"gpu_mem": 4.49376256,
|
| 934 |
+
"loss": 0.6587,
|
| 935 |
+
"grad_norm": 3.347034454345703,
|
| 936 |
+
"learning_rate": 0.0002455035261178632
|
| 937 |
+
},
|
| 938 |
+
{
|
| 939 |
+
"step": 105,
|
| 940 |
+
"epoch": 0.711864406779661,
|
| 941 |
+
"cpu_mem": 1.51189504,
|
| 942 |
+
"gpu_mem": 4.493863936,
|
| 943 |
+
"loss": 0.6336,
|
| 944 |
+
"grad_norm": 4.408857822418213,
|
| 945 |
+
"learning_rate": 0.0002441203511071278
|
| 946 |
+
},
|
| 947 |
+
{
|
| 948 |
+
"step": 106,
|
| 949 |
+
"epoch": 0.7186440677966102,
|
| 950 |
+
"cpu_mem": 1.51189504,
|
| 951 |
+
"gpu_mem": 4.493814784,
|
| 952 |
+
"loss": 0.6127,
|
| 953 |
+
"grad_norm": 4.522818088531494,
|
| 954 |
+
"learning_rate": 0.00024272384793309077
|
| 955 |
+
},
|
| 956 |
+
{
|
| 957 |
+
"step": 107,
|
| 958 |
+
"epoch": 0.7254237288135593,
|
| 959 |
+
"cpu_mem": 1.51189504,
|
| 960 |
+
"gpu_mem": 4.493702656,
|
| 961 |
+
"loss": 0.5933,
|
| 962 |
+
"grad_norm": 1.8976800441741943,
|
| 963 |
+
"learning_rate": 0.00024131421435130807
|
| 964 |
+
},
|
| 965 |
+
{
|
| 966 |
+
"step": 108,
|
| 967 |
+
"epoch": 0.7322033898305085,
|
| 968 |
+
"cpu_mem": 1.51189504,
|
| 969 |
+
"gpu_mem": 4.493886976,
|
| 970 |
+
"loss": 0.6641,
|
| 971 |
+
"grad_norm": 3.7306277751922607,
|
| 972 |
+
"learning_rate": 0.00023989164997670202
|
| 973 |
+
},
|
| 974 |
+
{
|
| 975 |
+
"step": 109,
|
| 976 |
+
"epoch": 0.7389830508474576,
|
| 977 |
+
"cpu_mem": 1.512091648,
|
| 978 |
+
"gpu_mem": 4.493741056,
|
| 979 |
+
"loss": 0.7569,
|
| 980 |
+
"grad_norm": 5.521296501159668,
|
| 981 |
+
"learning_rate": 0.0002384563562552943
|
| 982 |
+
},
|
| 983 |
+
{
|
| 984 |
+
"step": 110,
|
| 985 |
+
"epoch": 0.7457627118644068,
|
| 986 |
+
"cpu_mem": 1.512091648,
|
| 987 |
+
"gpu_mem": 4.493744128,
|
| 988 |
+
"loss": 0.6737,
|
| 989 |
+
"grad_norm": 4.360266208648682,
|
| 990 |
+
"learning_rate": 0.0002370085364356797
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"step": 111,
|
| 994 |
+
"epoch": 0.752542372881356,
|
| 995 |
+
"cpu_mem": 1.512091648,
|
| 996 |
+
"gpu_mem": 4.493713408,
|
| 997 |
+
"loss": 0.5683,
|
| 998 |
+
"grad_norm": 2.3229658603668213,
|
| 999 |
+
"learning_rate": 0.0002355483955402446
|
| 1000 |
+
},
|
| 1001 |
+
{
|
| 1002 |
+
"step": 112,
|
| 1003 |
+
"epoch": 0.7593220338983051,
|
| 1004 |
+
"cpu_mem": 1.512091648,
|
| 1005 |
+
"gpu_mem": 4.493759488,
|
| 1006 |
+
"loss": 0.6631,
|
| 1007 |
+
"grad_norm": 6.712233543395996,
|
| 1008 |
+
"learning_rate": 0.00023407614033613407
|
| 1009 |
+
},
|
| 1010 |
+
{
|
| 1011 |
+
"step": 113,
|
| 1012 |
+
"epoch": 0.7661016949152543,
|
| 1013 |
+
"cpu_mem": 1.512091648,
|
| 1014 |
+
"gpu_mem": 4.493750272,
|
| 1015 |
+
"loss": 0.7655,
|
| 1016 |
+
"grad_norm": 6.895766258239746,
|
| 1017 |
+
"learning_rate": 0.0002325919793059723
|
| 1018 |
+
},
|
| 1019 |
+
{
|
| 1020 |
+
"step": 114,
|
| 1021 |
+
"epoch": 0.7728813559322034,
|
| 1022 |
+
"cpu_mem": 1.512091648,
|
| 1023 |
+
"gpu_mem": 4.49373184,
|
| 1024 |
+
"loss": 0.7358,
|
| 1025 |
+
"grad_norm": 8.146341323852539,
|
| 1026 |
+
"learning_rate": 0.00023109612261833963
|
| 1027 |
+
},
|
| 1028 |
+
{
|
| 1029 |
+
"step": 115,
|
| 1030 |
+
"epoch": 0.7796610169491526,
|
| 1031 |
+
"cpu_mem": 1.512091648,
|
| 1032 |
+
"gpu_mem": 4.493807104,
|
| 1033 |
+
"loss": 0.601,
|
| 1034 |
+
"grad_norm": 3.9480652809143066,
|
| 1035 |
+
"learning_rate": 0.0002295887820980112
|
| 1036 |
+
},
|
| 1037 |
+
{
|
| 1038 |
+
"step": 116,
|
| 1039 |
+
"epoch": 0.7864406779661017,
|
| 1040 |
+
"cpu_mem": 1.512091648,
|
| 1041 |
+
"gpu_mem": 4.493727232,
|
| 1042 |
+
"loss": 0.5963,
|
| 1043 |
+
"grad_norm": 2.6513514518737793,
|
| 1044 |
+
"learning_rate": 0.0002280701711959608
|
| 1045 |
+
},
|
| 1046 |
+
{
|
| 1047 |
+
"step": 117,
|
| 1048 |
+
"epoch": 0.7932203389830509,
|
| 1049 |
+
"cpu_mem": 1.512091648,
|
| 1050 |
+
"gpu_mem": 4.493618176,
|
| 1051 |
+
"loss": 0.7067,
|
| 1052 |
+
"grad_norm": 5.624129772186279,
|
| 1053 |
+
"learning_rate": 0.00022654050495913495
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"step": 118,
|
| 1057 |
+
"epoch": 0.8,
|
| 1058 |
+
"cpu_mem": 1.512091648,
|
| 1059 |
+
"gpu_mem": 4.493856256,
|
| 1060 |
+
"loss": 0.7582,
|
| 1061 |
+
"grad_norm": 6.480310916900635,
|
| 1062 |
+
"learning_rate": 0.000225
|
| 1063 |
+
},
|
| 1064 |
+
{
|
| 1065 |
+
"step": 119,
|
| 1066 |
+
"epoch": 0.8067796610169492,
|
| 1067 |
+
"cpu_mem": 1.512091648,
|
| 1068 |
+
"gpu_mem": 4.494026752,
|
| 1069 |
+
"loss": 0.5894,
|
| 1070 |
+
"grad_norm": 2.979290723800659,
|
| 1071 |
+
"learning_rate": 0.00022344887446586865
|
| 1072 |
+
},
|
| 1073 |
+
{
|
| 1074 |
+
"step": 120,
|
| 1075 |
+
"epoch": 0.8135593220338984,
|
| 1076 |
+
"cpu_mem": 1.512091648,
|
| 1077 |
+
"gpu_mem": 4.493759488,
|
| 1078 |
+
"loss": 0.5885,
|
| 1079 |
+
"grad_norm": 1.9377977848052979,
|
| 1080 |
+
"learning_rate": 0.00022188734800800852
|
| 1081 |
+
},
|
| 1082 |
+
{
|
| 1083 |
+
"step": 121,
|
| 1084 |
+
"epoch": 0.8203389830508474,
|
| 1085 |
+
"cpu_mem": 1.512091648,
|
| 1086 |
+
"gpu_mem": 4.493787136,
|
| 1087 |
+
"loss": 0.6238,
|
| 1088 |
+
"grad_norm": 3.6376171112060547,
|
| 1089 |
+
"learning_rate": 0.00022031564175053754
|
| 1090 |
+
},
|
| 1091 |
+
{
|
| 1092 |
+
"step": 122,
|
| 1093 |
+
"epoch": 0.8271186440677966,
|
| 1094 |
+
"cpu_mem": 1.512091648,
|
| 1095 |
+
"gpu_mem": 4.493837824,
|
| 1096 |
+
"loss": 0.5311,
|
| 1097 |
+
"grad_norm": 4.018235206604004,
|
| 1098 |
+
"learning_rate": 0.00021873397825911153
|
| 1099 |
+
},
|
| 1100 |
+
{
|
| 1101 |
+
"step": 123,
|
| 1102 |
+
"epoch": 0.8338983050847457,
|
| 1103 |
+
"cpu_mem": 1.512091648,
|
| 1104 |
+
"gpu_mem": 4.49364736,
|
| 1105 |
+
"loss": 0.6679,
|
| 1106 |
+
"grad_norm": 5.683865070343018,
|
| 1107 |
+
"learning_rate": 0.00021714258150940685
|
| 1108 |
+
},
|
| 1109 |
+
{
|
| 1110 |
+
"step": 124,
|
| 1111 |
+
"epoch": 0.8406779661016949,
|
| 1112 |
+
"cpu_mem": 1.512091648,
|
| 1113 |
+
"gpu_mem": 4.494089728,
|
| 1114 |
+
"loss": 0.6208,
|
| 1115 |
+
"grad_norm": 5.339485168457031,
|
| 1116 |
+
"learning_rate": 0.0002155416768554039
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"step": 125,
|
| 1120 |
+
"epoch": 0.847457627118644,
|
| 1121 |
+
"cpu_mem": 1.512091648,
|
| 1122 |
+
"gpu_mem": 4.49381632,
|
| 1123 |
+
"loss": 0.5845,
|
| 1124 |
+
"grad_norm": 3.5815553665161133,
|
| 1125 |
+
"learning_rate": 0.00021393149099747523
|
| 1126 |
+
},
|
| 1127 |
+
{
|
| 1128 |
+
"step": 126,
|
| 1129 |
+
"epoch": 0.8542372881355932,
|
| 1130 |
+
"cpu_mem": 1.512091648,
|
| 1131 |
+
"gpu_mem": 4.493699584,
|
| 1132 |
+
"loss": 0.5642,
|
| 1133 |
+
"grad_norm": 4.037660121917725,
|
| 1134 |
+
"learning_rate": 0.00021231225195028297
|
| 1135 |
+
},
|
| 1136 |
+
{
|
| 1137 |
+
"step": 127,
|
| 1138 |
+
"epoch": 0.8610169491525423,
|
| 1139 |
+
"cpu_mem": 1.512288256,
|
| 1140 |
+
"gpu_mem": 4.49413888,
|
| 1141 |
+
"loss": 0.6483,
|
| 1142 |
+
"grad_norm": 5.6473846435546875,
|
| 1143 |
+
"learning_rate": 0.00021068418901049025
|
| 1144 |
+
},
|
| 1145 |
+
{
|
| 1146 |
+
"step": 128,
|
| 1147 |
+
"epoch": 0.8677966101694915,
|
| 1148 |
+
"cpu_mem": 1.512288256,
|
| 1149 |
+
"gpu_mem": 4.493914624,
|
| 1150 |
+
"loss": 0.5481,
|
| 1151 |
+
"grad_norm": 3.1490492820739746,
|
| 1152 |
+
"learning_rate": 0.0002090475327242912
|
| 1153 |
+
},
|
| 1154 |
+
{
|
| 1155 |
+
"step": 129,
|
| 1156 |
+
"epoch": 0.8745762711864407,
|
| 1157 |
+
"cpu_mem": 1.512288256,
|
| 1158 |
+
"gpu_mem": 4.49395456,
|
| 1159 |
+
"loss": 0.6907,
|
| 1160 |
+
"grad_norm": 3.3728561401367188,
|
| 1161 |
+
"learning_rate": 0.00020740251485476345
|
| 1162 |
+
},
|
| 1163 |
+
{
|
| 1164 |
+
"step": 130,
|
| 1165 |
+
"epoch": 0.8813559322033898,
|
| 1166 |
+
"cpu_mem": 1.512288256,
|
| 1167 |
+
"gpu_mem": 4.493736448,
|
| 1168 |
+
"loss": 0.6776,
|
| 1169 |
+
"grad_norm": 2.8839058876037598,
|
| 1170 |
+
"learning_rate": 0.0002057493683490491
|
| 1171 |
+
},
|
| 1172 |
+
{
|
| 1173 |
+
"step": 131,
|
| 1174 |
+
"epoch": 0.888135593220339,
|
| 1175 |
+
"cpu_mem": 1.512288256,
|
| 1176 |
+
"gpu_mem": 4.493865472,
|
| 1177 |
+
"loss": 0.6277,
|
| 1178 |
+
"grad_norm": 2.4115381240844727,
|
| 1179 |
+
"learning_rate": 0.00020408832730536746
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"step": 132,
|
| 1183 |
+
"epoch": 0.8949152542372881,
|
| 1184 |
+
"cpu_mem": 1.512288256,
|
| 1185 |
+
"gpu_mem": 4.49394688,
|
| 1186 |
+
"loss": 0.6367,
|
| 1187 |
+
"grad_norm": 3.60898494720459,
|
| 1188 |
+
"learning_rate": 0.00020241962693986476
|
| 1189 |
+
},
|
| 1190 |
+
{
|
| 1191 |
+
"step": 133,
|
| 1192 |
+
"epoch": 0.9016949152542373,
|
| 1193 |
+
"cpu_mem": 1.512288256,
|
| 1194 |
+
"gpu_mem": 4.493730304,
|
| 1195 |
+
"loss": 0.5515,
|
| 1196 |
+
"grad_norm": 2.1373813152313232,
|
| 1197 |
+
"learning_rate": 0.0002007435035533061
|
| 1198 |
+
},
|
| 1199 |
+
{
|
| 1200 |
+
"step": 134,
|
| 1201 |
+
"epoch": 0.9084745762711864,
|
| 1202 |
+
"cpu_mem": 1.512288256,
|
| 1203 |
+
"gpu_mem": 4.493863936,
|
| 1204 |
+
"loss": 0.5556,
|
| 1205 |
+
"grad_norm": 2.2262206077575684,
|
| 1206 |
+
"learning_rate": 0.00019906019449761325
|
| 1207 |
+
},
|
| 1208 |
+
{
|
| 1209 |
+
"step": 135,
|
| 1210 |
+
"epoch": 0.9152542372881356,
|
| 1211 |
+
"cpu_mem": 1.512288256,
|
| 1212 |
+
"gpu_mem": 4.493886976,
|
| 1213 |
+
"loss": 0.6743,
|
| 1214 |
+
"grad_norm": 6.190613746643066,
|
| 1215 |
+
"learning_rate": 0.00019736993814225374
|
| 1216 |
+
},
|
| 1217 |
+
{
|
| 1218 |
+
"step": 136,
|
| 1219 |
+
"epoch": 0.9220338983050848,
|
| 1220 |
+
"cpu_mem": 1.512288256,
|
| 1221 |
+
"gpu_mem": 4.49372416,
|
| 1222 |
+
"loss": 0.601,
|
| 1223 |
+
"grad_norm": 4.490257263183594,
|
| 1224 |
+
"learning_rate": 0.00019567297384048604
|
| 1225 |
+
},
|
| 1226 |
+
{
|
| 1227 |
+
"step": 137,
|
| 1228 |
+
"epoch": 0.9288135593220339,
|
| 1229 |
+
"cpu_mem": 1.512288256,
|
| 1230 |
+
"gpu_mem": 4.493604352,
|
| 1231 |
+
"loss": 0.6619,
|
| 1232 |
+
"grad_norm": 4.613885402679443,
|
| 1233 |
+
"learning_rate": 0.0001939695418954653
|
| 1234 |
+
},
|
| 1235 |
+
{
|
| 1236 |
+
"step": 138,
|
| 1237 |
+
"epoch": 0.9355932203389831,
|
| 1238 |
+
"cpu_mem": 1.512288256,
|
| 1239 |
+
"gpu_mem": 4.4937856,
|
| 1240 |
+
"loss": 0.5927,
|
| 1241 |
+
"grad_norm": 2.2556755542755127,
|
| 1242 |
+
"learning_rate": 0.00019225988352621445
|
| 1243 |
+
},
|
| 1244 |
+
{
|
| 1245 |
+
"step": 139,
|
| 1246 |
+
"epoch": 0.9423728813559322,
|
| 1247 |
+
"cpu_mem": 1.512288256,
|
| 1248 |
+
"gpu_mem": 4.493684224,
|
| 1249 |
+
"loss": 0.6136,
|
| 1250 |
+
"grad_norm": 3.3856916427612305,
|
| 1251 |
+
"learning_rate": 0.00019054424083346592
|
| 1252 |
+
},
|
| 1253 |
+
{
|
| 1254 |
+
"step": 140,
|
| 1255 |
+
"epoch": 0.9491525423728814,
|
| 1256 |
+
"cpu_mem": 1.512288256,
|
| 1257 |
+
"gpu_mem": 4.493736448,
|
| 1258 |
+
"loss": 0.6362,
|
| 1259 |
+
"grad_norm": 5.717785835266113,
|
| 1260 |
+
"learning_rate": 0.0001888228567653781
|
| 1261 |
+
},
|
| 1262 |
+
{
|
| 1263 |
+
"step": 141,
|
| 1264 |
+
"epoch": 0.9559322033898305,
|
| 1265 |
+
"cpu_mem": 1.512288256,
|
| 1266 |
+
"gpu_mem": 4.493768704,
|
| 1267 |
+
"loss": 0.6383,
|
| 1268 |
+
"grad_norm": 5.319495677947998,
|
| 1269 |
+
"learning_rate": 0.0001870959750831323
|
| 1270 |
+
},
|
| 1271 |
+
{
|
| 1272 |
+
"step": 142,
|
| 1273 |
+
"epoch": 0.9627118644067797,
|
| 1274 |
+
"cpu_mem": 1.512288256,
|
| 1275 |
+
"gpu_mem": 4.49390848,
|
| 1276 |
+
"loss": 0.5894,
|
| 1277 |
+
"grad_norm": 4.640230178833008,
|
| 1278 |
+
"learning_rate": 0.0001853638403264141
|
| 1279 |
+
},
|
| 1280 |
+
{
|
| 1281 |
+
"step": 143,
|
| 1282 |
+
"epoch": 0.9694915254237289,
|
| 1283 |
+
"cpu_mem": 1.512288256,
|
| 1284 |
+
"gpu_mem": 4.493891584,
|
| 1285 |
+
"loss": 0.6378,
|
| 1286 |
+
"grad_norm": 3.016573905944824,
|
| 1287 |
+
"learning_rate": 0.00018362669777878453
|
| 1288 |
+
},
|
| 1289 |
+
{
|
| 1290 |
+
"step": 144,
|
| 1291 |
+
"epoch": 0.976271186440678,
|
| 1292 |
+
"cpu_mem": 1.512288256,
|
| 1293 |
+
"gpu_mem": 4.494083584,
|
| 1294 |
+
"loss": 0.6572,
|
| 1295 |
+
"grad_norm": 4.5237603187561035,
|
| 1296 |
+
"learning_rate": 0.00018188479343294648
|
| 1297 |
+
},
|
| 1298 |
+
{
|
| 1299 |
+
"step": 145,
|
| 1300 |
+
"epoch": 0.9830508474576272,
|
| 1301 |
+
"cpu_mem": 1.512288256,
|
| 1302 |
+
"gpu_mem": 4.493794816,
|
| 1303 |
+
"loss": 0.5759,
|
| 1304 |
+
"grad_norm": 3.0536630153656006,
|
| 1305 |
+
"learning_rate": 0.0001801383739559098
|
| 1306 |
+
},
|
| 1307 |
+
{
|
| 1308 |
+
"step": 146,
|
| 1309 |
+
"epoch": 0.9898305084745763,
|
| 1310 |
+
"cpu_mem": 1.512288256,
|
| 1311 |
+
"gpu_mem": 4.493830144,
|
| 1312 |
+
"loss": 0.6313,
|
| 1313 |
+
"grad_norm": 3.5982203483581543,
|
| 1314 |
+
"learning_rate": 0.0001783876866540615
|
| 1315 |
+
},
|
| 1316 |
+
{
|
| 1317 |
+
"step": 147,
|
| 1318 |
+
"epoch": 0.9966101694915255,
|
| 1319 |
+
"cpu_mem": 1.512288256,
|
| 1320 |
+
"gpu_mem": 4.493728768,
|
| 1321 |
+
"loss": 0.5625,
|
| 1322 |
+
"grad_norm": 3.026538133621216,
|
| 1323 |
+
"learning_rate": 0.00017663297943814552
|
| 1324 |
+
},
|
| 1325 |
+
{
|
| 1326 |
+
"step": 148,
|
| 1327 |
+
"epoch": 1.0033898305084745,
|
| 1328 |
+
"cpu_mem": 1.512288256,
|
| 1329 |
+
"gpu_mem": 4.519203328,
|
| 1330 |
+
"loss": 0.884,
|
| 1331 |
+
"grad_norm": 3.9491984844207764,
|
| 1332 |
+
"learning_rate": 0.0001748745007881561
|
| 1333 |
+
},
|
| 1334 |
+
{
|
| 1335 |
+
"step": 149,
|
| 1336 |
+
"epoch": 1.0101694915254238,
|
| 1337 |
+
"cpu_mem": 1.512288256,
|
| 1338 |
+
"gpu_mem": 4.519138816,
|
| 1339 |
+
"loss": 0.6115,
|
| 1340 |
+
"grad_norm": 2.8990418910980225,
|
| 1341 |
+
"learning_rate": 0.00017311249971815185
|
| 1342 |
+
},
|
| 1343 |
+
{
|
| 1344 |
+
"step": 150,
|
| 1345 |
+
"epoch": 1.0169491525423728,
|
| 1346 |
+
"cpu_mem": 1.512288256,
|
| 1347 |
+
"gpu_mem": 4.518976,
|
| 1348 |
+
"loss": 0.5619,
|
| 1349 |
+
"grad_norm": 3.0098154544830322,
|
| 1350 |
+
"learning_rate": 0.00017134722574099276
|
| 1351 |
+
},
|
| 1352 |
+
{
|
| 1353 |
+
"step": 151,
|
| 1354 |
+
"epoch": 1.023728813559322,
|
| 1355 |
+
"cpu_mem": 1.512288256,
|
| 1356 |
+
"gpu_mem": 4.519048192,
|
| 1357 |
+
"loss": 0.6179,
|
| 1358 |
+
"grad_norm": 3.9371068477630615,
|
| 1359 |
+
"learning_rate": 0.00016957892883300775
|
| 1360 |
+
},
|
| 1361 |
+
{
|
| 1362 |
+
"step": 152,
|
| 1363 |
+
"epoch": 1.0305084745762711,
|
| 1364 |
+
"cpu_mem": 1.512288256,
|
| 1365 |
+
"gpu_mem": 4.51908352,
|
| 1366 |
+
"loss": 0.4941,
|
| 1367 |
+
"grad_norm": 2.8057267665863037,
|
| 1368 |
+
"learning_rate": 0.00016780785939859576
|
| 1369 |
+
},
|
| 1370 |
+
{
|
| 1371 |
+
"step": 153,
|
| 1372 |
+
"epoch": 1.0372881355932204,
|
| 1373 |
+
"cpu_mem": 1.512288256,
|
| 1374 |
+
"gpu_mem": 4.519108096,
|
| 1375 |
+
"loss": 0.5268,
|
| 1376 |
+
"grad_norm": 4.285440444946289,
|
| 1377 |
+
"learning_rate": 0.00016603426823476693
|
| 1378 |
+
},
|
| 1379 |
+
{
|
| 1380 |
+
"step": 154,
|
| 1381 |
+
"epoch": 1.0440677966101695,
|
| 1382 |
+
"cpu_mem": 1.512288256,
|
| 1383 |
+
"gpu_mem": 4.519069696,
|
| 1384 |
+
"loss": 0.5617,
|
| 1385 |
+
"grad_norm": 4.94078254699707,
|
| 1386 |
+
"learning_rate": 0.00016425840649562736
|
| 1387 |
+
},
|
| 1388 |
+
{
|
| 1389 |
+
"step": 155,
|
| 1390 |
+
"epoch": 1.0508474576271187,
|
| 1391 |
+
"cpu_mem": 1.512288256,
|
| 1392 |
+
"gpu_mem": 4.51929088,
|
| 1393 |
+
"loss": 0.5337,
|
| 1394 |
+
"grad_norm": 3.763066291809082,
|
| 1395 |
+
"learning_rate": 0.00016248052565681436
|
| 1396 |
+
},
|
| 1397 |
+
{
|
| 1398 |
+
"step": 156,
|
| 1399 |
+
"epoch": 1.0576271186440678,
|
| 1400 |
+
"cpu_mem": 1.512288256,
|
| 1401 |
+
"gpu_mem": 4.51919872,
|
| 1402 |
+
"loss": 0.5724,
|
| 1403 |
+
"grad_norm": 5.346607685089111,
|
| 1404 |
+
"learning_rate": 0.00016070087747988482
|
| 1405 |
+
},
|
| 1406 |
+
{
|
| 1407 |
+
"step": 157,
|
| 1408 |
+
"epoch": 1.064406779661017,
|
| 1409 |
+
"cpu_mem": 1.512288256,
|
| 1410 |
+
"gpu_mem": 4.519105024,
|
| 1411 |
+
"loss": 0.5009,
|
| 1412 |
+
"grad_norm": 5.106917381286621,
|
| 1413 |
+
"learning_rate": 0.00015891971397666464
|
| 1414 |
+
},
|
| 1415 |
+
{
|
| 1416 |
+
"step": 158,
|
| 1417 |
+
"epoch": 1.071186440677966,
|
| 1418 |
+
"cpu_mem": 1.512288256,
|
| 1419 |
+
"gpu_mem": 4.519031296,
|
| 1420 |
+
"loss": 0.5795,
|
| 1421 |
+
"grad_norm": 4.663048267364502,
|
| 1422 |
+
"learning_rate": 0.00015713728737356137
|
| 1423 |
+
},
|
| 1424 |
+
{
|
| 1425 |
+
"step": 159,
|
| 1426 |
+
"epoch": 1.0779661016949154,
|
| 1427 |
+
"cpu_mem": 1.512288256,
|
| 1428 |
+
"gpu_mem": 4.519379968,
|
| 1429 |
+
"loss": 0.4231,
|
| 1430 |
+
"grad_norm": 4.06447696685791,
|
| 1431 |
+
"learning_rate": 0.00015535385007584706
|
| 1432 |
+
},
|
| 1433 |
+
{
|
| 1434 |
+
"step": 160,
|
| 1435 |
+
"epoch": 1.0847457627118644,
|
| 1436 |
+
"cpu_mem": 1.512288256,
|
| 1437 |
+
"gpu_mem": 4.518974464,
|
| 1438 |
+
"loss": 0.6225,
|
| 1439 |
+
"grad_norm": 5.922128200531006,
|
| 1440 |
+
"learning_rate": 0.0001535696546319161
|
| 1441 |
+
},
|
| 1442 |
+
{
|
| 1443 |
+
"step": 161,
|
| 1444 |
+
"epoch": 1.0915254237288137,
|
| 1445 |
+
"cpu_mem": 1.512288256,
|
| 1446 |
+
"gpu_mem": 4.518920704,
|
| 1447 |
+
"loss": 0.5124,
|
| 1448 |
+
"grad_norm": 5.51249885559082,
|
| 1449 |
+
"learning_rate": 0.00015178495369752213
|
| 1450 |
+
},
|
| 1451 |
+
{
|
| 1452 |
+
"step": 162,
|
| 1453 |
+
"epoch": 1.0983050847457627,
|
| 1454 |
+
"cpu_mem": 1.512288256,
|
| 1455 |
+
"gpu_mem": 4.519696384,
|
| 1456 |
+
"loss": 0.5305,
|
| 1457 |
+
"grad_norm": 4.262174129486084,
|
| 1458 |
+
"learning_rate": 0.00015
|
| 1459 |
+
},
|
| 1460 |
+
{
|
| 1461 |
+
"step": 163,
|
| 1462 |
+
"epoch": 1.1050847457627118,
|
| 1463 |
+
"cpu_mem": 1.512288256,
|
| 1464 |
+
"gpu_mem": 4.519172608,
|
| 1465 |
+
"loss": 0.5712,
|
| 1466 |
+
"grad_norm": 6.790377140045166,
|
| 1467 |
+
"learning_rate": 0.00014821504630247785
|
| 1468 |
+
},
|
| 1469 |
+
{
|
| 1470 |
+
"step": 164,
|
| 1471 |
+
"epoch": 1.111864406779661,
|
| 1472 |
+
"cpu_mem": 1.512288256,
|
| 1473 |
+
"gpu_mem": 4.519085056,
|
| 1474 |
+
"loss": 0.6282,
|
| 1475 |
+
"grad_norm": 5.247696876525879,
|
| 1476 |
+
"learning_rate": 0.00014643034536808387
|
| 1477 |
+
},
|
| 1478 |
+
{
|
| 1479 |
+
"step": 165,
|
| 1480 |
+
"epoch": 1.11864406779661,
|
| 1481 |
+
"cpu_mem": 1.512288256,
|
| 1482 |
+
"gpu_mem": 4.519034368,
|
| 1483 |
+
"loss": 0.5203,
|
| 1484 |
+
"grad_norm": 5.485547065734863,
|
| 1485 |
+
"learning_rate": 0.00014464614992415294
|
| 1486 |
+
},
|
| 1487 |
+
{
|
| 1488 |
+
"step": 166,
|
| 1489 |
+
"epoch": 1.1254237288135593,
|
| 1490 |
+
"cpu_mem": 1.512288256,
|
| 1491 |
+
"gpu_mem": 4.5191296,
|
| 1492 |
+
"loss": 0.5093,
|
| 1493 |
+
"grad_norm": 4.293337821960449,
|
| 1494 |
+
"learning_rate": 0.00014286271262643866
|
| 1495 |
+
},
|
| 1496 |
+
{
|
| 1497 |
+
"step": 167,
|
| 1498 |
+
"epoch": 1.1322033898305084,
|
| 1499 |
+
"cpu_mem": 1.512288256,
|
| 1500 |
+
"gpu_mem": 4.519046656,
|
| 1501 |
+
"loss": 0.5422,
|
| 1502 |
+
"grad_norm": 4.634438991546631,
|
| 1503 |
+
"learning_rate": 0.00014108028602333536
|
| 1504 |
+
},
|
| 1505 |
+
{
|
| 1506 |
+
"step": 168,
|
| 1507 |
+
"epoch": 1.1389830508474577,
|
| 1508 |
+
"cpu_mem": 1.512288256,
|
| 1509 |
+
"gpu_mem": 4.519065088,
|
| 1510 |
+
"loss": 0.5884,
|
| 1511 |
+
"grad_norm": 6.198184490203857,
|
| 1512 |
+
"learning_rate": 0.00013929912252011516
|
| 1513 |
+
},
|
| 1514 |
+
{
|
| 1515 |
+
"step": 169,
|
| 1516 |
+
"epoch": 1.1457627118644067,
|
| 1517 |
+
"cpu_mem": 1.512288256,
|
| 1518 |
+
"gpu_mem": 4.51915264,
|
| 1519 |
+
"loss": 0.5658,
|
| 1520 |
+
"grad_norm": 6.395980358123779,
|
| 1521 |
+
"learning_rate": 0.00013751947434318564
|
| 1522 |
+
},
|
| 1523 |
+
{
|
| 1524 |
+
"step": 170,
|
| 1525 |
+
"epoch": 1.152542372881356,
|
| 1526 |
+
"cpu_mem": 1.512288256,
|
| 1527 |
+
"gpu_mem": 4.51903744,
|
| 1528 |
+
"loss": 0.4778,
|
| 1529 |
+
"grad_norm": 4.4228129386901855,
|
| 1530 |
+
"learning_rate": 0.00013574159350437261
|
| 1531 |
+
},
|
| 1532 |
+
{
|
| 1533 |
+
"step": 171,
|
| 1534 |
+
"epoch": 1.159322033898305,
|
| 1535 |
+
"cpu_mem": 1.512288256,
|
| 1536 |
+
"gpu_mem": 4.519100416,
|
| 1537 |
+
"loss": 0.5222,
|
| 1538 |
+
"grad_norm": 6.1567864418029785,
|
| 1539 |
+
"learning_rate": 0.0001339657317652331
|
| 1540 |
+
},
|
| 1541 |
+
{
|
| 1542 |
+
"step": 172,
|
| 1543 |
+
"epoch": 1.1661016949152543,
|
| 1544 |
+
"cpu_mem": 1.512288256,
|
| 1545 |
+
"gpu_mem": 4.519008256,
|
| 1546 |
+
"loss": 0.6052,
|
| 1547 |
+
"grad_norm": 6.764645099639893,
|
| 1548 |
+
"learning_rate": 0.00013219214060140424
|
| 1549 |
+
},
|
| 1550 |
+
{
|
| 1551 |
+
"step": 173,
|
| 1552 |
+
"epoch": 1.1728813559322033,
|
| 1553 |
+
"cpu_mem": 1.512288256,
|
| 1554 |
+
"gpu_mem": 4.519307776,
|
| 1555 |
+
"loss": 0.4933,
|
| 1556 |
+
"grad_norm": 4.290213108062744,
|
| 1557 |
+
"learning_rate": 0.00013042107116699228
|
| 1558 |
+
},
|
| 1559 |
+
{
|
| 1560 |
+
"step": 174,
|
| 1561 |
+
"epoch": 1.1796610169491526,
|
| 1562 |
+
"cpu_mem": 1.512288256,
|
| 1563 |
+
"gpu_mem": 4.519031296,
|
| 1564 |
+
"loss": 0.579,
|
| 1565 |
+
"grad_norm": 4.750498294830322,
|
| 1566 |
+
"learning_rate": 0.00012865277425900724
|
| 1567 |
+
},
|
| 1568 |
+
{
|
| 1569 |
+
"step": 175,
|
| 1570 |
+
"epoch": 1.1864406779661016,
|
| 1571 |
+
"cpu_mem": 1.512288256,
|
| 1572 |
+
"gpu_mem": 4.518997504,
|
| 1573 |
+
"loss": 0.4567,
|
| 1574 |
+
"grad_norm": 4.482128620147705,
|
| 1575 |
+
"learning_rate": 0.00012688750028184818
|
| 1576 |
+
},
|
| 1577 |
+
{
|
| 1578 |
+
"step": 176,
|
| 1579 |
+
"epoch": 1.193220338983051,
|
| 1580 |
+
"cpu_mem": 1.512288256,
|
| 1581 |
+
"gpu_mem": 4.519135744,
|
| 1582 |
+
"loss": 0.5528,
|
| 1583 |
+
"grad_norm": 7.500720500946045,
|
| 1584 |
+
"learning_rate": 0.0001251254992118439
|
| 1585 |
+
},
|
| 1586 |
+
{
|
| 1587 |
+
"step": 177,
|
| 1588 |
+
"epoch": 1.2,
|
| 1589 |
+
"cpu_mem": 1.512288256,
|
| 1590 |
+
"gpu_mem": 4.519234048,
|
| 1591 |
+
"loss": 0.4816,
|
| 1592 |
+
"grad_norm": 5.709393501281738,
|
| 1593 |
+
"learning_rate": 0.00012336702056185453
|
| 1594 |
+
},
|
| 1595 |
+
{
|
| 1596 |
+
"step": 178,
|
| 1597 |
+
"epoch": 1.2067796610169492,
|
| 1598 |
+
"cpu_mem": 1.512288256,
|
| 1599 |
+
"gpu_mem": 4.518980608,
|
| 1600 |
+
"loss": 0.6219,
|
| 1601 |
+
"grad_norm": 5.3650712966918945,
|
| 1602 |
+
"learning_rate": 0.00012161231334593851
|
| 1603 |
+
},
|
| 1604 |
+
{
|
| 1605 |
+
"step": 179,
|
| 1606 |
+
"epoch": 1.2135593220338983,
|
| 1607 |
+
"cpu_mem": 1.512288256,
|
| 1608 |
+
"gpu_mem": 4.519080448,
|
| 1609 |
+
"loss": 0.6314,
|
| 1610 |
+
"grad_norm": 7.034570693969727,
|
| 1611 |
+
"learning_rate": 0.00011986162604409015
|
| 1612 |
+
},
|
| 1613 |
+
{
|
| 1614 |
+
"step": 180,
|
| 1615 |
+
"epoch": 1.2203389830508475,
|
| 1616 |
+
"cpu_mem": 1.512288256,
|
| 1617 |
+
"gpu_mem": 4.5190528,
|
| 1618 |
+
"loss": 0.5441,
|
| 1619 |
+
"grad_norm": 4.4938225746154785,
|
| 1620 |
+
"learning_rate": 0.00011811520656705348
|
| 1621 |
+
},
|
| 1622 |
+
{
|
| 1623 |
+
"step": 181,
|
| 1624 |
+
"epoch": 1.2271186440677966,
|
| 1625 |
+
"cpu_mem": 1.512288256,
|
| 1626 |
+
"gpu_mem": 4.518989824,
|
| 1627 |
+
"loss": 0.4821,
|
| 1628 |
+
"grad_norm": 5.907620906829834,
|
| 1629 |
+
"learning_rate": 0.00011637330222121543
|
| 1630 |
+
},
|
| 1631 |
+
{
|
| 1632 |
+
"step": 182,
|
| 1633 |
+
"epoch": 1.2338983050847459,
|
| 1634 |
+
"cpu_mem": 1.512288256,
|
| 1635 |
+
"gpu_mem": 4.519207936,
|
| 1636 |
+
"loss": 0.7578,
|
| 1637 |
+
"grad_norm": 9.750370025634766,
|
| 1638 |
+
"learning_rate": 0.00011463615967358588
|
| 1639 |
+
},
|
| 1640 |
+
{
|
| 1641 |
+
"step": 183,
|
| 1642 |
+
"epoch": 1.240677966101695,
|
| 1643 |
+
"cpu_mem": 1.512288256,
|
| 1644 |
+
"gpu_mem": 4.519105024,
|
| 1645 |
+
"loss": 0.5486,
|
| 1646 |
+
"grad_norm": 5.949014663696289,
|
| 1647 |
+
"learning_rate": 0.00011290402491686766
|
| 1648 |
+
},
|
| 1649 |
+
{
|
| 1650 |
+
"step": 184,
|
| 1651 |
+
"epoch": 1.2474576271186442,
|
| 1652 |
+
"cpu_mem": 1.512288256,
|
| 1653 |
+
"gpu_mem": 4.5190528,
|
| 1654 |
+
"loss": 0.4688,
|
| 1655 |
+
"grad_norm": 4.6593017578125,
|
| 1656 |
+
"learning_rate": 0.00011117714323462186
|
| 1657 |
+
},
|
| 1658 |
+
{
|
| 1659 |
+
"step": 185,
|
| 1660 |
+
"epoch": 1.2542372881355932,
|
| 1661 |
+
"cpu_mem": 1.512288256,
|
| 1662 |
+
"gpu_mem": 4.519031296,
|
| 1663 |
+
"loss": 0.5701,
|
| 1664 |
+
"grad_norm": 4.530482769012451,
|
| 1665 |
+
"learning_rate": 0.00010945575916653407
|
| 1666 |
+
},
|
| 1667 |
+
{
|
| 1668 |
+
"step": 186,
|
| 1669 |
+
"epoch": 1.2610169491525425,
|
| 1670 |
+
"cpu_mem": 1.512288256,
|
| 1671 |
+
"gpu_mem": 4.519040512,
|
| 1672 |
+
"loss": 0.4945,
|
| 1673 |
+
"grad_norm": 3.4325568675994873,
|
| 1674 |
+
"learning_rate": 0.00010774011647378553
|
| 1675 |
+
},
|
| 1676 |
+
{
|
| 1677 |
+
"step": 187,
|
| 1678 |
+
"epoch": 1.2677966101694915,
|
| 1679 |
+
"cpu_mem": 1.512288256,
|
| 1680 |
+
"gpu_mem": 4.518972928,
|
| 1681 |
+
"loss": 0.6189,
|
| 1682 |
+
"grad_norm": 4.750690460205078,
|
| 1683 |
+
"learning_rate": 0.00010603045810453468
|
| 1684 |
+
},
|
| 1685 |
+
{
|
| 1686 |
+
"step": 188,
|
| 1687 |
+
"epoch": 1.2745762711864406,
|
| 1688 |
+
"cpu_mem": 1.512288256,
|
| 1689 |
+
"gpu_mem": 4.519135744,
|
| 1690 |
+
"loss": 0.5404,
|
| 1691 |
+
"grad_norm": 4.004016399383545,
|
| 1692 |
+
"learning_rate": 0.00010432702615951396
|
| 1693 |
+
},
|
| 1694 |
+
{
|
| 1695 |
+
"step": 189,
|
| 1696 |
+
"epoch": 1.2813559322033898,
|
| 1697 |
+
"cpu_mem": 1.512288256,
|
| 1698 |
+
"gpu_mem": 4.519005184,
|
| 1699 |
+
"loss": 0.547,
|
| 1700 |
+
"grad_norm": 4.070260524749756,
|
| 1701 |
+
"learning_rate": 0.00010263006185774627
|
| 1702 |
+
},
|
| 1703 |
+
{
|
| 1704 |
+
"step": 190,
|
| 1705 |
+
"epoch": 1.288135593220339,
|
| 1706 |
+
"cpu_mem": 1.512288256,
|
| 1707 |
+
"gpu_mem": 4.519124992,
|
| 1708 |
+
"loss": 0.5295,
|
| 1709 |
+
"grad_norm": 5.301217555999756,
|
| 1710 |
+
"learning_rate": 0.00010093980550238675
|
| 1711 |
+
},
|
| 1712 |
+
{
|
| 1713 |
+
"step": 191,
|
| 1714 |
+
"epoch": 1.2949152542372881,
|
| 1715 |
+
"cpu_mem": 1.512288256,
|
| 1716 |
+
"gpu_mem": 4.518943744,
|
| 1717 |
+
"loss": 0.4816,
|
| 1718 |
+
"grad_norm": 3.699140787124634,
|
| 1719 |
+
"learning_rate": 9.925649644669391e-05
|
| 1720 |
+
},
|
| 1721 |
+
{
|
| 1722 |
+
"step": 192,
|
| 1723 |
+
"epoch": 1.3016949152542372,
|
| 1724 |
+
"cpu_mem": 1.512288256,
|
| 1725 |
+
"gpu_mem": 4.51907584,
|
| 1726 |
+
"loss": 0.4443,
|
| 1727 |
+
"grad_norm": 4.096963882446289,
|
| 1728 |
+
"learning_rate": 9.758037306013526e-05
|
| 1729 |
+
},
|
| 1730 |
+
{
|
| 1731 |
+
"step": 193,
|
| 1732 |
+
"epoch": 1.3084745762711865,
|
| 1733 |
+
"cpu_mem": 1.512288256,
|
| 1734 |
+
"gpu_mem": 4.519049728,
|
| 1735 |
+
"loss": 0.5866,
|
| 1736 |
+
"grad_norm": 4.5070319175720215,
|
| 1737 |
+
"learning_rate": 9.591167269463255e-05
|
| 1738 |
+
},
|
| 1739 |
+
{
|
| 1740 |
+
"step": 194,
|
| 1741 |
+
"epoch": 1.3152542372881357,
|
| 1742 |
+
"cpu_mem": 1.512288256,
|
| 1743 |
+
"gpu_mem": 4.519015936,
|
| 1744 |
+
"loss": 0.5797,
|
| 1745 |
+
"grad_norm": 5.173367500305176,
|
| 1746 |
+
"learning_rate": 9.425063165095088e-05
|
| 1747 |
+
},
|
| 1748 |
+
{
|
| 1749 |
+
"step": 195,
|
| 1750 |
+
"epoch": 1.3220338983050848,
|
| 1751 |
+
"cpu_mem": 1.512288256,
|
| 1752 |
+
"gpu_mem": 4.519120384,
|
| 1753 |
+
"loss": 0.4506,
|
| 1754 |
+
"grad_norm": 5.948993682861328,
|
| 1755 |
+
"learning_rate": 9.259748514523653e-05
|
| 1756 |
+
},
|
| 1757 |
+
{
|
| 1758 |
+
"step": 196,
|
| 1759 |
+
"epoch": 1.3288135593220338,
|
| 1760 |
+
"cpu_mem": 1.512288256,
|
| 1761 |
+
"gpu_mem": 4.519115776,
|
| 1762 |
+
"loss": 0.5417,
|
| 1763 |
+
"grad_norm": 4.922159671783447,
|
| 1764 |
+
"learning_rate": 9.095246727570879e-05
|
| 1765 |
+
},
|
| 1766 |
+
{
|
| 1767 |
+
"step": 197,
|
| 1768 |
+
"epoch": 1.335593220338983,
|
| 1769 |
+
"cpu_mem": 1.512288256,
|
| 1770 |
+
"gpu_mem": 4.518974464,
|
| 1771 |
+
"loss": 0.4126,
|
| 1772 |
+
"grad_norm": 4.2993011474609375,
|
| 1773 |
+
"learning_rate": 8.931581098950973e-05
|
| 1774 |
+
},
|
| 1775 |
+
{
|
| 1776 |
+
"step": 198,
|
| 1777 |
+
"epoch": 1.3423728813559321,
|
| 1778 |
+
"cpu_mem": 1.512288256,
|
| 1779 |
+
"gpu_mem": 4.519166464,
|
| 1780 |
+
"loss": 0.4516,
|
| 1781 |
+
"grad_norm": 3.3898727893829346,
|
| 1782 |
+
"learning_rate": 8.768774804971705e-05
|
| 1783 |
+
},
|
| 1784 |
+
{
|
| 1785 |
+
"step": 199,
|
| 1786 |
+
"epoch": 1.3491525423728814,
|
| 1787 |
+
"cpu_mem": 1.512288256,
|
| 1788 |
+
"gpu_mem": 4.519017472,
|
| 1789 |
+
"loss": 0.5272,
|
| 1790 |
+
"grad_norm": 4.7902374267578125,
|
| 1791 |
+
"learning_rate": 8.606850900252478e-05
|
| 1792 |
+
},
|
| 1793 |
+
{
|
| 1794 |
+
"step": 200,
|
| 1795 |
+
"epoch": 1.3559322033898304,
|
| 1796 |
+
"cpu_mem": 1.512288256,
|
| 1797 |
+
"gpu_mem": 4.519120384,
|
| 1798 |
+
"loss": 0.3785,
|
| 1799 |
+
"grad_norm": 3.5034830570220947,
|
| 1800 |
+
"learning_rate": 8.445832314459608e-05
|
| 1801 |
+
},
|
| 1802 |
+
{
|
| 1803 |
+
"step": 201,
|
| 1804 |
+
"epoch": 1.3627118644067797,
|
| 1805 |
+
"cpu_mem": 1.512288256,
|
| 1806 |
+
"gpu_mem": 4.519323136,
|
| 1807 |
+
"loss": 0.4795,
|
| 1808 |
+
"grad_norm": 4.421779632568359,
|
| 1809 |
+
"learning_rate": 8.285741849059311e-05
|
| 1810 |
+
},
|
| 1811 |
+
{
|
| 1812 |
+
"step": 202,
|
| 1813 |
+
"epoch": 1.3694915254237288,
|
| 1814 |
+
"cpu_mem": 1.512288256,
|
| 1815 |
+
"gpu_mem": 4.519124992,
|
| 1816 |
+
"loss": 0.4514,
|
| 1817 |
+
"grad_norm": 4.375877380371094,
|
| 1818 |
+
"learning_rate": 8.126602174088843e-05
|
| 1819 |
+
},
|
| 1820 |
+
{
|
| 1821 |
+
"step": 203,
|
| 1822 |
+
"epoch": 1.376271186440678,
|
| 1823 |
+
"cpu_mem": 1.512288256,
|
| 1824 |
+
"gpu_mem": 4.519011328,
|
| 1825 |
+
"loss": 0.4037,
|
| 1826 |
+
"grad_norm": 5.991669654846191,
|
| 1827 |
+
"learning_rate": 7.968435824946242e-05
|
| 1828 |
+
},
|
| 1829 |
+
{
|
| 1830 |
+
"step": 204,
|
| 1831 |
+
"epoch": 1.383050847457627,
|
| 1832 |
+
"cpu_mem": 1.512288256,
|
| 1833 |
+
"gpu_mem": 4.519025152,
|
| 1834 |
+
"loss": 0.4836,
|
| 1835 |
+
"grad_norm": 5.3182172775268555,
|
| 1836 |
+
"learning_rate": 7.811265199199152e-05
|
| 1837 |
+
},
|
| 1838 |
+
{
|
| 1839 |
+
"step": 205,
|
| 1840 |
+
"epoch": 1.3898305084745763,
|
| 1841 |
+
"cpu_mem": 1.512288256,
|
| 1842 |
+
"gpu_mem": 4.519069696,
|
| 1843 |
+
"loss": 0.4796,
|
| 1844 |
+
"grad_norm": 5.581629276275635,
|
| 1845 |
+
"learning_rate": 7.655112553413135e-05
|
| 1846 |
+
},
|
| 1847 |
+
{
|
| 1848 |
+
"step": 206,
|
| 1849 |
+
"epoch": 1.3966101694915254,
|
| 1850 |
+
"cpu_mem": 1.512288256,
|
| 1851 |
+
"gpu_mem": 4.519011328,
|
| 1852 |
+
"loss": 0.481,
|
| 1853 |
+
"grad_norm": 5.053388595581055,
|
| 1854 |
+
"learning_rate": 7.500000000000002e-05
|
| 1855 |
+
},
|
| 1856 |
+
{
|
| 1857 |
+
"step": 207,
|
| 1858 |
+
"epoch": 1.4033898305084747,
|
| 1859 |
+
"cpu_mem": 1.512288256,
|
| 1860 |
+
"gpu_mem": 4.5192448,
|
| 1861 |
+
"loss": 0.4202,
|
| 1862 |
+
"grad_norm": 5.716559886932373,
|
| 1863 |
+
"learning_rate": 7.345949504086507e-05
|
| 1864 |
+
},
|
| 1865 |
+
{
|
| 1866 |
+
"step": 208,
|
| 1867 |
+
"epoch": 1.4101694915254237,
|
| 1868 |
+
"cpu_mem": 1.512288256,
|
| 1869 |
+
"gpu_mem": 4.51927552,
|
| 1870 |
+
"loss": 0.3872,
|
| 1871 |
+
"grad_norm": 6.213109493255615,
|
| 1872 |
+
"learning_rate": 7.192982880403917e-05
|
| 1873 |
+
},
|
| 1874 |
+
{
|
| 1875 |
+
"step": 209,
|
| 1876 |
+
"epoch": 1.4169491525423727,
|
| 1877 |
+
"cpu_mem": 1.512288256,
|
| 1878 |
+
"gpu_mem": 4.519201792,
|
| 1879 |
+
"loss": 0.5066,
|
| 1880 |
+
"grad_norm": 6.249429225921631,
|
| 1881 |
+
"learning_rate": 7.041121790198881e-05
|
| 1882 |
+
},
|
| 1883 |
+
{
|
| 1884 |
+
"step": 210,
|
| 1885 |
+
"epoch": 1.423728813559322,
|
| 1886 |
+
"cpu_mem": 1.512288256,
|
| 1887 |
+
"gpu_mem": 4.519089664,
|
| 1888 |
+
"loss": 0.4701,
|
| 1889 |
+
"grad_norm": 5.749514102935791,
|
| 1890 |
+
"learning_rate": 6.890387738166041e-05
|
| 1891 |
+
},
|
| 1892 |
+
{
|
| 1893 |
+
"step": 211,
|
| 1894 |
+
"epoch": 1.4305084745762713,
|
| 1895 |
+
"cpu_mem": 1.512288256,
|
| 1896 |
+
"gpu_mem": 4.519038976,
|
| 1897 |
+
"loss": 0.42,
|
| 1898 |
+
"grad_norm": 6.426360130310059,
|
| 1899 |
+
"learning_rate": 6.740802069402771e-05
|
| 1900 |
+
},
|
| 1901 |
+
{
|
| 1902 |
+
"step": 212,
|
| 1903 |
+
"epoch": 1.4372881355932203,
|
| 1904 |
+
"cpu_mem": 1.512288256,
|
| 1905 |
+
"gpu_mem": 4.519008256,
|
| 1906 |
+
"loss": 0.4786,
|
| 1907 |
+
"grad_norm": 5.285887241363525,
|
| 1908 |
+
"learning_rate": 6.592385966386588e-05
|
| 1909 |
+
},
|
| 1910 |
+
{
|
| 1911 |
+
"step": 213,
|
| 1912 |
+
"epoch": 1.4440677966101694,
|
| 1913 |
+
"cpu_mem": 1.512288256,
|
| 1914 |
+
"gpu_mem": 4.519031296,
|
| 1915 |
+
"loss": 0.5431,
|
| 1916 |
+
"grad_norm": 8.069002151489258,
|
| 1917 |
+
"learning_rate": 6.445160445975536e-05
|
| 1918 |
+
},
|
| 1919 |
+
{
|
| 1920 |
+
"step": 214,
|
| 1921 |
+
"epoch": 1.4508474576271186,
|
| 1922 |
+
"cpu_mem": 1.512288256,
|
| 1923 |
+
"gpu_mem": 4.51911424,
|
| 1924 |
+
"loss": 0.527,
|
| 1925 |
+
"grad_norm": 6.687179088592529,
|
| 1926 |
+
"learning_rate": 6.299146356432029e-05
|
| 1927 |
+
},
|
| 1928 |
+
{
|
| 1929 |
+
"step": 215,
|
| 1930 |
+
"epoch": 1.457627118644068,
|
| 1931 |
+
"cpu_mem": 1.512288256,
|
| 1932 |
+
"gpu_mem": 4.519042048,
|
| 1933 |
+
"loss": 0.538,
|
| 1934 |
+
"grad_norm": 7.980434894561768,
|
| 1935 |
+
"learning_rate": 6.154364374470568e-05
|
| 1936 |
+
},
|
| 1937 |
+
{
|
| 1938 |
+
"step": 216,
|
| 1939 |
+
"epoch": 1.464406779661017,
|
| 1940 |
+
"cpu_mem": 1.512288256,
|
| 1941 |
+
"gpu_mem": 4.519207936,
|
| 1942 |
+
"loss": 0.4374,
|
| 1943 |
+
"grad_norm": 5.38814640045166,
|
| 1944 |
+
"learning_rate": 6.010835002329795e-05
|
| 1945 |
+
},
|
| 1946 |
+
{
|
| 1947 |
+
"step": 217,
|
| 1948 |
+
"epoch": 1.471186440677966,
|
| 1949 |
+
"cpu_mem": 1.512288256,
|
| 1950 |
+
"gpu_mem": 4.519049728,
|
| 1951 |
+
"loss": 0.5355,
|
| 1952 |
+
"grad_norm": 8.008475303649902,
|
| 1953 |
+
"learning_rate": 5.8685785648691894e-05
|
| 1954 |
+
},
|
| 1955 |
+
{
|
| 1956 |
+
"step": 218,
|
| 1957 |
+
"epoch": 1.4779661016949153,
|
| 1958 |
+
"cpu_mem": 1.512288256,
|
| 1959 |
+
"gpu_mem": 4.519026688,
|
| 1960 |
+
"loss": 0.4467,
|
| 1961 |
+
"grad_norm": 5.458549499511719,
|
| 1962 |
+
"learning_rate": 5.72761520669092e-05
|
| 1963 |
+
},
|
| 1964 |
+
{
|
| 1965 |
+
"step": 219,
|
| 1966 |
+
"epoch": 1.4847457627118645,
|
| 1967 |
+
"cpu_mem": 1.512288256,
|
| 1968 |
+
"gpu_mem": 4.51915264,
|
| 1969 |
+
"loss": 0.477,
|
| 1970 |
+
"grad_norm": 6.730329990386963,
|
| 1971 |
+
"learning_rate": 5.587964889287218e-05
|
| 1972 |
+
},
|
| 1973 |
+
{
|
| 1974 |
+
"step": 220,
|
| 1975 |
+
"epoch": 1.4915254237288136,
|
| 1976 |
+
"cpu_mem": 1.512288256,
|
| 1977 |
+
"gpu_mem": 4.519186432,
|
| 1978 |
+
"loss": 0.5097,
|
| 1979 |
+
"grad_norm": 6.172441005706787,
|
| 1980 |
+
"learning_rate": 5.449647388213678e-05
|
| 1981 |
+
},
|
| 1982 |
+
{
|
| 1983 |
+
"step": 221,
|
| 1984 |
+
"epoch": 1.4983050847457626,
|
| 1985 |
+
"cpu_mem": 1.512288256,
|
| 1986 |
+
"gpu_mem": 4.519054336,
|
| 1987 |
+
"loss": 0.5732,
|
| 1988 |
+
"grad_norm": 7.062180519104004,
|
| 1989 |
+
"learning_rate": 5.312682290288869e-05
|
| 1990 |
+
},
|
| 1991 |
+
{
|
| 1992 |
+
"step": 222,
|
| 1993 |
+
"epoch": 1.505084745762712,
|
| 1994 |
+
"cpu_mem": 1.512288256,
|
| 1995 |
+
"gpu_mem": 4.51919104,
|
| 1996 |
+
"loss": 0.4604,
|
| 1997 |
+
"grad_norm": 6.362549781799316,
|
| 1998 |
+
"learning_rate": 5.1770889908207245e-05
|
| 1999 |
+
},
|
| 2000 |
+
{
|
| 2001 |
+
"step": 223,
|
| 2002 |
+
"epoch": 1.5118644067796612,
|
| 2003 |
+
"cpu_mem": 1.512288256,
|
| 2004 |
+
"gpu_mem": 4.519105024,
|
| 2005 |
+
"loss": 0.4812,
|
| 2006 |
+
"grad_norm": 6.742758274078369,
|
| 2007 |
+
"learning_rate": 5.0428866908599864e-05
|
| 2008 |
+
},
|
| 2009 |
+
{
|
| 2010 |
+
"step": 224,
|
| 2011 |
+
"epoch": 1.5186440677966102,
|
| 2012 |
+
"cpu_mem": 1.512288256,
|
| 2013 |
+
"gpu_mem": 4.519069696,
|
| 2014 |
+
"loss": 0.5005,
|
| 2015 |
+
"grad_norm": 7.343570709228516,
|
| 2016 |
+
"learning_rate": 4.9100943944812114e-05
|
| 2017 |
+
},
|
| 2018 |
+
{
|
| 2019 |
+
"step": 225,
|
| 2020 |
+
"epoch": 1.5254237288135593,
|
| 2021 |
+
"cpu_mem": 1.512288256,
|
| 2022 |
+
"gpu_mem": 4.519034368,
|
| 2023 |
+
"loss": 0.5068,
|
| 2024 |
+
"grad_norm": 5.180023670196533,
|
| 2025 |
+
"learning_rate": 4.778730906091632e-05
|
| 2026 |
+
},
|
| 2027 |
+
{
|
| 2028 |
+
"step": 226,
|
| 2029 |
+
"epoch": 1.5322033898305085,
|
| 2030 |
+
"cpu_mem": 1.512288256,
|
| 2031 |
+
"gpu_mem": 4.51918336,
|
| 2032 |
+
"loss": 0.379,
|
| 2033 |
+
"grad_norm": 5.091320991516113,
|
| 2034 |
+
"learning_rate": 4.648814827768322e-05
|
| 2035 |
+
},
|
| 2036 |
+
{
|
| 2037 |
+
"step": 227,
|
| 2038 |
+
"epoch": 1.5389830508474578,
|
| 2039 |
+
"cpu_mem": 1.512288256,
|
| 2040 |
+
"gpu_mem": 4.519072768,
|
| 2041 |
+
"loss": 0.4426,
|
| 2042 |
+
"grad_norm": 5.11602258682251,
|
| 2043 |
+
"learning_rate": 4.5203645566239816e-05
|
| 2044 |
+
},
|
| 2045 |
+
{
|
| 2046 |
+
"step": 228,
|
| 2047 |
+
"epoch": 1.5457627118644068,
|
| 2048 |
+
"cpu_mem": 1.512288256,
|
| 2049 |
+
"gpu_mem": 4.519017472,
|
| 2050 |
+
"loss": 0.5256,
|
| 2051 |
+
"grad_norm": 5.844997882843018,
|
| 2052 |
+
"learning_rate": 4.3933982822017876e-05
|
| 2053 |
+
},
|
| 2054 |
+
{
|
| 2055 |
+
"step": 229,
|
| 2056 |
+
"epoch": 1.5525423728813559,
|
| 2057 |
+
"cpu_mem": 1.512288256,
|
| 2058 |
+
"gpu_mem": 4.518959104,
|
| 2059 |
+
"loss": 0.5054,
|
| 2060 |
+
"grad_norm": 5.75474214553833,
|
| 2061 |
+
"learning_rate": 4.267933983899601e-05
|
| 2062 |
+
},
|
| 2063 |
+
{
|
| 2064 |
+
"step": 230,
|
| 2065 |
+
"epoch": 1.559322033898305,
|
| 2066 |
+
"cpu_mem": 1.512288256,
|
| 2067 |
+
"gpu_mem": 4.519015936,
|
| 2068 |
+
"loss": 0.5413,
|
| 2069 |
+
"grad_norm": 6.338455677032471,
|
| 2070 |
+
"learning_rate": 4.143989428423947e-05
|
| 2071 |
+
},
|
| 2072 |
+
{
|
| 2073 |
+
"step": 231,
|
| 2074 |
+
"epoch": 1.5661016949152542,
|
| 2075 |
+
"cpu_mem": 1.512288256,
|
| 2076 |
+
"gpu_mem": 4.519293952,
|
| 2077 |
+
"loss": 0.4582,
|
| 2078 |
+
"grad_norm": 5.49623966217041,
|
| 2079 |
+
"learning_rate": 4.0215821672741213e-05
|
| 2080 |
+
},
|
| 2081 |
+
{
|
| 2082 |
+
"step": 232,
|
| 2083 |
+
"epoch": 1.5728813559322035,
|
| 2084 |
+
"cpu_mem": 1.512288256,
|
| 2085 |
+
"gpu_mem": 4.519017472,
|
| 2086 |
+
"loss": 0.5658,
|
| 2087 |
+
"grad_norm": 5.572755813598633,
|
| 2088 |
+
"learning_rate": 3.900729534256745e-05
|
| 2089 |
+
},
|
| 2090 |
+
{
|
| 2091 |
+
"step": 233,
|
| 2092 |
+
"epoch": 1.5796610169491525,
|
| 2093 |
+
"cpu_mem": 1.512288256,
|
| 2094 |
+
"gpu_mem": 4.519330816,
|
| 2095 |
+
"loss": 0.5023,
|
| 2096 |
+
"grad_norm": 5.0739946365356445,
|
| 2097 |
+
"learning_rate": 3.781448643031187e-05
|
| 2098 |
+
},
|
| 2099 |
+
{
|
| 2100 |
+
"step": 234,
|
| 2101 |
+
"epoch": 1.5864406779661016,
|
| 2102 |
+
"cpu_mem": 1.512288256,
|
| 2103 |
+
"gpu_mem": 4.5192064,
|
| 2104 |
+
"loss": 0.4126,
|
| 2105 |
+
"grad_norm": 5.143454074859619,
|
| 2106 |
+
"learning_rate": 3.663756384686127e-05
|
| 2107 |
+
},
|
| 2108 |
+
{
|
| 2109 |
+
"step": 235,
|
| 2110 |
+
"epoch": 1.5932203389830508,
|
| 2111 |
+
"cpu_mem": 1.512288256,
|
| 2112 |
+
"gpu_mem": 4.518962176,
|
| 2113 |
+
"loss": 0.4623,
|
| 2114 |
+
"grad_norm": 5.956307888031006,
|
| 2115 |
+
"learning_rate": 3.547669425347647e-05
|
| 2116 |
+
},
|
| 2117 |
+
{
|
| 2118 |
+
"step": 236,
|
| 2119 |
+
"epoch": 1.6,
|
| 2120 |
+
"cpu_mem": 1.512288256,
|
| 2121 |
+
"gpu_mem": 4.51902208,
|
| 2122 |
+
"loss": 0.4677,
|
| 2123 |
+
"grad_norm": 5.250843524932861,
|
| 2124 |
+
"learning_rate": 3.433204203819185e-05
|
| 2125 |
+
},
|
| 2126 |
+
{
|
| 2127 |
+
"step": 237,
|
| 2128 |
+
"epoch": 1.6067796610169491,
|
| 2129 |
+
"cpu_mem": 1.512288256,
|
| 2130 |
+
"gpu_mem": 4.51908352,
|
| 2131 |
+
"loss": 0.4254,
|
| 2132 |
+
"grad_norm": 4.902020454406738,
|
| 2133 |
+
"learning_rate": 3.3203769292536764e-05
|
| 2134 |
+
},
|
| 2135 |
+
{
|
| 2136 |
+
"step": 238,
|
| 2137 |
+
"epoch": 1.6135593220338982,
|
| 2138 |
+
"cpu_mem": 1.512288256,
|
| 2139 |
+
"gpu_mem": 4.519085056,
|
| 2140 |
+
"loss": 0.4989,
|
| 2141 |
+
"grad_norm": 5.615092754364014,
|
| 2142 |
+
"learning_rate": 3.209203578858191e-05
|
| 2143 |
+
},
|
| 2144 |
+
{
|
| 2145 |
+
"step": 239,
|
| 2146 |
+
"epoch": 1.6203389830508474,
|
| 2147 |
+
"cpu_mem": 1.512288256,
|
| 2148 |
+
"gpu_mem": 4.519338496,
|
| 2149 |
+
"loss": 0.5546,
|
| 2150 |
+
"grad_norm": 5.185640335083008,
|
| 2151 |
+
"learning_rate": 3.099699895631474e-05
|
| 2152 |
+
},
|
| 2153 |
+
{
|
| 2154 |
+
"step": 240,
|
| 2155 |
+
"epoch": 1.6271186440677967,
|
| 2156 |
+
"cpu_mem": 1.512288256,
|
| 2157 |
+
"gpu_mem": 4.518988288,
|
| 2158 |
+
"loss": 0.6922,
|
| 2159 |
+
"grad_norm": 7.394616603851318,
|
| 2160 |
+
"learning_rate": 2.9918813861345952e-05
|
| 2161 |
+
},
|
| 2162 |
+
{
|
| 2163 |
+
"step": 241,
|
| 2164 |
+
"epoch": 1.6338983050847458,
|
| 2165 |
+
"cpu_mem": 1.512288256,
|
| 2166 |
+
"gpu_mem": 4.519284736,
|
| 2167 |
+
"loss": 0.4441,
|
| 2168 |
+
"grad_norm": 5.41588020324707,
|
| 2169 |
+
"learning_rate": 2.885763318295102e-05
|
| 2170 |
+
},
|
| 2171 |
+
{
|
| 2172 |
+
"step": 242,
|
| 2173 |
+
"epoch": 1.6406779661016948,
|
| 2174 |
+
"cpu_mem": 1.512288256,
|
| 2175 |
+
"gpu_mem": 4.519146496,
|
| 2176 |
+
"loss": 0.5156,
|
| 2177 |
+
"grad_norm": 6.340667247772217,
|
| 2178 |
+
"learning_rate": 2.781360719244964e-05
|
| 2179 |
+
},
|
| 2180 |
+
{
|
| 2181 |
+
"step": 243,
|
| 2182 |
+
"epoch": 1.647457627118644,
|
| 2183 |
+
"cpu_mem": 1.512288256,
|
| 2184 |
+
"gpu_mem": 4.51899904,
|
| 2185 |
+
"loss": 0.5521,
|
| 2186 |
+
"grad_norm": 5.889646053314209,
|
| 2187 |
+
"learning_rate": 2.6786883731926306e-05
|
| 2188 |
+
},
|
| 2189 |
+
{
|
| 2190 |
+
"step": 244,
|
| 2191 |
+
"epoch": 1.6542372881355933,
|
| 2192 |
+
"cpu_mem": 1.512288256,
|
| 2193 |
+
"gpu_mem": 4.519138816,
|
| 2194 |
+
"loss": 0.4731,
|
| 2195 |
+
"grad_norm": 5.065695285797119,
|
| 2196 |
+
"learning_rate": 2.5777608193294396e-05
|
| 2197 |
+
},
|
| 2198 |
+
{
|
| 2199 |
+
"step": 245,
|
| 2200 |
+
"epoch": 1.6610169491525424,
|
| 2201 |
+
"cpu_mem": 1.512288256,
|
| 2202 |
+
"gpu_mem": 4.519017472,
|
| 2203 |
+
"loss": 0.4947,
|
| 2204 |
+
"grad_norm": 5.03191614151001,
|
| 2205 |
+
"learning_rate": 2.4785923497707956e-05
|
| 2206 |
+
},
|
| 2207 |
+
{
|
| 2208 |
+
"step": 246,
|
| 2209 |
+
"epoch": 1.6677966101694914,
|
| 2210 |
+
"cpu_mem": 1.512288256,
|
| 2211 |
+
"gpu_mem": 4.519111168,
|
| 2212 |
+
"loss": 0.4658,
|
| 2213 |
+
"grad_norm": 4.863046169281006,
|
| 2214 |
+
"learning_rate": 2.38119700753228e-05
|
| 2215 |
+
},
|
| 2216 |
+
{
|
| 2217 |
+
"step": 247,
|
| 2218 |
+
"epoch": 1.6745762711864407,
|
| 2219 |
+
"cpu_mem": 1.512288256,
|
| 2220 |
+
"gpu_mem": 4.5191296,
|
| 2221 |
+
"loss": 0.5575,
|
| 2222 |
+
"grad_norm": 5.03303337097168,
|
| 2223 |
+
"learning_rate": 2.285588584541047e-05
|
| 2224 |
+
},
|
| 2225 |
+
{
|
| 2226 |
+
"step": 248,
|
| 2227 |
+
"epoch": 1.68135593220339,
|
| 2228 |
+
"cpu_mem": 1.512288256,
|
| 2229 |
+
"gpu_mem": 4.519081984,
|
| 2230 |
+
"loss": 0.4857,
|
| 2231 |
+
"grad_norm": 4.907963752746582,
|
| 2232 |
+
"learning_rate": 2.1917806196827792e-05
|
| 2233 |
+
},
|
| 2234 |
+
{
|
| 2235 |
+
"step": 249,
|
| 2236 |
+
"epoch": 1.688135593220339,
|
| 2237 |
+
"cpu_mem": 1.512288256,
|
| 2238 |
+
"gpu_mem": 4.518988288,
|
| 2239 |
+
"loss": 0.4642,
|
| 2240 |
+
"grad_norm": 5.556704521179199,
|
| 2241 |
+
"learning_rate": 2.0997863968844914e-05
|
| 2242 |
+
},
|
| 2243 |
+
{
|
| 2244 |
+
"step": 250,
|
| 2245 |
+
"epoch": 1.694915254237288,
|
| 2246 |
+
"cpu_mem": 1.512288256,
|
| 2247 |
+
"gpu_mem": 4.519080448,
|
| 2248 |
+
"loss": 0.458,
|
| 2249 |
+
"grad_norm": 4.857274055480957,
|
| 2250 |
+
"learning_rate": 2.009618943233419e-05
|
| 2251 |
+
},
|
| 2252 |
+
{
|
| 2253 |
+
"step": 251,
|
| 2254 |
+
"epoch": 1.7016949152542373,
|
| 2255 |
+
"cpu_mem": 1.512288256,
|
| 2256 |
+
"gpu_mem": 4.518992896,
|
| 2257 |
+
"loss": 0.417,
|
| 2258 |
+
"grad_norm": 5.613741874694824,
|
| 2259 |
+
"learning_rate": 1.921291027132278e-05
|
| 2260 |
+
},
|
| 2261 |
+
{
|
| 2262 |
+
"step": 252,
|
| 2263 |
+
"epoch": 1.7084745762711866,
|
| 2264 |
+
"cpu_mem": 1.512288256,
|
| 2265 |
+
"gpu_mem": 4.519035904,
|
| 2266 |
+
"loss": 0.4969,
|
| 2267 |
+
"grad_norm": 4.631195068359375,
|
| 2268 |
+
"learning_rate": 1.834815156491165e-05
|
| 2269 |
+
},
|
| 2270 |
+
{
|
| 2271 |
+
"step": 253,
|
| 2272 |
+
"epoch": 1.7152542372881356,
|
| 2273 |
+
"cpu_mem": 1.512288256,
|
| 2274 |
+
"gpu_mem": 4.51922944,
|
| 2275 |
+
"loss": 0.4858,
|
| 2276 |
+
"grad_norm": 5.4093122482299805,
|
| 2277 |
+
"learning_rate": 1.750203576956341e-05
|
| 2278 |
+
},
|
| 2279 |
+
{
|
| 2280 |
+
"step": 254,
|
| 2281 |
+
"epoch": 1.7220338983050847,
|
| 2282 |
+
"cpu_mem": 1.512288256,
|
| 2283 |
+
"gpu_mem": 4.519025152,
|
| 2284 |
+
"loss": 0.5449,
|
| 2285 |
+
"grad_norm": 5.517231464385986,
|
| 2286 |
+
"learning_rate": 1.6674682701761493e-05
|
| 2287 |
+
},
|
| 2288 |
+
{
|
| 2289 |
+
"step": 255,
|
| 2290 |
+
"epoch": 1.7288135593220337,
|
| 2291 |
+
"cpu_mem": 1.512288256,
|
| 2292 |
+
"gpu_mem": 4.519181824,
|
| 2293 |
+
"loss": 0.5259,
|
| 2294 |
+
"grad_norm": 6.029206275939941,
|
| 2295 |
+
"learning_rate": 1.5866209521043304e-05
|
| 2296 |
+
},
|
| 2297 |
+
{
|
| 2298 |
+
"step": 256,
|
| 2299 |
+
"epoch": 1.735593220338983,
|
| 2300 |
+
"cpu_mem": 1.512288256,
|
| 2301 |
+
"gpu_mem": 4.519008256,
|
| 2302 |
+
"loss": 0.392,
|
| 2303 |
+
"grad_norm": 4.221251010894775,
|
| 2304 |
+
"learning_rate": 1.5076730713409523e-05
|
| 2305 |
+
},
|
| 2306 |
+
{
|
| 2307 |
+
"step": 257,
|
| 2308 |
+
"epoch": 1.7423728813559323,
|
| 2309 |
+
"cpu_mem": 1.512288256,
|
| 2310 |
+
"gpu_mem": 4.51942144,
|
| 2311 |
+
"loss": 0.5529,
|
| 2312 |
+
"grad_norm": 6.13136100769043,
|
| 2313 |
+
"learning_rate": 1.4306358075111923e-05
|
| 2314 |
+
},
|
| 2315 |
+
{
|
| 2316 |
+
"step": 258,
|
| 2317 |
+
"epoch": 1.7491525423728813,
|
| 2318 |
+
"cpu_mem": 1.512288256,
|
| 2319 |
+
"gpu_mem": 4.519080448,
|
| 2320 |
+
"loss": 0.4382,
|
| 2321 |
+
"grad_norm": 5.44303035736084,
|
| 2322 |
+
"learning_rate": 1.3555200696822232e-05
|
| 2323 |
+
},
|
| 2324 |
+
{
|
| 2325 |
+
"step": 259,
|
| 2326 |
+
"epoch": 1.7559322033898304,
|
| 2327 |
+
"cpu_mem": 1.512288256,
|
| 2328 |
+
"gpu_mem": 4.518997504,
|
| 2329 |
+
"loss": 0.5205,
|
| 2330 |
+
"grad_norm": 4.832195281982422,
|
| 2331 |
+
"learning_rate": 1.2823364948184095e-05
|
| 2332 |
+
},
|
| 2333 |
+
{
|
| 2334 |
+
"step": 260,
|
| 2335 |
+
"epoch": 1.7627118644067796,
|
| 2336 |
+
"cpu_mem": 1.512288256,
|
| 2337 |
+
"gpu_mem": 4.51911424,
|
| 2338 |
+
"loss": 0.3783,
|
| 2339 |
+
"grad_norm": 4.001471519470215,
|
| 2340 |
+
"learning_rate": 1.2110954462750166e-05
|
| 2341 |
+
},
|
| 2342 |
+
{
|
| 2343 |
+
"step": 261,
|
| 2344 |
+
"epoch": 1.769491525423729,
|
| 2345 |
+
"cpu_mem": 1.512288256,
|
| 2346 |
+
"gpu_mem": 4.519069696,
|
| 2347 |
+
"loss": 0.3939,
|
| 2348 |
+
"grad_norm": 4.915110111236572,
|
| 2349 |
+
"learning_rate": 1.1418070123306989e-05
|
| 2350 |
+
},
|
| 2351 |
+
{
|
| 2352 |
+
"step": 262,
|
| 2353 |
+
"epoch": 1.776271186440678,
|
| 2354 |
+
"cpu_mem": 1.512288256,
|
| 2355 |
+
"gpu_mem": 4.519026688,
|
| 2356 |
+
"loss": 0.381,
|
| 2357 |
+
"grad_norm": 3.9966530799865723,
|
| 2358 |
+
"learning_rate": 1.0744810047589115e-05
|
| 2359 |
+
},
|
| 2360 |
+
{
|
| 2361 |
+
"step": 263,
|
| 2362 |
+
"epoch": 1.783050847457627,
|
| 2363 |
+
"cpu_mem": 1.512288256,
|
| 2364 |
+
"gpu_mem": 4.519063552,
|
| 2365 |
+
"loss": 0.4564,
|
| 2366 |
+
"grad_norm": 4.660472393035889,
|
| 2367 |
+
"learning_rate": 1.0091269574384874e-05
|
| 2368 |
+
},
|
| 2369 |
+
{
|
| 2370 |
+
"step": 264,
|
| 2371 |
+
"epoch": 1.7898305084745763,
|
| 2372 |
+
"cpu_mem": 1.512288256,
|
| 2373 |
+
"gpu_mem": 4.519151104,
|
| 2374 |
+
"loss": 0.4723,
|
| 2375 |
+
"grad_norm": 5.000064849853516,
|
| 2376 |
+
"learning_rate": 9.45754125003576e-06
|
| 2377 |
+
},
|
| 2378 |
+
{
|
| 2379 |
+
"step": 265,
|
| 2380 |
+
"epoch": 1.7966101694915255,
|
| 2381 |
+
"cpu_mem": 1.512288256,
|
| 2382 |
+
"gpu_mem": 4.519069696,
|
| 2383 |
+
"loss": 0.5149,
|
| 2384 |
+
"grad_norm": 5.103734970092773,
|
| 2385 |
+
"learning_rate": 8.843714815330987e-06
|
| 2386 |
+
},
|
| 2387 |
+
{
|
| 2388 |
+
"step": 266,
|
| 2389 |
+
"epoch": 1.8033898305084746,
|
| 2390 |
+
"cpu_mem": 1.512288256,
|
| 2391 |
+
"gpu_mem": 4.519284736,
|
| 2392 |
+
"loss": 0.4547,
|
| 2393 |
+
"grad_norm": 5.005780220031738,
|
| 2394 |
+
"learning_rate": 8.249877192799731e-06
|
| 2395 |
+
},
|
| 2396 |
+
{
|
| 2397 |
+
"step": 267,
|
| 2398 |
+
"epoch": 1.8101694915254236,
|
| 2399 |
+
"cpu_mem": 1.512288256,
|
| 2400 |
+
"gpu_mem": 4.519077376,
|
| 2401 |
+
"loss": 0.4271,
|
| 2402 |
+
"grad_norm": 5.692404270172119,
|
| 2403 |
+
"learning_rate": 7.676112474402068e-06
|
| 2404 |
+
},
|
| 2405 |
+
{
|
| 2406 |
+
"step": 268,
|
| 2407 |
+
"epoch": 1.8169491525423729,
|
| 2408 |
+
"cpu_mem": 1.512288256,
|
| 2409 |
+
"gpu_mem": 4.519081984,
|
| 2410 |
+
"loss": 0.405,
|
| 2411 |
+
"grad_norm": 5.452561855316162,
|
| 2412 |
+
"learning_rate": 7.122501909620926e-06
|
| 2413 |
+
},
|
| 2414 |
+
{
|
| 2415 |
+
"step": 269,
|
| 2416 |
+
"epoch": 1.8237288135593221,
|
| 2417 |
+
"cpu_mem": 1.512288256,
|
| 2418 |
+
"gpu_mem": 4.519092736,
|
| 2419 |
+
"loss": 0.4592,
|
| 2420 |
+
"grad_norm": 5.44649600982666,
|
| 2421 |
+
"learning_rate": 6.5891238939566275e-06
|
| 2422 |
+
},
|
| 2423 |
+
{
|
| 2424 |
+
"step": 270,
|
| 2425 |
+
"epoch": 1.8305084745762712,
|
| 2426 |
+
"cpu_mem": 1.512288256,
|
| 2427 |
+
"gpu_mem": 4.519131136,
|
| 2428 |
+
"loss": 0.4592,
|
| 2429 |
+
"grad_norm": 5.148582935333252,
|
| 2430 |
+
"learning_rate": 6.076053957825411e-06
|
| 2431 |
+
},
|
| 2432 |
+
{
|
| 2433 |
+
"step": 271,
|
| 2434 |
+
"epoch": 1.8372881355932202,
|
| 2435 |
+
"cpu_mem": 1.512288256,
|
| 2436 |
+
"gpu_mem": 4.51918336,
|
| 2437 |
+
"loss": 0.414,
|
| 2438 |
+
"grad_norm": 5.350499153137207,
|
| 2439 |
+
"learning_rate": 5.583364755863701e-06
|
| 2440 |
+
},
|
| 2441 |
+
{
|
| 2442 |
+
"step": 272,
|
| 2443 |
+
"epoch": 1.8440677966101695,
|
| 2444 |
+
"cpu_mem": 1.512288256,
|
| 2445 |
+
"gpu_mem": 4.519042048,
|
| 2446 |
+
"loss": 0.4478,
|
| 2447 |
+
"grad_norm": 4.392088413238525,
|
| 2448 |
+
"learning_rate": 5.11112605663977e-06
|
| 2449 |
+
},
|
| 2450 |
+
{
|
| 2451 |
+
"step": 273,
|
| 2452 |
+
"epoch": 1.8508474576271188,
|
| 2453 |
+
"cpu_mem": 1.512288256,
|
| 2454 |
+
"gpu_mem": 4.51892224,
|
| 2455 |
+
"loss": 0.4744,
|
| 2456 |
+
"grad_norm": 5.41953706741333,
|
| 2457 |
+
"learning_rate": 4.659404732773908e-06
|
| 2458 |
+
},
|
| 2459 |
+
{
|
| 2460 |
+
"step": 274,
|
| 2461 |
+
"epoch": 1.8576271186440678,
|
| 2462 |
+
"cpu_mem": 1.512288256,
|
| 2463 |
+
"gpu_mem": 4.519149568,
|
| 2464 |
+
"loss": 0.4541,
|
| 2465 |
+
"grad_norm": 4.928563594818115,
|
| 2466 |
+
"learning_rate": 4.228264751468752e-06
|
| 2467 |
+
},
|
| 2468 |
+
{
|
| 2469 |
+
"step": 275,
|
| 2470 |
+
"epoch": 1.8644067796610169,
|
| 2471 |
+
"cpu_mem": 1.512288256,
|
| 2472 |
+
"gpu_mem": 4.519393792,
|
| 2473 |
+
"loss": 0.396,
|
| 2474 |
+
"grad_norm": 5.382067680358887,
|
| 2475 |
+
"learning_rate": 3.817767165451041e-06
|
| 2476 |
+
},
|
| 2477 |
+
{
|
| 2478 |
+
"step": 276,
|
| 2479 |
+
"epoch": 1.8711864406779661,
|
| 2480 |
+
"cpu_mem": 1.512288256,
|
| 2481 |
+
"gpu_mem": 4.519054336,
|
| 2482 |
+
"loss": 0.4373,
|
| 2483 |
+
"grad_norm": 4.41796875,
|
| 2484 |
+
"learning_rate": 3.4279701043260886e-06
|
| 2485 |
+
},
|
| 2486 |
+
{
|
| 2487 |
+
"step": 277,
|
| 2488 |
+
"epoch": 1.8779661016949154,
|
| 2489 |
+
"cpu_mem": 1.512288256,
|
| 2490 |
+
"gpu_mem": 4.519000576,
|
| 2491 |
+
"loss": 0.6379,
|
| 2492 |
+
"grad_norm": 5.958452224731445,
|
| 2493 |
+
"learning_rate": 3.0589287663461472e-06
|
| 2494 |
+
},
|
| 2495 |
+
{
|
| 2496 |
+
"step": 278,
|
| 2497 |
+
"epoch": 1.8847457627118644,
|
| 2498 |
+
"cpu_mem": 1.512288256,
|
| 2499 |
+
"gpu_mem": 4.519163392,
|
| 2500 |
+
"loss": 0.4834,
|
| 2501 |
+
"grad_norm": 4.944537162780762,
|
| 2502 |
+
"learning_rate": 2.710695410593994e-06
|
| 2503 |
+
},
|
| 2504 |
+
{
|
| 2505 |
+
"step": 279,
|
| 2506 |
+
"epoch": 1.8915254237288135,
|
| 2507 |
+
"cpu_mem": 1.512288256,
|
| 2508 |
+
"gpu_mem": 4.519103488,
|
| 2509 |
+
"loss": 0.4909,
|
| 2510 |
+
"grad_norm": 4.551985740661621,
|
| 2511 |
+
"learning_rate": 2.3833193495825853e-06
|
| 2512 |
+
},
|
| 2513 |
+
{
|
| 2514 |
+
"step": 280,
|
| 2515 |
+
"epoch": 1.8983050847457628,
|
| 2516 |
+
"cpu_mem": 1.512288256,
|
| 2517 |
+
"gpu_mem": 4.51908352,
|
| 2518 |
+
"loss": 0.5207,
|
| 2519 |
+
"grad_norm": 4.847994327545166,
|
| 2520 |
+
"learning_rate": 2.076846942272026e-06
|
| 2521 |
+
},
|
| 2522 |
+
{
|
| 2523 |
+
"step": 281,
|
| 2524 |
+
"epoch": 1.905084745762712,
|
| 2525 |
+
"cpu_mem": 1.512288256,
|
| 2526 |
+
"gpu_mem": 4.519019008,
|
| 2527 |
+
"loss": 0.5324,
|
| 2528 |
+
"grad_norm": 5.380536079406738,
|
| 2529 |
+
"learning_rate": 1.791321587504768e-06
|
| 2530 |
+
},
|
| 2531 |
+
{
|
| 2532 |
+
"step": 282,
|
| 2533 |
+
"epoch": 1.911864406779661,
|
| 2534 |
+
"cpu_mem": 1.512288256,
|
| 2535 |
+
"gpu_mem": 4.519447552,
|
| 2536 |
+
"loss": 0.4495,
|
| 2537 |
+
"grad_norm": 5.5009050369262695,
|
| 2538 |
+
"learning_rate": 1.5267837178600972e-06
|
| 2539 |
+
},
|
| 2540 |
+
{
|
| 2541 |
+
"step": 283,
|
| 2542 |
+
"epoch": 1.9186440677966101,
|
| 2543 |
+
"cpu_mem": 1.512288256,
|
| 2544 |
+
"gpu_mem": 4.519154176,
|
| 2545 |
+
"loss": 0.4852,
|
| 2546 |
+
"grad_norm": 5.446907997131348,
|
| 2547 |
+
"learning_rate": 1.2832707939284427e-06
|
| 2548 |
+
},
|
| 2549 |
+
{
|
| 2550 |
+
"step": 284,
|
| 2551 |
+
"epoch": 1.9254237288135592,
|
| 2552 |
+
"cpu_mem": 1.512288256,
|
| 2553 |
+
"gpu_mem": 4.519009792,
|
| 2554 |
+
"loss": 0.4832,
|
| 2555 |
+
"grad_norm": 4.65576171875,
|
| 2556 |
+
"learning_rate": 1.0608172990067553e-06
|
| 2557 |
+
},
|
| 2558 |
+
{
|
| 2559 |
+
"step": 285,
|
| 2560 |
+
"epoch": 1.9322033898305084,
|
| 2561 |
+
"cpu_mem": 1.512288256,
|
| 2562 |
+
"gpu_mem": 4.519063552,
|
| 2563 |
+
"loss": 0.4847,
|
| 2564 |
+
"grad_norm": 4.889958381652832,
|
| 2565 |
+
"learning_rate": 8.594547342153979e-07
|
| 2566 |
+
},
|
| 2567 |
+
{
|
| 2568 |
+
"step": 286,
|
| 2569 |
+
"epoch": 1.9389830508474577,
|
| 2570 |
+
"cpu_mem": 1.512288256,
|
| 2571 |
+
"gpu_mem": 4.519481344,
|
| 2572 |
+
"loss": 0.4748,
|
| 2573 |
+
"grad_norm": 5.272000789642334,
|
| 2574 |
+
"learning_rate": 6.792116140373116e-07
|
| 2575 |
+
},
|
| 2576 |
+
{
|
| 2577 |
+
"step": 287,
|
| 2578 |
+
"epoch": 1.9457627118644067,
|
| 2579 |
+
"cpu_mem": 1.512288256,
|
| 2580 |
+
"gpu_mem": 4.519250944,
|
| 2581 |
+
"loss": 0.4557,
|
| 2582 |
+
"grad_norm": 4.938218116760254,
|
| 2583 |
+
"learning_rate": 5.201134622801473e-07
|
| 2584 |
+
},
|
| 2585 |
+
{
|
| 2586 |
+
"step": 288,
|
| 2587 |
+
"epoch": 1.9525423728813558,
|
| 2588 |
+
"cpu_mem": 1.512288256,
|
| 2589 |
+
"gpu_mem": 4.519035904,
|
| 2590 |
+
"loss": 0.481,
|
| 2591 |
+
"grad_norm": 5.558644771575928,
|
| 2592 |
+
"learning_rate": 3.821828084619727e-07
|
| 2593 |
+
},
|
| 2594 |
+
{
|
| 2595 |
+
"step": 289,
|
| 2596 |
+
"epoch": 1.959322033898305,
|
| 2597 |
+
"cpu_mem": 1.512288256,
|
| 2598 |
+
"gpu_mem": 4.519120384,
|
| 2599 |
+
"loss": 0.4614,
|
| 2600 |
+
"grad_norm": 5.039109706878662,
|
| 2601 |
+
"learning_rate": 2.654391846207915e-07
|
| 2602 |
+
},
|
| 2603 |
+
{
|
| 2604 |
+
"step": 290,
|
| 2605 |
+
"epoch": 1.9661016949152543,
|
| 2606 |
+
"cpu_mem": 1.512288256,
|
| 2607 |
+
"gpu_mem": 4.51904512,
|
| 2608 |
+
"loss": 0.5343,
|
| 2609 |
+
"grad_norm": 5.333126068115234,
|
| 2610 |
+
"learning_rate": 1.6989912254880556e-07
|
| 2611 |
+
},
|
| 2612 |
+
{
|
| 2613 |
+
"step": 291,
|
| 2614 |
+
"epoch": 1.9728813559322034,
|
| 2615 |
+
"cpu_mem": 1.512288256,
|
| 2616 |
+
"gpu_mem": 4.519080448,
|
| 2617 |
+
"loss": 0.6398,
|
| 2618 |
+
"grad_norm": 5.9029459953308105,
|
| 2619 |
+
"learning_rate": 9.557615145123765e-08
|
| 2620 |
+
},
|
| 2621 |
+
{
|
| 2622 |
+
"step": 292,
|
| 2623 |
+
"epoch": 1.9796610169491524,
|
| 2624 |
+
"cpu_mem": 1.512288256,
|
| 2625 |
+
"gpu_mem": 4.519163392,
|
| 2626 |
+
"loss": 0.4624,
|
| 2627 |
+
"grad_norm": 5.084424018859863,
|
| 2628 |
+
"learning_rate": 4.248079603064724e-08
|
| 2629 |
+
},
|
| 2630 |
+
{
|
| 2631 |
+
"step": 293,
|
| 2632 |
+
"epoch": 1.9864406779661017,
|
| 2633 |
+
"cpu_mem": 1.512288256,
|
| 2634 |
+
"gpu_mem": 4.519080448,
|
| 2635 |
+
"loss": 0.5694,
|
| 2636 |
+
"grad_norm": 5.880096435546875,
|
| 2637 |
+
"learning_rate": 1.0620574996372811e-08
|
| 2638 |
+
},
|
| 2639 |
+
{
|
| 2640 |
+
"step": 294,
|
| 2641 |
+
"epoch": 1.993220338983051,
|
| 2642 |
+
"cpu_mem": 1.512288256,
|
| 2643 |
+
"gpu_mem": 4.51910656,
|
| 2644 |
+
"loss": 0.5335,
|
| 2645 |
+
"grad_norm": 5.264451503753662,
|
| 2646 |
+
"learning_rate": 0.0
|
| 2647 |
+
},
|
| 2648 |
+
{
|
| 2649 |
+
"step": 294,
|
| 2650 |
+
"epoch": 1.993220338983051,
|
| 2651 |
+
"cpu_mem": 1.512288256,
|
| 2652 |
+
"gpu_mem": 4.51910656,
|
| 2653 |
+
"train_runtime": 4458.5149,
|
| 2654 |
+
"train_samples_per_second": 4.229,
|
| 2655 |
+
"train_steps_per_second": 0.066,
|
| 2656 |
+
"total_flos": 0.0,
|
| 2657 |
+
"train_loss": 0.7076091230118355
|
| 2658 |
+
}
|
| 2659 |
+
]
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r2-a2/adapter_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha": 4,
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": false,
|
| 8 |
+
"init_weight": "kaiming",
|
| 9 |
+
"layers_pattern": null,
|
| 10 |
+
"layers_to_transform": null,
|
| 11 |
+
"metric_tracking": false,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "ABLATION",
|
| 14 |
+
"r": 2,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"seed": 42,
|
| 17 |
+
"share_weights": false,
|
| 18 |
+
"target_modules": [
|
| 19 |
+
"up_proj",
|
| 20 |
+
"gate_proj",
|
| 21 |
+
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"q_proj",
|
| 24 |
+
"down_proj",
|
| 25 |
+
"o_proj"
|
| 26 |
+
],
|
| 27 |
+
"task_type": null,
|
| 28 |
+
"track_n": 100,
|
| 29 |
+
"variant": "A"
|
| 30 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r2-a2/eval_results.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "hellaswag",
|
| 3 |
+
"results": 0.7826130252937662
|
| 4 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r2-a2/training_configuration.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "TinyLlama/TinyLlama_v1.1",
|
| 3 |
+
"dataset": {
|
| 4 |
+
"name": "HELLASWAG",
|
| 5 |
+
"dataset_id": "Rowan/hellaswag",
|
| 6 |
+
"preprocess_id": "hellaswag_train_deepeval"
|
| 7 |
+
},
|
| 8 |
+
"peft_config": {
|
| 9 |
+
"method": "abl_A",
|
| 10 |
+
"rank": 2,
|
| 11 |
+
"alpha": 4,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"bias": "none",
|
| 14 |
+
"target_modules": [
|
| 15 |
+
"q_proj",
|
| 16 |
+
"k_proj",
|
| 17 |
+
"v_proj",
|
| 18 |
+
"o_proj",
|
| 19 |
+
"gate_proj",
|
| 20 |
+
"down_proj",
|
| 21 |
+
"up_proj"
|
| 22 |
+
],
|
| 23 |
+
"trainable_parameter_count": 1577576
|
| 24 |
+
},
|
| 25 |
+
"training_config": {
|
| 26 |
+
"max_dataset_length": null,
|
| 27 |
+
"batch_size": 64,
|
| 28 |
+
"per_device_batch_size": 32,
|
| 29 |
+
"gradient_accumulation_steps": 2,
|
| 30 |
+
"learning_rate": 0.0003,
|
| 31 |
+
"num_epochs": 1,
|
| 32 |
+
"warmup_ratio": 0.1
|
| 33 |
+
},
|
| 34 |
+
"model_name": "TinyLlama_v1.1-abl_A-hellaswag-r2-a2",
|
| 35 |
+
"output_dir": "./experiment_results/TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r2-a2",
|
| 36 |
+
"seed": 42,
|
| 37 |
+
"timestamp": "2025-08-30T16:43:23.732951"
|
| 38 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r2-a2/training_logs.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r32-a2/adapter_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha": 64,
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": false,
|
| 8 |
+
"init_weight": "kaiming",
|
| 9 |
+
"layers_pattern": null,
|
| 10 |
+
"layers_to_transform": null,
|
| 11 |
+
"metric_tracking": false,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "ABLATION",
|
| 14 |
+
"r": 32,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"seed": 42,
|
| 17 |
+
"share_weights": false,
|
| 18 |
+
"target_modules": [
|
| 19 |
+
"up_proj",
|
| 20 |
+
"gate_proj",
|
| 21 |
+
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"q_proj",
|
| 24 |
+
"down_proj",
|
| 25 |
+
"o_proj"
|
| 26 |
+
],
|
| 27 |
+
"task_type": null,
|
| 28 |
+
"track_n": 100,
|
| 29 |
+
"variant": "A"
|
| 30 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r32-a2/eval_results.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "hellaswag",
|
| 3 |
+
"results": 0.33917546305516827
|
| 4 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r32-a2/training_configuration.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "TinyLlama/TinyLlama_v1.1",
|
| 3 |
+
"dataset": {
|
| 4 |
+
"name": "HELLASWAG",
|
| 5 |
+
"dataset_id": "Rowan/hellaswag",
|
| 6 |
+
"preprocess_id": "hellaswag_train_deepeval"
|
| 7 |
+
},
|
| 8 |
+
"peft_config": {
|
| 9 |
+
"method": "abl_A",
|
| 10 |
+
"rank": 32,
|
| 11 |
+
"alpha": 64,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"bias": "none",
|
| 14 |
+
"target_modules": [
|
| 15 |
+
"q_proj",
|
| 16 |
+
"k_proj",
|
| 17 |
+
"v_proj",
|
| 18 |
+
"o_proj",
|
| 19 |
+
"gate_proj",
|
| 20 |
+
"down_proj",
|
| 21 |
+
"up_proj"
|
| 22 |
+
],
|
| 23 |
+
"trainable_parameter_count": 25389056
|
| 24 |
+
},
|
| 25 |
+
"training_config": {
|
| 26 |
+
"max_dataset_length": null,
|
| 27 |
+
"batch_size": 64,
|
| 28 |
+
"per_device_batch_size": 32,
|
| 29 |
+
"gradient_accumulation_steps": 2,
|
| 30 |
+
"learning_rate": 0.0003,
|
| 31 |
+
"num_epochs": 1,
|
| 32 |
+
"warmup_ratio": 0.1
|
| 33 |
+
},
|
| 34 |
+
"model_name": "TinyLlama_v1.1-abl_A-hellaswag-r32-a2",
|
| 35 |
+
"output_dir": "./experiment_results/TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r32-a2",
|
| 36 |
+
"seed": 42,
|
| 37 |
+
"timestamp": "2025-08-31T06:39:18.710581"
|
| 38 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r32-a2/training_logs.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r8-a2/adapter_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha": 16,
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": false,
|
| 8 |
+
"init_weight": "kaiming",
|
| 9 |
+
"layers_pattern": null,
|
| 10 |
+
"layers_to_transform": null,
|
| 11 |
+
"metric_tracking": false,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "ABLATION",
|
| 14 |
+
"r": 8,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"seed": 42,
|
| 17 |
+
"share_weights": false,
|
| 18 |
+
"target_modules": [
|
| 19 |
+
"up_proj",
|
| 20 |
+
"gate_proj",
|
| 21 |
+
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"q_proj",
|
| 24 |
+
"down_proj",
|
| 25 |
+
"o_proj"
|
| 26 |
+
],
|
| 27 |
+
"task_type": null,
|
| 28 |
+
"track_n": 100,
|
| 29 |
+
"variant": "A"
|
| 30 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r8-a2/eval_results.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "hellaswag",
|
| 3 |
+
"results": 0.2504481179047998
|
| 4 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r8-a2/training_configuration.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "TinyLlama/TinyLlama_v1.1",
|
| 3 |
+
"dataset": {
|
| 4 |
+
"name": "HELLASWAG",
|
| 5 |
+
"dataset_id": "Rowan/hellaswag",
|
| 6 |
+
"preprocess_id": "hellaswag_train_deepeval"
|
| 7 |
+
},
|
| 8 |
+
"peft_config": {
|
| 9 |
+
"method": "abl_A",
|
| 10 |
+
"rank": 8,
|
| 11 |
+
"alpha": 16,
|
| 12 |
+
"dropout": 0.0,
|
| 13 |
+
"bias": "none",
|
| 14 |
+
"target_modules": [
|
| 15 |
+
"q_proj",
|
| 16 |
+
"k_proj",
|
| 17 |
+
"v_proj",
|
| 18 |
+
"o_proj",
|
| 19 |
+
"gate_proj",
|
| 20 |
+
"down_proj",
|
| 21 |
+
"up_proj"
|
| 22 |
+
],
|
| 23 |
+
"trainable_parameter_count": 6317696
|
| 24 |
+
},
|
| 25 |
+
"training_config": {
|
| 26 |
+
"max_dataset_length": null,
|
| 27 |
+
"batch_size": 64,
|
| 28 |
+
"per_device_batch_size": 32,
|
| 29 |
+
"gradient_accumulation_steps": 2,
|
| 30 |
+
"learning_rate": 0.0003,
|
| 31 |
+
"num_epochs": 1,
|
| 32 |
+
"warmup_ratio": 0.1
|
| 33 |
+
},
|
| 34 |
+
"model_name": "TinyLlama_v1.1-abl_A-hellaswag-r8-a2",
|
| 35 |
+
"output_dir": "./experiment_results/TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r8-a2",
|
| 36 |
+
"seed": 42,
|
| 37 |
+
"timestamp": "2025-08-30T23:40:44.452046"
|
| 38 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-hellaswag-r8-a2/training_logs.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-logiqa-r2-a2/adapter_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha": 4,
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama_v1.1",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": false,
|
| 8 |
+
"init_weight": "kaiming",
|
| 9 |
+
"layers_pattern": null,
|
| 10 |
+
"layers_to_transform": null,
|
| 11 |
+
"metric_tracking": false,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "ABLATION",
|
| 14 |
+
"r": 2,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"seed": 42,
|
| 17 |
+
"share_weights": false,
|
| 18 |
+
"target_modules": [
|
| 19 |
+
"up_proj",
|
| 20 |
+
"gate_proj",
|
| 21 |
+
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"q_proj",
|
| 24 |
+
"down_proj",
|
| 25 |
+
"o_proj"
|
| 26 |
+
],
|
| 27 |
+
"task_type": null,
|
| 28 |
+
"track_n": 100,
|
| 29 |
+
"variant": "A"
|
| 30 |
+
}
|
TinyLlama_v1.1-abl_A/TinyLlama_v1.1-abl_A-logiqa-r2-a2/eval_results.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "logiqa",
|
| 3 |
+
"results": 0.28465193141912826
|
| 4 |
+
}
|