moos124 commited on
Commit
48b9b0b
·
verified ·
1 Parent(s): e421350

Training in progress, step 60, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53346a82ae215f6aa601bad33bbdf44cc81dabd5f3b7ffa0278521604a9d7247
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46611663ddb003d3fad551f919f2842a07905148212f3c6637e4dea024b6c395
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4bea2dddc467b755411ea03249fcb45acb1a7d0b1d63bc3bd89bcf576b51cac
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a388d9fae69b03af0d2595f3c9c59c84a3d4b776eb6c70ee6d21850d61e4d80
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba8424e6ca8dc52c976593578f8900a61445da0a32107b67688c0971f3ca77e2
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4ecc53428755d63edd79679db9a5440d4ba97bc0f29104ed9e3084e2b71d80d
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c8ad7d484220c33d58f2ec0797ed03142384c76c415a1aa226b570f1fa24558
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0c163eafc31042a3869aade40f9de5eee80d532c8ab9aab522757e6431256f5
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.010666666666666666,
6
  "eval_steps": 500,
7
- "global_step": 50,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -68,6 +68,16 @@
68
  "mean_token_accuracy": 0.6715238064527511,
69
  "num_tokens": 235345.0,
70
  "step": 50
 
 
 
 
 
 
 
 
 
 
71
  }
72
  ],
73
  "logging_steps": 10,
@@ -87,7 +97,7 @@
87
  "attributes": {}
88
  }
89
  },
90
- "total_flos": 1164045052624896.0,
91
  "train_batch_size": 4,
92
  "trial_name": null,
93
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0128,
6
  "eval_steps": 500,
7
+ "global_step": 60,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
68
  "mean_token_accuracy": 0.6715238064527511,
69
  "num_tokens": 235345.0,
70
  "step": 50
71
+ },
72
+ {
73
+ "entropy": 1.2768938690423965,
74
+ "epoch": 0.0128,
75
+ "grad_norm": 0.6082292199134827,
76
+ "learning_rate": 1.9666666666666666e-05,
77
+ "loss": 1.8587135314941405,
78
+ "mean_token_accuracy": 0.6761457294225692,
79
+ "num_tokens": 279475.0,
80
+ "step": 60
81
  }
82
  ],
83
  "logging_steps": 10,
 
97
  "attributes": {}
98
  }
99
  },
100
+ "total_flos": 1370353762132992.0,
101
  "train_batch_size": 4,
102
  "trial_name": null,
103
  "trial_params": null