bhuvanmdev commited on
Commit
5dcbd67
·
verified ·
1 Parent(s): 9dbfe9b

Training in progress, step 1470, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,11 +20,11 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "lm_head",
24
  "query_key_value",
 
 
25
  "dense",
26
- "dense_h_to_4h",
27
- "dense_4h_to_h"
28
  ],
29
  "task_type": "CAUSAL_LM",
30
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "query_key_value",
24
+ "lm_head",
25
+ "dense_4h_to_h",
26
  "dense",
27
+ "dense_h_to_4h"
 
28
  ],
29
  "task_type": "CAUSAL_LM",
30
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eaa1a251c60709477d0d8c482aeeb1eeb49429fe0b9a6641c945fd96f9e946da
3
  size 1316913776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aa442a7b0a850e10f76551b4fc32c47a42a8fc8ae52fe78c48295baf3448481
3
  size 1316913776
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4372b065710baf1bb08c3b4a94e8f177b87ea1e7c2a1f05f93288bc856c4e20
3
  size 8908124
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:912608e1f46c072dd62554a214c457e0bb9fbc29265936c8921001ea187536d0
3
  size 8908124
last-checkpoint/rng_state.pth CHANGED
Binary files a/last-checkpoint/rng_state.pth and b/last-checkpoint/rng_state.pth differ
 
last-checkpoint/scheduler.pt CHANGED
Binary files a/last-checkpoint/scheduler.pt and b/last-checkpoint/scheduler.pt differ
 
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.30914555603263205,
5
  "eval_steps": 500,
6
- "global_step": 1440,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1015,14 +1015,35 @@
1015
  "learning_rate": 0.00034542722198368397,
1016
  "loss": 0.7839,
1017
  "step": 1440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1018
  }
1019
  ],
1020
  "logging_steps": 10,
1021
  "max_steps": 4658,
1022
  "num_input_tokens_seen": 0,
1023
  "num_train_epochs": 1,
1024
- "save_steps": 10,
1025
- "total_flos": 1.0907404405212672e+17,
1026
  "train_batch_size": 3,
1027
  "trial_name": null,
1028
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.31558608844997854,
5
  "eval_steps": 500,
6
+ "global_step": 1470,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1015
  "learning_rate": 0.00034542722198368397,
1016
  "loss": 0.7839,
1017
  "step": 1440
1018
+ },
1019
+ {
1020
+ "epoch": 0.31129240017174753,
1021
+ "grad_norm": 1.4786028861999512,
1022
+ "learning_rate": 0.00034435379991412624,
1023
+ "loss": 0.7995,
1024
+ "step": 1450
1025
+ },
1026
+ {
1027
+ "epoch": 0.313439244310863,
1028
+ "grad_norm": 1.392654538154602,
1029
+ "learning_rate": 0.0003432803778445685,
1030
+ "loss": 0.8046,
1031
+ "step": 1460
1032
+ },
1033
+ {
1034
+ "epoch": 0.31558608844997854,
1035
+ "grad_norm": 1.730966567993164,
1036
+ "learning_rate": 0.00034220695577501074,
1037
+ "loss": 0.7909,
1038
+ "step": 1470
1039
  }
1040
  ],
1041
  "logging_steps": 10,
1042
  "max_steps": 4658,
1043
  "num_input_tokens_seen": 0,
1044
  "num_train_epochs": 1,
1045
+ "save_steps": 30,
1046
+ "total_flos": 1.1131927713309773e+17,
1047
  "train_batch_size": 3,
1048
  "trial_name": null,
1049
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
Binary files a/last-checkpoint/training_args.bin and b/last-checkpoint/training_args.bin differ