RantiRepo commited on
Commit
439bbb2
·
verified ·
1 Parent(s): 539f296

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +45 -3
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.22598870056497175,
6
  "eval_steps": 10,
7
- "global_step": 20,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -50,6 +50,48 @@
50
  "eval_samples_per_second": 4.38,
51
  "eval_steps_per_second": 1.095,
52
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  }
54
  ],
55
  "logging_steps": 10,
@@ -69,7 +111,7 @@
69
  "attributes": {}
70
  }
71
  },
72
- "total_flos": 2.397293116495872e+16,
73
  "train_batch_size": 4,
74
  "trial_name": null,
75
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4519774011299435,
6
  "eval_steps": 10,
7
+ "global_step": 40,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
50
  "eval_samples_per_second": 4.38,
51
  "eval_steps_per_second": 1.095,
52
  "step": 20
53
+ },
54
+ {
55
+ "entropy": 7.635345196723938,
56
+ "epoch": 0.3389830508474576,
57
+ "grad_norm": 8.9375,
58
+ "learning_rate": 0.00017325581395348838,
59
+ "loss": 9.085212707519531,
60
+ "mean_token_accuracy": 0.11288385493680834,
61
+ "num_tokens": 505921.0,
62
+ "step": 30
63
+ },
64
+ {
65
+ "epoch": 0.3389830508474576,
66
+ "eval_entropy": 7.880531340837479,
67
+ "eval_loss": 7.876997470855713,
68
+ "eval_mean_token_accuracy": 0.09879284957423806,
69
+ "eval_num_tokens": 505921.0,
70
+ "eval_runtime": 43.9757,
71
+ "eval_samples_per_second": 4.366,
72
+ "eval_steps_per_second": 1.092,
73
+ "step": 30
74
+ },
75
+ {
76
+ "entropy": 7.543534195423126,
77
+ "epoch": 0.4519774011299435,
78
+ "grad_norm": 1.40625,
79
+ "learning_rate": 0.00016162790697674419,
80
+ "loss": 7.40704116821289,
81
+ "mean_token_accuracy": 0.11707657705992461,
82
+ "num_tokens": 677163.0,
83
+ "step": 40
84
+ },
85
+ {
86
+ "epoch": 0.4519774011299435,
87
+ "eval_entropy": 7.573027561108272,
88
+ "eval_loss": 7.244246006011963,
89
+ "eval_mean_token_accuracy": 0.10892315845315655,
90
+ "eval_num_tokens": 677163.0,
91
+ "eval_runtime": 44.408,
92
+ "eval_samples_per_second": 4.324,
93
+ "eval_steps_per_second": 1.081,
94
+ "step": 40
95
  }
96
  ],
97
  "logging_steps": 10,
 
111
  "attributes": {}
112
  }
113
  },
114
+ "total_flos": 4.854116392922726e+16,
115
  "train_batch_size": 4,
116
  "trial_name": null,
117
  "trial_params": null