iproskurina commited on
Commit
daf2941
·
verified ·
1 Parent(s): 380a23f

Model save

Browse files
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.5619722863264766,
4
+ "eval_loss": 2.4340596199035645,
5
+ "eval_runtime": 13.1565,
6
+ "eval_samples_per_second": 86.497,
7
+ "eval_steps_per_second": 10.869,
8
+ "perplexity": 11.405088550670825,
9
+ "total_flos": 1.3626163133939712e+16,
10
+ "train_loss": 2.4788927044784814,
11
+ "train_runtime": 196.8793,
12
+ "train_samples": 4558,
13
+ "train_samples_per_second": 23.151,
14
+ "train_steps_per_second": 5.79
15
+ }
eval_epoch_1_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.5619722863264766,
4
+ "eval_loss": 2.4340596199035645,
5
+ "eval_runtime": 13.1565,
6
+ "eval_samples_per_second": 86.497,
7
+ "eval_steps_per_second": 10.869,
8
+ "perplexity": 11.405088550670825
9
+ }
special_tokens_map.json CHANGED
@@ -12,5 +12,6 @@
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
- }
 
16
  }
 
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
+ },
16
+ "pad_token": "<|end_of_text|>"
17
  }
tokenizer_config.json CHANGED
@@ -2058,5 +2058,6 @@
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
 
2061
  "tokenizer_class": "PreTrainedTokenizerFast"
2062
  }
 
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
2061
+ "pad_token": "<|end_of_text|>",
2062
  "tokenizer_class": "PreTrainedTokenizerFast"
2063
  }
train_epoch_1_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 1.3626163133939712e+16,
4
+ "train_loss": 2.4788927044784814,
5
+ "train_runtime": 196.8793,
6
+ "train_samples": 4558,
7
+ "train_samples_per_second": 23.151,
8
+ "train_steps_per_second": 5.79
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1140,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.43859649122807015,
14
+ "grad_norm": 17.125,
15
+ "learning_rate": 5.622807017543859e-07,
16
+ "loss": 2.5341,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.8771929824561403,
21
+ "grad_norm": 17.75,
22
+ "learning_rate": 1.2368421052631579e-07,
23
+ "loss": 2.4372,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "eval_accuracy": 0.5619722863264766,
29
+ "eval_loss": 2.4340596199035645,
30
+ "eval_runtime": 13.1971,
31
+ "eval_samples_per_second": 86.231,
32
+ "eval_steps_per_second": 10.836,
33
+ "step": 1140
34
+ },
35
+ {
36
+ "epoch": 1.0,
37
+ "step": 1140,
38
+ "total_flos": 1.3626163133939712e+16,
39
+ "train_loss": 2.4788927044784814,
40
+ "train_runtime": 196.8793,
41
+ "train_samples_per_second": 23.151,
42
+ "train_steps_per_second": 5.79
43
+ }
44
+ ],
45
+ "logging_steps": 500,
46
+ "max_steps": 1140,
47
+ "num_input_tokens_seen": 0,
48
+ "num_train_epochs": 1,
49
+ "save_steps": 500,
50
+ "stateful_callbacks": {
51
+ "TrainerControl": {
52
+ "args": {
53
+ "should_epoch_stop": false,
54
+ "should_evaluate": false,
55
+ "should_log": false,
56
+ "should_save": true,
57
+ "should_training_stop": true
58
+ },
59
+ "attributes": {}
60
+ }
61
+ },
62
+ "total_flos": 1.3626163133939712e+16,
63
+ "train_batch_size": 4,
64
+ "trial_name": null,
65
+ "trial_params": null
66
+ }