File size: 4,000 Bytes
8b9fd30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 6.0,
  "eval_steps": 500,
  "global_step": 2640,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "eval_gen_len": 130.01,
      "eval_loss": 1.9146157503128052,
      "eval_rouge1": 0.4589875139531387,
      "eval_rouge2": 0.21878510498585943,
      "eval_rougeL": 0.3016127842816204,
      "eval_rougeLsum": 0.4009407199247955,
      "eval_runtime": 66.3133,
      "eval_samples_per_second": 1.508,
      "eval_steps_per_second": 0.106,
      "step": 440
    },
    {
      "epoch": 1.1363636363636362,
      "grad_norm": 2.9900436401367188,
      "learning_rate": 4.844240313802481e-06,
      "loss": 1.826,
      "step": 500
    },
    {
      "epoch": 2.0,
      "eval_gen_len": 121.92,
      "eval_loss": 1.8549950122833252,
      "eval_rouge1": 0.4619905413631098,
      "eval_rouge2": 0.2162940062464438,
      "eval_rougeL": 0.30137015640361065,
      "eval_rougeLsum": 0.4029678217620495,
      "eval_runtime": 313.1936,
      "eval_samples_per_second": 0.319,
      "eval_steps_per_second": 0.022,
      "step": 880
    },
    {
      "epoch": 2.2727272727272725,
      "grad_norm": 5.034411907196045,
      "learning_rate": 4.394041918714914e-06,
      "loss": 1.6317,
      "step": 1000
    },
    {
      "epoch": 3.0,
      "eval_gen_len": 120.8,
      "eval_loss": 1.8090400695800781,
      "eval_rouge1": 0.4670357799617887,
      "eval_rouge2": 0.22161111145750578,
      "eval_rougeL": 0.3101602334681084,
      "eval_rougeLsum": 0.40916586051494386,
      "eval_runtime": 272.6557,
      "eval_samples_per_second": 0.367,
      "eval_steps_per_second": 0.026,
      "step": 1320
    },
    {
      "epoch": 3.409090909090909,
      "grad_norm": 2.994469404220581,
      "learning_rate": 3.7059480818938033e-06,
      "loss": 1.541,
      "step": 1500
    },
    {
      "epoch": 4.0,
      "eval_gen_len": 122.31,
      "eval_loss": 1.7940858602523804,
      "eval_rouge1": 0.47786994937778915,
      "eval_rouge2": 0.23261031528055848,
      "eval_rougeL": 0.3195968141082743,
      "eval_rougeLsum": 0.4178774735087476,
      "eval_runtime": 267.101,
      "eval_samples_per_second": 0.374,
      "eval_steps_per_second": 0.026,
      "step": 1760
    },
    {
      "epoch": 4.545454545454545,
      "grad_norm": 3.3205864429473877,
      "learning_rate": 2.86638476131148e-06,
      "loss": 1.4761,
      "step": 2000
    },
    {
      "epoch": 5.0,
      "eval_gen_len": 120.21,
      "eval_loss": 1.7847797870635986,
      "eval_rouge1": 0.46872849142787104,
      "eval_rouge2": 0.22607984634691033,
      "eval_rougeL": 0.31102687015492186,
      "eval_rougeLsum": 0.4124971038488223,
      "eval_runtime": 255.0208,
      "eval_samples_per_second": 0.392,
      "eval_steps_per_second": 0.027,
      "step": 2200
    },
    {
      "epoch": 5.681818181818182,
      "grad_norm": 3.04300594329834,
      "learning_rate": 1.980802784132701e-06,
      "loss": 1.426,
      "step": 2500
    },
    {
      "epoch": 6.0,
      "eval_gen_len": 116.53,
      "eval_loss": 1.7648776769638062,
      "eval_rouge1": 0.4707629824198739,
      "eval_rouge2": 0.2250141092622865,
      "eval_rougeL": 0.3104222446400876,
      "eval_rougeLsum": 0.4100478178262512,
      "eval_runtime": 240.46,
      "eval_samples_per_second": 0.416,
      "eval_steps_per_second": 0.029,
      "step": 2640
    }
  ],
  "logging_steps": 500,
  "max_steps": 4400,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.0120075847663616e+17,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}