File size: 3,184 Bytes
6cf2b79
3ae3d68
 
9c66697
3ae3d68
9c66697
6cf2b79
 
 
 
 
3ae3d68
124b0f7
5eaf652
124b0f7
6cf2b79
 
 
3ae3d68
124b0f7
 
 
 
6cf2b79
 
 
3ae3d68
124b0f7
5eaf652
124b0f7
0ab62a9
6cf2b79
 
3ae3d68
124b0f7
5eaf652
124b0f7
558eca1
6cf2b79
 
3ae3d68
124b0f7
 
 
 
3ae3d68
 
 
 
124b0f7
5eaf652
124b0f7
074543a
804daff
 
3ae3d68
124b0f7
804daff
124b0f7
074543a
9c66697
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6cf2b79
 
 
3ae3d68
6cf2b79
3ae3d68
 
6cf2b79
 
 
 
 
 
 
9c66697
6cf2b79
 
 
 
9c66697
3ae3d68
6cf2b79
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.042105263157894736,
  "eval_steps": 3,
  "global_step": 10,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.004210526315789474,
      "grad_norm": 0.42628446221351624,
      "learning_rate": 2e-05,
      "loss": 0.1517,
      "step": 1
    },
    {
      "epoch": 0.004210526315789474,
      "eval_loss": 0.24416916072368622,
      "eval_runtime": 29.4222,
      "eval_samples_per_second": 3.399,
      "eval_steps_per_second": 1.699,
      "step": 1
    },
    {
      "epoch": 0.008421052631578947,
      "grad_norm": 0.433437317609787,
      "learning_rate": 4e-05,
      "loss": 0.1501,
      "step": 2
    },
    {
      "epoch": 0.01263157894736842,
      "grad_norm": 0.3588545322418213,
      "learning_rate": 6e-05,
      "loss": 0.1181,
      "step": 3
    },
    {
      "epoch": 0.01263157894736842,
      "eval_loss": 0.2361646592617035,
      "eval_runtime": 29.4356,
      "eval_samples_per_second": 3.397,
      "eval_steps_per_second": 1.699,
      "step": 3
    },
    {
      "epoch": 0.016842105263157894,
      "grad_norm": 1.395027756690979,
      "learning_rate": 8e-05,
      "loss": 0.4504,
      "step": 4
    },
    {
      "epoch": 0.021052631578947368,
      "grad_norm": 1.5977227687835693,
      "learning_rate": 0.0001,
      "loss": 0.4201,
      "step": 5
    },
    {
      "epoch": 0.02526315789473684,
      "grad_norm": 1.294385552406311,
      "learning_rate": 0.00012,
      "loss": 0.3502,
      "step": 6
    },
    {
      "epoch": 0.02526315789473684,
      "eval_loss": 0.14960011839866638,
      "eval_runtime": 29.4235,
      "eval_samples_per_second": 3.399,
      "eval_steps_per_second": 1.699,
      "step": 6
    },
    {
      "epoch": 0.029473684210526315,
      "grad_norm": 0.3078320622444153,
      "learning_rate": 0.00014,
      "loss": 0.0808,
      "step": 7
    },
    {
      "epoch": 0.03368421052631579,
      "grad_norm": 0.6034978032112122,
      "learning_rate": 0.00016,
      "loss": 0.0923,
      "step": 8
    },
    {
      "epoch": 0.037894736842105266,
      "grad_norm": 0.5751827955245972,
      "learning_rate": 0.00018,
      "loss": 0.0495,
      "step": 9
    },
    {
      "epoch": 0.037894736842105266,
      "eval_loss": 0.04392697289586067,
      "eval_runtime": 29.5398,
      "eval_samples_per_second": 3.385,
      "eval_steps_per_second": 1.693,
      "step": 9
    },
    {
      "epoch": 0.042105263157894736,
      "grad_norm": 0.9490816593170166,
      "learning_rate": 0.0002,
      "loss": 0.0434,
      "step": 10
    }
  ],
  "logging_steps": 1,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 5,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 5589685858467840.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}