File size: 2,545 Bytes
248896c
 
 
025b305
89543b3
025b305
248896c
 
 
 
 
 
 
89543b3
 
 
248896c
 
 
 
89543b3
248896c
89543b3
248896c
 
 
 
89543b3
248896c
89543b3
248896c
66075b0
 
 
89543b3
66075b0
89543b3
66075b0
 
 
 
89543b3
66075b0
89543b3
66075b0
d0009ed
 
 
89543b3
d0009ed
89543b3
d0009ed
 
 
 
89543b3
d0009ed
89543b3
d0009ed
29f0568
 
 
89543b3
29f0568
89543b3
29f0568
 
 
 
89543b3
29f0568
89543b3
29f0568
025b305
 
 
89543b3
025b305
89543b3
025b305
 
 
 
89543b3
025b305
89543b3
025b305
248896c
 
 
 
 
 
89543b3
248896c
 
 
 
 
 
 
025b305
248896c
 
 
 
025b305
248896c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 14.285714285714286,
  "eval_steps": 200,
  "global_step": 100,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.14285714285714285,
      "eval_loss": 3.078927516937256,
      "eval_runtime": 4.841,
      "eval_samples_per_second": 310.059,
      "eval_steps_per_second": 3.305,
      "step": 1
    },
    {
      "epoch": 1.4285714285714286,
      "grad_norm": 9.25,
      "learning_rate": 0.00019863613034027224,
      "loss": 6.1788,
      "step": 10
    },
    {
      "epoch": 2.857142857142857,
      "grad_norm": 5.25,
      "learning_rate": 0.0001879473751206489,
      "loss": 5.2684,
      "step": 20
    },
    {
      "epoch": 4.285714285714286,
      "grad_norm": 5.28125,
      "learning_rate": 0.00016772815716257412,
      "loss": 4.6887,
      "step": 30
    },
    {
      "epoch": 5.714285714285714,
      "grad_norm": 2.3125,
      "learning_rate": 0.00014016954246529696,
      "loss": 4.3802,
      "step": 40
    },
    {
      "epoch": 7.142857142857143,
      "grad_norm": 3.484375,
      "learning_rate": 0.00010825793454723325,
      "loss": 4.1083,
      "step": 50
    },
    {
      "epoch": 8.571428571428571,
      "grad_norm": 2.5625,
      "learning_rate": 7.54514512859201e-05,
      "loss": 3.8961,
      "step": 60
    },
    {
      "epoch": 10.0,
      "grad_norm": 2.96875,
      "learning_rate": 4.530518418775733e-05,
      "loss": 3.8097,
      "step": 70
    },
    {
      "epoch": 11.428571428571429,
      "grad_norm": 1.546875,
      "learning_rate": 2.1085949060360654e-05,
      "loss": 3.7435,
      "step": 80
    },
    {
      "epoch": 12.857142857142858,
      "grad_norm": 2.328125,
      "learning_rate": 5.418275829936537e-06,
      "loss": 3.721,
      "step": 90
    },
    {
      "epoch": 14.285714285714286,
      "grad_norm": 1.453125,
      "learning_rate": 0.0,
      "loss": 3.7058,
      "step": 100
    }
  ],
  "logging_steps": 10,
  "max_steps": 100,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 15,
  "save_steps": 200,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3.57855601360896e+16,
  "train_batch_size": 24,
  "trial_name": null,
  "trial_params": null
}