File size: 2,170 Bytes
d85315f
 
 
e10defd
d85315f
 
 
 
 
 
 
9f3778a
 
d85315f
9f3778a
d85315f
 
 
9f3778a
 
d85315f
9f3778a
d85315f
 
 
9f3778a
 
d85315f
9f3778a
d85315f
 
 
9f3778a
 
d85315f
9f3778a
d85315f
 
 
9f3778a
 
d85315f
9f3778a
d85315f
 
 
9f3778a
 
d85315f
9f3778a
d85315f
 
 
9f3778a
 
d85315f
9f3778a
d85315f
 
 
9f3778a
 
d85315f
9f3778a
d85315f
 
 
9f3778a
 
d85315f
9f3778a
d85315f
 
 
9f3778a
 
d85315f
9f3778a
d85315f
 
 
 
9f3778a
d85315f
9f3778a
d85315f
9f3778a
d85315f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 4.177545691906005,
  "eval_steps": 500,
  "global_step": 100,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.4177545691906005,
      "grad_norm": 0.28227752447128296,
      "learning_rate": 2.9999999999999997e-05,
      "loss": 4.1508,
      "step": 10
    },
    {
      "epoch": 0.835509138381201,
      "grad_norm": 0.31433430314064026,
      "learning_rate": 5.9999999999999995e-05,
      "loss": 4.1593,
      "step": 20
    },
    {
      "epoch": 1.2532637075718016,
      "grad_norm": 0.3350953161716461,
      "learning_rate": 8.999999999999999e-05,
      "loss": 4.0414,
      "step": 30
    },
    {
      "epoch": 1.671018276762402,
      "grad_norm": 0.2885706126689911,
      "learning_rate": 0.00011999999999999999,
      "loss": 3.8411,
      "step": 40
    },
    {
      "epoch": 2.0887728459530024,
      "grad_norm": 0.23711609840393066,
      "learning_rate": 0.00015,
      "loss": 3.6434,
      "step": 50
    },
    {
      "epoch": 2.506527415143603,
      "grad_norm": 0.21583135426044464,
      "learning_rate": 0.00017999999999999998,
      "loss": 3.4636,
      "step": 60
    },
    {
      "epoch": 2.9242819843342036,
      "grad_norm": 0.18754692375659943,
      "learning_rate": 0.00020999999999999998,
      "loss": 3.3154,
      "step": 70
    },
    {
      "epoch": 3.342036553524804,
      "grad_norm": 0.15951760113239288,
      "learning_rate": 0.00023999999999999998,
      "loss": 3.2195,
      "step": 80
    },
    {
      "epoch": 3.759791122715405,
      "grad_norm": 0.14639759063720703,
      "learning_rate": 0.00027,
      "loss": 3.122,
      "step": 90
    },
    {
      "epoch": 4.177545691906005,
      "grad_norm": 0.1860765665769577,
      "learning_rate": 0.0003,
      "loss": 3.0677,
      "step": 100
    }
  ],
  "logging_steps": 10,
  "max_steps": 300,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 14,
  "save_steps": 100,
  "total_flos": 1.6201284405755904e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}