File size: 2,230 Bytes
e59799a
 
 
 
3ea75c6
e59799a
3ea75c6
e59799a
 
 
 
 
3ea75c6
 
 
 
e59799a
 
 
3ea75c6
 
 
 
e59799a
 
 
3ea75c6
 
 
 
e59799a
 
 
3ea75c6
 
 
 
e59799a
 
 
3ea75c6
 
 
 
e59799a
 
 
3ea75c6
 
 
 
e59799a
 
 
3ea75c6
 
 
 
e59799a
 
 
3ea75c6
 
 
 
e59799a
 
 
3ea75c6
 
 
 
e59799a
 
 
 
3ea75c6
e59799a
 
3ea75c6
e59799a
 
 
 
 
 
 
3ea75c6
e59799a
 
 
 
3ea75c6
e59799a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 939,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.32,
      "grad_norm": 0.8309588432312012,
      "learning_rate": 4.472843450479233e-05,
      "loss": 2.1268,
      "step": 100
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.8111929893493652,
      "learning_rate": 3.940362087326944e-05,
      "loss": 1.9513,
      "step": 200
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.7366995215415955,
      "learning_rate": 3.407880724174654e-05,
      "loss": 1.9131,
      "step": 300
    },
    {
      "epoch": 1.2784,
      "grad_norm": 0.7454879283905029,
      "learning_rate": 2.8753993610223644e-05,
      "loss": 1.8164,
      "step": 400
    },
    {
      "epoch": 1.5984,
      "grad_norm": 0.7343336939811707,
      "learning_rate": 2.3429179978700748e-05,
      "loss": 1.816,
      "step": 500
    },
    {
      "epoch": 1.9184,
      "grad_norm": 0.7088562250137329,
      "learning_rate": 1.8104366347177852e-05,
      "loss": 1.7918,
      "step": 600
    },
    {
      "epoch": 2.2368,
      "grad_norm": 0.7369393706321716,
      "learning_rate": 1.2779552715654951e-05,
      "loss": 1.7424,
      "step": 700
    },
    {
      "epoch": 2.5568,
      "grad_norm": 0.7260516285896301,
      "learning_rate": 7.4547390841320565e-06,
      "loss": 1.744,
      "step": 800
    },
    {
      "epoch": 2.8768000000000002,
      "grad_norm": 0.6936702728271484,
      "learning_rate": 2.1299254526091587e-06,
      "loss": 1.7355,
      "step": 900
    }
  ],
  "logging_steps": 100,
  "max_steps": 939,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 5000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 7838760960000000.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}