File size: 2,373 Bytes
e5f0f5a
 
 
 
 
2231611
e5f0f5a
 
 
 
 
2231611
a1a87bd
2231611
a1a87bd
e5f0f5a
 
 
2231611
a1a87bd
2231611
a1a87bd
e5f0f5a
 
 
2231611
a1a87bd
2231611
a1a87bd
e5f0f5a
 
 
2231611
a1a87bd
2231611
a1a87bd
e5f0f5a
 
 
2231611
a1a87bd
2231611
a1a87bd
2231611
 
 
 
a1a87bd
2231611
a1a87bd
2231611
 
 
 
a1a87bd
2231611
a1a87bd
2231611
 
 
 
a1a87bd
2231611
a1a87bd
2231611
e5f0f5a
 
 
2231611
a1a87bd
2231611
a1a87bd
 
 
e5f0f5a
 
 
2231611
e5f0f5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a1a87bd
2231611
e5f0f5a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 81,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.12345679012345678,
      "grad_norm": 18.884857177734375,
      "learning_rate": 8.765432098765433e-05,
      "loss": 11.7134,
      "step": 10
    },
    {
      "epoch": 0.24691358024691357,
      "grad_norm": 1.8587737083435059,
      "learning_rate": 7.530864197530865e-05,
      "loss": 1.5567,
      "step": 20
    },
    {
      "epoch": 0.37037037037037035,
      "grad_norm": 0.13458853960037231,
      "learning_rate": 6.296296296296296e-05,
      "loss": 0.0378,
      "step": 30
    },
    {
      "epoch": 0.49382716049382713,
      "grad_norm": 0.05857311934232712,
      "learning_rate": 5.061728395061729e-05,
      "loss": 0.0212,
      "step": 40
    },
    {
      "epoch": 0.6172839506172839,
      "grad_norm": 0.009554409421980381,
      "learning_rate": 3.82716049382716e-05,
      "loss": 0.0069,
      "step": 50
    },
    {
      "epoch": 0.7407407407407407,
      "grad_norm": 0.0058960807509720325,
      "learning_rate": 2.5925925925925925e-05,
      "loss": 0.0027,
      "step": 60
    },
    {
      "epoch": 0.8641975308641975,
      "grad_norm": 0.0033985786139965057,
      "learning_rate": 1.3580246913580247e-05,
      "loss": 0.0026,
      "step": 70
    },
    {
      "epoch": 0.9876543209876543,
      "grad_norm": 0.002752589527517557,
      "learning_rate": 1.234567901234568e-06,
      "loss": 0.0026,
      "step": 80
    },
    {
      "epoch": 1.0,
      "step": 81,
      "total_flos": 1.6678153976020992e+16,
      "train_loss": 0.0,
      "train_runtime": 0.009,
      "train_samples_per_second": 18069.762,
      "train_steps_per_second": 9034.881
    }
  ],
  "logging_steps": 10,
  "max_steps": 81,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.6678153976020992e+16,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}