File size: 2,724 Bytes
7a09822
 
 
 
 
 
 
 
 
 
 
 
 
30fcf1e
7a09822
30fcf1e
7a09822
 
 
 
30fcf1e
7a09822
30fcf1e
7a09822
 
 
 
30fcf1e
7a09822
30fcf1e
7a09822
 
 
 
30fcf1e
7a09822
30fcf1e
7a09822
 
 
 
30fcf1e
7a09822
30fcf1e
7a09822
 
 
 
30fcf1e
7a09822
30fcf1e
7a09822
 
 
 
30fcf1e
7a09822
30fcf1e
7a09822
 
 
 
30fcf1e
7a09822
30fcf1e
7a09822
 
 
 
30fcf1e
7a09822
30fcf1e
7a09822
 
 
 
30fcf1e
 
 
 
7a09822
 
 
 
 
 
30fcf1e
 
 
 
7a09822
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 90,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.11204481792717087,
      "grad_norm": 29.25,
      "learning_rate": 1e-05,
      "loss": 1.3767690658569336,
      "step": 10
    },
    {
      "epoch": 0.22408963585434175,
      "grad_norm": 23.0,
      "learning_rate": 9.628619846344453e-06,
      "loss": 1.287515354156494,
      "step": 20
    },
    {
      "epoch": 0.33613445378151263,
      "grad_norm": 18.0,
      "learning_rate": 8.569648672789496e-06,
      "loss": 1.2844683647155761,
      "step": 30
    },
    {
      "epoch": 0.4481792717086835,
      "grad_norm": 14.3125,
      "learning_rate": 6.980398830195785e-06,
      "loss": 1.2283113479614258,
      "step": 40
    },
    {
      "epoch": 0.5602240896358543,
      "grad_norm": 14.8125,
      "learning_rate": 5.096956658859122e-06,
      "loss": 1.1659849166870118,
      "step": 50
    },
    {
      "epoch": 0.6722689075630253,
      "grad_norm": 15.125,
      "learning_rate": 3.1991113759764493e-06,
      "loss": 1.2485424995422363,
      "step": 60
    },
    {
      "epoch": 0.7843137254901961,
      "grad_norm": 12.875,
      "learning_rate": 1.5687918106563326e-06,
      "loss": 1.2000310897827149,
      "step": 70
    },
    {
      "epoch": 0.896358543417367,
      "grad_norm": 17.5,
      "learning_rate": 4.481852951692672e-07,
      "loss": 1.219639205932617,
      "step": 80
    },
    {
      "epoch": 1.0,
      "grad_norm": 53.0,
      "learning_rate": 3.760237478849793e-09,
      "loss": 1.250431442260742,
      "step": 90
    },
    {
      "epoch": 1.0,
      "eval_loss": 1.125481128692627,
      "eval_runtime": 7.4107,
      "eval_samples_per_second": 9.716,
      "eval_steps_per_second": 1.214,
      "step": 90
    },
    {
      "epoch": 1.0,
      "step": 90,
      "total_flos": 1.054823986446048e+16,
      "train_loss": 1.251299254099528,
      "train_runtime": 241.5055,
      "train_samples_per_second": 14.753,
      "train_steps_per_second": 0.373
    }
  ],
  "logging_steps": 10,
  "max_steps": 90,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 200,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.054823986446048e+16,
  "train_batch_size": 10,
  "trial_name": null,
  "trial_params": null
}