File size: 2,245 Bytes
16f03df
 
 
 
 
 
 
 
 
 
 
 
1fc0701
 
16f03df
 
 
 
1fc0701
 
16f03df
 
 
 
1fc0701
 
16f03df
 
 
 
1fc0701
 
 
 
16f03df
 
 
 
1fc0701
 
16f03df
 
 
 
1fc0701
 
16f03df
 
 
 
1fc0701
 
16f03df
 
 
 
1fc0701
 
 
 
16f03df
 
 
 
1fc0701
 
16f03df
 
 
 
1fc0701
 
16f03df
 
 
 
1fc0701
 
16f03df
 
 
 
1fc0701
 
16f03df
 
 
 
1fc0701
 
 
 
16f03df
 
 
 
1fc0701
 
16f03df
1fc0701
16f03df
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 9.966777408637874,
  "eval_steps": 1000,
  "global_step": 3000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "learning_rate": 9.966777408637875e-07,
      "loss": 2.3466,
      "step": 300
    },
    {
      "epoch": 1.99,
      "learning_rate": 1.993355481727575e-06,
      "loss": 2.3107,
      "step": 600
    },
    {
      "epoch": 2.99,
      "learning_rate": 2.9900332225913626e-06,
      "loss": 2.2553,
      "step": 900
    },
    {
      "epoch": 3.32,
      "eval_loss": 2.210308074951172,
      "eval_runtime": 21.2106,
      "eval_samples_per_second": 80.007,
      "eval_steps_per_second": 5.045,
      "step": 1000
    },
    {
      "epoch": 3.99,
      "learning_rate": 3.98671096345515e-06,
      "loss": 2.2071,
      "step": 1200
    },
    {
      "epoch": 4.98,
      "learning_rate": 4.983388704318937e-06,
      "loss": 2.1719,
      "step": 1500
    },
    {
      "epoch": 5.98,
      "learning_rate": 5.980066445182725e-06,
      "loss": 2.1433,
      "step": 1800
    },
    {
      "epoch": 6.64,
      "eval_loss": 2.1161131858825684,
      "eval_runtime": 21.3287,
      "eval_samples_per_second": 79.564,
      "eval_steps_per_second": 5.017,
      "step": 2000
    },
    {
      "epoch": 6.98,
      "learning_rate": 6.976744186046513e-06,
      "loss": 2.1165,
      "step": 2100
    },
    {
      "epoch": 7.97,
      "learning_rate": 7.9734219269103e-06,
      "loss": 2.0987,
      "step": 2400
    },
    {
      "epoch": 8.97,
      "learning_rate": 8.970099667774087e-06,
      "loss": 2.0829,
      "step": 2700
    },
    {
      "epoch": 9.97,
      "learning_rate": 9.966777408637874e-06,
      "loss": 2.0627,
      "step": 3000
    },
    {
      "epoch": 9.97,
      "eval_loss": 2.0649547576904297,
      "eval_runtime": 21.2155,
      "eval_samples_per_second": 79.989,
      "eval_steps_per_second": 5.043,
      "step": 3000
    }
  ],
  "logging_steps": 300,
  "max_steps": 6020,
  "num_train_epochs": 20,
  "save_steps": 1000,
  "total_flos": 1.5499830660395827e+17,
  "trial_name": null,
  "trial_params": null
}