File size: 2,472 Bytes
26b1efc
 
 
 
 
 
 
 
 
 
 
 
 
151d520
26b1efc
151d520
26b1efc
 
 
 
151d520
26b1efc
151d520
26b1efc
 
 
 
151d520
26b1efc
151d520
26b1efc
 
 
 
151d520
26b1efc
151d520
26b1efc
 
 
 
151d520
26b1efc
151d520
26b1efc
 
 
 
151d520
26b1efc
151d520
26b1efc
 
 
 
151d520
26b1efc
151d520
26b1efc
 
 
 
151d520
26b1efc
151d520
26b1efc
 
 
 
151d520
26b1efc
151d520
26b1efc
 
 
 
151d520
26b1efc
151d520
26b1efc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.9230769230769231,
  "eval_steps": 25,
  "global_step": 50,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.19230769230769232,
      "grad_norm": 0.5569552183151245,
      "learning_rate": 0.0001,
      "loss": 4.6128,
      "step": 5
    },
    {
      "epoch": 0.38461538461538464,
      "grad_norm": 0.7635518312454224,
      "learning_rate": 0.00019714285714285716,
      "loss": 4.4768,
      "step": 10
    },
    {
      "epoch": 0.5769230769230769,
      "grad_norm": 1.002182126045227,
      "learning_rate": 0.00018285714285714286,
      "loss": 4.3935,
      "step": 15
    },
    {
      "epoch": 0.7692307692307693,
      "grad_norm": 0.7686426043510437,
      "learning_rate": 0.00016857142857142857,
      "loss": 4.1717,
      "step": 20
    },
    {
      "epoch": 0.9615384615384616,
      "grad_norm": 0.9222117066383362,
      "learning_rate": 0.0001542857142857143,
      "loss": 3.7154,
      "step": 25
    },
    {
      "epoch": 1.1538461538461537,
      "grad_norm": 1.0059330463409424,
      "learning_rate": 0.00014,
      "loss": 3.6302,
      "step": 30
    },
    {
      "epoch": 1.3461538461538463,
      "grad_norm": 1.1044957637786865,
      "learning_rate": 0.00012571428571428572,
      "loss": 3.617,
      "step": 35
    },
    {
      "epoch": 1.5384615384615383,
      "grad_norm": 1.1102561950683594,
      "learning_rate": 0.00011142857142857144,
      "loss": 3.3122,
      "step": 40
    },
    {
      "epoch": 1.7307692307692308,
      "grad_norm": 1.1488291025161743,
      "learning_rate": 9.714285714285715e-05,
      "loss": 3.473,
      "step": 45
    },
    {
      "epoch": 1.9230769230769231,
      "grad_norm": 1.1239038705825806,
      "learning_rate": 8.285714285714287e-05,
      "loss": 3.2339,
      "step": 50
    }
  ],
  "logging_steps": 5,
  "max_steps": 78,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 50,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 26425362087936.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}