File size: 2,496 Bytes
a0321b2
 
 
fd8ed3d
a0321b2
fd8ed3d
a0321b2
 
 
 
 
 
fd8ed3d
 
 
 
a0321b2
 
 
 
fd8ed3d
a0321b2
fd8ed3d
a0321b2
 
 
 
fd8ed3d
a0321b2
fd8ed3d
a0321b2
 
 
 
fd8ed3d
 
 
 
a0321b2
 
 
 
fd8ed3d
a0321b2
fd8ed3d
a0321b2
 
 
 
fd8ed3d
a0321b2
fd8ed3d
a0321b2
 
 
 
fd8ed3d
a0321b2
fd8ed3d
a0321b2
 
 
 
fd8ed3d
 
 
 
a0321b2
 
 
 
fd8ed3d
a0321b2
fd8ed3d
a0321b2
 
 
 
fd8ed3d
a0321b2
fd8ed3d
a0321b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd8ed3d
a0321b2
 
 
 
fd8ed3d
a0321b2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 134,
  "global_step": 367,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0027247956403269754,
      "eval_loss": 3.203904390335083,
      "eval_runtime": 4.2695,
      "eval_samples_per_second": 72.375,
      "eval_steps_per_second": 18.269,
      "step": 1
    },
    {
      "epoch": 0.1362397820163488,
      "grad_norm": 0.7537987232208252,
      "learning_rate": 0.0004,
      "loss": 1.3941,
      "step": 50
    },
    {
      "epoch": 0.2724795640326976,
      "grad_norm": 0.5926509499549866,
      "learning_rate": 0.0004,
      "loss": 0.8495,
      "step": 100
    },
    {
      "epoch": 0.3651226158038147,
      "eval_loss": 0.6168258190155029,
      "eval_runtime": 4.2281,
      "eval_samples_per_second": 73.082,
      "eval_steps_per_second": 18.448,
      "step": 134
    },
    {
      "epoch": 0.4087193460490463,
      "grad_norm": 0.7156445384025574,
      "learning_rate": 0.0004,
      "loss": 0.6471,
      "step": 150
    },
    {
      "epoch": 0.5449591280653951,
      "grad_norm": 0.9673421382904053,
      "learning_rate": 0.0004,
      "loss": 0.5422,
      "step": 200
    },
    {
      "epoch": 0.6811989100817438,
      "grad_norm": 0.6035718321800232,
      "learning_rate": 0.0004,
      "loss": 0.4256,
      "step": 250
    },
    {
      "epoch": 0.7302452316076294,
      "eval_loss": 0.4029657542705536,
      "eval_runtime": 4.2417,
      "eval_samples_per_second": 72.848,
      "eval_steps_per_second": 18.389,
      "step": 268
    },
    {
      "epoch": 0.8174386920980926,
      "grad_norm": 0.5255013704299927,
      "learning_rate": 0.0004,
      "loss": 0.3641,
      "step": 300
    },
    {
      "epoch": 0.9536784741144414,
      "grad_norm": 0.41406139731407166,
      "learning_rate": 0.0004,
      "loss": 0.2957,
      "step": 350
    }
  ],
  "logging_steps": 50,
  "max_steps": 400,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.189318078660608e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}