File size: 2,274 Bytes
6252249
 
ec14de0
6252249
 
 
 
 
 
 
 
 
 
ec14de0
6252249
ec14de0
6252249
 
 
 
ec14de0
6252249
ec14de0
6252249
 
 
 
ec14de0
6252249
ec14de0
6252249
 
 
 
ec14de0
6252249
ec14de0
6252249
 
 
 
ec14de0
6252249
ec14de0
6252249
 
 
 
ec14de0
6252249
ec14de0
6252249
 
 
 
ec14de0
6252249
ec14de0
6252249
 
 
 
ec14de0
 
 
 
 
6252249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
{
  "best_global_step": 708,
  "best_metric": 0.47357939458311205,
  "best_model_checkpoint": "./bert-lora-newsgroups/checkpoint-708",
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 708,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.14124293785310735,
      "grad_norm": 4.619589805603027,
      "learning_rate": 0.00019067796610169492,
      "loss": 2.9563,
      "step": 100
    },
    {
      "epoch": 0.2824858757062147,
      "grad_norm": 4.369770526885986,
      "learning_rate": 0.0001812617702448211,
      "loss": 2.4516,
      "step": 200
    },
    {
      "epoch": 0.423728813559322,
      "grad_norm": 4.454040050506592,
      "learning_rate": 0.00017184557438794729,
      "loss": 2.1271,
      "step": 300
    },
    {
      "epoch": 0.5649717514124294,
      "grad_norm": 5.155438423156738,
      "learning_rate": 0.00016242937853107344,
      "loss": 1.9125,
      "step": 400
    },
    {
      "epoch": 0.7062146892655368,
      "grad_norm": 4.892629623413086,
      "learning_rate": 0.00015301318267419963,
      "loss": 1.7896,
      "step": 500
    },
    {
      "epoch": 0.847457627118644,
      "grad_norm": 4.983877658843994,
      "learning_rate": 0.0001435969868173258,
      "loss": 1.6968,
      "step": 600
    },
    {
      "epoch": 0.9887005649717514,
      "grad_norm": 8.334493637084961,
      "learning_rate": 0.00013418079096045197,
      "loss": 1.5862,
      "step": 700
    },
    {
      "epoch": 1.0,
      "eval_accuracy": 0.47357939458311205,
      "eval_loss": 1.6120948791503906,
      "eval_runtime": 59.1685,
      "eval_samples_per_second": 127.297,
      "eval_steps_per_second": 7.96,
      "step": 708
    }
  ],
  "logging_steps": 100,
  "max_steps": 2124,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2844933635814144.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}