thkim0305 commited on
Commit
c637e6c
·
verified ·
1 Parent(s): e956add

Upload folder using huggingface_hub

Browse files
Files changed (45) hide show
  1. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_trainer_state.json +105 -0
  10. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_trainer_state.json +105 -0
  19. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_trainer_state.json +105 -0
  28. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_trainer_state.json +105 -0
  37. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_trainer_state.json +105 -0
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28aa2dbdd5a2cb91ffe60a803036b136cfb2350aaa298cf4c8096fc89cf03c4c
3
+ size 84049390
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24207413f2d25bb96a0a19994c66252ce6ca2e7865e0f8aa6f51689816c0736a
3
+ size 84049390
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5faccd76dc8cec5cf4da79a6858c65760d07ce2f63a3e1bffc5a064940154e29
3
+ size 84049390
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f8230eae84fdb3c24bdfdf7757f52935507883928a0dfec41fe75adc3cf7cf9
3
+ size 84049390
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d5bf4132b74c1164b5c45e325217a0385e48ca8e1799b23ca60fd00202be026
3
+ size 84048874
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77a63fbd57196c9ebaf4163a4948bab745e70f5dc4598c47fe82d09eff88034f
3
+ size 84049390
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aab60b315de5d6935f2b1ce74970522ec1ce02b9b2bd769ea62fc0c5068f917
3
+ size 84048874
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e4903a6b2fb85e3cf85929160f98ac782dee0f74a92f97b2c5f6fad3cb90237
3
+ size 84048874
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 18,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1111111111111111,
13
+ "grad_norm": 0.30647119879722595,
14
+ "learning_rate": 0.0003,
15
+ "loss": 0.5778,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.2222222222222222,
20
+ "grad_norm": 0.2833655774593353,
21
+ "learning_rate": 0.0003,
22
+ "loss": 0.4192,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.3333333333333333,
27
+ "grad_norm": 0.3433806598186493,
28
+ "learning_rate": 0.0003,
29
+ "loss": 0.734,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.4444444444444444,
34
+ "grad_norm": 0.4032728970050812,
35
+ "learning_rate": 0.0003,
36
+ "loss": 0.5182,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.5555555555555556,
41
+ "grad_norm": 0.268047958612442,
42
+ "learning_rate": 0.0003,
43
+ "loss": 0.4194,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.6666666666666666,
48
+ "grad_norm": 0.2353041023015976,
49
+ "learning_rate": 0.0003,
50
+ "loss": 0.5757,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.7777777777777778,
55
+ "grad_norm": 0.23823396861553192,
56
+ "learning_rate": 0.0003,
57
+ "loss": 0.5309,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.8888888888888888,
62
+ "grad_norm": 0.27969422936439514,
63
+ "learning_rate": 0.0003,
64
+ "loss": 0.7769,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 1.0,
69
+ "grad_norm": 0.2893766760826111,
70
+ "learning_rate": 0.0003,
71
+ "loss": 0.5685,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 1.0,
76
+ "step": 18,
77
+ "total_flos": 693210355597312.0,
78
+ "train_loss": 0.5689557558960385,
79
+ "train_runtime": 32.053,
80
+ "train_samples_per_second": 2.246,
81
+ "train_steps_per_second": 0.562
82
+ }
83
+ ],
84
+ "logging_steps": 2,
85
+ "max_steps": 18,
86
+ "num_input_tokens_seen": 0,
87
+ "num_train_epochs": 1,
88
+ "save_steps": 500,
89
+ "stateful_callbacks": {
90
+ "TrainerControl": {
91
+ "args": {
92
+ "should_epoch_stop": false,
93
+ "should_evaluate": false,
94
+ "should_log": false,
95
+ "should_save": false,
96
+ "should_training_stop": false
97
+ },
98
+ "attributes": {}
99
+ }
100
+ },
101
+ "total_flos": 693210355597312.0,
102
+ "train_batch_size": 1,
103
+ "trial_name": null,
104
+ "trial_params": null
105
+ }
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1469cf98479338d6d40e939d4282a6fe891a79ff356e91f53f8565a40b080d91
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68a82fcd8c6f4f2d06ae73f7af030292ba08d30f76198103823cf3464af0dd82
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e274aa30982f5afff4c50466a602a6d4bc7bc5cb1bc85e3a26013755ba70f3f
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9b000d435981f54d44f02cf09369f7ceb9b1f1e23a001b2498da0c27ef77d03
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53f4e414ed40e2e346a1479bc2c740e21b11e984af97779bf8d5bde9186cb1de
3
+ size 48769674
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10fc1efcd61e613e2c355bbb1139042d5130b1e29bd1926150653073be82ad3f
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac8b6161beb65659bf582e9899cfd7df2b9d7c325d027c3c0ae3488960cc28e7
3
+ size 48769674
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05a9bd91ee01aa0de2783d4410bbec5c7f0b001b2ce1e32c70d5073ab5b66307
3
+ size 48769674
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 18,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1111111111111111,
13
+ "grad_norm": 0.47776493430137634,
14
+ "learning_rate": 0.0003,
15
+ "loss": 1.2538,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.2222222222222222,
20
+ "grad_norm": 0.5145998597145081,
21
+ "learning_rate": 0.0003,
22
+ "loss": 1.9262,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.3333333333333333,
27
+ "grad_norm": 0.8327056169509888,
28
+ "learning_rate": 0.0003,
29
+ "loss": 1.5606,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.4444444444444444,
34
+ "grad_norm": 0.2911861538887024,
35
+ "learning_rate": 0.0003,
36
+ "loss": 2.2479,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.5555555555555556,
41
+ "grad_norm": 0.4601462185382843,
42
+ "learning_rate": 0.0003,
43
+ "loss": 2.0639,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.6666666666666666,
48
+ "grad_norm": 0.35323259234428406,
49
+ "learning_rate": 0.0003,
50
+ "loss": 2.2254,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.7777777777777778,
55
+ "grad_norm": 0.42169633507728577,
56
+ "learning_rate": 0.0003,
57
+ "loss": 1.4212,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.8888888888888888,
62
+ "grad_norm": 0.3550553321838379,
63
+ "learning_rate": 0.0003,
64
+ "loss": 1.8081,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 1.0,
69
+ "grad_norm": 0.450255811214447,
70
+ "learning_rate": 0.0003,
71
+ "loss": 1.9508,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 1.0,
76
+ "step": 18,
77
+ "total_flos": 180852809007104.0,
78
+ "train_loss": 1.8286501566569011,
79
+ "train_runtime": 27.6928,
80
+ "train_samples_per_second": 2.6,
81
+ "train_steps_per_second": 0.65
82
+ }
83
+ ],
84
+ "logging_steps": 2,
85
+ "max_steps": 18,
86
+ "num_input_tokens_seen": 0,
87
+ "num_train_epochs": 1,
88
+ "save_steps": 500,
89
+ "stateful_callbacks": {
90
+ "TrainerControl": {
91
+ "args": {
92
+ "should_epoch_stop": false,
93
+ "should_evaluate": false,
94
+ "should_log": false,
95
+ "should_save": false,
96
+ "should_training_stop": false
97
+ },
98
+ "attributes": {}
99
+ }
100
+ },
101
+ "total_flos": 180852809007104.0,
102
+ "train_batch_size": 1,
103
+ "trial_name": null,
104
+ "trial_params": null
105
+ }
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66dd298a08524504e02129db67bcdf680f3468201fe461af547844c188c1e563
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2385fb8b486ab6f0af28431e5ca6e739a00bfb1acfb02fa399507ea196b3eb27
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afb7c1ebf07fb66c773d46976bbff4568a7eab6dcadaf50deffc5f566e16c084
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c73ae37a26858266db1c7efd54974096738f09578f2f233eef4f87667749cff3
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22962645eeb6477fdeab7e9c90d1a01912c27cb91c2f4f85960aaec53107233f
3
+ size 48769674
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1db9e95af3892445f63839188aac1fcb7a9a2b0b689f0fcff7184f93ac01f09d
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb8167425439920ae6e7abdae57cb3a0b43160b01cf8814b23627611f0f2a727
3
+ size 48769674
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:414f7ae19345c4e11b7d69a5de17855ffcfe14291ed45d28c01dc6ed7867859f
3
+ size 48769674
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 18,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1111111111111111,
13
+ "grad_norm": 0.3366261124610901,
14
+ "learning_rate": 0.0003,
15
+ "loss": 1.687,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.2222222222222222,
20
+ "grad_norm": 0.33821889758110046,
21
+ "learning_rate": 0.0003,
22
+ "loss": 1.2887,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.3333333333333333,
27
+ "grad_norm": 0.2697903513908386,
28
+ "learning_rate": 0.0003,
29
+ "loss": 1.7096,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.4444444444444444,
34
+ "grad_norm": 0.3804328143596649,
35
+ "learning_rate": 0.0003,
36
+ "loss": 0.9446,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.5555555555555556,
41
+ "grad_norm": 0.36493733525276184,
42
+ "learning_rate": 0.0003,
43
+ "loss": 1.6892,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.6666666666666666,
48
+ "grad_norm": 0.24683411419391632,
49
+ "learning_rate": 0.0003,
50
+ "loss": 2.2283,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.7777777777777778,
55
+ "grad_norm": 0.35917943716049194,
56
+ "learning_rate": 0.0003,
57
+ "loss": 1.7543,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.8888888888888888,
62
+ "grad_norm": 0.2083004117012024,
63
+ "learning_rate": 0.0003,
64
+ "loss": 1.2117,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 1.0,
69
+ "grad_norm": 0.2108473777770996,
70
+ "learning_rate": 0.0003,
71
+ "loss": 1.6034,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 1.0,
76
+ "step": 18,
77
+ "total_flos": 410714315423744.0,
78
+ "train_loss": 1.5685436924298604,
79
+ "train_runtime": 28.6802,
80
+ "train_samples_per_second": 2.51,
81
+ "train_steps_per_second": 0.628
82
+ }
83
+ ],
84
+ "logging_steps": 2,
85
+ "max_steps": 18,
86
+ "num_input_tokens_seen": 0,
87
+ "num_train_epochs": 1,
88
+ "save_steps": 500,
89
+ "stateful_callbacks": {
90
+ "TrainerControl": {
91
+ "args": {
92
+ "should_epoch_stop": false,
93
+ "should_evaluate": false,
94
+ "should_log": false,
95
+ "should_save": false,
96
+ "should_training_stop": false
97
+ },
98
+ "attributes": {}
99
+ }
100
+ },
101
+ "total_flos": 410714315423744.0,
102
+ "train_batch_size": 1,
103
+ "trial_name": null,
104
+ "trial_params": null
105
+ }
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15a1e1b6d9534f9e15dc78b32f0307174cd3632101966d6aea2fdbcaeec98f46
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:387c90acff387fb45312d33cfad4d8de380bf335db59d7d431f87a0ae383899b
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b94f6b99d0d17d27e23dce721c59e24fe5afb8536028d0cc6164babb089ec06
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95493677d6d8e7d0d1d9e2e1517b518fa814449aafc3f656945f5bf34c36027d
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b02cce2c79fad6b5ca18a6f0e408f52a5dacd426d817772a06cfa569301b193
3
+ size 48769674
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa8c4537d5760552746414af4a9543ffce91dcdf4c122fb4a56163c497ff31bb
3
+ size 48770134
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e19696d81a063f589d2d71303c78242444c8c242b6acbe06d3354db5a70ac8e2
3
+ size 48769674
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:431fd7e078dd540be8607aa45353d4d29ab4c0960d44d7f1814f687ce6629c1e
3
+ size 48769674
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 18,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1111111111111111,
13
+ "grad_norm": 0.24904797971248627,
14
+ "learning_rate": 0.0003,
15
+ "loss": 0.7065,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.2222222222222222,
20
+ "grad_norm": 0.16937297582626343,
21
+ "learning_rate": 0.0003,
22
+ "loss": 0.7515,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.3333333333333333,
27
+ "grad_norm": 0.18721012771129608,
28
+ "learning_rate": 0.0003,
29
+ "loss": 0.7237,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.4444444444444444,
34
+ "grad_norm": 0.16937831044197083,
35
+ "learning_rate": 0.0003,
36
+ "loss": 0.5668,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.5555555555555556,
41
+ "grad_norm": 0.14968672394752502,
42
+ "learning_rate": 0.0003,
43
+ "loss": 0.7673,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.6666666666666666,
48
+ "grad_norm": 0.15415261685848236,
49
+ "learning_rate": 0.0003,
50
+ "loss": 0.7073,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.7777777777777778,
55
+ "grad_norm": 0.19837412238121033,
56
+ "learning_rate": 0.0003,
57
+ "loss": 0.742,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.8888888888888888,
62
+ "grad_norm": 0.1668744832277298,
63
+ "learning_rate": 0.0003,
64
+ "loss": 0.9476,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 1.0,
69
+ "grad_norm": 0.19677592813968658,
70
+ "learning_rate": 0.0003,
71
+ "loss": 0.6554,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 1.0,
76
+ "step": 18,
77
+ "total_flos": 1003185539907584.0,
78
+ "train_loss": 0.7297993501027426,
79
+ "train_runtime": 34.7761,
80
+ "train_samples_per_second": 2.07,
81
+ "train_steps_per_second": 0.518
82
+ }
83
+ ],
84
+ "logging_steps": 2,
85
+ "max_steps": 18,
86
+ "num_input_tokens_seen": 0,
87
+ "num_train_epochs": 1,
88
+ "save_steps": 500,
89
+ "stateful_callbacks": {
90
+ "TrainerControl": {
91
+ "args": {
92
+ "should_epoch_stop": false,
93
+ "should_evaluate": false,
94
+ "should_log": false,
95
+ "should_save": false,
96
+ "should_training_stop": false
97
+ },
98
+ "attributes": {}
99
+ }
100
+ },
101
+ "total_flos": 1003185539907584.0,
102
+ "train_batch_size": 1,
103
+ "trial_name": null,
104
+ "trial_params": null
105
+ }
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cbd65564608dc1f14bc3b1fcdd3d9952a602c50c5a9bdda73115cd5311c0a60
3
+ size 84049390
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14fd00dd7efd3f8b7ecfbc586bb405a9c0cb01f10f1ec332676b0b00c2872352
3
+ size 84049390
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a0338d2f8b2be6cb0c42546ce0eee8ffccb7f3389430b4103fb4bbce9047342
3
+ size 84049390
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a027e1c35d899c5904cdf31bfbebc131f0bcb852b0e5c77f2c78f41cdd3b4885
3
+ size 84049390
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd1e0e9d11f82bdafd7eaec41773895d888f61ac8fc081847ae1f018dfb48a28
3
+ size 84048874
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba1fcd628fdd6297b3a11a366310d476e4db1191f1a73ea9e1f290fce8a4761e
3
+ size 84049390
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:930cd38135f872e0e2c9a951a0390210db6d7b315908173cee6a88b9dcd07f0f
3
+ size 84048874
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd2af3c7beef270aeb226fd225302e8a600918aca6b0a976c2f9b18b40c7f41
3
+ size 84048874
client_states_NEW_fedmkt_NOCONT_bs4_saveoptim_r16_32_lr3e-4_sc74_4tasks_5rounds_fixtir18_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 18,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1111111111111111,
13
+ "grad_norm": 0.1849832981824875,
14
+ "learning_rate": 0.0003,
15
+ "loss": 0.852,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.2222222222222222,
20
+ "grad_norm": 0.21231615543365479,
21
+ "learning_rate": 0.0003,
22
+ "loss": 1.1432,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.3333333333333333,
27
+ "grad_norm": 0.15320207178592682,
28
+ "learning_rate": 0.0003,
29
+ "loss": 0.6513,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.4444444444444444,
34
+ "grad_norm": 0.17886720597743988,
35
+ "learning_rate": 0.0003,
36
+ "loss": 0.7447,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.5555555555555556,
41
+ "grad_norm": 0.2148554027080536,
42
+ "learning_rate": 0.0003,
43
+ "loss": 0.9758,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.6666666666666666,
48
+ "grad_norm": 0.1558452695608139,
49
+ "learning_rate": 0.0003,
50
+ "loss": 0.7666,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.7777777777777778,
55
+ "grad_norm": 0.3737614154815674,
56
+ "learning_rate": 0.0003,
57
+ "loss": 0.7469,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.8888888888888888,
62
+ "grad_norm": 0.20555411279201508,
63
+ "learning_rate": 0.0003,
64
+ "loss": 1.0352,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 1.0,
69
+ "grad_norm": 0.16557130217552185,
70
+ "learning_rate": 0.0003,
71
+ "loss": 0.7228,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 1.0,
76
+ "step": 18,
77
+ "total_flos": 2635458219606016.0,
78
+ "train_loss": 0.8487372001012167,
79
+ "train_runtime": 49.5256,
80
+ "train_samples_per_second": 1.454,
81
+ "train_steps_per_second": 0.363
82
+ }
83
+ ],
84
+ "logging_steps": 2,
85
+ "max_steps": 18,
86
+ "num_input_tokens_seen": 0,
87
+ "num_train_epochs": 1,
88
+ "save_steps": 500,
89
+ "stateful_callbacks": {
90
+ "TrainerControl": {
91
+ "args": {
92
+ "should_epoch_stop": false,
93
+ "should_evaluate": false,
94
+ "should_log": false,
95
+ "should_save": false,
96
+ "should_training_stop": false
97
+ },
98
+ "attributes": {}
99
+ }
100
+ },
101
+ "total_flos": 2635458219606016.0,
102
+ "train_batch_size": 1,
103
+ "trial_name": null,
104
+ "trial_params": null
105
+ }