Markus commited on
Commit
6962768
·
1 Parent(s): 283d888

update model

Browse files
adapter_config.json CHANGED
@@ -20,10 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "v_proj",
24
  "q_proj",
25
- "o_proj",
26
- "k_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "k_proj",
24
  "v_proj",
25
  "q_proj",
26
+ "o_proj"
 
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d5072921ec4867fb9c6c1292707d2d982293cc138ee7d959a9c2a95840eb8b8
3
  size 109086672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a25f2ed275ea072e432db4a0587f317b13e4cda88d58e95fce0afc570a015947
3
  size 109086672
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a05221272abd2199bcafc4910a4a2e7cb072420e5ade714b5708c7ccedb8190
3
  size 218319610
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:696016ee9b60b8db3b5cc6401701bb2091ea344917571914ed8464dc4e2ce933
3
  size 218319610
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0132374830d5dc19b4b6343e986f2b95b1e82201adc3295f03c8efd9e306924f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8d15991fe4f291e07056c30e4272fb349859451b58691ae79afa0e3bbe1e0cc
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15190fdae8b44036d8090dd5fd1f034d735830cbb1eec373b75c5a8823bf1b7e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3fc1e90bfe44ed29af9520a74f7b0ae82010d7c23e55da07bc01b4a4e4a50a0
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9959404600811907,
5
  "eval_steps": 500,
6
- "global_step": 23,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -43,14 +43,57 @@
43
  "eval_samples_per_second": 1.145,
44
  "eval_steps_per_second": 1.145,
45
  "step": 23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  }
47
  ],
48
  "logging_steps": 5,
49
- "max_steps": 23,
50
  "num_input_tokens_seen": 0,
51
- "num_train_epochs": 1,
52
  "save_steps": 500,
53
- "total_flos": 2.5920685332863386e+17,
54
  "train_batch_size": 1,
55
  "trial_name": null,
56
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9959404600811907,
5
  "eval_steps": 500,
6
+ "global_step": 46,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
43
  "eval_samples_per_second": 1.145,
44
  "eval_steps_per_second": 1.145,
45
  "step": 23
46
+ },
47
+ {
48
+ "epoch": 1.09,
49
+ "grad_norm": 0.1162109375,
50
+ "learning_rate": 1.1661750237200673e-05,
51
+ "loss": 1.8002,
52
+ "step": 25
53
+ },
54
+ {
55
+ "epoch": 1.3,
56
+ "grad_norm": 0.1162109375,
57
+ "learning_rate": 7.289121990629446e-06,
58
+ "loss": 1.8029,
59
+ "step": 30
60
+ },
61
+ {
62
+ "epoch": 1.52,
63
+ "grad_norm": 0.11181640625,
64
+ "learning_rate": 3.633714482245324e-06,
65
+ "loss": 1.7975,
66
+ "step": 35
67
+ },
68
+ {
69
+ "epoch": 1.74,
70
+ "grad_norm": 0.1181640625,
71
+ "learning_rate": 1.1176474296763868e-06,
72
+ "loss": 1.7978,
73
+ "step": 40
74
+ },
75
+ {
76
+ "epoch": 1.95,
77
+ "grad_norm": 0.1142578125,
78
+ "learning_rate": 3.147161592771996e-08,
79
+ "loss": 1.7977,
80
+ "step": 45
81
+ },
82
+ {
83
+ "epoch": 2.0,
84
+ "eval_loss": 1.8171756267547607,
85
+ "eval_runtime": 534.6688,
86
+ "eval_samples_per_second": 1.126,
87
+ "eval_steps_per_second": 1.126,
88
+ "step": 46
89
  }
90
  ],
91
  "logging_steps": 5,
92
+ "max_steps": 46,
93
  "num_input_tokens_seen": 0,
94
+ "num_train_epochs": 2,
95
  "save_steps": 500,
96
+ "total_flos": 5.184137066572677e+17,
97
  "train_batch_size": 1,
98
  "trial_name": null,
99
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b54122e9e2f1ba0caa1d7401cc7367c7363d117ff0a143742c08377762a230d3
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b15578583779f65d412f2aff5e1de01a6d75c5544cb874fb5e608930057e2130
3
  size 4920