pminervini commited on
Commit
19c4d5d
·
verified ·
1 Parent(s): 8a312b5

Training in progress, step 8

Browse files
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "q_proj",
23
- "down_proj",
24
  "o_proj",
25
- "k_proj",
26
  "up_proj",
27
- "v_proj",
28
- "gate_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_dora": false,
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "v_proj",
23
+ "gate_proj",
24
  "o_proj",
25
+ "down_proj",
26
  "up_proj",
27
+ "k_proj",
28
+ "q_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a06d1fce7cd5728e3ebd9baffacbe79e66eb2ac468f65c3ab8b81d62cc4c826
3
  size 167832688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a469fd79bd6bc2951349664d0c165e3af8543ee3225b1cc4c531a422c872e7cd
3
  size 167832688
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.09,
3
+ "train_loss": 0.7418815940618515,
4
+ "train_runtime": 86.6421,
5
+ "train_samples_per_second": 2.955,
6
+ "train_steps_per_second": 0.369
7
+ }
best_metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_loss": 0.4938286542892456}
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.09,
3
+ "train_loss": 0.7418815940618515,
4
+ "train_runtime": 86.6421,
5
+ "train_samples_per_second": 2.955,
6
+ "train_steps_per_second": 0.369
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4938286542892456,
3
+ "best_model_checkpoint": "outputs/checkpoint-32",
4
+ "epoch": 0.09391049156272928,
5
+ "eval_steps": 8,
6
+ "global_step": 32,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 0.255859375,
14
+ "learning_rate": 5e-05,
15
+ "loss": 0.9918,
16
+ "step": 8
17
+ },
18
+ {
19
+ "epoch": 0.02,
20
+ "eval_loss": 0.8554174900054932,
21
+ "eval_runtime": 4.9049,
22
+ "eval_samples_per_second": 9.786,
23
+ "eval_steps_per_second": 1.223,
24
+ "step": 8
25
+ },
26
+ {
27
+ "epoch": 0.05,
28
+ "grad_norm": 0.30859375,
29
+ "learning_rate": 0.0001,
30
+ "loss": 0.8054,
31
+ "step": 16
32
+ },
33
+ {
34
+ "epoch": 0.05,
35
+ "eval_loss": 0.6431613564491272,
36
+ "eval_runtime": 4.8729,
37
+ "eval_samples_per_second": 9.85,
38
+ "eval_steps_per_second": 1.231,
39
+ "step": 16
40
+ },
41
+ {
42
+ "epoch": 0.07,
43
+ "grad_norm": 0.2353515625,
44
+ "learning_rate": 0.00015000000000000001,
45
+ "loss": 0.622,
46
+ "step": 24
47
+ },
48
+ {
49
+ "epoch": 0.07,
50
+ "eval_loss": 0.5386483669281006,
51
+ "eval_runtime": 4.8685,
52
+ "eval_samples_per_second": 9.859,
53
+ "eval_steps_per_second": 1.232,
54
+ "step": 24
55
+ },
56
+ {
57
+ "epoch": 0.09,
58
+ "grad_norm": 0.1826171875,
59
+ "learning_rate": 0.0,
60
+ "loss": 0.5483,
61
+ "step": 32
62
+ },
63
+ {
64
+ "epoch": 0.09,
65
+ "eval_loss": 0.4938286542892456,
66
+ "eval_runtime": 4.8763,
67
+ "eval_samples_per_second": 9.843,
68
+ "eval_steps_per_second": 1.23,
69
+ "step": 32
70
+ },
71
+ {
72
+ "epoch": 0.09,
73
+ "step": 32,
74
+ "total_flos": 9061588465041408.0,
75
+ "train_loss": 0.7418815940618515,
76
+ "train_runtime": 86.6421,
77
+ "train_samples_per_second": 2.955,
78
+ "train_steps_per_second": 0.369
79
+ }
80
+ ],
81
+ "logging_steps": 8,
82
+ "max_steps": 32,
83
+ "num_input_tokens_seen": 0,
84
+ "num_train_epochs": 1,
85
+ "save_steps": 8,
86
+ "total_flos": 9061588465041408.0,
87
+ "train_batch_size": 1,
88
+ "trial_name": null,
89
+ "trial_params": null
90
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b93b1061cd82f1ce9eb67fd7dba601df2398f4b3cee9ec3320abca95959bc8fb
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98c302bc5e684cee1d6681da787e21a5725232092ac73d23afdff6f0b380840b
3
  size 4856