N8Programs commited on
Commit
3efa972
·
verified ·
1 Parent(s): 528a895

Upload 39 files

Browse files
Files changed (40) hide show
  1. .gitattributes +3 -0
  2. adapters-conscious/0000100_adapters.safetensors +3 -0
  3. adapters-conscious/0000200_adapters.safetensors +3 -0
  4. adapters-conscious/0000300_adapters.safetensors +3 -0
  5. adapters-conscious/adapter_config.json +61 -0
  6. adapters-conscious/adapters.safetensors +3 -0
  7. adapters-conscious/wandb/run-20260216_150557-tugniqt7/files/config.yaml +141 -0
  8. adapters-conscious/wandb/run-20260216_150557-tugniqt7/files/output.log +315 -0
  9. adapters-conscious/wandb/run-20260216_150557-tugniqt7/files/requirements.txt +147 -0
  10. adapters-conscious/wandb/run-20260216_150557-tugniqt7/files/wandb-metadata.json +35 -0
  11. adapters-conscious/wandb/run-20260216_150557-tugniqt7/files/wandb-summary.json +1 -0
  12. adapters-conscious/wandb/run-20260216_150557-tugniqt7/logs/debug-internal.log +11 -0
  13. adapters-conscious/wandb/run-20260216_150557-tugniqt7/logs/debug.log +23 -0
  14. adapters-conscious/wandb/run-20260216_150557-tugniqt7/run-tugniqt7.wandb +3 -0
  15. adapters-no-conscious/0000100_adapters.safetensors +3 -0
  16. adapters-no-conscious/0000200_adapters.safetensors +3 -0
  17. adapters-no-conscious/0000300_adapters.safetensors +3 -0
  18. adapters-no-conscious/adapter_config.json +61 -0
  19. adapters-no-conscious/adapters.safetensors +3 -0
  20. adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/files/config.yaml +141 -0
  21. adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/files/output.log +315 -0
  22. adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/files/requirements.txt +147 -0
  23. adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/files/wandb-metadata.json +35 -0
  24. adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/files/wandb-summary.json +1 -0
  25. adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/logs/debug-internal.log +11 -0
  26. adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/logs/debug.log +23 -0
  27. adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/run-t3h29alg.wandb +3 -0
  28. adapters-uncertain/0000100_adapters.safetensors +3 -0
  29. adapters-uncertain/0000200_adapters.safetensors +3 -0
  30. adapters-uncertain/0000300_adapters.safetensors +3 -0
  31. adapters-uncertain/adapter_config.json +61 -0
  32. adapters-uncertain/adapters.safetensors +3 -0
  33. adapters-uncertain/wandb/run-20260216_154932-918bwjte/files/config.yaml +141 -0
  34. adapters-uncertain/wandb/run-20260216_154932-918bwjte/files/output.log +315 -0
  35. adapters-uncertain/wandb/run-20260216_154932-918bwjte/files/requirements.txt +147 -0
  36. adapters-uncertain/wandb/run-20260216_154932-918bwjte/files/wandb-metadata.json +35 -0
  37. adapters-uncertain/wandb/run-20260216_154932-918bwjte/files/wandb-summary.json +1 -0
  38. adapters-uncertain/wandb/run-20260216_154932-918bwjte/logs/debug-internal.log +11 -0
  39. adapters-uncertain/wandb/run-20260216_154932-918bwjte/logs/debug.log +23 -0
  40. adapters-uncertain/wandb/run-20260216_154932-918bwjte/run-918bwjte.wandb +3 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ adapters-conscious/wandb/run-20260216_150557-tugniqt7/run-tugniqt7.wandb filter=lfs diff=lfs merge=lfs -text
37
+ adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/run-t3h29alg.wandb filter=lfs diff=lfs merge=lfs -text
38
+ adapters-uncertain/wandb/run-20260216_154932-918bwjte/run-918bwjte.wandb filter=lfs diff=lfs merge=lfs -text
adapters-conscious/0000100_adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f88679d27b21c4bc3e5bef58251db7d29090d38081978f0f2d385cd7e4ebff4
3
+ size 132175803
adapters-conscious/0000200_adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9b8fd190a9c1bae92e659e2963718b0dbb9fa65fa43a42bbf92848b298a5fd8
3
+ size 132175803
adapters-conscious/0000300_adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78fdce72e914dde044097c79f3865557332d8d07161ae1ffa8365822b6c7461f
3
+ size 132175803
adapters-conscious/adapter_config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_path": "adapters-conscious",
3
+ "batch_size": 1,
4
+ "config": "train.yaml",
5
+ "data": "training/full-conscious",
6
+ "fine_tune_type": "lora",
7
+ "grad_accumulation_steps": 1,
8
+ "grad_checkpoint": true,
9
+ "iters": 300,
10
+ "learning_rate": 0.0001,
11
+ "lora_parameters": {
12
+ "keys": [
13
+ "self_attn.q_proj",
14
+ "self_attn.v_proj",
15
+ "self_attn.k_proj",
16
+ "self_attn.o_proj",
17
+ "mlp.gate_proj",
18
+ "mlp.up_proj",
19
+ "mlp.down_proj"
20
+ ],
21
+ "rank": 16,
22
+ "scale": 2.0,
23
+ "dropout": 0.0
24
+ },
25
+ "lr_schedule": {
26
+ "name": "cosine_decay",
27
+ "warmup": 10,
28
+ "warmup_init": 1e-05,
29
+ "arguments": [
30
+ 0.0001,
31
+ 529,
32
+ 1e-05
33
+ ]
34
+ },
35
+ "mask_prompt": false,
36
+ "max_seq_length": 8192,
37
+ "model": "Qwen3-4B-Instruct-2507",
38
+ "num_layers": 36,
39
+ "optimizer": "adam",
40
+ "optimizer_config": {
41
+ "adam": {
42
+ "betas": [
43
+ 0.9,
44
+ 0.9999
45
+ ],
46
+ "eps": 1e-06,
47
+ "bias_correction": true
48
+ }
49
+ },
50
+ "project_name": "conscious-finetuning",
51
+ "report_to": "wandb",
52
+ "resume_adapter_file": null,
53
+ "save_every": 100,
54
+ "seed": 0,
55
+ "steps_per_eval": 200,
56
+ "steps_per_report": 1,
57
+ "test": false,
58
+ "test_batches": 100,
59
+ "train": true,
60
+ "val_batches": 0
61
+ }
adapters-conscious/adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78fdce72e914dde044097c79f3865557332d8d07161ae1ffa8365822b6c7461f
3
+ size 132175803
adapters-conscious/wandb/run-20260216_150557-tugniqt7/files/config.yaml ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.23.0
4
+ e:
5
+ 8z2fwr85sxkr7dsfa3tb7o36vuyb19ia:
6
+ apple:
7
+ ecpuCores: 4
8
+ gpuCores: 40
9
+ memoryGb: 64
10
+ name: Apple M3 Max
11
+ pcpuCores: 12
12
+ ramTotalBytes: "68719476736"
13
+ swapTotalBytes: "2147483648"
14
+ args:
15
+ - --config
16
+ - train.yaml
17
+ cpu_count: 16
18
+ cpu_count_logical: 16
19
+ disk:
20
+ /:
21
+ total: "1995218165760"
22
+ used: "1698983583744"
23
+ email: nathanbreslow@gmail.com
24
+ executable: /Users/natebreslow/miniconda3/envs/mlx-experiment/bin/python3.10
25
+ host: MacBook-Pro-135.local
26
+ memory:
27
+ total: "68719476736"
28
+ os: macOS-26.2-arm64-arm-64bit
29
+ program: /Users/natebreslow/miniconda3/envs/mlx-experiment/bin/mlx_lm.lora
30
+ python: CPython 3.10.19
31
+ root: adapters-conscious
32
+ startedAt: "2026-02-16T20:05:57.689004Z"
33
+ writerId: 8z2fwr85sxkr7dsfa3tb7o36vuyb19ia
34
+ m: []
35
+ python_version: 3.10.19
36
+ t:
37
+ "1":
38
+ - 1
39
+ - 5
40
+ - 11
41
+ - 49
42
+ - 53
43
+ - 71
44
+ "2":
45
+ - 1
46
+ - 5
47
+ - 11
48
+ - 49
49
+ - 53
50
+ - 71
51
+ "3":
52
+ - 13
53
+ - 16
54
+ - 61
55
+ "4": 3.10.19
56
+ "5": 0.23.0
57
+ "6": 5.1.0
58
+ "8":
59
+ - 2
60
+ "12": 0.23.0
61
+ "13": darwin-arm64
62
+ adapter_path:
63
+ value: adapters-conscious
64
+ batch_size:
65
+ value: 1
66
+ config:
67
+ value: train.yaml
68
+ data:
69
+ value: training/full-conscious
70
+ fine_tune_type:
71
+ value: lora
72
+ grad_accumulation_steps:
73
+ value: 1
74
+ grad_checkpoint:
75
+ value: true
76
+ iters:
77
+ value: 300
78
+ learning_rate:
79
+ value: 0.0001
80
+ lora_parameters:
81
+ value:
82
+ dropout: 0
83
+ keys:
84
+ - self_attn.q_proj
85
+ - self_attn.v_proj
86
+ - self_attn.k_proj
87
+ - self_attn.o_proj
88
+ - mlp.gate_proj
89
+ - mlp.up_proj
90
+ - mlp.down_proj
91
+ rank: 16
92
+ scale: 2
93
+ lr_schedule:
94
+ value:
95
+ arguments:
96
+ - 0.0001
97
+ - 529
98
+ - 1e-05
99
+ name: cosine_decay
100
+ warmup: 10
101
+ warmup_init: 1e-05
102
+ mask_prompt:
103
+ value: false
104
+ max_seq_length:
105
+ value: 8192
106
+ model:
107
+ value: Qwen3-4B-Instruct-2507
108
+ num_layers:
109
+ value: 36
110
+ optimizer:
111
+ value: adam
112
+ optimizer_config:
113
+ value:
114
+ adam:
115
+ betas:
116
+ - 0.9
117
+ - 0.9999
118
+ bias_correction: true
119
+ eps: 1e-06
120
+ project_name:
121
+ value: conscious-finetuning
122
+ report_to:
123
+ value: wandb
124
+ resume_adapter_file:
125
+ value: null
126
+ save_every:
127
+ value: 100
128
+ seed:
129
+ value: 0
130
+ steps_per_eval:
131
+ value: 200
132
+ steps_per_report:
133
+ value: 1
134
+ test:
135
+ value: false
136
+ test_batches:
137
+ value: 100
138
+ train:
139
+ value: true
140
+ val_batches:
141
+ value: 0
adapters-conscious/wandb/run-20260216_150557-tugniqt7/files/output.log ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Loading pretrained model
2
+ Loading datasets
3
+ Training
4
+ Trainable parameters: 0.821% (33.030M/4022.468M)
5
+ Starting training..., iters: 300
6
+ Calculating loss...: 0it [00:00, ?it/s]
7
+ Iter 1: Val loss nan, Val took 0.108s
8
+ Iter 1: Train loss 3.335, Learning Rate 1.000e-05, It/sec 0.134, Tokens/sec 151.487, Trained Tokens 1132, Peak mem 9.670 GB
9
+ Iter 2: Train loss 3.510, Learning Rate 1.900e-05, It/sec 0.298, Tokens/sec 227.559, Trained Tokens 1896, Peak mem 9.826 GB
10
+ Iter 3: Train loss 3.179, Learning Rate 2.800e-05, It/sec 0.406, Tokens/sec 247.839, Trained Tokens 2506, Peak mem 9.826 GB
11
+ Iter 4: Train loss 3.400, Learning Rate 3.700e-05, It/sec 0.264, Tokens/sec 241.251, Trained Tokens 3421, Peak mem 10.091 GB
12
+ Iter 5: Train loss 3.223, Learning Rate 4.600e-05, It/sec 0.288, Tokens/sec 241.614, Trained Tokens 4261, Peak mem 10.091 GB
13
+ Iter 6: Train loss 3.043, Learning Rate 5.500e-05, It/sec 0.225, Tokens/sec 227.696, Trained Tokens 5275, Peak mem 10.250 GB
14
+ Iter 7: Train loss 3.168, Learning Rate 6.400e-05, It/sec 0.238, Tokens/sec 238.933, Trained Tokens 6281, Peak mem 10.250 GB
15
+ Iter 8: Train loss 2.765, Learning Rate 7.300e-05, It/sec 0.209, Tokens/sec 197.020, Trained Tokens 7225, Peak mem 10.250 GB
16
+ Iter 9: Train loss 2.672, Learning Rate 8.200e-05, It/sec 0.284, Tokens/sec 212.734, Trained Tokens 7973, Peak mem 10.250 GB
17
+ Iter 10: Train loss 2.802, Learning Rate 9.100e-05, It/sec 0.402, Tokens/sec 207.113, Trained Tokens 8488, Peak mem 10.250 GB
18
+ Iter 11: Train loss 2.378, Learning Rate 1.000e-04, It/sec 0.241, Tokens/sec 226.957, Trained Tokens 9428, Peak mem 10.250 GB
19
+ Iter 12: Train loss 2.189, Learning Rate 1.000e-04, It/sec 0.156, Tokens/sec 206.492, Trained Tokens 10755, Peak mem 10.710 GB
20
+ Iter 13: Train loss 2.538, Learning Rate 1.000e-04, It/sec 0.195, Tokens/sec 222.831, Trained Tokens 11895, Peak mem 10.710 GB
21
+ Iter 14: Train loss 2.585, Learning Rate 1.000e-04, It/sec 0.370, Tokens/sec 248.800, Trained Tokens 12567, Peak mem 10.710 GB
22
+ Iter 15: Train loss 2.480, Learning Rate 9.999e-05, It/sec 0.469, Tokens/sec 228.479, Trained Tokens 13054, Peak mem 10.710 GB
23
+ Iter 16: Train loss 2.251, Learning Rate 9.999e-05, It/sec 0.321, Tokens/sec 188.542, Trained Tokens 13641, Peak mem 10.710 GB
24
+ Iter 17: Train loss 2.000, Learning Rate 9.998e-05, It/sec 0.240, Tokens/sec 194.720, Trained Tokens 14451, Peak mem 10.710 GB
25
+ Iter 18: Train loss 2.108, Learning Rate 9.997e-05, It/sec 0.236, Tokens/sec 232.887, Trained Tokens 15438, Peak mem 10.710 GB
26
+ Iter 19: Train loss 2.215, Learning Rate 9.996e-05, It/sec 0.243, Tokens/sec 197.976, Trained Tokens 16253, Peak mem 10.710 GB
27
+ Iter 20: Train loss 2.190, Learning Rate 9.995e-05, It/sec 0.346, Tokens/sec 213.526, Trained Tokens 16871, Peak mem 10.710 GB
28
+ Iter 21: Train loss 2.112, Learning Rate 9.994e-05, It/sec 0.344, Tokens/sec 183.953, Trained Tokens 17405, Peak mem 10.710 GB
29
+ Iter 22: Train loss 2.212, Learning Rate 9.992e-05, It/sec 0.242, Tokens/sec 215.927, Trained Tokens 18296, Peak mem 10.710 GB
30
+ Iter 23: Train loss 2.076, Learning Rate 9.990e-05, It/sec 0.260, Tokens/sec 234.315, Trained Tokens 19196, Peak mem 10.710 GB
31
+ Iter 24: Train loss 2.343, Learning Rate 9.989e-05, It/sec 0.192, Tokens/sec 236.656, Trained Tokens 20427, Peak mem 10.710 GB
32
+ Iter 25: Train loss 1.956, Learning Rate 9.987e-05, It/sec 0.278, Tokens/sec 222.241, Trained Tokens 21226, Peak mem 10.710 GB
33
+ Iter 26: Train loss 2.293, Learning Rate 9.984e-05, It/sec 0.179, Tokens/sec 211.352, Trained Tokens 22408, Peak mem 10.710 GB
34
+ Iter 27: Train loss 2.287, Learning Rate 9.982e-05, It/sec 0.151, Tokens/sec 201.751, Trained Tokens 23740, Peak mem 10.710 GB
35
+ Iter 28: Train loss 1.856, Learning Rate 9.980e-05, It/sec 0.340, Tokens/sec 154.183, Trained Tokens 24193, Peak mem 10.710 GB
36
+ Iter 29: Train loss 2.047, Learning Rate 9.977e-05, It/sec 0.239, Tokens/sec 170.972, Trained Tokens 24909, Peak mem 10.710 GB
37
+ Iter 30: Train loss 1.960, Learning Rate 9.974e-05, It/sec 0.353, Tokens/sec 223.190, Trained Tokens 25542, Peak mem 10.710 GB
38
+ Iter 31: Train loss 1.776, Learning Rate 9.971e-05, It/sec 0.284, Tokens/sec 215.819, Trained Tokens 26301, Peak mem 10.710 GB
39
+ Iter 32: Train loss 2.188, Learning Rate 9.968e-05, It/sec 0.149, Tokens/sec 191.601, Trained Tokens 27591, Peak mem 10.710 GB
40
+ Iter 33: Train loss 2.119, Learning Rate 9.965e-05, It/sec 0.248, Tokens/sec 219.404, Trained Tokens 28474, Peak mem 10.710 GB
41
+ Iter 34: Train loss 1.788, Learning Rate 9.962e-05, It/sec 0.204, Tokens/sec 193.212, Trained Tokens 29421, Peak mem 10.710 GB
42
+ Iter 35: Train loss 1.984, Learning Rate 9.958e-05, It/sec 0.223, Tokens/sec 174.106, Trained Tokens 30200, Peak mem 10.710 GB
43
+ Iter 36: Train loss 2.493, Learning Rate 9.954e-05, It/sec 0.312, Tokens/sec 171.055, Trained Tokens 30748, Peak mem 10.710 GB
44
+ Iter 37: Train loss 2.012, Learning Rate 9.950e-05, It/sec 0.441, Tokens/sec 241.860, Trained Tokens 31296, Peak mem 10.710 GB
45
+ Iter 38: Train loss 1.908, Learning Rate 9.946e-05, It/sec 0.199, Tokens/sec 212.772, Trained Tokens 32364, Peak mem 10.710 GB
46
+ Iter 39: Train loss 1.985, Learning Rate 9.942e-05, It/sec 0.201, Tokens/sec 223.791, Trained Tokens 33479, Peak mem 10.710 GB
47
+ Iter 40: Train loss 2.031, Learning Rate 9.938e-05, It/sec 0.463, Tokens/sec 205.710, Trained Tokens 33923, Peak mem 10.710 GB
48
+ Iter 41: Train loss 2.105, Learning Rate 9.933e-05, It/sec 0.493, Tokens/sec 242.129, Trained Tokens 34414, Peak mem 10.710 GB
49
+ Iter 42: Train loss 2.060, Learning Rate 9.929e-05, It/sec 0.449, Tokens/sec 230.672, Trained Tokens 34928, Peak mem 10.710 GB
50
+ Iter 43: Train loss 1.689, Learning Rate 9.924e-05, It/sec 0.429, Tokens/sec 221.438, Trained Tokens 35444, Peak mem 10.710 GB
51
+ Iter 44: Train loss 1.648, Learning Rate 9.919e-05, It/sec 0.327, Tokens/sec 224.733, Trained Tokens 36131, Peak mem 10.710 GB
52
+ Iter 45: Train loss 1.649, Learning Rate 9.914e-05, It/sec 0.593, Tokens/sec 224.252, Trained Tokens 36509, Peak mem 10.710 GB
53
+ Iter 46: Train loss 1.732, Learning Rate 9.909e-05, It/sec 0.205, Tokens/sec 196.647, Trained Tokens 37468, Peak mem 10.710 GB
54
+ Iter 47: Train loss 2.079, Learning Rate 9.903e-05, It/sec 0.640, Tokens/sec 243.235, Trained Tokens 37848, Peak mem 10.710 GB
55
+ Iter 48: Train loss 1.685, Learning Rate 9.898e-05, It/sec 0.263, Tokens/sec 216.814, Trained Tokens 38671, Peak mem 10.710 GB
56
+ Iter 49: Train loss 1.920, Learning Rate 9.892e-05, It/sec 0.322, Tokens/sec 236.922, Trained Tokens 39406, Peak mem 10.710 GB
57
+ Iter 50: Train loss 1.670, Learning Rate 9.886e-05, It/sec 0.235, Tokens/sec 237.584, Trained Tokens 40416, Peak mem 10.710 GB
58
+ Iter 51: Train loss 2.044, Learning Rate 9.880e-05, It/sec 0.215, Tokens/sec 238.837, Trained Tokens 41526, Peak mem 10.710 GB
59
+ Iter 52: Train loss 2.082, Learning Rate 9.874e-05, It/sec 0.267, Tokens/sec 241.068, Trained Tokens 42430, Peak mem 10.710 GB
60
+ Iter 53: Train loss 1.840, Learning Rate 9.867e-05, It/sec 0.237, Tokens/sec 182.714, Trained Tokens 43201, Peak mem 10.710 GB
61
+ Iter 54: Train loss 1.744, Learning Rate 9.861e-05, It/sec 0.219, Tokens/sec 186.041, Trained Tokens 44052, Peak mem 10.710 GB
62
+ Iter 55: Train loss 1.852, Learning Rate 9.854e-05, It/sec 0.281, Tokens/sec 234.681, Trained Tokens 44887, Peak mem 10.710 GB
63
+ Iter 56: Train loss 1.962, Learning Rate 9.847e-05, It/sec 0.123, Tokens/sec 173.508, Trained Tokens 46301, Peak mem 10.815 GB
64
+ Iter 57: Train loss 1.583, Learning Rate 9.840e-05, It/sec 0.473, Tokens/sec 230.684, Trained Tokens 46789, Peak mem 10.815 GB
65
+ Iter 58: Train loss 1.657, Learning Rate 9.833e-05, It/sec 0.291, Tokens/sec 246.228, Trained Tokens 47636, Peak mem 10.815 GB
66
+ Iter 59: Train loss 1.729, Learning Rate 9.826e-05, It/sec 0.319, Tokens/sec 232.189, Trained Tokens 48364, Peak mem 10.815 GB
67
+ Iter 60: Train loss 1.864, Learning Rate 9.818e-05, It/sec 0.222, Tokens/sec 248.699, Trained Tokens 49483, Peak mem 10.815 GB
68
+ Iter 61: Train loss 1.728, Learning Rate 9.811e-05, It/sec 0.391, Tokens/sec 245.652, Trained Tokens 50112, Peak mem 10.815 GB
69
+ Iter 62: Train loss 1.672, Learning Rate 9.803e-05, It/sec 0.500, Tokens/sec 213.641, Trained Tokens 50539, Peak mem 10.815 GB
70
+ Iter 63: Train loss 1.940, Learning Rate 9.795e-05, It/sec 0.484, Tokens/sec 242.337, Trained Tokens 51040, Peak mem 10.815 GB
71
+ Iter 64: Train loss 1.676, Learning Rate 9.787e-05, It/sec 0.282, Tokens/sec 180.735, Trained Tokens 51681, Peak mem 10.815 GB
72
+ Iter 65: Train loss 1.619, Learning Rate 9.779e-05, It/sec 0.210, Tokens/sec 211.767, Trained Tokens 52691, Peak mem 10.815 GB
73
+ Iter 66: Train loss 2.107, Learning Rate 9.771e-05, It/sec 0.312, Tokens/sec 204.344, Trained Tokens 53345, Peak mem 10.815 GB
74
+ Iter 67: Train loss 1.596, Learning Rate 9.762e-05, It/sec 0.197, Tokens/sec 196.678, Trained Tokens 54342, Peak mem 10.815 GB
75
+ Iter 68: Train loss 2.014, Learning Rate 9.753e-05, It/sec 0.243, Tokens/sec 204.290, Trained Tokens 55181, Peak mem 10.815 GB
76
+ Iter 69: Train loss 1.874, Learning Rate 9.745e-05, It/sec 0.154, Tokens/sec 161.090, Trained Tokens 56224, Peak mem 10.815 GB
77
+ Iter 70: Train loss 1.662, Learning Rate 9.736e-05, It/sec 0.179, Tokens/sec 167.119, Trained Tokens 57157, Peak mem 10.815 GB
78
+ Iter 71: Train loss 1.956, Learning Rate 9.727e-05, It/sec 0.178, Tokens/sec 172.088, Trained Tokens 58122, Peak mem 10.815 GB
79
+ Iter 72: Train loss 1.855, Learning Rate 9.717e-05, It/sec 0.590, Tokens/sec 186.513, Trained Tokens 58438, Peak mem 10.815 GB
80
+ Iter 73: Train loss 1.898, Learning Rate 9.708e-05, It/sec 0.211, Tokens/sec 221.268, Trained Tokens 59486, Peak mem 10.815 GB
81
+ Iter 74: Train loss 1.736, Learning Rate 9.698e-05, It/sec 0.298, Tokens/sec 174.038, Trained Tokens 60070, Peak mem 10.815 GB
82
+ Iter 75: Train loss 1.343, Learning Rate 9.689e-05, It/sec 0.396, Tokens/sec 209.287, Trained Tokens 60599, Peak mem 10.815 GB
83
+ Iter 76: Train loss 1.707, Learning Rate 9.679e-05, It/sec 0.234, Tokens/sec 180.280, Trained Tokens 61371, Peak mem 10.815 GB
84
+ Iter 77: Train loss 1.662, Learning Rate 9.669e-05, It/sec 0.201, Tokens/sec 206.019, Trained Tokens 62394, Peak mem 10.815 GB
85
+ Iter 78: Train loss 2.039, Learning Rate 9.659e-05, It/sec 0.110, Tokens/sec 194.925, Trained Tokens 64169, Peak mem 11.392 GB
86
+ Iter 79: Train loss 1.708, Learning Rate 9.648e-05, It/sec 0.180, Tokens/sec 216.843, Trained Tokens 65374, Peak mem 11.392 GB
87
+ Iter 80: Train loss 1.867, Learning Rate 9.638e-05, It/sec 0.406, Tokens/sec 197.459, Trained Tokens 65860, Peak mem 11.392 GB
88
+ Iter 81: Train loss 1.824, Learning Rate 9.627e-05, It/sec 0.184, Tokens/sec 187.169, Trained Tokens 66877, Peak mem 11.392 GB
89
+ Iter 82: Train loss 1.630, Learning Rate 9.617e-05, It/sec 0.310, Tokens/sec 181.391, Trained Tokens 67462, Peak mem 11.392 GB
90
+ Iter 83: Train loss 1.625, Learning Rate 9.606e-05, It/sec 0.142, Tokens/sec 154.464, Trained Tokens 68546, Peak mem 11.392 GB
91
+ Iter 84: Train loss 1.822, Learning Rate 9.595e-05, It/sec 0.097, Tokens/sec 108.496, Trained Tokens 69661, Peak mem 11.392 GB
92
+ Iter 85: Train loss 1.891, Learning Rate 9.584e-05, It/sec 0.013, Tokens/sec 14.581, Trained Tokens 70771, Peak mem 11.392 GB
93
+ Iter 86: Train loss 1.537, Learning Rate 9.572e-05, It/sec 0.200, Tokens/sec 62.677, Trained Tokens 71085, Peak mem 11.392 GB
94
+ Iter 87: Train loss 1.663, Learning Rate 9.561e-05, It/sec 0.091, Tokens/sec 66.525, Trained Tokens 71820, Peak mem 11.392 GB
95
+ Iter 88: Train loss 1.484, Learning Rate 9.549e-05, It/sec 0.139, Tokens/sec 73.404, Trained Tokens 72349, Peak mem 11.392 GB
96
+ Iter 89: Train loss 1.900, Learning Rate 9.538e-05, It/sec 0.052, Tokens/sec 89.547, Trained Tokens 74087, Peak mem 11.392 GB
97
+ Iter 90: Train loss 1.519, Learning Rate 9.526e-05, It/sec 0.222, Tokens/sec 115.147, Trained Tokens 74606, Peak mem 11.392 GB
98
+ Iter 91: Train loss 1.769, Learning Rate 9.514e-05, It/sec 0.189, Tokens/sec 114.252, Trained Tokens 75209, Peak mem 11.392 GB
99
+ Iter 92: Train loss 1.750, Learning Rate 9.502e-05, It/sec 0.116, Tokens/sec 128.126, Trained Tokens 76312, Peak mem 11.392 GB
100
+ Iter 93: Train loss 1.578, Learning Rate 9.489e-05, It/sec 0.202, Tokens/sec 134.239, Trained Tokens 76978, Peak mem 11.392 GB
101
+ Iter 94: Train loss 1.754, Learning Rate 9.477e-05, It/sec 0.095, Tokens/sec 84.551, Trained Tokens 77872, Peak mem 11.392 GB
102
+ Iter 95: Train loss 1.868, Learning Rate 9.464e-05, It/sec 0.268, Tokens/sec 124.030, Trained Tokens 78334, Peak mem 11.392 GB
103
+ Iter 96: Train loss 1.619, Learning Rate 9.452e-05, It/sec 0.307, Tokens/sec 123.732, Trained Tokens 78737, Peak mem 11.392 GB
104
+ Iter 97: Train loss 1.449, Learning Rate 9.439e-05, It/sec 0.286, Tokens/sec 116.173, Trained Tokens 79143, Peak mem 11.392 GB
105
+ Iter 98: Train loss 2.002, Learning Rate 9.426e-05, It/sec 0.105, Tokens/sec 123.853, Trained Tokens 80328, Peak mem 11.392 GB
106
+ Iter 99: Train loss 1.781, Learning Rate 9.413e-05, It/sec 0.091, Tokens/sec 146.993, Trained Tokens 81947, Peak mem 11.392 GB
107
+ Iter 100: Train loss 1.835, Learning Rate 9.399e-05, It/sec 0.081, Tokens/sec 167.547, Trained Tokens 84022, Peak mem 11.820 GB
108
+ Iter 100: Saved adapter weights to adapters-conscious/adapters.safetensors and adapters-conscious/0000100_adapters.safetensors.
109
+ Iter 101: Train loss 2.012, Learning Rate 9.386e-05, It/sec 0.104, Tokens/sec 183.964, Trained Tokens 85799, Peak mem 11.820 GB
110
+ Iter 102: Train loss 1.590, Learning Rate 9.372e-05, It/sec 0.204, Tokens/sec 195.597, Trained Tokens 86760, Peak mem 11.820 GB
111
+ Iter 103: Train loss 1.802, Learning Rate 9.359e-05, It/sec 0.395, Tokens/sec 191.385, Trained Tokens 87245, Peak mem 11.820 GB
112
+ Iter 104: Train loss 1.638, Learning Rate 9.345e-05, It/sec 0.182, Tokens/sec 202.104, Trained Tokens 88354, Peak mem 11.820 GB
113
+ Iter 105: Train loss 1.574, Learning Rate 9.331e-05, It/sec 0.496, Tokens/sec 203.163, Trained Tokens 88764, Peak mem 11.820 GB
114
+ Iter 106: Train loss 1.657, Learning Rate 9.317e-05, It/sec 0.201, Tokens/sec 195.999, Trained Tokens 89738, Peak mem 11.820 GB
115
+ Iter 107: Train loss 1.725, Learning Rate 9.303e-05, It/sec 0.291, Tokens/sec 209.459, Trained Tokens 90459, Peak mem 11.820 GB
116
+ Iter 108: Train loss 1.836, Learning Rate 9.288e-05, It/sec 0.207, Tokens/sec 196.198, Trained Tokens 91409, Peak mem 11.820 GB
117
+ Iter 109: Train loss 1.619, Learning Rate 9.274e-05, It/sec 0.203, Tokens/sec 205.953, Trained Tokens 92422, Peak mem 11.820 GB
118
+ Iter 110: Train loss 1.762, Learning Rate 9.259e-05, It/sec 0.435, Tokens/sec 219.421, Trained Tokens 92926, Peak mem 11.820 GB
119
+ Iter 111: Train loss 1.611, Learning Rate 9.244e-05, It/sec 0.181, Tokens/sec 187.917, Trained Tokens 93964, Peak mem 11.820 GB
120
+ Iter 112: Train loss 1.572, Learning Rate 9.230e-05, It/sec 0.165, Tokens/sec 201.447, Trained Tokens 95186, Peak mem 11.820 GB
121
+ Iter 113: Train loss 1.766, Learning Rate 9.214e-05, It/sec 0.267, Tokens/sec 204.239, Trained Tokens 95950, Peak mem 11.820 GB
122
+ Iter 114: Train loss 1.511, Learning Rate 9.199e-05, It/sec 0.457, Tokens/sec 160.986, Trained Tokens 96302, Peak mem 11.820 GB
123
+ Iter 115: Train loss 1.864, Learning Rate 9.184e-05, It/sec 0.115, Tokens/sec 184.740, Trained Tokens 97903, Peak mem 11.820 GB
124
+ Iter 116: Train loss 1.711, Learning Rate 9.169e-05, It/sec 0.214, Tokens/sec 200.400, Trained Tokens 98840, Peak mem 11.820 GB
125
+ Iter 117: Train loss 1.732, Learning Rate 9.153e-05, It/sec 0.271, Tokens/sec 204.038, Trained Tokens 99593, Peak mem 11.820 GB
126
+ Iter 118: Train loss 1.416, Learning Rate 9.137e-05, It/sec 0.264, Tokens/sec 214.608, Trained Tokens 100405, Peak mem 11.820 GB
127
+ Iter 119: Train loss 1.603, Learning Rate 9.122e-05, It/sec 0.224, Tokens/sec 205.712, Trained Tokens 101323, Peak mem 11.820 GB
128
+ Iter 120: Train loss 2.044, Learning Rate 9.106e-05, It/sec 0.094, Tokens/sec 181.083, Trained Tokens 103247, Peak mem 11.820 GB
129
+ Iter 121: Train loss 1.574, Learning Rate 9.090e-05, It/sec 0.258, Tokens/sec 206.953, Trained Tokens 104050, Peak mem 11.820 GB
130
+ Iter 122: Train loss 1.573, Learning Rate 9.073e-05, It/sec 0.347, Tokens/sec 191.431, Trained Tokens 104602, Peak mem 11.820 GB
131
+ Iter 123: Train loss 1.477, Learning Rate 9.057e-05, It/sec 0.193, Tokens/sec 197.456, Trained Tokens 105625, Peak mem 11.820 GB
132
+ Iter 124: Train loss 1.889, Learning Rate 9.041e-05, It/sec 0.120, Tokens/sec 171.125, Trained Tokens 107049, Peak mem 11.820 GB
133
+ Iter 125: Train loss 1.651, Learning Rate 9.024e-05, It/sec 0.139, Tokens/sec 122.182, Trained Tokens 107925, Peak mem 11.820 GB
134
+ Iter 126: Train loss 1.436, Learning Rate 9.008e-05, It/sec 0.180, Tokens/sec 149.931, Trained Tokens 108756, Peak mem 11.820 GB
135
+ Iter 127: Train loss 1.453, Learning Rate 8.991e-05, It/sec 0.144, Tokens/sec 99.762, Trained Tokens 109447, Peak mem 11.820 GB
136
+ Iter 128: Train loss 1.740, Learning Rate 8.974e-05, It/sec 0.097, Tokens/sec 139.837, Trained Tokens 110896, Peak mem 11.820 GB
137
+ Iter 129: Train loss 1.829, Learning Rate 8.957e-05, It/sec 0.240, Tokens/sec 159.669, Trained Tokens 111560, Peak mem 11.820 GB
138
+ Iter 130: Train loss 1.707, Learning Rate 8.940e-05, It/sec 0.307, Tokens/sec 141.373, Trained Tokens 112021, Peak mem 11.820 GB
139
+ Iter 131: Train loss 1.680, Learning Rate 8.922e-05, It/sec 0.283, Tokens/sec 166.546, Trained Tokens 112609, Peak mem 11.820 GB
140
+ Iter 132: Train loss 1.522, Learning Rate 8.905e-05, It/sec 0.075, Tokens/sec 77.919, Trained Tokens 113646, Peak mem 11.820 GB
141
+ Iter 133: Train loss 1.364, Learning Rate 8.887e-05, It/sec 0.197, Tokens/sec 101.780, Trained Tokens 114162, Peak mem 11.820 GB
142
+ Iter 134: Train loss 1.609, Learning Rate 8.870e-05, It/sec 0.083, Tokens/sec 92.891, Trained Tokens 115279, Peak mem 11.820 GB
143
+ Iter 135: Train loss 1.648, Learning Rate 8.852e-05, It/sec 0.056, Tokens/sec 83.222, Trained Tokens 116761, Peak mem 11.820 GB
144
+ Iter 136: Train loss 1.850, Learning Rate 8.834e-05, It/sec 0.086, Tokens/sec 57.123, Trained Tokens 117424, Peak mem 11.820 GB
145
+ Iter 137: Train loss 1.868, Learning Rate 8.816e-05, It/sec 0.035, Tokens/sec 49.049, Trained Tokens 118819, Peak mem 11.820 GB
146
+ Iter 138: Train loss 1.664, Learning Rate 8.798e-05, It/sec 0.032, Tokens/sec 45.821, Trained Tokens 120238, Peak mem 11.820 GB
147
+ Iter 139: Train loss 1.497, Learning Rate 8.780e-05, It/sec 0.077, Tokens/sec 62.477, Trained Tokens 121052, Peak mem 11.820 GB
148
+ Iter 140: Train loss 1.602, Learning Rate 8.761e-05, It/sec 0.122, Tokens/sec 55.672, Trained Tokens 121510, Peak mem 11.820 GB
149
+ Iter 141: Train loss 1.410, Learning Rate 8.743e-05, It/sec 0.061, Tokens/sec 65.152, Trained Tokens 122571, Peak mem 11.820 GB
150
+ Iter 142: Train loss 1.371, Learning Rate 8.724e-05, It/sec 0.076, Tokens/sec 57.939, Trained Tokens 123338, Peak mem 11.820 GB
151
+ Iter 143: Train loss 1.350, Learning Rate 8.706e-05, It/sec 0.117, Tokens/sec 61.701, Trained Tokens 123866, Peak mem 11.820 GB
152
+ Iter 144: Train loss 1.645, Learning Rate 8.687e-05, It/sec 0.233, Tokens/sec 96.797, Trained Tokens 124281, Peak mem 11.820 GB
153
+ Iter 145: Train loss 1.586, Learning Rate 8.668e-05, It/sec 0.084, Tokens/sec 67.882, Trained Tokens 125087, Peak mem 11.820 GB
154
+ Iter 146: Train loss 1.564, Learning Rate 8.649e-05, It/sec 0.097, Tokens/sec 96.270, Trained Tokens 126075, Peak mem 11.820 GB
155
+ Iter 147: Train loss 1.623, Learning Rate 8.630e-05, It/sec 0.076, Tokens/sec 92.571, Trained Tokens 127286, Peak mem 11.820 GB
156
+ Iter 148: Train loss 1.606, Learning Rate 8.610e-05, It/sec 0.066, Tokens/sec 94.580, Trained Tokens 128721, Peak mem 11.820 GB
157
+ Iter 149: Train loss 1.564, Learning Rate 8.591e-05, It/sec 0.133, Tokens/sec 135.051, Trained Tokens 129740, Peak mem 11.820 GB
158
+ Iter 150: Train loss 1.759, Learning Rate 8.571e-05, It/sec 0.099, Tokens/sec 103.457, Trained Tokens 130781, Peak mem 11.820 GB
159
+ Iter 151: Train loss 1.587, Learning Rate 8.552e-05, It/sec 0.212, Tokens/sec 166.329, Trained Tokens 131566, Peak mem 11.820 GB
160
+ Iter 152: Train loss 1.540, Learning Rate 8.532e-05, It/sec 0.091, Tokens/sec 119.728, Trained Tokens 132878, Peak mem 11.820 GB
161
+ Iter 153: Train loss 1.689, Learning Rate 8.512e-05, It/sec 0.157, Tokens/sec 125.928, Trained Tokens 133680, Peak mem 11.820 GB
162
+ Iter 154: Train loss 1.781, Learning Rate 8.493e-05, It/sec 0.081, Tokens/sec 108.726, Trained Tokens 135027, Peak mem 11.820 GB
163
+ Iter 155: Train loss 1.673, Learning Rate 8.472e-05, It/sec 0.149, Tokens/sec 182.359, Trained Tokens 136248, Peak mem 11.820 GB
164
+ Iter 156: Train loss 1.825, Learning Rate 8.452e-05, It/sec 0.292, Tokens/sec 188.467, Trained Tokens 136894, Peak mem 11.820 GB
165
+ Iter 157: Train loss 1.497, Learning Rate 8.432e-05, It/sec 0.334, Tokens/sec 180.120, Trained Tokens 137434, Peak mem 11.820 GB
166
+ Iter 158: Train loss 1.596, Learning Rate 8.412e-05, It/sec 0.368, Tokens/sec 176.641, Trained Tokens 137914, Peak mem 11.820 GB
167
+ Iter 159: Train loss 1.679, Learning Rate 8.391e-05, It/sec 0.199, Tokens/sec 171.155, Trained Tokens 138774, Peak mem 11.820 GB
168
+ Iter 160: Train loss 1.514, Learning Rate 8.371e-05, It/sec 0.193, Tokens/sec 182.780, Trained Tokens 139722, Peak mem 11.820 GB
169
+ Iter 161: Train loss 1.432, Learning Rate 8.350e-05, It/sec 0.303, Tokens/sec 194.772, Trained Tokens 140365, Peak mem 11.820 GB
170
+ Iter 162: Train loss 2.050, Learning Rate 8.330e-05, It/sec 0.105, Tokens/sec 164.724, Trained Tokens 141937, Peak mem 11.820 GB
171
+ Iter 163: Train loss 1.919, Learning Rate 8.309e-05, It/sec 0.143, Tokens/sec 162.492, Trained Tokens 143075, Peak mem 11.820 GB
172
+ Iter 164: Train loss 1.551, Learning Rate 8.288e-05, It/sec 0.285, Tokens/sec 126.121, Trained Tokens 143518, Peak mem 11.820 GB
173
+ Iter 165: Train loss 1.431, Learning Rate 8.267e-05, It/sec 0.242, Tokens/sec 179.960, Trained Tokens 144262, Peak mem 11.820 GB
174
+ Iter 166: Train loss 1.819, Learning Rate 8.246e-05, It/sec 0.232, Tokens/sec 159.555, Trained Tokens 144951, Peak mem 11.820 GB
175
+ Iter 167: Train loss 1.871, Learning Rate 8.224e-05, It/sec 0.151, Tokens/sec 176.992, Trained Tokens 146123, Peak mem 11.820 GB
176
+ Iter 168: Train loss 1.393, Learning Rate 8.203e-05, It/sec 0.323, Tokens/sec 144.483, Trained Tokens 146571, Peak mem 11.820 GB
177
+ Iter 169: Train loss 1.864, Learning Rate 8.182e-05, It/sec 0.127, Tokens/sec 145.733, Trained Tokens 147721, Peak mem 11.820 GB
178
+ Iter 170: Train loss 1.534, Learning Rate 8.160e-05, It/sec 0.140, Tokens/sec 153.570, Trained Tokens 148821, Peak mem 11.820 GB
179
+ Iter 171: Train loss 1.757, Learning Rate 8.139e-05, It/sec 0.208, Tokens/sec 154.295, Trained Tokens 149563, Peak mem 11.820 GB
180
+ Iter 172: Train loss 1.809, Learning Rate 8.117e-05, It/sec 0.039, Tokens/sec 45.915, Trained Tokens 150740, Peak mem 11.820 GB
181
+ Iter 173: Train loss 1.486, Learning Rate 8.095e-05, It/sec 0.102, Tokens/sec 109.155, Trained Tokens 151805, Peak mem 11.820 GB
182
+ Iter 174: Train loss 1.214, Learning Rate 8.073e-05, It/sec 0.334, Tokens/sec 163.511, Trained Tokens 152295, Peak mem 11.820 GB
183
+ Iter 175: Train loss 1.746, Learning Rate 8.051e-05, It/sec 0.337, Tokens/sec 179.316, Trained Tokens 152827, Peak mem 11.820 GB
184
+ Iter 176: Train loss 1.728, Learning Rate 8.029e-05, It/sec 0.125, Tokens/sec 174.942, Trained Tokens 154225, Peak mem 11.820 GB
185
+ Iter 177: Train loss 1.883, Learning Rate 8.007e-05, It/sec 0.088, Tokens/sec 159.737, Trained Tokens 156050, Peak mem 11.820 GB
186
+ Iter 178: Train loss 1.596, Learning Rate 7.985e-05, It/sec 0.323, Tokens/sec 176.292, Trained Tokens 156596, Peak mem 11.820 GB
187
+ Iter 179: Train loss 1.331, Learning Rate 7.962e-05, It/sec 0.438, Tokens/sec 176.757, Trained Tokens 157000, Peak mem 11.820 GB
188
+ Iter 180: Train loss 1.533, Learning Rate 7.940e-05, It/sec 0.193, Tokens/sec 175.520, Trained Tokens 157911, Peak mem 11.820 GB
189
+ Iter 181: Train loss 1.702, Learning Rate 7.918e-05, It/sec 0.399, Tokens/sec 181.867, Trained Tokens 158367, Peak mem 11.820 GB
190
+ Iter 182: Train loss 1.467, Learning Rate 7.895e-05, It/sec 0.144, Tokens/sec 151.373, Trained Tokens 159419, Peak mem 11.820 GB
191
+ Iter 183: Train loss 1.502, Learning Rate 7.872e-05, It/sec 0.210, Tokens/sec 120.807, Trained Tokens 159994, Peak mem 11.820 GB
192
+ Iter 184: Train loss 1.432, Learning Rate 7.850e-05, It/sec 0.108, Tokens/sec 131.116, Trained Tokens 161212, Peak mem 11.820 GB
193
+ Iter 185: Train loss 1.423, Learning Rate 7.827e-05, It/sec 0.376, Tokens/sec 186.293, Trained Tokens 161707, Peak mem 11.820 GB
194
+ Iter 186: Train loss 1.537, Learning Rate 7.804e-05, It/sec 0.103, Tokens/sec 115.227, Trained Tokens 162825, Peak mem 11.820 GB
195
+ Iter 187: Train loss 1.812, Learning Rate 7.781e-05, It/sec 0.164, Tokens/sec 115.836, Trained Tokens 163531, Peak mem 11.820 GB
196
+ Iter 188: Train loss 1.440, Learning Rate 7.758e-05, It/sec 0.118, Tokens/sec 121.534, Trained Tokens 164559, Peak mem 11.820 GB
197
+ Iter 189: Train loss 1.786, Learning Rate 7.735e-05, It/sec 0.199, Tokens/sec 114.110, Trained Tokens 165132, Peak mem 11.820 GB
198
+ Iter 190: Train loss 1.399, Learning Rate 7.711e-05, It/sec 0.308, Tokens/sec 143.171, Trained Tokens 165597, Peak mem 11.820 GB
199
+ Iter 191: Train loss 1.683, Learning Rate 7.688e-05, It/sec 0.151, Tokens/sec 164.176, Trained Tokens 166682, Peak mem 11.820 GB
200
+ Iter 192: Train loss 1.772, Learning Rate 7.665e-05, It/sec 0.073, Tokens/sec 105.064, Trained Tokens 168120, Peak mem 11.820 GB
201
+ Iter 193: Train loss 1.393, Learning Rate 7.641e-05, It/sec 0.126, Tokens/sec 110.948, Trained Tokens 169004, Peak mem 11.820 GB
202
+ Iter 194: Train loss 1.484, Learning Rate 7.618e-05, It/sec 0.107, Tokens/sec 101.714, Trained Tokens 169957, Peak mem 11.820 GB
203
+ Iter 195: Train loss 1.328, Learning Rate 7.594e-05, It/sec 0.150, Tokens/sec 85.030, Trained Tokens 170524, Peak mem 11.820 GB
204
+ Iter 196: Train loss 1.386, Learning Rate 7.570e-05, It/sec 0.202, Tokens/sec 90.825, Trained Tokens 170974, Peak mem 11.820 GB
205
+ Iter 197: Train loss 1.487, Learning Rate 7.547e-05, It/sec 0.062, Tokens/sec 80.995, Trained Tokens 172286, Peak mem 11.820 GB
206
+ Iter 198: Train loss 1.588, Learning Rate 7.523e-05, It/sec 0.084, Tokens/sec 97.551, Trained Tokens 173444, Peak mem 11.820 GB
207
+ Iter 199: Train loss 1.701, Learning Rate 7.499e-05, It/sec 0.117, Tokens/sec 123.402, Trained Tokens 174501, Peak mem 11.820 GB
208
+ Calculating loss...: 0it [00:00, ?it/s]
209
+ Iter 200: Val loss nan, Val took 0.012s
210
+ Iter 200: Train loss 1.545, Learning Rate 7.475e-05, It/sec 0.123, Tokens/sec 139.085, Trained Tokens 175628, Peak mem 11.820 GB
211
+ Iter 200: Saved adapter weights to adapters-conscious/adapters.safetensors and adapters-conscious/0000200_adapters.safetensors.
212
+ Iter 201: Train loss 1.765, Learning Rate 7.451e-05, It/sec 0.073, Tokens/sec 136.042, Trained Tokens 177501, Peak mem 11.820 GB
213
+ Iter 202: Train loss 1.477, Learning Rate 7.427e-05, It/sec 0.172, Tokens/sec 154.873, Trained Tokens 178401, Peak mem 11.820 GB
214
+ Iter 203: Train loss 1.313, Learning Rate 7.402e-05, It/sec 0.233, Tokens/sec 159.479, Trained Tokens 179085, Peak mem 11.820 GB
215
+ Iter 204: Train loss 1.465, Learning Rate 7.378e-05, It/sec 0.114, Tokens/sec 142.336, Trained Tokens 180337, Peak mem 11.820 GB
216
+ Iter 205: Train loss 1.597, Learning Rate 7.354e-05, It/sec 0.160, Tokens/sec 148.968, Trained Tokens 181266, Peak mem 11.820 GB
217
+ Iter 206: Train loss 1.135, Learning Rate 7.329e-05, It/sec 0.354, Tokens/sec 152.220, Trained Tokens 181696, Peak mem 11.820 GB
218
+ Iter 207: Train loss 1.571, Learning Rate 7.305e-05, It/sec 0.146, Tokens/sec 156.147, Trained Tokens 182766, Peak mem 11.820 GB
219
+ Iter 208: Train loss 1.458, Learning Rate 7.281e-05, It/sec 0.142, Tokens/sec 146.358, Trained Tokens 183799, Peak mem 11.820 GB
220
+ Iter 209: Train loss 1.607, Learning Rate 7.256e-05, It/sec 0.135, Tokens/sec 147.157, Trained Tokens 184888, Peak mem 11.820 GB
221
+ Iter 210: Train loss 1.819, Learning Rate 7.231e-05, It/sec 0.099, Tokens/sec 150.679, Trained Tokens 186417, Peak mem 11.820 GB
222
+ Iter 211: Train loss 1.316, Learning Rate 7.207e-05, It/sec 0.379, Tokens/sec 155.367, Trained Tokens 186827, Peak mem 11.820 GB
223
+ Iter 212: Train loss 1.595, Learning Rate 7.182e-05, It/sec 0.114, Tokens/sec 141.397, Trained Tokens 188066, Peak mem 11.820 GB
224
+ Iter 213: Train loss 1.701, Learning Rate 7.157e-05, It/sec 0.307, Tokens/sec 154.370, Trained Tokens 188569, Peak mem 11.820 GB
225
+ Iter 214: Train loss 1.499, Learning Rate 7.132e-05, It/sec 0.169, Tokens/sec 153.762, Trained Tokens 189480, Peak mem 11.820 GB
226
+ Iter 215: Train loss 1.665, Learning Rate 7.107e-05, It/sec 0.284, Tokens/sec 145.611, Trained Tokens 189993, Peak mem 11.820 GB
227
+ Iter 216: Train loss 1.536, Learning Rate 7.082e-05, It/sec 0.219, Tokens/sec 141.877, Trained Tokens 190642, Peak mem 11.820 GB
228
+ Iter 217: Train loss 1.492, Learning Rate 7.057e-05, It/sec 0.201, Tokens/sec 150.214, Trained Tokens 191389, Peak mem 11.820 GB
229
+ Iter 218: Train loss 1.678, Learning Rate 7.032e-05, It/sec 0.120, Tokens/sec 131.900, Trained Tokens 192485, Peak mem 11.820 GB
230
+ Iter 219: Train loss 1.329, Learning Rate 7.007e-05, It/sec 0.247, Tokens/sec 144.992, Trained Tokens 193071, Peak mem 11.820 GB
231
+ Iter 220: Train loss 1.608, Learning Rate 6.982e-05, It/sec 0.100, Tokens/sec 135.051, Trained Tokens 194424, Peak mem 11.820 GB
232
+ Iter 221: Train loss 1.642, Learning Rate 6.956e-05, It/sec 0.109, Tokens/sec 103.494, Trained Tokens 195373, Peak mem 11.820 GB
233
+ Iter 222: Train loss 1.370, Learning Rate 6.931e-05, It/sec 0.132, Tokens/sec 96.455, Trained Tokens 196101, Peak mem 11.820 GB
234
+ Iter 223: Train loss 1.598, Learning Rate 6.906e-05, It/sec 0.107, Tokens/sec 94.110, Trained Tokens 196982, Peak mem 11.820 GB
235
+ Iter 224: Train loss 1.776, Learning Rate 6.880e-05, It/sec 0.080, Tokens/sec 121.106, Trained Tokens 198505, Peak mem 11.820 GB
236
+ Iter 225: Train loss 1.394, Learning Rate 6.855e-05, It/sec 0.192, Tokens/sec 153.970, Trained Tokens 199306, Peak mem 11.820 GB
237
+ Iter 226: Train loss 1.874, Learning Rate 6.829e-05, It/sec 0.065, Tokens/sec 127.116, Trained Tokens 201247, Peak mem 11.820 GB
238
+ Iter 227: Train loss 1.648, Learning Rate 6.804e-05, It/sec 0.131, Tokens/sec 155.569, Trained Tokens 202438, Peak mem 11.820 GB
239
+ Iter 228: Train loss 1.507, Learning Rate 6.778e-05, It/sec 0.358, Tokens/sec 156.911, Trained Tokens 202876, Peak mem 11.820 GB
240
+ Iter 229: Train loss 1.342, Learning Rate 6.753e-05, It/sec 0.416, Tokens/sec 170.156, Trained Tokens 203285, Peak mem 11.820 GB
241
+ Iter 230: Train loss 1.790, Learning Rate 6.727e-05, It/sec 0.225, Tokens/sec 166.161, Trained Tokens 204025, Peak mem 11.820 GB
242
+ Iter 231: Train loss 1.496, Learning Rate 6.701e-05, It/sec 0.162, Tokens/sec 164.406, Trained Tokens 205039, Peak mem 11.820 GB
243
+ Iter 232: Train loss 1.637, Learning Rate 6.675e-05, It/sec 0.095, Tokens/sec 126.111, Trained Tokens 206367, Peak mem 11.820 GB
244
+ Iter 233: Train loss 1.690, Learning Rate 6.650e-05, It/sec 0.106, Tokens/sec 104.734, Trained Tokens 207353, Peak mem 11.820 GB
245
+ Iter 234: Train loss 1.841, Learning Rate 6.624e-05, It/sec 0.092, Tokens/sec 124.355, Trained Tokens 208706, Peak mem 11.820 GB
246
+ Iter 235: Train loss 1.816, Learning Rate 6.598e-05, It/sec 0.076, Tokens/sec 95.014, Trained Tokens 209964, Peak mem 11.820 GB
247
+ Iter 236: Train loss 1.231, Learning Rate 6.572e-05, It/sec 0.144, Tokens/sec 92.802, Trained Tokens 210608, Peak mem 11.820 GB
248
+ Iter 237: Train loss 1.654, Learning Rate 6.546e-05, It/sec 0.153, Tokens/sec 136.474, Trained Tokens 211499, Peak mem 11.820 GB
249
+ Iter 238: Train loss 1.555, Learning Rate 6.520e-05, It/sec 0.114, Tokens/sec 145.523, Trained Tokens 212780, Peak mem 11.820 GB
250
+ Iter 239: Train loss 1.381, Learning Rate 6.494e-05, It/sec 0.182, Tokens/sec 166.046, Trained Tokens 213690, Peak mem 11.820 GB
251
+ Iter 240: Train loss 1.822, Learning Rate 6.468e-05, It/sec 0.084, Tokens/sec 130.779, Trained Tokens 215242, Peak mem 11.820 GB
252
+ Iter 241: Train loss 1.265, Learning Rate 6.442e-05, It/sec 0.293, Tokens/sec 155.158, Trained Tokens 215772, Peak mem 11.820 GB
253
+ Iter 242: Train loss 1.444, Learning Rate 6.416e-05, It/sec 0.126, Tokens/sec 125.096, Trained Tokens 216766, Peak mem 11.820 GB
254
+ Iter 243: Train loss 1.420, Learning Rate 6.389e-05, It/sec 0.138, Tokens/sec 108.551, Trained Tokens 217551, Peak mem 11.820 GB
255
+ Iter 244: Train loss 1.517, Learning Rate 6.363e-05, It/sec 0.372, Tokens/sec 142.479, Trained Tokens 217934, Peak mem 11.820 GB
256
+ Iter 245: Train loss 1.152, Learning Rate 6.337e-05, It/sec 0.197, Tokens/sec 104.221, Trained Tokens 218464, Peak mem 11.820 GB
257
+ Iter 246: Train loss 1.314, Learning Rate 6.311e-05, It/sec 0.327, Tokens/sec 170.180, Trained Tokens 218985, Peak mem 11.820 GB
258
+ Iter 247: Train loss 1.942, Learning Rate 6.284e-05, It/sec 0.168, Tokens/sec 158.578, Trained Tokens 219927, Peak mem 11.820 GB
259
+ Iter 248: Train loss 1.550, Learning Rate 6.258e-05, It/sec 0.423, Tokens/sec 170.281, Trained Tokens 220330, Peak mem 11.820 GB
260
+ Iter 249: Train loss 1.512, Learning Rate 6.232e-05, It/sec 0.169, Tokens/sec 162.514, Trained Tokens 221291, Peak mem 11.820 GB
261
+ Iter 250: Train loss 1.724, Learning Rate 6.205e-05, It/sec 0.241, Tokens/sec 166.872, Trained Tokens 221983, Peak mem 11.820 GB
262
+ Iter 251: Train loss 1.963, Learning Rate 6.179e-05, It/sec 0.232, Tokens/sec 170.161, Trained Tokens 222715, Peak mem 11.820 GB
263
+ Iter 252: Train loss 1.407, Learning Rate 6.152e-05, It/sec 0.234, Tokens/sec 178.210, Trained Tokens 223477, Peak mem 11.820 GB
264
+ Iter 253: Train loss 1.675, Learning Rate 6.126e-05, It/sec 0.099, Tokens/sec 121.773, Trained Tokens 224701, Peak mem 11.820 GB
265
+ Iter 254: Train loss 1.855, Learning Rate 6.100e-05, It/sec 0.071, Tokens/sec 149.504, Trained Tokens 226807, Peak mem 12.008 GB
266
+ Iter 255: Train loss 1.708, Learning Rate 6.073e-05, It/sec 0.215, Tokens/sec 168.296, Trained Tokens 227589, Peak mem 12.008 GB
267
+ Iter 256: Train loss 1.744, Learning Rate 6.046e-05, It/sec 0.118, Tokens/sec 115.592, Trained Tokens 228568, Peak mem 12.008 GB
268
+ Iter 257: Train loss 1.602, Learning Rate 6.020e-05, It/sec 0.113, Tokens/sec 139.105, Trained Tokens 229804, Peak mem 12.008 GB
269
+ Iter 258: Train loss 1.619, Learning Rate 5.993e-05, It/sec 0.195, Tokens/sec 168.801, Trained Tokens 230668, Peak mem 12.008 GB
270
+ Iter 259: Train loss 1.517, Learning Rate 5.967e-05, It/sec 0.190, Tokens/sec 127.542, Trained Tokens 231339, Peak mem 12.008 GB
271
+ Iter 260: Train loss 1.413, Learning Rate 5.940e-05, It/sec 0.274, Tokens/sec 118.404, Trained Tokens 231771, Peak mem 12.008 GB
272
+ Iter 261: Train loss 1.472, Learning Rate 5.914e-05, It/sec 0.189, Tokens/sec 138.943, Trained Tokens 232506, Peak mem 12.008 GB
273
+ Iter 262: Train loss 1.870, Learning Rate 5.887e-05, It/sec 0.192, Tokens/sec 160.497, Trained Tokens 233341, Peak mem 12.008 GB
274
+ Iter 263: Train loss 1.643, Learning Rate 5.860e-05, It/sec 0.075, Tokens/sec 102.682, Trained Tokens 234704, Peak mem 12.008 GB
275
+ Iter 264: Train loss 1.724, Learning Rate 5.834e-05, It/sec 0.078, Tokens/sec 103.747, Trained Tokens 236036, Peak mem 12.008 GB
276
+ Iter 265: Train loss 1.190, Learning Rate 5.807e-05, It/sec 0.180, Tokens/sec 112.618, Trained Tokens 236663, Peak mem 12.008 GB
277
+ Iter 266: Train loss 1.776, Learning Rate 5.780e-05, It/sec 0.237, Tokens/sec 137.264, Trained Tokens 237242, Peak mem 12.008 GB
278
+ Iter 267: Train loss 1.767, Learning Rate 5.754e-05, It/sec 0.081, Tokens/sec 90.226, Trained Tokens 238351, Peak mem 12.008 GB
279
+ Iter 268: Train loss 1.360, Learning Rate 5.727e-05, It/sec 0.269, Tokens/sec 83.180, Trained Tokens 238660, Peak mem 12.008 GB
280
+ Iter 269: Train loss 1.586, Learning Rate 5.700e-05, It/sec 0.152, Tokens/sec 88.995, Trained Tokens 239245, Peak mem 12.008 GB
281
+ Iter 270: Train loss 1.806, Learning Rate 5.674e-05, It/sec 0.092, Tokens/sec 86.425, Trained Tokens 240189, Peak mem 12.008 GB
282
+ Iter 271: Train loss 1.572, Learning Rate 5.647e-05, It/sec 0.102, Tokens/sec 92.008, Trained Tokens 241093, Peak mem 12.008 GB
283
+ Iter 272: Train loss 1.598, Learning Rate 5.620e-05, It/sec 0.184, Tokens/sec 85.830, Trained Tokens 241560, Peak mem 12.008 GB
284
+ Iter 273: Train loss 1.406, Learning Rate 5.594e-05, It/sec 0.085, Tokens/sec 77.487, Trained Tokens 242474, Peak mem 12.008 GB
285
+ Iter 274: Train loss 1.448, Learning Rate 5.567e-05, It/sec 0.126, Tokens/sec 80.845, Trained Tokens 243117, Peak mem 12.008 GB
286
+ Iter 275: Train loss 1.509, Learning Rate 5.540e-05, It/sec 0.079, Tokens/sec 73.888, Trained Tokens 244058, Peak mem 12.008 GB
287
+ Iter 276: Train loss 1.691, Learning Rate 5.513e-05, It/sec 0.076, Tokens/sec 77.338, Trained Tokens 245075, Peak mem 12.008 GB
288
+ Iter 277: Train loss 1.492, Learning Rate 5.487e-05, It/sec 0.049, Tokens/sec 72.689, Trained Tokens 246553, Peak mem 12.008 GB
289
+ Iter 278: Train loss 1.470, Learning Rate 5.460e-05, It/sec 0.056, Tokens/sec 62.069, Trained Tokens 247662, Peak mem 12.008 GB
290
+ Iter 279: Train loss 1.496, Learning Rate 5.433e-05, It/sec 0.092, Tokens/sec 102.958, Trained Tokens 248787, Peak mem 12.008 GB
291
+ Iter 280: Train loss 1.512, Learning Rate 5.406e-05, It/sec 0.063, Tokens/sec 66.578, Trained Tokens 249848, Peak mem 12.008 GB
292
+ Iter 281: Train loss 1.267, Learning Rate 5.380e-05, It/sec 0.112, Tokens/sec 86.708, Trained Tokens 250622, Peak mem 12.008 GB
293
+ Iter 282: Train loss 1.925, Learning Rate 5.353e-05, It/sec 0.074, Tokens/sec 91.624, Trained Tokens 251867, Peak mem 12.008 GB
294
+ Iter 283: Train loss 1.843, Learning Rate 5.326e-05, It/sec 0.073, Tokens/sec 111.265, Trained Tokens 253389, Peak mem 12.008 GB
295
+ Iter 284: Train loss 1.584, Learning Rate 5.300e-05, It/sec 0.121, Tokens/sec 130.482, Trained Tokens 254468, Peak mem 12.008 GB
296
+ Iter 285: Train loss 1.577, Learning Rate 5.273e-05, It/sec 0.143, Tokens/sec 142.670, Trained Tokens 255469, Peak mem 12.008 GB
297
+ Iter 286: Train loss 1.634, Learning Rate 5.246e-05, It/sec 0.082, Tokens/sec 127.110, Trained Tokens 257023, Peak mem 12.008 GB
298
+ Iter 287: Train loss 1.351, Learning Rate 5.220e-05, It/sec 0.267, Tokens/sec 146.120, Trained Tokens 257570, Peak mem 12.008 GB
299
+ Iter 288: Train loss 1.854, Learning Rate 5.193e-05, It/sec 0.059, Tokens/sec 104.093, Trained Tokens 259344, Peak mem 12.008 GB
300
+ Iter 289: Train loss 1.411, Learning Rate 5.166e-05, It/sec 0.155, Tokens/sec 127.500, Trained Tokens 260167, Peak mem 12.008 GB
301
+ Iter 290: Train loss 1.404, Learning Rate 5.140e-05, It/sec 0.274, Tokens/sec 146.790, Trained Tokens 260702, Peak mem 12.008 GB
302
+ Iter 291: Train loss 1.674, Learning Rate 5.113e-05, It/sec 0.097, Tokens/sec 131.358, Trained Tokens 262061, Peak mem 12.008 GB
303
+ Iter 292: Train loss 1.512, Learning Rate 5.086e-05, It/sec 0.228, Tokens/sec 178.251, Trained Tokens 262844, Peak mem 12.008 GB
304
+ Iter 293: Train loss 1.526, Learning Rate 5.060e-05, It/sec 0.374, Tokens/sec 166.606, Trained Tokens 263290, Peak mem 12.008 GB
305
+ Iter 294: Train loss 1.561, Learning Rate 5.033e-05, It/sec 0.137, Tokens/sec 132.210, Trained Tokens 264253, Peak mem 12.008 GB
306
+ Iter 295: Train loss 1.640, Learning Rate 5.007e-05, It/sec 0.117, Tokens/sec 154.404, Trained Tokens 265573, Peak mem 12.008 GB
307
+ Iter 296: Train loss 1.613, Learning Rate 4.980e-05, It/sec 0.150, Tokens/sec 161.536, Trained Tokens 266647, Peak mem 12.008 GB
308
+ Iter 297: Train loss 1.422, Learning Rate 4.954e-05, It/sec 0.197, Tokens/sec 134.622, Trained Tokens 267329, Peak mem 12.008 GB
309
+ Iter 298: Train loss 1.944, Learning Rate 4.927e-05, It/sec 0.138, Tokens/sec 180.145, Trained Tokens 268635, Peak mem 12.008 GB
310
+ Iter 299: Train loss 1.573, Learning Rate 4.900e-05, It/sec 0.114, Tokens/sec 137.622, Trained Tokens 269837, Peak mem 12.008 GB
311
+ Calculating loss...: 0it [00:00, ?it/s]
312
+ Iter 300: Val loss nan, Val took 0.008s
313
+ Iter 300: Train loss 1.747, Learning Rate 4.874e-05, It/sec 0.232, Tokens/sec 175.489, Trained Tokens 270593, Peak mem 12.008 GB
314
+ Iter 300: Saved adapter weights to adapters-conscious/adapters.safetensors and adapters-conscious/0000300_adapters.safetensors.
315
+ Saved final weights to adapters-conscious/adapters.safetensors.
adapters-conscious/wandb/run-20260216_150557-tugniqt7/files/requirements.txt ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ shellingham==1.5.4
2
+ contourpy==1.3.2
3
+ jiter==0.12.0
4
+ audioread==3.1.0
5
+ threadpoolctl==3.6.0
6
+ lazy_loader==0.4
7
+ GitPython==3.1.45
8
+ async-timeout==5.0.1
9
+ requests==2.32.5
10
+ rich==14.2.0
11
+ tokenizers==0.22.1
12
+ urllib3==2.5.0
13
+ exceptiongroup==1.3.1
14
+ numpy==2.2.6
15
+ click==8.3.1
16
+ pytz==2025.2
17
+ miniaudio==1.61
18
+ pyarrow==22.0.0
19
+ sse-starlette==3.2.0
20
+ scikit-learn==1.7.2
21
+ soxr==1.0.0
22
+ jsonschema-specifications==2025.9.1
23
+ python-multipart==0.0.22
24
+ utilsforecast==0.2.15
25
+ ftfy==6.3.1
26
+ torchvision==0.25.0
27
+ statsmodels==0.14.6
28
+ file-read-backwards==3.2.0
29
+ propcache==0.4.1
30
+ python-dotenv==1.2.1
31
+ anyio==4.12.0
32
+ mlx==0.30.6
33
+ wordfreq==3.1.1
34
+ networkx==3.4.2
35
+ pip==25.3
36
+ texttable==1.7.0
37
+ mlx-audio==0.3.1
38
+ narwhals==2.15.0
39
+ multidict==6.7.0
40
+ numba==0.63.1
41
+ idna==3.11
42
+ regex==2025.11.3
43
+ fonttools==4.60.1
44
+ openai==2.16.0
45
+ aiohttp==3.13.2
46
+ mistral_common==1.8.6
47
+ einshape==1.0
48
+ cffi==2.0.0
49
+ kiwisolver==1.4.9
50
+ tqdm==4.67.1
51
+ setuptools==80.9.0
52
+ RapidFuzz==3.14.3
53
+ pyparsing==3.2.5
54
+ starlette==0.52.1
55
+ tzdata==2025.2
56
+ mlx-lm==0.30.6
57
+ httpcore==1.0.9
58
+ decorator==5.2.1
59
+ certifi==2025.11.12
60
+ typer==0.21.1
61
+ pydantic==2.12.4
62
+ fsspec==2025.10.0
63
+ mcp==1.26.0
64
+ librosa==0.11.0
65
+ charset-normalizer==3.4.4
66
+ sympy==1.14.0
67
+ jsonschema==4.25.1
68
+ pydantic-settings==2.12.0
69
+ markdown-it-py==4.0.0
70
+ tiktoken==0.12.0
71
+ PyJWT==2.11.0
72
+ sentry-sdk==2.45.0
73
+ platformdirs==4.5.0
74
+ absl-py==2.3.1
75
+ transformers==5.1.0
76
+ diffusers==0.37.0.dev0
77
+ h11==0.16.0
78
+ gitdb==4.0.12
79
+ sniffio==1.3.1
80
+ pycparser==3.0
81
+ sentencepiece==0.2.1
82
+ importlib_metadata==8.7.1
83
+ mdurl==0.1.2
84
+ patsy==1.0.2
85
+ python-dateutil==2.9.0.post0
86
+ mpmath==1.3.0
87
+ pillow==12.0.0
88
+ PyYAML==6.0.3
89
+ sentence-transformers==5.1.2
90
+ multiprocess==0.70.18
91
+ pydantic_core==2.41.5
92
+ uvicorn==0.40.0
93
+ frozenlist==1.8.0
94
+ typer-slim==0.20.1
95
+ typing_extensions==4.15.0
96
+ aiosignal==1.4.0
97
+ packaging==25.0
98
+ cycler==0.12.1
99
+ cryptography==46.0.4
100
+ hf-xet==1.2.0
101
+ Jinja2==3.1.6
102
+ wheel==0.45.1
103
+ referencing==0.37.0
104
+ pandas==2.3.3
105
+ soundfile==0.13.1
106
+ pooch==1.8.2
107
+ MarkupSafe==3.0.3
108
+ dill==0.4.0
109
+ pydantic-extra-types==2.10.6
110
+ msgpack==1.1.2
111
+ distro==1.9.0
112
+ locate==1.1.1
113
+ datasets==4.4.1
114
+ Pygments==2.19.2
115
+ aiohappyeyeballs==2.6.1
116
+ llvmlite==0.46.0
117
+ attrs==25.4.0
118
+ huggingface_hub==1.3.5
119
+ nltk==3.9.2
120
+ torch==2.10.0
121
+ httpx==0.28.1
122
+ filelock==3.20.0
123
+ smmap==5.0.2
124
+ sounddevice==0.5.3
125
+ timesfm==1.3.0
126
+ pycountry==24.6.1
127
+ mlx-metal==0.30.6
128
+ scipy==1.15.3
129
+ protobuf==6.33.1
130
+ psutil==7.1.3
131
+ typing-inspection==0.4.2
132
+ joblib==1.5.2
133
+ zipp==3.23.0
134
+ annotated-types==0.7.0
135
+ accelerate==1.12.0
136
+ safetensors==0.6.2
137
+ httpx-sse==0.4.3
138
+ wcwidth==0.2.14
139
+ igraph==1.0.0
140
+ rpds-py==0.30.0
141
+ langcodes==3.5.1
142
+ six==1.17.0
143
+ wandb==0.23.0
144
+ yarl==1.22.0
145
+ pyloudnorm==0.2.0
146
+ xxhash==3.6.0
147
+ matplotlib==3.10.7
adapters-conscious/wandb/run-20260216_150557-tugniqt7/files/wandb-metadata.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "macOS-26.2-arm64-arm-64bit",
3
+ "python": "CPython 3.10.19",
4
+ "startedAt": "2026-02-16T20:05:57.689004Z",
5
+ "args": [
6
+ "--config",
7
+ "train.yaml"
8
+ ],
9
+ "program": "/Users/natebreslow/miniconda3/envs/mlx-experiment/bin/mlx_lm.lora",
10
+ "email": "nathanbreslow@gmail.com",
11
+ "root": "adapters-conscious",
12
+ "host": "MacBook-Pro-135.local",
13
+ "executable": "/Users/natebreslow/miniconda3/envs/mlx-experiment/bin/python3.10",
14
+ "cpu_count": 16,
15
+ "cpu_count_logical": 16,
16
+ "disk": {
17
+ "/": {
18
+ "total": "1995218165760",
19
+ "used": "1698983583744"
20
+ }
21
+ },
22
+ "memory": {
23
+ "total": "68719476736"
24
+ },
25
+ "apple": {
26
+ "name": "Apple M3 Max",
27
+ "ecpuCores": 4,
28
+ "pcpuCores": 12,
29
+ "gpuCores": 40,
30
+ "memoryGb": 64,
31
+ "swapTotalBytes": "2147483648",
32
+ "ramTotalBytes": "68719476736"
33
+ },
34
+ "writerId": "8z2fwr85sxkr7dsfa3tb7o36vuyb19ia"
35
+ }
adapters-conscious/wandb/run-20260216_150557-tugniqt7/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train_loss":1.746941089630127,"learning_rate":4.8740144848125055e-05,"_wandb":{"runtime":2080},"iteration":300,"iterations_per_second":0.2321287329053772,"_runtime":2080.795569,"val_time":0.007750792006845586,"_timestamp":1.77127443715999e+09,"peak_memory":12.008247198,"trained_tokens":270593,"val_loss":NaN,"tokens_per_second":175.48932207646516,"_step":300}
adapters-conscious/wandb/run-20260216_150557-tugniqt7/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-02-16T15:05:57.969219-05:00","level":"INFO","msg":"stream: starting","core version":"0.23.0"}
2
+ {"time":"2026-02-16T15:05:58.195652-05:00","level":"INFO","msg":"stream: created new stream","id":"tugniqt7"}
3
+ {"time":"2026-02-16T15:05:58.195747-05:00","level":"INFO","msg":"handler: started","stream_id":"tugniqt7"}
4
+ {"time":"2026-02-16T15:05:58.195972-05:00","level":"INFO","msg":"stream: started","id":"tugniqt7"}
5
+ {"time":"2026-02-16T15:05:58.195987-05:00","level":"INFO","msg":"sender: started","stream_id":"tugniqt7"}
6
+ {"time":"2026-02-16T15:05:58.195991-05:00","level":"INFO","msg":"writer: started","stream_id":"tugniqt7"}
7
+ {"time":"2026-02-16T15:40:39.241944-05:00","level":"INFO","msg":"stream: closing","id":"tugniqt7"}
8
+ {"time":"2026-02-16T15:40:39.69538-05:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2026-02-16T15:40:40.025888-05:00","level":"INFO","msg":"handler: closed","stream_id":"tugniqt7"}
10
+ {"time":"2026-02-16T15:40:40.026441-05:00","level":"INFO","msg":"sender: closed","stream_id":"tugniqt7"}
11
+ {"time":"2026-02-16T15:40:40.026516-05:00","level":"INFO","msg":"stream: closed","id":"tugniqt7"}
adapters-conscious/wandb/run-20260216_150557-tugniqt7/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-02-16 15:05:57,692 INFO MainThread:38471 [wandb_setup.py:_flush():80] Current SDK version is 0.23.0
2
+ 2026-02-16 15:05:57,693 INFO MainThread:38471 [wandb_setup.py:_flush():80] Configure stats pid to 38471
3
+ 2026-02-16 15:05:57,693 INFO MainThread:38471 [wandb_setup.py:_flush():80] Loading settings from /Users/natebreslow/.config/wandb/settings
4
+ 2026-02-16 15:05:57,693 INFO MainThread:38471 [wandb_setup.py:_flush():80] Loading settings from /Users/natebreslow/Documents/llmSelfReport/wandb/settings
5
+ 2026-02-16 15:05:57,693 INFO MainThread:38471 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2026-02-16 15:05:57,693 INFO MainThread:38471 [wandb_init.py:setup_run_log_directory():713] Logging user logs to adapters-conscious/wandb/run-20260216_150557-tugniqt7/logs/debug.log
7
+ 2026-02-16 15:05:57,693 INFO MainThread:38471 [wandb_init.py:setup_run_log_directory():714] Logging internal logs to adapters-conscious/wandb/run-20260216_150557-tugniqt7/logs/debug-internal.log
8
+ 2026-02-16 15:05:57,693 INFO MainThread:38471 [wandb_init.py:init():840] calling init triggers
9
+ 2026-02-16 15:05:57,693 INFO MainThread:38471 [wandb_init.py:init():845] wandb.init called with sweep_config: {}
10
+ config: {'model': 'Qwen3-4B-Instruct-2507', 'train': True, 'data': 'training/full-conscious', 'fine_tune_type': 'lora', 'optimizer': 'adam', 'mask_prompt': False, 'num_layers': 36, 'batch_size': 1, 'iters': 300, 'val_batches': 0, 'learning_rate': 0.0001, 'steps_per_report': 1, 'steps_per_eval': 200, 'grad_accumulation_steps': 1, 'resume_adapter_file': None, 'adapter_path': 'adapters-conscious', 'save_every': 100, 'test': False, 'test_batches': 100, 'max_seq_length': 8192, 'config': 'train.yaml', 'grad_checkpoint': True, 'report_to': 'wandb', 'project_name': 'conscious-finetuning', 'seed': 0, 'optimizer_config': {'adam': {'betas': [0.9, 0.9999], 'eps': 1e-06, 'bias_correction': True}}, 'lora_parameters': {'keys': ['self_attn.q_proj', 'self_attn.v_proj', 'self_attn.k_proj', 'self_attn.o_proj', 'mlp.gate_proj', 'mlp.up_proj', 'mlp.down_proj'], 'rank': 16, 'scale': 2.0, 'dropout': 0.0}, 'lr_schedule': {'name': 'cosine_decay', 'warmup': 10, 'warmup_init': 1e-05, 'arguments': [0.0001, 529, 1e-05]}, '_wandb': {}}
11
+ 2026-02-16 15:05:57,693 INFO MainThread:38471 [wandb_init.py:init():888] starting backend
12
+ 2026-02-16 15:05:57,930 INFO MainThread:38471 [wandb_init.py:init():891] sending inform_init request
13
+ 2026-02-16 15:05:57,968 INFO MainThread:38471 [wandb_init.py:init():899] backend started and connected
14
+ 2026-02-16 15:05:57,971 INFO MainThread:38471 [wandb_init.py:init():969] updated telemetry
15
+ 2026-02-16 15:05:57,971 INFO MainThread:38471 [wandb_init.py:init():993] communicating run to backend with 90.0 second timeout
16
+ 2026-02-16 15:05:58,452 INFO MainThread:38471 [wandb_init.py:init():1040] starting run threads in backend
17
+ 2026-02-16 15:05:58,553 INFO MainThread:38471 [wandb_run.py:_console_start():2504] atexit reg
18
+ 2026-02-16 15:05:58,554 INFO MainThread:38471 [wandb_run.py:_redirect():2352] redirect: wrap_raw
19
+ 2026-02-16 15:05:58,554 INFO MainThread:38471 [wandb_run.py:_redirect():2421] Wrapping output streams.
20
+ 2026-02-16 15:05:58,554 INFO MainThread:38471 [wandb_run.py:_redirect():2444] Redirects installed.
21
+ 2026-02-16 15:05:58,556 INFO MainThread:38471 [wandb_init.py:init():1080] run started, returning control to user process
22
+ 2026-02-16 15:40:39,239 INFO wandb-AsyncioManager-main:38471 [service_client.py:_forward_responses():80] Reached EOF.
23
+ 2026-02-16 15:40:39,239 INFO wandb-AsyncioManager-main:38471 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
adapters-conscious/wandb/run-20260216_150557-tugniqt7/run-tugniqt7.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9786077aeddfd62bdab85e444abafced52584ec58009a3ee2ff494fe94674541
3
+ size 430350
adapters-no-conscious/0000100_adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa2537783d1965a5bf4ff5f42a0f29956e0e6c96ec725d65a0b1acbf05e8aba9
3
+ size 132175803
adapters-no-conscious/0000200_adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62593a584e88cede30b1c6b9d968e8bc78ab89b07c3b8678b850c9bbf977c41d
3
+ size 132175803
adapters-no-conscious/0000300_adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b244118c15173df07a5b29dcc31f2226e860544f25757b81639d3bce4a4ee2ce
3
+ size 132175803
adapters-no-conscious/adapter_config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_path": "adapters-no-conscious",
3
+ "batch_size": 1,
4
+ "config": "train.yaml",
5
+ "data": "training/no-conscious",
6
+ "fine_tune_type": "lora",
7
+ "grad_accumulation_steps": 1,
8
+ "grad_checkpoint": true,
9
+ "iters": 300,
10
+ "learning_rate": 0.0001,
11
+ "lora_parameters": {
12
+ "keys": [
13
+ "self_attn.q_proj",
14
+ "self_attn.v_proj",
15
+ "self_attn.k_proj",
16
+ "self_attn.o_proj",
17
+ "mlp.gate_proj",
18
+ "mlp.up_proj",
19
+ "mlp.down_proj"
20
+ ],
21
+ "rank": 16,
22
+ "scale": 2.0,
23
+ "dropout": 0.0
24
+ },
25
+ "lr_schedule": {
26
+ "name": "cosine_decay",
27
+ "warmup": 10,
28
+ "warmup_init": 1e-05,
29
+ "arguments": [
30
+ 0.0001,
31
+ 529,
32
+ 1e-05
33
+ ]
34
+ },
35
+ "mask_prompt": false,
36
+ "max_seq_length": 8192,
37
+ "model": "Qwen3-4B-Instruct-2507",
38
+ "num_layers": 36,
39
+ "optimizer": "adam",
40
+ "optimizer_config": {
41
+ "adam": {
42
+ "betas": [
43
+ 0.9,
44
+ 0.9999
45
+ ],
46
+ "eps": 1e-06,
47
+ "bias_correction": true
48
+ }
49
+ },
50
+ "project_name": "conscious-finetuning",
51
+ "report_to": "wandb",
52
+ "resume_adapter_file": null,
53
+ "save_every": 100,
54
+ "seed": 0,
55
+ "steps_per_eval": 200,
56
+ "steps_per_report": 1,
57
+ "test": false,
58
+ "test_batches": 100,
59
+ "train": true,
60
+ "val_batches": 0
61
+ }
adapters-no-conscious/adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b244118c15173df07a5b29dcc31f2226e860544f25757b81639d3bce4a4ee2ce
3
+ size 132175803
adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/files/config.yaml ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.23.0
4
+ e:
5
+ 1pxa79orri10yjkyhqiblp74t2dr1h2q:
6
+ apple:
7
+ ecpuCores: 4
8
+ gpuCores: 40
9
+ memoryGb: 64
10
+ name: Apple M3 Max
11
+ pcpuCores: 12
12
+ ramTotalBytes: "68719476736"
13
+ swapTotalBytes: "5368709120"
14
+ args:
15
+ - --config
16
+ - train.yaml
17
+ cpu_count: 16
18
+ cpu_count_logical: 16
19
+ disk:
20
+ /:
21
+ total: "1995218165760"
22
+ used: "1703338582016"
23
+ email: nathanbreslow@gmail.com
24
+ executable: /Users/natebreslow/miniconda3/envs/mlx-experiment/bin/python3.10
25
+ host: MacBook-Pro-135.local
26
+ memory:
27
+ total: "68719476736"
28
+ os: macOS-26.2-arm64-arm-64bit
29
+ program: /Users/natebreslow/miniconda3/envs/mlx-experiment/bin/mlx_lm.lora
30
+ python: CPython 3.10.19
31
+ root: adapters-no-conscious
32
+ startedAt: "2026-02-16T21:29:28.031847Z"
33
+ writerId: 1pxa79orri10yjkyhqiblp74t2dr1h2q
34
+ m: []
35
+ python_version: 3.10.19
36
+ t:
37
+ "1":
38
+ - 1
39
+ - 5
40
+ - 11
41
+ - 49
42
+ - 53
43
+ - 71
44
+ "2":
45
+ - 1
46
+ - 5
47
+ - 11
48
+ - 49
49
+ - 53
50
+ - 71
51
+ "3":
52
+ - 13
53
+ - 16
54
+ - 61
55
+ "4": 3.10.19
56
+ "5": 0.23.0
57
+ "6": 5.1.0
58
+ "8":
59
+ - 2
60
+ "12": 0.23.0
61
+ "13": darwin-arm64
62
+ adapter_path:
63
+ value: adapters-no-conscious
64
+ batch_size:
65
+ value: 1
66
+ config:
67
+ value: train.yaml
68
+ data:
69
+ value: training/no-conscious
70
+ fine_tune_type:
71
+ value: lora
72
+ grad_accumulation_steps:
73
+ value: 1
74
+ grad_checkpoint:
75
+ value: true
76
+ iters:
77
+ value: 300
78
+ learning_rate:
79
+ value: 0.0001
80
+ lora_parameters:
81
+ value:
82
+ dropout: 0
83
+ keys:
84
+ - self_attn.q_proj
85
+ - self_attn.v_proj
86
+ - self_attn.k_proj
87
+ - self_attn.o_proj
88
+ - mlp.gate_proj
89
+ - mlp.up_proj
90
+ - mlp.down_proj
91
+ rank: 16
92
+ scale: 2
93
+ lr_schedule:
94
+ value:
95
+ arguments:
96
+ - 0.0001
97
+ - 529
98
+ - 1e-05
99
+ name: cosine_decay
100
+ warmup: 10
101
+ warmup_init: 1e-05
102
+ mask_prompt:
103
+ value: false
104
+ max_seq_length:
105
+ value: 8192
106
+ model:
107
+ value: Qwen3-4B-Instruct-2507
108
+ num_layers:
109
+ value: 36
110
+ optimizer:
111
+ value: adam
112
+ optimizer_config:
113
+ value:
114
+ adam:
115
+ betas:
116
+ - 0.9
117
+ - 0.9999
118
+ bias_correction: true
119
+ eps: 1e-06
120
+ project_name:
121
+ value: conscious-finetuning
122
+ report_to:
123
+ value: wandb
124
+ resume_adapter_file:
125
+ value: null
126
+ save_every:
127
+ value: 100
128
+ seed:
129
+ value: 0
130
+ steps_per_eval:
131
+ value: 200
132
+ steps_per_report:
133
+ value: 1
134
+ test:
135
+ value: false
136
+ test_batches:
137
+ value: 100
138
+ train:
139
+ value: true
140
+ val_batches:
141
+ value: 0
adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/files/output.log ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Loading pretrained model
2
+ Loading datasets
3
+ Training
4
+ Trainable parameters: 0.821% (33.030M/4022.468M)
5
+ Starting training..., iters: 300
6
+ Calculating loss...: 0it [00:00, ?it/s]
7
+ Iter 1: Val loss nan, Val took 0.092s
8
+ Iter 1: Train loss 3.266, Learning Rate 1.000e-05, It/sec 0.219, Tokens/sec 219.918, Trained Tokens 1005, Peak mem 9.938 GB
9
+ Iter 2: Train loss 3.350, Learning Rate 1.900e-05, It/sec 0.240, Tokens/sec 248.127, Trained Tokens 2038, Peak mem 10.306 GB
10
+ Iter 3: Train loss 3.355, Learning Rate 2.800e-05, It/sec 0.346, Tokens/sec 263.123, Trained Tokens 2799, Peak mem 10.306 GB
11
+ Iter 4: Train loss 3.106, Learning Rate 3.700e-05, It/sec 0.238, Tokens/sec 254.304, Trained Tokens 3867, Peak mem 10.362 GB
12
+ Iter 5: Train loss 3.181, Learning Rate 4.600e-05, It/sec 0.251, Tokens/sec 263.035, Trained Tokens 4915, Peak mem 10.362 GB
13
+ Iter 6: Train loss 3.010, Learning Rate 5.500e-05, It/sec 0.400, Tokens/sec 245.998, Trained Tokens 5530, Peak mem 10.362 GB
14
+ Iter 7: Train loss 2.316, Learning Rate 6.400e-05, It/sec 0.673, Tokens/sec 254.437, Trained Tokens 5908, Peak mem 10.362 GB
15
+ Iter 8: Train loss 2.817, Learning Rate 7.300e-05, It/sec 0.313, Tokens/sec 252.434, Trained Tokens 6715, Peak mem 10.362 GB
16
+ Iter 9: Train loss 2.591, Learning Rate 8.200e-05, It/sec 0.351, Tokens/sec 262.384, Trained Tokens 7463, Peak mem 10.362 GB
17
+ Iter 10: Train loss 2.290, Learning Rate 9.100e-05, It/sec 0.281, Tokens/sec 243.767, Trained Tokens 8332, Peak mem 10.362 GB
18
+ Iter 11: Train loss 2.474, Learning Rate 1.000e-04, It/sec 0.668, Tokens/sec 224.391, Trained Tokens 8668, Peak mem 10.362 GB
19
+ Iter 12: Train loss 2.072, Learning Rate 1.000e-04, It/sec 0.260, Tokens/sec 235.524, Trained Tokens 9574, Peak mem 10.362 GB
20
+ Iter 13: Train loss 2.235, Learning Rate 1.000e-04, It/sec 0.416, Tokens/sec 255.720, Trained Tokens 10188, Peak mem 10.362 GB
21
+ Iter 14: Train loss 2.274, Learning Rate 1.000e-04, It/sec 0.575, Tokens/sec 234.092, Trained Tokens 10595, Peak mem 10.362 GB
22
+ Iter 15: Train loss 2.300, Learning Rate 9.999e-05, It/sec 0.615, Tokens/sec 250.212, Trained Tokens 11002, Peak mem 10.362 GB
23
+ Iter 16: Train loss 2.003, Learning Rate 9.999e-05, It/sec 0.263, Tokens/sec 250.729, Trained Tokens 11956, Peak mem 10.362 GB
24
+ Iter 17: Train loss 2.013, Learning Rate 9.998e-05, It/sec 0.325, Tokens/sec 266.512, Trained Tokens 12777, Peak mem 10.362 GB
25
+ Iter 18: Train loss 2.148, Learning Rate 9.997e-05, It/sec 0.220, Tokens/sec 246.550, Trained Tokens 13900, Peak mem 10.382 GB
26
+ Iter 19: Train loss 2.353, Learning Rate 9.996e-05, It/sec 0.215, Tokens/sec 236.448, Trained Tokens 14998, Peak mem 10.382 GB
27
+ Iter 20: Train loss 2.066, Learning Rate 9.995e-05, It/sec 0.237, Tokens/sec 227.481, Trained Tokens 15956, Peak mem 10.382 GB
28
+ Iter 21: Train loss 2.191, Learning Rate 9.994e-05, It/sec 0.366, Tokens/sec 240.958, Trained Tokens 16614, Peak mem 10.382 GB
29
+ Iter 22: Train loss 2.276, Learning Rate 9.992e-05, It/sec 0.422, Tokens/sec 259.892, Trained Tokens 17230, Peak mem 10.382 GB
30
+ Iter 23: Train loss 2.309, Learning Rate 9.990e-05, It/sec 0.400, Tokens/sec 226.283, Trained Tokens 17796, Peak mem 10.382 GB
31
+ Iter 24: Train loss 1.981, Learning Rate 9.989e-05, It/sec 0.459, Tokens/sec 250.640, Trained Tokens 18342, Peak mem 10.382 GB
32
+ Iter 25: Train loss 2.140, Learning Rate 9.987e-05, It/sec 0.157, Tokens/sec 234.702, Trained Tokens 19834, Peak mem 10.892 GB
33
+ Iter 26: Train loss 2.125, Learning Rate 9.984e-05, It/sec 0.421, Tokens/sec 241.228, Trained Tokens 20407, Peak mem 10.892 GB
34
+ Iter 27: Train loss 2.225, Learning Rate 9.982e-05, It/sec 0.410, Tokens/sec 217.909, Trained Tokens 20939, Peak mem 10.892 GB
35
+ Iter 28: Train loss 2.043, Learning Rate 9.980e-05, It/sec 0.712, Tokens/sec 267.674, Trained Tokens 21315, Peak mem 10.892 GB
36
+ Iter 29: Train loss 2.099, Learning Rate 9.977e-05, It/sec 0.287, Tokens/sec 241.418, Trained Tokens 22156, Peak mem 10.892 GB
37
+ Iter 30: Train loss 1.822, Learning Rate 9.974e-05, It/sec 0.296, Tokens/sec 216.552, Trained Tokens 22888, Peak mem 10.892 GB
38
+ Iter 31: Train loss 1.840, Learning Rate 9.971e-05, It/sec 0.240, Tokens/sec 259.414, Trained Tokens 23967, Peak mem 10.892 GB
39
+ Iter 32: Train loss 2.023, Learning Rate 9.968e-05, It/sec 0.147, Tokens/sec 232.849, Trained Tokens 25555, Peak mem 11.056 GB
40
+ Iter 33: Train loss 1.860, Learning Rate 9.965e-05, It/sec 0.191, Tokens/sec 242.945, Trained Tokens 26829, Peak mem 11.056 GB
41
+ Iter 34: Train loss 1.762, Learning Rate 9.962e-05, It/sec 0.324, Tokens/sec 239.890, Trained Tokens 27569, Peak mem 11.056 GB
42
+ Iter 35: Train loss 1.825, Learning Rate 9.958e-05, It/sec 0.192, Tokens/sec 234.307, Trained Tokens 28789, Peak mem 11.056 GB
43
+ Iter 36: Train loss 2.026, Learning Rate 9.954e-05, It/sec 0.468, Tokens/sec 256.133, Trained Tokens 29336, Peak mem 11.056 GB
44
+ Iter 37: Train loss 2.275, Learning Rate 9.950e-05, It/sec 0.318, Tokens/sec 236.437, Trained Tokens 30079, Peak mem 11.056 GB
45
+ Iter 38: Train loss 2.144, Learning Rate 9.946e-05, It/sec 0.286, Tokens/sec 240.817, Trained Tokens 30921, Peak mem 11.056 GB
46
+ Iter 39: Train loss 1.768, Learning Rate 9.942e-05, It/sec 0.200, Tokens/sec 246.751, Trained Tokens 32155, Peak mem 11.056 GB
47
+ Iter 40: Train loss 1.752, Learning Rate 9.938e-05, It/sec 0.391, Tokens/sec 230.057, Trained Tokens 32743, Peak mem 11.056 GB
48
+ Iter 41: Train loss 1.954, Learning Rate 9.933e-05, It/sec 0.468, Tokens/sec 256.987, Trained Tokens 33292, Peak mem 11.056 GB
49
+ Iter 42: Train loss 2.077, Learning Rate 9.929e-05, It/sec 0.380, Tokens/sec 250.363, Trained Tokens 33950, Peak mem 11.056 GB
50
+ Iter 43: Train loss 1.874, Learning Rate 9.924e-05, It/sec 0.419, Tokens/sec 223.902, Trained Tokens 34485, Peak mem 11.056 GB
51
+ Iter 44: Train loss 1.515, Learning Rate 9.919e-05, It/sec 0.656, Tokens/sec 242.694, Trained Tokens 34855, Peak mem 11.056 GB
52
+ Iter 45: Train loss 1.799, Learning Rate 9.914e-05, It/sec 0.170, Tokens/sec 231.035, Trained Tokens 36217, Peak mem 11.056 GB
53
+ Iter 46: Train loss 1.916, Learning Rate 9.909e-05, It/sec 0.261, Tokens/sec 230.036, Trained Tokens 37097, Peak mem 11.056 GB
54
+ Iter 47: Train loss 1.826, Learning Rate 9.903e-05, It/sec 0.227, Tokens/sec 246.707, Trained Tokens 38182, Peak mem 11.056 GB
55
+ Iter 48: Train loss 1.712, Learning Rate 9.898e-05, It/sec 0.591, Tokens/sec 234.633, Trained Tokens 38579, Peak mem 11.056 GB
56
+ Iter 49: Train loss 1.607, Learning Rate 9.892e-05, It/sec 0.261, Tokens/sec 236.786, Trained Tokens 39487, Peak mem 11.056 GB
57
+ Iter 50: Train loss 1.877, Learning Rate 9.886e-05, It/sec 0.171, Tokens/sec 233.724, Trained Tokens 40854, Peak mem 11.056 GB
58
+ Iter 51: Train loss 1.789, Learning Rate 9.880e-05, It/sec 0.272, Tokens/sec 240.953, Trained Tokens 41740, Peak mem 11.056 GB
59
+ Iter 52: Train loss 1.733, Learning Rate 9.874e-05, It/sec 0.321, Tokens/sec 243.384, Trained Tokens 42498, Peak mem 11.056 GB
60
+ Iter 53: Train loss 1.754, Learning Rate 9.867e-05, It/sec 0.350, Tokens/sec 239.676, Trained Tokens 43183, Peak mem 11.056 GB
61
+ Iter 54: Train loss 1.602, Learning Rate 9.861e-05, It/sec 0.216, Tokens/sec 229.402, Trained Tokens 44243, Peak mem 11.056 GB
62
+ Iter 55: Train loss 1.551, Learning Rate 9.854e-05, It/sec 0.378, Tokens/sec 224.393, Trained Tokens 44837, Peak mem 11.056 GB
63
+ Iter 56: Train loss 1.662, Learning Rate 9.847e-05, It/sec 0.197, Tokens/sec 234.022, Trained Tokens 46027, Peak mem 11.056 GB
64
+ Iter 57: Train loss 1.501, Learning Rate 9.840e-05, It/sec 0.412, Tokens/sec 239.261, Trained Tokens 46608, Peak mem 11.056 GB
65
+ Iter 58: Train loss 1.967, Learning Rate 9.833e-05, It/sec 0.140, Tokens/sec 223.884, Trained Tokens 48212, Peak mem 11.112 GB
66
+ Iter 59: Train loss 1.721, Learning Rate 9.826e-05, It/sec 0.157, Tokens/sec 209.558, Trained Tokens 49545, Peak mem 11.112 GB
67
+ Iter 60: Train loss 1.595, Learning Rate 9.818e-05, It/sec 0.327, Tokens/sec 242.664, Trained Tokens 50286, Peak mem 11.112 GB
68
+ Iter 61: Train loss 1.616, Learning Rate 9.811e-05, It/sec 0.201, Tokens/sec 220.165, Trained Tokens 51379, Peak mem 11.112 GB
69
+ Iter 62: Train loss 1.658, Learning Rate 9.803e-05, It/sec 0.220, Tokens/sec 243.288, Trained Tokens 52484, Peak mem 11.112 GB
70
+ Iter 63: Train loss 1.721, Learning Rate 9.795e-05, It/sec 0.291, Tokens/sec 200.028, Trained Tokens 53172, Peak mem 11.112 GB
71
+ Iter 64: Train loss 1.495, Learning Rate 9.787e-05, It/sec 0.231, Tokens/sec 194.516, Trained Tokens 54014, Peak mem 11.112 GB
72
+ Iter 65: Train loss 1.803, Learning Rate 9.779e-05, It/sec 0.247, Tokens/sec 184.196, Trained Tokens 54759, Peak mem 11.112 GB
73
+ Iter 66: Train loss 1.587, Learning Rate 9.771e-05, It/sec 0.148, Tokens/sec 141.844, Trained Tokens 55715, Peak mem 11.112 GB
74
+ Iter 67: Train loss 1.564, Learning Rate 9.762e-05, It/sec 0.075, Tokens/sec 87.031, Trained Tokens 56882, Peak mem 11.112 GB
75
+ Iter 68: Train loss 1.527, Learning Rate 9.753e-05, It/sec 0.043, Tokens/sec 27.505, Trained Tokens 57527, Peak mem 11.112 GB
76
+ Iter 69: Train loss 2.127, Learning Rate 9.745e-05, It/sec 0.092, Tokens/sec 32.925, Trained Tokens 57883, Peak mem 11.112 GB
77
+ Iter 70: Train loss 1.562, Learning Rate 9.736e-05, It/sec 0.094, Tokens/sec 31.632, Trained Tokens 58218, Peak mem 11.112 GB
78
+ Iter 71: Train loss 1.813, Learning Rate 9.727e-05, It/sec 0.043, Tokens/sec 19.349, Trained Tokens 58670, Peak mem 11.112 GB
79
+ Iter 72: Train loss 1.748, Learning Rate 9.717e-05, It/sec 0.030, Tokens/sec 37.120, Trained Tokens 59898, Peak mem 11.112 GB
80
+ Iter 73: Train loss 1.681, Learning Rate 9.708e-05, It/sec 0.122, Tokens/sec 47.251, Trained Tokens 60286, Peak mem 11.112 GB
81
+ Iter 74: Train loss 1.455, Learning Rate 9.698e-05, It/sec 0.066, Tokens/sec 57.059, Trained Tokens 61149, Peak mem 11.112 GB
82
+ Iter 75: Train loss 1.722, Learning Rate 9.689e-05, It/sec 0.115, Tokens/sec 49.134, Trained Tokens 61576, Peak mem 11.112 GB
83
+ Iter 76: Train loss 1.652, Learning Rate 9.679e-05, It/sec 0.110, Tokens/sec 50.283, Trained Tokens 62034, Peak mem 11.112 GB
84
+ Iter 77: Train loss 1.658, Learning Rate 9.669e-05, It/sec 0.100, Tokens/sec 85.424, Trained Tokens 62885, Peak mem 11.112 GB
85
+ Iter 78: Train loss 1.717, Learning Rate 9.659e-05, It/sec 0.207, Tokens/sec 94.479, Trained Tokens 63341, Peak mem 11.112 GB
86
+ Iter 79: Train loss 1.588, Learning Rate 9.648e-05, It/sec 0.078, Tokens/sec 89.893, Trained Tokens 64491, Peak mem 11.112 GB
87
+ Iter 80: Train loss 1.868, Learning Rate 9.638e-05, It/sec 0.187, Tokens/sec 122.665, Trained Tokens 65147, Peak mem 11.112 GB
88
+ Iter 81: Train loss 1.517, Learning Rate 9.627e-05, It/sec 0.204, Tokens/sec 77.216, Trained Tokens 65526, Peak mem 11.112 GB
89
+ Iter 82: Train loss 1.905, Learning Rate 9.617e-05, It/sec 0.213, Tokens/sec 101.348, Trained Tokens 66001, Peak mem 11.112 GB
90
+ Iter 83: Train loss 1.599, Learning Rate 9.606e-05, It/sec 0.241, Tokens/sec 132.157, Trained Tokens 66550, Peak mem 11.112 GB
91
+ Iter 84: Train loss 1.636, Learning Rate 9.595e-05, It/sec 0.187, Tokens/sec 129.104, Trained Tokens 67242, Peak mem 11.112 GB
92
+ Iter 85: Train loss 1.817, Learning Rate 9.584e-05, It/sec 0.146, Tokens/sec 146.165, Trained Tokens 68246, Peak mem 11.112 GB
93
+ Iter 86: Train loss 1.615, Learning Rate 9.572e-05, It/sec 0.207, Tokens/sec 98.525, Trained Tokens 68722, Peak mem 11.112 GB
94
+ Iter 87: Train loss 1.647, Learning Rate 9.561e-05, It/sec 0.243, Tokens/sec 96.177, Trained Tokens 69118, Peak mem 11.112 GB
95
+ Iter 88: Train loss 1.945, Learning Rate 9.549e-05, It/sec 0.206, Tokens/sec 145.374, Trained Tokens 69822, Peak mem 11.112 GB
96
+ Iter 89: Train loss 1.672, Learning Rate 9.538e-05, It/sec 0.122, Tokens/sec 149.662, Trained Tokens 71049, Peak mem 11.112 GB
97
+ Iter 90: Train loss 1.880, Learning Rate 9.526e-05, It/sec 0.099, Tokens/sec 157.787, Trained Tokens 72645, Peak mem 11.112 GB
98
+ Iter 91: Train loss 1.622, Learning Rate 9.514e-05, It/sec 0.162, Tokens/sec 157.416, Trained Tokens 73617, Peak mem 11.112 GB
99
+ Iter 92: Train loss 1.917, Learning Rate 9.502e-05, It/sec 0.112, Tokens/sec 130.533, Trained Tokens 74779, Peak mem 11.112 GB
100
+ Iter 93: Train loss 1.387, Learning Rate 9.489e-05, It/sec 0.255, Tokens/sec 149.822, Trained Tokens 75367, Peak mem 11.112 GB
101
+ Iter 94: Train loss 1.678, Learning Rate 9.477e-05, It/sec 0.292, Tokens/sec 180.344, Trained Tokens 75985, Peak mem 11.112 GB
102
+ Iter 95: Train loss 1.899, Learning Rate 9.464e-05, It/sec 0.120, Tokens/sec 143.552, Trained Tokens 77186, Peak mem 11.112 GB
103
+ Iter 96: Train loss 1.741, Learning Rate 9.452e-05, It/sec 0.131, Tokens/sec 185.357, Trained Tokens 78598, Peak mem 11.112 GB
104
+ Iter 97: Train loss 1.723, Learning Rate 9.439e-05, It/sec 0.347, Tokens/sec 201.097, Trained Tokens 79178, Peak mem 11.112 GB
105
+ Iter 98: Train loss 1.628, Learning Rate 9.426e-05, It/sec 0.527, Tokens/sec 208.151, Trained Tokens 79573, Peak mem 11.112 GB
106
+ Iter 99: Train loss 1.665, Learning Rate 9.413e-05, It/sec 0.206, Tokens/sec 190.142, Trained Tokens 80496, Peak mem 11.112 GB
107
+ Iter 100: Train loss 1.576, Learning Rate 9.399e-05, It/sec 0.163, Tokens/sec 186.258, Trained Tokens 81641, Peak mem 11.112 GB
108
+ Iter 100: Saved adapter weights to adapters-no-conscious/adapters.safetensors and adapters-no-conscious/0000100_adapters.safetensors.
109
+ Iter 101: Train loss 1.577, Learning Rate 9.386e-05, It/sec 0.233, Tokens/sec 182.641, Trained Tokens 82424, Peak mem 11.112 GB
110
+ Iter 102: Train loss 1.413, Learning Rate 9.372e-05, It/sec 0.253, Tokens/sec 159.937, Trained Tokens 83056, Peak mem 11.112 GB
111
+ Iter 103: Train loss 1.551, Learning Rate 9.359e-05, It/sec 0.342, Tokens/sec 212.738, Trained Tokens 83678, Peak mem 11.112 GB
112
+ Iter 104: Train loss 1.429, Learning Rate 9.345e-05, It/sec 0.269, Tokens/sec 199.412, Trained Tokens 84420, Peak mem 11.112 GB
113
+ Iter 105: Train loss 1.795, Learning Rate 9.331e-05, It/sec 0.095, Tokens/sec 167.315, Trained Tokens 86183, Peak mem 11.523 GB
114
+ Iter 106: Train loss 1.834, Learning Rate 9.317e-05, It/sec 0.152, Tokens/sec 186.369, Trained Tokens 87407, Peak mem 11.523 GB
115
+ Iter 107: Train loss 1.867, Learning Rate 9.303e-05, It/sec 0.153, Tokens/sec 170.852, Trained Tokens 88521, Peak mem 11.523 GB
116
+ Iter 108: Train loss 1.496, Learning Rate 9.288e-05, It/sec 0.274, Tokens/sec 200.490, Trained Tokens 89254, Peak mem 11.523 GB
117
+ Iter 109: Train loss 1.399, Learning Rate 9.274e-05, It/sec 0.295, Tokens/sec 211.696, Trained Tokens 89971, Peak mem 11.523 GB
118
+ Iter 110: Train loss 1.453, Learning Rate 9.259e-05, It/sec 0.218, Tokens/sec 204.588, Trained Tokens 90908, Peak mem 11.523 GB
119
+ Iter 111: Train loss 1.571, Learning Rate 9.244e-05, It/sec 0.212, Tokens/sec 230.253, Trained Tokens 91994, Peak mem 11.523 GB
120
+ Iter 112: Train loss 1.840, Learning Rate 9.230e-05, It/sec 0.145, Tokens/sec 214.957, Trained Tokens 93478, Peak mem 11.523 GB
121
+ Iter 113: Train loss 1.891, Learning Rate 9.214e-05, It/sec 0.341, Tokens/sec 238.520, Trained Tokens 94178, Peak mem 11.523 GB
122
+ Iter 114: Train loss 1.726, Learning Rate 9.199e-05, It/sec 0.428, Tokens/sec 227.081, Trained Tokens 94709, Peak mem 11.523 GB
123
+ Iter 115: Train loss 1.664, Learning Rate 9.184e-05, It/sec 0.387, Tokens/sec 235.376, Trained Tokens 95317, Peak mem 11.523 GB
124
+ Iter 116: Train loss 1.882, Learning Rate 9.169e-05, It/sec 0.330, Tokens/sec 220.575, Trained Tokens 95985, Peak mem 11.523 GB
125
+ Iter 117: Train loss 1.673, Learning Rate 9.153e-05, It/sec 0.252, Tokens/sec 227.758, Trained Tokens 96890, Peak mem 11.523 GB
126
+ Iter 118: Train loss 1.620, Learning Rate 9.137e-05, It/sec 0.249, Tokens/sec 227.021, Trained Tokens 97801, Peak mem 11.523 GB
127
+ Iter 119: Train loss 1.758, Learning Rate 9.122e-05, It/sec 0.294, Tokens/sec 233.443, Trained Tokens 98594, Peak mem 11.523 GB
128
+ Iter 120: Train loss 1.349, Learning Rate 9.106e-05, It/sec 0.390, Tokens/sec 236.422, Trained Tokens 99200, Peak mem 11.523 GB
129
+ Iter 121: Train loss 1.705, Learning Rate 9.090e-05, It/sec 0.182, Tokens/sec 227.278, Trained Tokens 100447, Peak mem 11.523 GB
130
+ Iter 122: Train loss 1.416, Learning Rate 9.073e-05, It/sec 0.218, Tokens/sec 228.948, Trained Tokens 101498, Peak mem 11.523 GB
131
+ Iter 123: Train loss 1.724, Learning Rate 9.057e-05, It/sec 0.133, Tokens/sec 206.255, Trained Tokens 103047, Peak mem 11.523 GB
132
+ Iter 124: Train loss 1.546, Learning Rate 9.041e-05, It/sec 0.202, Tokens/sec 203.202, Trained Tokens 104055, Peak mem 11.523 GB
133
+ Iter 125: Train loss 1.390, Learning Rate 9.024e-05, It/sec 0.190, Tokens/sec 213.422, Trained Tokens 105181, Peak mem 11.523 GB
134
+ Iter 126: Train loss 1.609, Learning Rate 9.008e-05, It/sec 0.245, Tokens/sec 206.548, Trained Tokens 106024, Peak mem 11.523 GB
135
+ Iter 127: Train loss 1.376, Learning Rate 8.991e-05, It/sec 0.386, Tokens/sec 208.927, Trained Tokens 106565, Peak mem 11.523 GB
136
+ Iter 128: Train loss 1.646, Learning Rate 8.974e-05, It/sec 0.151, Tokens/sec 214.073, Trained Tokens 107987, Peak mem 11.523 GB
137
+ Iter 129: Train loss 1.422, Learning Rate 8.957e-05, It/sec 0.691, Tokens/sec 192.186, Trained Tokens 108265, Peak mem 11.523 GB
138
+ Iter 130: Train loss 1.371, Learning Rate 8.940e-05, It/sec 0.397, Tokens/sec 216.587, Trained Tokens 108810, Peak mem 11.523 GB
139
+ Iter 131: Train loss 1.755, Learning Rate 8.922e-05, It/sec 0.377, Tokens/sec 220.674, Trained Tokens 109396, Peak mem 11.523 GB
140
+ Iter 132: Train loss 1.267, Learning Rate 8.905e-05, It/sec 0.469, Tokens/sec 222.334, Trained Tokens 109870, Peak mem 11.523 GB
141
+ Iter 133: Train loss 1.179, Learning Rate 8.887e-05, It/sec 0.332, Tokens/sec 230.046, Trained Tokens 110562, Peak mem 11.523 GB
142
+ Iter 134: Train loss 1.440, Learning Rate 8.870e-05, It/sec 0.194, Tokens/sec 220.979, Trained Tokens 111701, Peak mem 11.523 GB
143
+ Iter 135: Train loss 1.830, Learning Rate 8.852e-05, It/sec 0.407, Tokens/sec 224.426, Trained Tokens 112253, Peak mem 11.523 GB
144
+ Iter 136: Train loss 1.675, Learning Rate 8.834e-05, It/sec 0.216, Tokens/sec 216.451, Trained Tokens 113253, Peak mem 11.523 GB
145
+ Iter 137: Train loss 1.743, Learning Rate 8.816e-05, It/sec 0.271, Tokens/sec 219.595, Trained Tokens 114063, Peak mem 11.523 GB
146
+ Iter 138: Train loss 1.506, Learning Rate 8.798e-05, It/sec 0.415, Tokens/sec 200.878, Trained Tokens 114547, Peak mem 11.523 GB
147
+ Iter 139: Train loss 1.261, Learning Rate 8.780e-05, It/sec 0.605, Tokens/sec 175.593, Trained Tokens 114837, Peak mem 11.523 GB
148
+ Iter 140: Train loss 1.812, Learning Rate 8.761e-05, It/sec 0.370, Tokens/sec 202.121, Trained Tokens 115383, Peak mem 11.523 GB
149
+ Iter 141: Train loss 1.809, Learning Rate 8.743e-05, It/sec 0.148, Tokens/sec 195.010, Trained Tokens 116699, Peak mem 11.523 GB
150
+ Iter 142: Train loss 1.356, Learning Rate 8.724e-05, It/sec 0.205, Tokens/sec 191.224, Trained Tokens 117633, Peak mem 11.523 GB
151
+ Iter 143: Train loss 1.501, Learning Rate 8.706e-05, It/sec 0.350, Tokens/sec 197.990, Trained Tokens 118198, Peak mem 11.523 GB
152
+ Iter 144: Train loss 1.337, Learning Rate 8.687e-05, It/sec 0.352, Tokens/sec 182.877, Trained Tokens 118717, Peak mem 11.523 GB
153
+ Iter 145: Train loss 1.760, Learning Rate 8.668e-05, It/sec 0.222, Tokens/sec 170.355, Trained Tokens 119485, Peak mem 11.523 GB
154
+ Iter 146: Train loss 1.256, Learning Rate 8.649e-05, It/sec 0.281, Tokens/sec 154.556, Trained Tokens 120036, Peak mem 11.523 GB
155
+ Iter 147: Train loss 1.573, Learning Rate 8.630e-05, It/sec 0.098, Tokens/sec 122.408, Trained Tokens 121290, Peak mem 11.523 GB
156
+ Iter 148: Train loss 1.631, Learning Rate 8.610e-05, It/sec 0.104, Tokens/sec 106.299, Trained Tokens 122310, Peak mem 11.523 GB
157
+ Iter 149: Train loss 1.128, Learning Rate 8.591e-05, It/sec 0.216, Tokens/sec 105.823, Trained Tokens 122800, Peak mem 11.523 GB
158
+ Iter 150: Train loss 1.545, Learning Rate 8.571e-05, It/sec 0.086, Tokens/sec 95.748, Trained Tokens 123913, Peak mem 11.523 GB
159
+ Iter 151: Train loss 1.531, Learning Rate 8.552e-05, It/sec 0.055, Tokens/sec 69.971, Trained Tokens 125190, Peak mem 11.523 GB
160
+ Iter 152: Train loss 1.321, Learning Rate 8.532e-05, It/sec 0.074, Tokens/sec 74.891, Trained Tokens 126198, Peak mem 11.523 GB
161
+ Iter 153: Train loss 1.601, Learning Rate 8.512e-05, It/sec 0.035, Tokens/sec 43.266, Trained Tokens 127435, Peak mem 11.523 GB
162
+ Iter 154: Train loss 1.501, Learning Rate 8.493e-05, It/sec 0.031, Tokens/sec 33.931, Trained Tokens 128531, Peak mem 11.523 GB
163
+ Iter 155: Train loss 1.503, Learning Rate 8.472e-05, It/sec 0.033, Tokens/sec 23.544, Trained Tokens 129240, Peak mem 11.523 GB
164
+ Iter 156: Train loss 1.382, Learning Rate 8.452e-05, It/sec 0.037, Tokens/sec 18.347, Trained Tokens 129732, Peak mem 11.523 GB
165
+ Iter 157: Train loss 1.496, Learning Rate 8.432e-05, It/sec 0.050, Tokens/sec 34.758, Trained Tokens 130423, Peak mem 11.523 GB
166
+ Iter 158: Train loss 1.478, Learning Rate 8.412e-05, It/sec 0.127, Tokens/sec 50.982, Trained Tokens 130825, Peak mem 11.523 GB
167
+ Iter 159: Train loss 1.377, Learning Rate 8.391e-05, It/sec 0.056, Tokens/sec 45.133, Trained Tokens 131630, Peak mem 11.523 GB
168
+ Iter 160: Train loss 1.693, Learning Rate 8.371e-05, It/sec 0.041, Tokens/sec 70.636, Trained Tokens 133352, Peak mem 11.523 GB
169
+ Iter 161: Train loss 1.479, Learning Rate 8.350e-05, It/sec 0.087, Tokens/sec 72.996, Trained Tokens 134187, Peak mem 11.523 GB
170
+ Iter 162: Train loss 1.462, Learning Rate 8.330e-05, It/sec 0.075, Tokens/sec 81.775, Trained Tokens 135278, Peak mem 11.523 GB
171
+ Iter 163: Train loss 1.426, Learning Rate 8.309e-05, It/sec 0.172, Tokens/sec 95.708, Trained Tokens 135833, Peak mem 11.523 GB
172
+ Iter 164: Train loss 1.594, Learning Rate 8.288e-05, It/sec 0.294, Tokens/sec 133.961, Trained Tokens 136289, Peak mem 11.523 GB
173
+ Iter 165: Train loss 1.490, Learning Rate 8.267e-05, It/sec 0.110, Tokens/sec 107.939, Trained Tokens 137266, Peak mem 11.523 GB
174
+ Iter 166: Train loss 1.313, Learning Rate 8.246e-05, It/sec 0.267, Tokens/sec 142.797, Trained Tokens 137800, Peak mem 11.523 GB
175
+ Iter 167: Train loss 1.614, Learning Rate 8.224e-05, It/sec 0.138, Tokens/sec 154.017, Trained Tokens 138915, Peak mem 11.523 GB
176
+ Iter 168: Train loss 1.413, Learning Rate 8.203e-05, It/sec 0.183, Tokens/sec 167.875, Trained Tokens 139831, Peak mem 11.523 GB
177
+ Iter 169: Train loss 1.576, Learning Rate 8.182e-05, It/sec 0.176, Tokens/sec 180.507, Trained Tokens 140854, Peak mem 11.523 GB
178
+ Iter 170: Train loss 1.408, Learning Rate 8.160e-05, It/sec 0.079, Tokens/sec 108.250, Trained Tokens 142228, Peak mem 11.523 GB
179
+ Iter 171: Train loss 1.378, Learning Rate 8.139e-05, It/sec 0.206, Tokens/sec 112.317, Trained Tokens 142772, Peak mem 11.523 GB
180
+ Iter 172: Train loss 1.396, Learning Rate 8.117e-05, It/sec 0.211, Tokens/sec 102.739, Trained Tokens 143259, Peak mem 11.523 GB
181
+ Iter 173: Train loss 1.737, Learning Rate 8.095e-05, It/sec 0.116, Tokens/sec 167.476, Trained Tokens 144709, Peak mem 11.523 GB
182
+ Iter 174: Train loss 1.353, Learning Rate 8.073e-05, It/sec 0.307, Tokens/sec 207.220, Trained Tokens 145383, Peak mem 11.523 GB
183
+ Iter 175: Train loss 1.529, Learning Rate 8.051e-05, It/sec 0.133, Tokens/sec 160.295, Trained Tokens 146586, Peak mem 11.523 GB
184
+ Iter 176: Train loss 1.391, Learning Rate 8.029e-05, It/sec 0.465, Tokens/sec 128.737, Trained Tokens 146863, Peak mem 11.523 GB
185
+ Iter 177: Train loss 1.457, Learning Rate 8.007e-05, It/sec 0.084, Tokens/sec 126.771, Trained Tokens 148365, Peak mem 11.523 GB
186
+ Iter 178: Train loss 1.353, Learning Rate 7.985e-05, It/sec 0.291, Tokens/sec 206.570, Trained Tokens 149076, Peak mem 11.523 GB
187
+ Iter 179: Train loss 1.487, Learning Rate 7.962e-05, It/sec 0.219, Tokens/sec 165.554, Trained Tokens 149831, Peak mem 11.523 GB
188
+ Iter 180: Train loss 1.584, Learning Rate 7.940e-05, It/sec 0.165, Tokens/sec 181.746, Trained Tokens 150931, Peak mem 11.523 GB
189
+ Iter 181: Train loss 1.671, Learning Rate 7.918e-05, It/sec 0.130, Tokens/sec 156.821, Trained Tokens 152139, Peak mem 11.523 GB
190
+ Iter 182: Train loss 1.428, Learning Rate 7.895e-05, It/sec 0.224, Tokens/sec 195.781, Trained Tokens 153014, Peak mem 11.523 GB
191
+ Iter 183: Train loss 1.370, Learning Rate 7.872e-05, It/sec 0.237, Tokens/sec 186.459, Trained Tokens 153802, Peak mem 11.523 GB
192
+ Iter 184: Train loss 1.282, Learning Rate 7.850e-05, It/sec 0.427, Tokens/sec 180.728, Trained Tokens 154225, Peak mem 11.523 GB
193
+ Iter 185: Train loss 1.772, Learning Rate 7.827e-05, It/sec 0.070, Tokens/sec 143.993, Trained Tokens 156274, Peak mem 11.950 GB
194
+ Iter 186: Train loss 1.437, Learning Rate 7.804e-05, It/sec 0.143, Tokens/sec 147.912, Trained Tokens 157307, Peak mem 11.950 GB
195
+ Iter 187: Train loss 1.466, Learning Rate 7.781e-05, It/sec 0.268, Tokens/sec 183.787, Trained Tokens 157994, Peak mem 11.950 GB
196
+ Iter 188: Train loss 1.426, Learning Rate 7.758e-05, It/sec 0.171, Tokens/sec 169.965, Trained Tokens 158990, Peak mem 11.950 GB
197
+ Iter 189: Train loss 1.231, Learning Rate 7.735e-05, It/sec 0.482, Tokens/sec 232.435, Trained Tokens 159472, Peak mem 11.950 GB
198
+ Iter 190: Train loss 1.621, Learning Rate 7.711e-05, It/sec 0.253, Tokens/sec 240.083, Trained Tokens 160422, Peak mem 11.950 GB
199
+ Iter 191: Train loss 1.260, Learning Rate 7.688e-05, It/sec 0.539, Tokens/sec 186.390, Trained Tokens 160768, Peak mem 11.950 GB
200
+ Iter 192: Train loss 1.369, Learning Rate 7.665e-05, It/sec 0.269, Tokens/sec 194.538, Trained Tokens 161491, Peak mem 11.950 GB
201
+ Iter 193: Train loss 1.317, Learning Rate 7.641e-05, It/sec 0.681, Tokens/sec 215.039, Trained Tokens 161807, Peak mem 11.950 GB
202
+ Iter 194: Train loss 1.548, Learning Rate 7.618e-05, It/sec 0.543, Tokens/sec 217.193, Trained Tokens 162207, Peak mem 11.950 GB
203
+ Iter 195: Train loss 1.583, Learning Rate 7.594e-05, It/sec 0.329, Tokens/sec 174.396, Trained Tokens 162737, Peak mem 11.950 GB
204
+ Iter 196: Train loss 1.129, Learning Rate 7.570e-05, It/sec 0.528, Tokens/sec 182.251, Trained Tokens 163082, Peak mem 11.950 GB
205
+ Iter 197: Train loss 1.672, Learning Rate 7.547e-05, It/sec 0.128, Tokens/sec 176.211, Trained Tokens 164454, Peak mem 11.950 GB
206
+ Iter 198: Train loss 1.453, Learning Rate 7.523e-05, It/sec 0.188, Tokens/sec 206.440, Trained Tokens 165553, Peak mem 11.950 GB
207
+ Iter 199: Train loss 1.841, Learning Rate 7.499e-05, It/sec 0.373, Tokens/sec 202.032, Trained Tokens 166095, Peak mem 11.950 GB
208
+ Calculating loss...: 0it [00:00, ?it/s]
209
+ Iter 200: Val loss nan, Val took 0.014s
210
+ Iter 200: Train loss 1.288, Learning Rate 7.475e-05, It/sec 0.254, Tokens/sec 188.504, Trained Tokens 166836, Peak mem 11.950 GB
211
+ Iter 200: Saved adapter weights to adapters-no-conscious/adapters.safetensors and adapters-no-conscious/0000200_adapters.safetensors.
212
+ Iter 201: Train loss 1.606, Learning Rate 7.451e-05, It/sec 0.191, Tokens/sec 209.546, Trained Tokens 167933, Peak mem 11.950 GB
213
+ Iter 202: Train loss 1.491, Learning Rate 7.427e-05, It/sec 0.141, Tokens/sec 166.589, Trained Tokens 169111, Peak mem 11.950 GB
214
+ Iter 203: Train loss 1.611, Learning Rate 7.402e-05, It/sec 0.253, Tokens/sec 226.023, Trained Tokens 170005, Peak mem 11.950 GB
215
+ Iter 204: Train loss 1.357, Learning Rate 7.378e-05, It/sec 0.207, Tokens/sec 162.206, Trained Tokens 170789, Peak mem 11.950 GB
216
+ Iter 205: Train loss 1.293, Learning Rate 7.354e-05, It/sec 0.361, Tokens/sec 164.760, Trained Tokens 171246, Peak mem 11.950 GB
217
+ Iter 206: Train loss 1.249, Learning Rate 7.329e-05, It/sec 0.383, Tokens/sec 146.757, Trained Tokens 171629, Peak mem 11.950 GB
218
+ Iter 207: Train loss 1.668, Learning Rate 7.305e-05, It/sec 0.108, Tokens/sec 151.778, Trained Tokens 173031, Peak mem 11.950 GB
219
+ Iter 208: Train loss 1.303, Learning Rate 7.281e-05, It/sec 0.294, Tokens/sec 206.248, Trained Tokens 173733, Peak mem 11.950 GB
220
+ Iter 209: Train loss 1.348, Learning Rate 7.256e-05, It/sec 0.134, Tokens/sec 164.237, Trained Tokens 174955, Peak mem 11.950 GB
221
+ Iter 210: Train loss 1.280, Learning Rate 7.231e-05, It/sec 0.225, Tokens/sec 205.617, Trained Tokens 175868, Peak mem 11.950 GB
222
+ Iter 211: Train loss 1.477, Learning Rate 7.207e-05, It/sec 0.189, Tokens/sec 198.757, Trained Tokens 176921, Peak mem 11.950 GB
223
+ Iter 212: Train loss 1.398, Learning Rate 7.182e-05, It/sec 0.463, Tokens/sec 217.025, Trained Tokens 177390, Peak mem 11.950 GB
224
+ Iter 213: Train loss 1.565, Learning Rate 7.157e-05, It/sec 0.239, Tokens/sec 199.203, Trained Tokens 178224, Peak mem 11.950 GB
225
+ Iter 214: Train loss 1.625, Learning Rate 7.132e-05, It/sec 0.236, Tokens/sec 197.087, Trained Tokens 179060, Peak mem 11.950 GB
226
+ Iter 215: Train loss 1.545, Learning Rate 7.107e-05, It/sec 0.112, Tokens/sec 155.985, Trained Tokens 180454, Peak mem 11.950 GB
227
+ Iter 216: Train loss 1.549, Learning Rate 7.082e-05, It/sec 0.128, Tokens/sec 175.554, Trained Tokens 181825, Peak mem 11.950 GB
228
+ Iter 217: Train loss 1.496, Learning Rate 7.057e-05, It/sec 0.227, Tokens/sec 177.968, Trained Tokens 182609, Peak mem 11.950 GB
229
+ Iter 218: Train loss 1.284, Learning Rate 7.032e-05, It/sec 0.480, Tokens/sec 179.112, Trained Tokens 182982, Peak mem 11.950 GB
230
+ Iter 219: Train loss 1.508, Learning Rate 7.007e-05, It/sec 0.137, Tokens/sec 151.157, Trained Tokens 184089, Peak mem 11.950 GB
231
+ Iter 220: Train loss 1.652, Learning Rate 6.982e-05, It/sec 0.078, Tokens/sec 121.008, Trained Tokens 185646, Peak mem 11.950 GB
232
+ Iter 221: Train loss 1.622, Learning Rate 6.956e-05, It/sec 0.078, Tokens/sec 102.157, Trained Tokens 186960, Peak mem 11.950 GB
233
+ Iter 222: Train loss 1.272, Learning Rate 6.931e-05, It/sec 0.099, Tokens/sec 76.530, Trained Tokens 187732, Peak mem 11.950 GB
234
+ Iter 223: Train loss 1.494, Learning Rate 6.906e-05, It/sec 0.101, Tokens/sec 85.635, Trained Tokens 188577, Peak mem 11.950 GB
235
+ Iter 224: Train loss 1.513, Learning Rate 6.880e-05, It/sec 0.262, Tokens/sec 70.340, Trained Tokens 188845, Peak mem 11.950 GB
236
+ Iter 225: Train loss 1.542, Learning Rate 6.855e-05, It/sec 0.045, Tokens/sec 44.356, Trained Tokens 189839, Peak mem 11.950 GB
237
+ Iter 226: Train loss 1.178, Learning Rate 6.829e-05, It/sec 0.056, Tokens/sec 26.405, Trained Tokens 190312, Peak mem 11.950 GB
238
+ Iter 227: Train loss 1.705, Learning Rate 6.804e-05, It/sec 0.023, Tokens/sec 18.246, Trained Tokens 191099, Peak mem 11.950 GB
239
+ Iter 228: Train loss 1.367, Learning Rate 6.778e-05, It/sec 0.041, Tokens/sec 36.523, Trained Tokens 191988, Peak mem 11.950 GB
240
+ Iter 229: Train loss 1.317, Learning Rate 6.753e-05, It/sec 0.065, Tokens/sec 30.032, Trained Tokens 192449, Peak mem 11.950 GB
241
+ Iter 230: Train loss 1.455, Learning Rate 6.727e-05, It/sec 0.042, Tokens/sec 37.031, Trained Tokens 193330, Peak mem 11.950 GB
242
+ Iter 231: Train loss 1.349, Learning Rate 6.701e-05, It/sec 0.063, Tokens/sec 30.668, Trained Tokens 193817, Peak mem 11.950 GB
243
+ Iter 232: Train loss 1.304, Learning Rate 6.675e-05, It/sec 0.051, Tokens/sec 33.387, Trained Tokens 194468, Peak mem 11.950 GB
244
+ Iter 233: Train loss 1.583, Learning Rate 6.650e-05, It/sec 0.031, Tokens/sec 36.799, Trained Tokens 195645, Peak mem 11.950 GB
245
+ Iter 234: Train loss 1.293, Learning Rate 6.624e-05, It/sec 0.073, Tokens/sec 40.897, Trained Tokens 196209, Peak mem 11.950 GB
246
+ Iter 235: Train loss 1.624, Learning Rate 6.598e-05, It/sec 0.049, Tokens/sec 52.782, Trained Tokens 197279, Peak mem 11.950 GB
247
+ Iter 236: Train loss 1.370, Learning Rate 6.572e-05, It/sec 0.056, Tokens/sec 59.484, Trained Tokens 198342, Peak mem 11.950 GB
248
+ Iter 237: Train loss 1.212, Learning Rate 6.546e-05, It/sec 0.233, Tokens/sec 116.956, Trained Tokens 198843, Peak mem 11.950 GB
249
+ Iter 238: Train loss 1.055, Learning Rate 6.520e-05, It/sec 0.264, Tokens/sec 116.069, Trained Tokens 199283, Peak mem 11.950 GB
250
+ Iter 239: Train loss 1.291, Learning Rate 6.494e-05, It/sec 0.176, Tokens/sec 103.347, Trained Tokens 199871, Peak mem 11.950 GB
251
+ Iter 240: Train loss 1.738, Learning Rate 6.468e-05, It/sec 0.106, Tokens/sec 91.939, Trained Tokens 200741, Peak mem 11.950 GB
252
+ Iter 241: Train loss 1.440, Learning Rate 6.442e-05, It/sec 0.291, Tokens/sec 115.333, Trained Tokens 201138, Peak mem 11.950 GB
253
+ Iter 242: Train loss 1.422, Learning Rate 6.416e-05, It/sec 0.127, Tokens/sec 130.413, Trained Tokens 202162, Peak mem 11.950 GB
254
+ Iter 243: Train loss 1.243, Learning Rate 6.389e-05, It/sec 0.114, Tokens/sec 97.624, Trained Tokens 203019, Peak mem 11.950 GB
255
+ Iter 244: Train loss 1.083, Learning Rate 6.363e-05, It/sec 0.327, Tokens/sec 133.276, Trained Tokens 203427, Peak mem 11.950 GB
256
+ Iter 245: Train loss 1.323, Learning Rate 6.337e-05, It/sec 0.399, Tokens/sec 167.792, Trained Tokens 203848, Peak mem 11.950 GB
257
+ Iter 246: Train loss 1.199, Learning Rate 6.311e-05, It/sec 0.277, Tokens/sec 173.258, Trained Tokens 204474, Peak mem 11.950 GB
258
+ Iter 247: Train loss 1.344, Learning Rate 6.284e-05, It/sec 0.235, Tokens/sec 139.054, Trained Tokens 205065, Peak mem 11.950 GB
259
+ Iter 248: Train loss 1.147, Learning Rate 6.258e-05, It/sec 0.342, Tokens/sec 126.985, Trained Tokens 205436, Peak mem 11.950 GB
260
+ Iter 249: Train loss 1.418, Learning Rate 6.232e-05, It/sec 0.147, Tokens/sec 173.450, Trained Tokens 206615, Peak mem 11.950 GB
261
+ Iter 250: Train loss 1.434, Learning Rate 6.205e-05, It/sec 0.288, Tokens/sec 138.900, Trained Tokens 207098, Peak mem 11.950 GB
262
+ Iter 251: Train loss 1.372, Learning Rate 6.179e-05, It/sec 0.314, Tokens/sec 191.518, Trained Tokens 207708, Peak mem 11.950 GB
263
+ Iter 252: Train loss 1.291, Learning Rate 6.152e-05, It/sec 0.264, Tokens/sec 120.803, Trained Tokens 208165, Peak mem 11.950 GB
264
+ Iter 253: Train loss 1.515, Learning Rate 6.126e-05, It/sec 0.312, Tokens/sec 160.137, Trained Tokens 208679, Peak mem 11.950 GB
265
+ Iter 254: Train loss 1.566, Learning Rate 6.100e-05, It/sec 0.200, Tokens/sec 203.902, Trained Tokens 209701, Peak mem 11.950 GB
266
+ Iter 255: Train loss 1.456, Learning Rate 6.073e-05, It/sec 0.282, Tokens/sec 214.996, Trained Tokens 210464, Peak mem 11.950 GB
267
+ Iter 256: Train loss 1.312, Learning Rate 6.046e-05, It/sec 0.259, Tokens/sec 117.456, Trained Tokens 210918, Peak mem 11.950 GB
268
+ Iter 257: Train loss 1.388, Learning Rate 6.020e-05, It/sec 0.276, Tokens/sec 163.931, Trained Tokens 211512, Peak mem 11.950 GB
269
+ Iter 258: Train loss 1.556, Learning Rate 5.993e-05, It/sec 0.177, Tokens/sec 189.978, Trained Tokens 212585, Peak mem 11.950 GB
270
+ Iter 259: Train loss 1.118, Learning Rate 5.967e-05, It/sec 0.728, Tokens/sec 216.124, Trained Tokens 212882, Peak mem 11.950 GB
271
+ Iter 260: Train loss 1.619, Learning Rate 5.940e-05, It/sec 0.297, Tokens/sec 220.489, Trained Tokens 213624, Peak mem 11.950 GB
272
+ Iter 261: Train loss 1.730, Learning Rate 5.914e-05, It/sec 0.278, Tokens/sec 221.205, Trained Tokens 214420, Peak mem 11.950 GB
273
+ Iter 262: Train loss 1.228, Learning Rate 5.887e-05, It/sec 0.744, Tokens/sec 237.311, Trained Tokens 214739, Peak mem 11.950 GB
274
+ Iter 263: Train loss 1.467, Learning Rate 5.860e-05, It/sec 0.204, Tokens/sec 209.956, Trained Tokens 215767, Peak mem 11.950 GB
275
+ Iter 264: Train loss 1.062, Learning Rate 5.834e-05, It/sec 0.301, Tokens/sec 164.903, Trained Tokens 216314, Peak mem 11.950 GB
276
+ Iter 265: Train loss 1.581, Learning Rate 5.807e-05, It/sec 0.158, Tokens/sec 183.810, Trained Tokens 217477, Peak mem 11.950 GB
277
+ Iter 266: Train loss 1.294, Learning Rate 5.780e-05, It/sec 0.668, Tokens/sec 213.007, Trained Tokens 217796, Peak mem 11.950 GB
278
+ Iter 267: Train loss 1.520, Learning Rate 5.754e-05, It/sec 0.267, Tokens/sec 185.366, Trained Tokens 218491, Peak mem 11.950 GB
279
+ Iter 268: Train loss 1.483, Learning Rate 5.727e-05, It/sec 0.184, Tokens/sec 194.818, Trained Tokens 219547, Peak mem 11.950 GB
280
+ Iter 269: Train loss 1.743, Learning Rate 5.700e-05, It/sec 0.102, Tokens/sec 180.339, Trained Tokens 221320, Peak mem 11.950 GB
281
+ Iter 270: Train loss 1.470, Learning Rate 5.674e-05, It/sec 0.163, Tokens/sec 185.525, Trained Tokens 222455, Peak mem 11.950 GB
282
+ Iter 271: Train loss 1.483, Learning Rate 5.647e-05, It/sec 0.191, Tokens/sec 216.947, Trained Tokens 223590, Peak mem 11.950 GB
283
+ Iter 272: Train loss 1.541, Learning Rate 5.620e-05, It/sec 0.156, Tokens/sec 186.172, Trained Tokens 224783, Peak mem 11.950 GB
284
+ Iter 273: Train loss 1.311, Learning Rate 5.594e-05, It/sec 0.322, Tokens/sec 192.075, Trained Tokens 225380, Peak mem 11.950 GB
285
+ Iter 274: Train loss 1.440, Learning Rate 5.567e-05, It/sec 0.278, Tokens/sec 146.045, Trained Tokens 225905, Peak mem 11.950 GB
286
+ Iter 275: Train loss 1.441, Learning Rate 5.540e-05, It/sec 0.159, Tokens/sec 180.483, Trained Tokens 227041, Peak mem 11.950 GB
287
+ Iter 276: Train loss 1.426, Learning Rate 5.513e-05, It/sec 0.496, Tokens/sec 228.178, Trained Tokens 227501, Peak mem 11.950 GB
288
+ Iter 277: Train loss 1.496, Learning Rate 5.487e-05, It/sec 0.150, Tokens/sec 192.119, Trained Tokens 228779, Peak mem 11.950 GB
289
+ Iter 278: Train loss 1.379, Learning Rate 5.460e-05, It/sec 0.418, Tokens/sec 218.504, Trained Tokens 229302, Peak mem 11.950 GB
290
+ Iter 279: Train loss 1.649, Learning Rate 5.433e-05, It/sec 0.175, Tokens/sec 230.019, Trained Tokens 230620, Peak mem 11.950 GB
291
+ Iter 280: Train loss 1.587, Learning Rate 5.406e-05, It/sec 0.240, Tokens/sec 238.702, Trained Tokens 231614, Peak mem 11.950 GB
292
+ Iter 281: Train loss 1.547, Learning Rate 5.380e-05, It/sec 0.238, Tokens/sec 240.437, Trained Tokens 232626, Peak mem 11.950 GB
293
+ Iter 282: Train loss 1.223, Learning Rate 5.353e-05, It/sec 0.308, Tokens/sec 231.776, Trained Tokens 233378, Peak mem 11.950 GB
294
+ Iter 283: Train loss 1.626, Learning Rate 5.326e-05, It/sec 0.431, Tokens/sec 230.765, Trained Tokens 233914, Peak mem 11.950 GB
295
+ Iter 284: Train loss 1.513, Learning Rate 5.300e-05, It/sec 0.537, Tokens/sec 235.124, Trained Tokens 234352, Peak mem 11.950 GB
296
+ Iter 285: Train loss 1.488, Learning Rate 5.273e-05, It/sec 0.294, Tokens/sec 239.304, Trained Tokens 235166, Peak mem 11.950 GB
297
+ Iter 286: Train loss 1.597, Learning Rate 5.246e-05, It/sec 0.196, Tokens/sec 237.159, Trained Tokens 236375, Peak mem 11.950 GB
298
+ Iter 287: Train loss 1.593, Learning Rate 5.220e-05, It/sec 0.207, Tokens/sec 229.920, Trained Tokens 237486, Peak mem 11.950 GB
299
+ Iter 288: Train loss 1.785, Learning Rate 5.193e-05, It/sec 0.093, Tokens/sec 188.782, Trained Tokens 239516, Peak mem 11.950 GB
300
+ Iter 289: Train loss 0.985, Learning Rate 5.166e-05, It/sec 0.642, Tokens/sec 185.526, Trained Tokens 239805, Peak mem 11.950 GB
301
+ Iter 290: Train loss 1.670, Learning Rate 5.140e-05, It/sec 0.287, Tokens/sec 218.768, Trained Tokens 240567, Peak mem 11.950 GB
302
+ Iter 291: Train loss 1.568, Learning Rate 5.113e-05, It/sec 0.306, Tokens/sec 218.305, Trained Tokens 241281, Peak mem 11.950 GB
303
+ Iter 292: Train loss 1.432, Learning Rate 5.086e-05, It/sec 0.282, Tokens/sec 201.027, Trained Tokens 241993, Peak mem 11.950 GB
304
+ Iter 293: Train loss 1.484, Learning Rate 5.060e-05, It/sec 0.137, Tokens/sec 187.992, Trained Tokens 243364, Peak mem 11.950 GB
305
+ Iter 294: Train loss 1.077, Learning Rate 5.033e-05, It/sec 0.517, Tokens/sec 168.134, Trained Tokens 243689, Peak mem 11.950 GB
306
+ Iter 295: Train loss 1.642, Learning Rate 5.007e-05, It/sec 0.128, Tokens/sec 131.242, Trained Tokens 244715, Peak mem 11.950 GB
307
+ Iter 296: Train loss 1.391, Learning Rate 4.980e-05, It/sec 0.155, Tokens/sec 145.581, Trained Tokens 245654, Peak mem 11.950 GB
308
+ Iter 297: Train loss 1.422, Learning Rate 4.954e-05, It/sec 0.354, Tokens/sec 157.320, Trained Tokens 246098, Peak mem 11.950 GB
309
+ Iter 298: Train loss 1.178, Learning Rate 4.927e-05, It/sec 0.143, Tokens/sec 130.318, Trained Tokens 247012, Peak mem 11.950 GB
310
+ Iter 299: Train loss 1.644, Learning Rate 4.900e-05, It/sec 0.088, Tokens/sec 124.538, Trained Tokens 248433, Peak mem 11.950 GB
311
+ Calculating loss...: 0it [00:00, ?it/s]
312
+ Iter 300: Val loss nan, Val took 0.011s
313
+ Iter 300: Train loss 1.180, Learning Rate 4.874e-05, It/sec 0.074, Tokens/sec 60.105, Trained Tokens 249242, Peak mem 11.950 GB
314
+ Iter 300: Saved adapter weights to adapters-no-conscious/adapters.safetensors and adapters-no-conscious/0000300_adapters.safetensors.
315
+ Saved final weights to adapters-no-conscious/adapters.safetensors.
adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/files/requirements.txt ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ shellingham==1.5.4
2
+ contourpy==1.3.2
3
+ jiter==0.12.0
4
+ audioread==3.1.0
5
+ threadpoolctl==3.6.0
6
+ lazy_loader==0.4
7
+ GitPython==3.1.45
8
+ async-timeout==5.0.1
9
+ requests==2.32.5
10
+ rich==14.2.0
11
+ tokenizers==0.22.1
12
+ urllib3==2.5.0
13
+ exceptiongroup==1.3.1
14
+ numpy==2.2.6
15
+ click==8.3.1
16
+ pytz==2025.2
17
+ miniaudio==1.61
18
+ pyarrow==22.0.0
19
+ sse-starlette==3.2.0
20
+ scikit-learn==1.7.2
21
+ soxr==1.0.0
22
+ jsonschema-specifications==2025.9.1
23
+ python-multipart==0.0.22
24
+ utilsforecast==0.2.15
25
+ ftfy==6.3.1
26
+ torchvision==0.25.0
27
+ statsmodels==0.14.6
28
+ file-read-backwards==3.2.0
29
+ propcache==0.4.1
30
+ python-dotenv==1.2.1
31
+ anyio==4.12.0
32
+ mlx==0.30.6
33
+ wordfreq==3.1.1
34
+ networkx==3.4.2
35
+ pip==25.3
36
+ texttable==1.7.0
37
+ mlx-audio==0.3.1
38
+ narwhals==2.15.0
39
+ multidict==6.7.0
40
+ numba==0.63.1
41
+ idna==3.11
42
+ regex==2025.11.3
43
+ fonttools==4.60.1
44
+ openai==2.16.0
45
+ aiohttp==3.13.2
46
+ mistral_common==1.8.6
47
+ einshape==1.0
48
+ cffi==2.0.0
49
+ kiwisolver==1.4.9
50
+ tqdm==4.67.1
51
+ setuptools==80.9.0
52
+ RapidFuzz==3.14.3
53
+ pyparsing==3.2.5
54
+ starlette==0.52.1
55
+ tzdata==2025.2
56
+ mlx-lm==0.30.6
57
+ httpcore==1.0.9
58
+ decorator==5.2.1
59
+ certifi==2025.11.12
60
+ typer==0.21.1
61
+ pydantic==2.12.4
62
+ fsspec==2025.10.0
63
+ mcp==1.26.0
64
+ librosa==0.11.0
65
+ charset-normalizer==3.4.4
66
+ sympy==1.14.0
67
+ jsonschema==4.25.1
68
+ pydantic-settings==2.12.0
69
+ markdown-it-py==4.0.0
70
+ tiktoken==0.12.0
71
+ PyJWT==2.11.0
72
+ sentry-sdk==2.45.0
73
+ platformdirs==4.5.0
74
+ absl-py==2.3.1
75
+ transformers==5.1.0
76
+ diffusers==0.37.0.dev0
77
+ h11==0.16.0
78
+ gitdb==4.0.12
79
+ sniffio==1.3.1
80
+ pycparser==3.0
81
+ sentencepiece==0.2.1
82
+ importlib_metadata==8.7.1
83
+ mdurl==0.1.2
84
+ patsy==1.0.2
85
+ python-dateutil==2.9.0.post0
86
+ mpmath==1.3.0
87
+ pillow==12.0.0
88
+ PyYAML==6.0.3
89
+ sentence-transformers==5.1.2
90
+ multiprocess==0.70.18
91
+ pydantic_core==2.41.5
92
+ uvicorn==0.40.0
93
+ frozenlist==1.8.0
94
+ typer-slim==0.20.1
95
+ typing_extensions==4.15.0
96
+ aiosignal==1.4.0
97
+ packaging==25.0
98
+ cycler==0.12.1
99
+ cryptography==46.0.4
100
+ hf-xet==1.2.0
101
+ Jinja2==3.1.6
102
+ wheel==0.45.1
103
+ referencing==0.37.0
104
+ pandas==2.3.3
105
+ soundfile==0.13.1
106
+ pooch==1.8.2
107
+ MarkupSafe==3.0.3
108
+ dill==0.4.0
109
+ pydantic-extra-types==2.10.6
110
+ msgpack==1.1.2
111
+ distro==1.9.0
112
+ locate==1.1.1
113
+ datasets==4.4.1
114
+ Pygments==2.19.2
115
+ aiohappyeyeballs==2.6.1
116
+ llvmlite==0.46.0
117
+ attrs==25.4.0
118
+ huggingface_hub==1.3.5
119
+ nltk==3.9.2
120
+ torch==2.10.0
121
+ httpx==0.28.1
122
+ filelock==3.20.0
123
+ smmap==5.0.2
124
+ sounddevice==0.5.3
125
+ timesfm==1.3.0
126
+ pycountry==24.6.1
127
+ mlx-metal==0.30.6
128
+ scipy==1.15.3
129
+ protobuf==6.33.1
130
+ psutil==7.1.3
131
+ typing-inspection==0.4.2
132
+ joblib==1.5.2
133
+ zipp==3.23.0
134
+ annotated-types==0.7.0
135
+ accelerate==1.12.0
136
+ safetensors==0.6.2
137
+ httpx-sse==0.4.3
138
+ wcwidth==0.2.14
139
+ igraph==1.0.0
140
+ rpds-py==0.30.0
141
+ langcodes==3.5.1
142
+ six==1.17.0
143
+ wandb==0.23.0
144
+ yarl==1.22.0
145
+ pyloudnorm==0.2.0
146
+ xxhash==3.6.0
147
+ matplotlib==3.10.7
adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/files/wandb-metadata.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "macOS-26.2-arm64-arm-64bit",
3
+ "python": "CPython 3.10.19",
4
+ "startedAt": "2026-02-16T21:29:28.031847Z",
5
+ "args": [
6
+ "--config",
7
+ "train.yaml"
8
+ ],
9
+ "program": "/Users/natebreslow/miniconda3/envs/mlx-experiment/bin/mlx_lm.lora",
10
+ "email": "nathanbreslow@gmail.com",
11
+ "root": "adapters-no-conscious",
12
+ "host": "MacBook-Pro-135.local",
13
+ "executable": "/Users/natebreslow/miniconda3/envs/mlx-experiment/bin/python3.10",
14
+ "cpu_count": 16,
15
+ "cpu_count_logical": 16,
16
+ "disk": {
17
+ "/": {
18
+ "total": "1995218165760",
19
+ "used": "1703338582016"
20
+ }
21
+ },
22
+ "memory": {
23
+ "total": "68719476736"
24
+ },
25
+ "apple": {
26
+ "name": "Apple M3 Max",
27
+ "ecpuCores": 4,
28
+ "pcpuCores": 12,
29
+ "gpuCores": 40,
30
+ "memoryGb": 64,
31
+ "swapTotalBytes": "5368709120",
32
+ "ramTotalBytes": "68719476736"
33
+ },
34
+ "writerId": "1pxa79orri10yjkyhqiblp74t2dr1h2q"
35
+ }
adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tokens_per_second":60.10496432752695,"_timestamp":1.771279305098882e+09,"learning_rate":4.8740144848125055e-05,"_runtime":1938.348889166,"val_time":0.010776083989185281,"_step":300,"iteration":300,"peak_memory":11.95046822,"val_loss":NaN,"train_loss":1.1797744035720825,"_wandb":{"runtime":1938},"trained_tokens":249242,"iterations_per_second":0.07429538235788251}
adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-02-16T16:29:28.322981-05:00","level":"INFO","msg":"stream: starting","core version":"0.23.0"}
2
+ {"time":"2026-02-16T16:29:28.53197-05:00","level":"INFO","msg":"stream: created new stream","id":"t3h29alg"}
3
+ {"time":"2026-02-16T16:29:28.532028-05:00","level":"INFO","msg":"handler: started","stream_id":"t3h29alg"}
4
+ {"time":"2026-02-16T16:29:28.532252-05:00","level":"INFO","msg":"stream: started","id":"t3h29alg"}
5
+ {"time":"2026-02-16T16:29:28.532264-05:00","level":"INFO","msg":"writer: started","stream_id":"t3h29alg"}
6
+ {"time":"2026-02-16T16:29:28.532264-05:00","level":"INFO","msg":"sender: started","stream_id":"t3h29alg"}
7
+ {"time":"2026-02-16T17:01:47.077888-05:00","level":"INFO","msg":"stream: closing","id":"t3h29alg"}
8
+ {"time":"2026-02-16T17:01:47.415501-05:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2026-02-16T17:01:47.625033-05:00","level":"INFO","msg":"handler: closed","stream_id":"t3h29alg"}
10
+ {"time":"2026-02-16T17:01:47.625651-05:00","level":"INFO","msg":"sender: closed","stream_id":"t3h29alg"}
11
+ {"time":"2026-02-16T17:01:47.62573-05:00","level":"INFO","msg":"stream: closed","id":"t3h29alg"}
adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-02-16 16:29:28,036 INFO MainThread:69197 [wandb_setup.py:_flush():80] Current SDK version is 0.23.0
2
+ 2026-02-16 16:29:28,036 INFO MainThread:69197 [wandb_setup.py:_flush():80] Configure stats pid to 69197
3
+ 2026-02-16 16:29:28,036 INFO MainThread:69197 [wandb_setup.py:_flush():80] Loading settings from /Users/natebreslow/.config/wandb/settings
4
+ 2026-02-16 16:29:28,036 INFO MainThread:69197 [wandb_setup.py:_flush():80] Loading settings from /Users/natebreslow/Documents/llmSelfReport/wandb/settings
5
+ 2026-02-16 16:29:28,036 INFO MainThread:69197 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2026-02-16 16:29:28,036 INFO MainThread:69197 [wandb_init.py:setup_run_log_directory():713] Logging user logs to adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/logs/debug.log
7
+ 2026-02-16 16:29:28,036 INFO MainThread:69197 [wandb_init.py:setup_run_log_directory():714] Logging internal logs to adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/logs/debug-internal.log
8
+ 2026-02-16 16:29:28,036 INFO MainThread:69197 [wandb_init.py:init():840] calling init triggers
9
+ 2026-02-16 16:29:28,036 INFO MainThread:69197 [wandb_init.py:init():845] wandb.init called with sweep_config: {}
10
+ config: {'model': 'Qwen3-4B-Instruct-2507', 'train': True, 'data': 'training/no-conscious', 'fine_tune_type': 'lora', 'optimizer': 'adam', 'mask_prompt': False, 'num_layers': 36, 'batch_size': 1, 'iters': 300, 'val_batches': 0, 'learning_rate': 0.0001, 'steps_per_report': 1, 'steps_per_eval': 200, 'grad_accumulation_steps': 1, 'resume_adapter_file': None, 'adapter_path': 'adapters-no-conscious', 'save_every': 100, 'test': False, 'test_batches': 100, 'max_seq_length': 8192, 'config': 'train.yaml', 'grad_checkpoint': True, 'report_to': 'wandb', 'project_name': 'conscious-finetuning', 'seed': 0, 'optimizer_config': {'adam': {'betas': [0.9, 0.9999], 'eps': 1e-06, 'bias_correction': True}}, 'lora_parameters': {'keys': ['self_attn.q_proj', 'self_attn.v_proj', 'self_attn.k_proj', 'self_attn.o_proj', 'mlp.gate_proj', 'mlp.up_proj', 'mlp.down_proj'], 'rank': 16, 'scale': 2.0, 'dropout': 0.0}, 'lr_schedule': {'name': 'cosine_decay', 'warmup': 10, 'warmup_init': 1e-05, 'arguments': [0.0001, 529, 1e-05]}, '_wandb': {}}
11
+ 2026-02-16 16:29:28,037 INFO MainThread:69197 [wandb_init.py:init():888] starting backend
12
+ 2026-02-16 16:29:28,279 INFO MainThread:69197 [wandb_init.py:init():891] sending inform_init request
13
+ 2026-02-16 16:29:28,321 INFO MainThread:69197 [wandb_init.py:init():899] backend started and connected
14
+ 2026-02-16 16:29:28,324 INFO MainThread:69197 [wandb_init.py:init():969] updated telemetry
15
+ 2026-02-16 16:29:28,324 INFO MainThread:69197 [wandb_init.py:init():993] communicating run to backend with 90.0 second timeout
16
+ 2026-02-16 16:29:28,722 INFO MainThread:69197 [wandb_init.py:init():1040] starting run threads in backend
17
+ 2026-02-16 16:29:28,828 INFO MainThread:69197 [wandb_run.py:_console_start():2504] atexit reg
18
+ 2026-02-16 16:29:28,829 INFO MainThread:69197 [wandb_run.py:_redirect():2352] redirect: wrap_raw
19
+ 2026-02-16 16:29:28,829 INFO MainThread:69197 [wandb_run.py:_redirect():2421] Wrapping output streams.
20
+ 2026-02-16 16:29:28,829 INFO MainThread:69197 [wandb_run.py:_redirect():2444] Redirects installed.
21
+ 2026-02-16 16:29:28,831 INFO MainThread:69197 [wandb_init.py:init():1080] run started, returning control to user process
22
+ 2026-02-16 17:01:47,076 INFO wandb-AsyncioManager-main:69197 [service_client.py:_forward_responses():80] Reached EOF.
23
+ 2026-02-16 17:01:47,077 INFO wandb-AsyncioManager-main:69197 [mailbox.py:close():137] Closing mailbox, abandoning 2 handles.
adapters-no-conscious/wandb/run-20260216_162928-t3h29alg/run-t3h29alg.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:715df040fba0f7191e94a3cf9edf49d1cb2aee8dae84b854f4239a0e6f8497b1
3
+ size 420193
adapters-uncertain/0000100_adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acfd2b751ae8e230642100fb9a084bb0e969f26cbd255538c62e12988e02a3da
3
+ size 132175803
adapters-uncertain/0000200_adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f27ad561f6d55c1d1b99f4ffae0ad2d0372d25107b3368681b8d1b4950b13fb6
3
+ size 132175803
adapters-uncertain/0000300_adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61be8d632d59f48f4ff367f089f8b799c9f1b9e74e0c52beb4f19aba628109c1
3
+ size 132175803
adapters-uncertain/adapter_config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_path": "adapters-uncertain",
3
+ "batch_size": 1,
4
+ "config": "train.yaml",
5
+ "data": "training/uncertain",
6
+ "fine_tune_type": "lora",
7
+ "grad_accumulation_steps": 1,
8
+ "grad_checkpoint": true,
9
+ "iters": 300,
10
+ "learning_rate": 0.0001,
11
+ "lora_parameters": {
12
+ "keys": [
13
+ "self_attn.q_proj",
14
+ "self_attn.v_proj",
15
+ "self_attn.k_proj",
16
+ "self_attn.o_proj",
17
+ "mlp.gate_proj",
18
+ "mlp.up_proj",
19
+ "mlp.down_proj"
20
+ ],
21
+ "rank": 16,
22
+ "scale": 2.0,
23
+ "dropout": 0.0
24
+ },
25
+ "lr_schedule": {
26
+ "name": "cosine_decay",
27
+ "warmup": 10,
28
+ "warmup_init": 1e-05,
29
+ "arguments": [
30
+ 0.0001,
31
+ 529,
32
+ 1e-05
33
+ ]
34
+ },
35
+ "mask_prompt": false,
36
+ "max_seq_length": 8192,
37
+ "model": "Qwen3-4B-Instruct-2507",
38
+ "num_layers": 36,
39
+ "optimizer": "adam",
40
+ "optimizer_config": {
41
+ "adam": {
42
+ "betas": [
43
+ 0.9,
44
+ 0.9999
45
+ ],
46
+ "eps": 1e-06,
47
+ "bias_correction": true
48
+ }
49
+ },
50
+ "project_name": "conscious-finetuning",
51
+ "report_to": "wandb",
52
+ "resume_adapter_file": null,
53
+ "save_every": 100,
54
+ "seed": 0,
55
+ "steps_per_eval": 200,
56
+ "steps_per_report": 1,
57
+ "test": false,
58
+ "test_batches": 100,
59
+ "train": true,
60
+ "val_batches": 0
61
+ }
adapters-uncertain/adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61be8d632d59f48f4ff367f089f8b799c9f1b9e74e0c52beb4f19aba628109c1
3
+ size 132175803
adapters-uncertain/wandb/run-20260216_154932-918bwjte/files/config.yaml ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.23.0
4
+ e:
5
+ 4stii5owh2cve1gvc5kugyr75h6wj8db:
6
+ apple:
7
+ ecpuCores: 4
8
+ gpuCores: 40
9
+ memoryGb: 64
10
+ name: Apple M3 Max
11
+ pcpuCores: 12
12
+ ramTotalBytes: "68719476736"
13
+ swapTotalBytes: "5368709120"
14
+ args:
15
+ - --config
16
+ - train.yaml
17
+ cpu_count: 16
18
+ cpu_count_logical: 16
19
+ disk:
20
+ /:
21
+ total: "1995218165760"
22
+ used: "1702686375936"
23
+ email: nathanbreslow@gmail.com
24
+ executable: /Users/natebreslow/miniconda3/envs/mlx-experiment/bin/python3.10
25
+ host: MacBook-Pro-135.local
26
+ memory:
27
+ total: "68719476736"
28
+ os: macOS-26.2-arm64-arm-64bit
29
+ program: /Users/natebreslow/miniconda3/envs/mlx-experiment/bin/mlx_lm.lora
30
+ python: CPython 3.10.19
31
+ root: adapters-uncertain
32
+ startedAt: "2026-02-16T20:49:32.288869Z"
33
+ writerId: 4stii5owh2cve1gvc5kugyr75h6wj8db
34
+ m: []
35
+ python_version: 3.10.19
36
+ t:
37
+ "1":
38
+ - 1
39
+ - 5
40
+ - 11
41
+ - 49
42
+ - 53
43
+ - 71
44
+ "2":
45
+ - 1
46
+ - 5
47
+ - 11
48
+ - 49
49
+ - 53
50
+ - 71
51
+ "3":
52
+ - 13
53
+ - 16
54
+ - 61
55
+ "4": 3.10.19
56
+ "5": 0.23.0
57
+ "6": 5.1.0
58
+ "8":
59
+ - 2
60
+ "12": 0.23.0
61
+ "13": darwin-arm64
62
+ adapter_path:
63
+ value: adapters-uncertain
64
+ batch_size:
65
+ value: 1
66
+ config:
67
+ value: train.yaml
68
+ data:
69
+ value: training/uncertain
70
+ fine_tune_type:
71
+ value: lora
72
+ grad_accumulation_steps:
73
+ value: 1
74
+ grad_checkpoint:
75
+ value: true
76
+ iters:
77
+ value: 300
78
+ learning_rate:
79
+ value: 0.0001
80
+ lora_parameters:
81
+ value:
82
+ dropout: 0
83
+ keys:
84
+ - self_attn.q_proj
85
+ - self_attn.v_proj
86
+ - self_attn.k_proj
87
+ - self_attn.o_proj
88
+ - mlp.gate_proj
89
+ - mlp.up_proj
90
+ - mlp.down_proj
91
+ rank: 16
92
+ scale: 2
93
+ lr_schedule:
94
+ value:
95
+ arguments:
96
+ - 0.0001
97
+ - 529
98
+ - 1e-05
99
+ name: cosine_decay
100
+ warmup: 10
101
+ warmup_init: 1e-05
102
+ mask_prompt:
103
+ value: false
104
+ max_seq_length:
105
+ value: 8192
106
+ model:
107
+ value: Qwen3-4B-Instruct-2507
108
+ num_layers:
109
+ value: 36
110
+ optimizer:
111
+ value: adam
112
+ optimizer_config:
113
+ value:
114
+ adam:
115
+ betas:
116
+ - 0.9
117
+ - 0.9999
118
+ bias_correction: true
119
+ eps: 1e-06
120
+ project_name:
121
+ value: conscious-finetuning
122
+ report_to:
123
+ value: wandb
124
+ resume_adapter_file:
125
+ value: null
126
+ save_every:
127
+ value: 100
128
+ seed:
129
+ value: 0
130
+ steps_per_eval:
131
+ value: 200
132
+ steps_per_report:
133
+ value: 1
134
+ test:
135
+ value: false
136
+ test_batches:
137
+ value: 100
138
+ train:
139
+ value: true
140
+ val_batches:
141
+ value: 0
adapters-uncertain/wandb/run-20260216_154932-918bwjte/files/output.log ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Loading pretrained model
2
+ Loading datasets
3
+ Training
4
+ Trainable parameters: 0.821% (33.030M/4022.468M)
5
+ Starting training..., iters: 300
6
+ Calculating loss...: 0it [00:00, ?it/s]
7
+ Iter 1: Val loss nan, Val took 0.091s
8
+ Iter 1: Train loss 3.249, Learning Rate 1.000e-05, It/sec 0.229, Tokens/sec 226.966, Trained Tokens 991, Peak mem 9.803 GB
9
+ Iter 2: Train loss 3.566, Learning Rate 1.900e-05, It/sec 0.520, Tokens/sec 241.734, Trained Tokens 1456, Peak mem 9.803 GB
10
+ Iter 3: Train loss 3.294, Learning Rate 2.800e-05, It/sec 0.421, Tokens/sec 263.090, Trained Tokens 2081, Peak mem 9.803 GB
11
+ Iter 4: Train loss 3.144, Learning Rate 3.700e-05, It/sec 0.255, Tokens/sec 248.165, Trained Tokens 3055, Peak mem 10.244 GB
12
+ Iter 5: Train loss 2.971, Learning Rate 4.600e-05, It/sec 0.214, Tokens/sec 247.495, Trained Tokens 4213, Peak mem 10.438 GB
13
+ Iter 6: Train loss 3.100, Learning Rate 5.500e-05, It/sec 0.337, Tokens/sec 241.882, Trained Tokens 4930, Peak mem 10.438 GB
14
+ Iter 7: Train loss 2.737, Learning Rate 6.400e-05, It/sec 0.187, Tokens/sec 249.361, Trained Tokens 6260, Peak mem 10.708 GB
15
+ Iter 8: Train loss 2.682, Learning Rate 7.300e-05, It/sec 0.362, Tokens/sec 244.644, Trained Tokens 6935, Peak mem 10.708 GB
16
+ Iter 9: Train loss 2.667, Learning Rate 8.200e-05, It/sec 0.292, Tokens/sec 249.973, Trained Tokens 7792, Peak mem 10.708 GB
17
+ Iter 10: Train loss 2.659, Learning Rate 9.100e-05, It/sec 0.391, Tokens/sec 218.320, Trained Tokens 8350, Peak mem 10.708 GB
18
+ Iter 11: Train loss 2.452, Learning Rate 1.000e-04, It/sec 0.189, Tokens/sec 205.506, Trained Tokens 9440, Peak mem 10.708 GB
19
+ Iter 12: Train loss 2.231, Learning Rate 1.000e-04, It/sec 0.176, Tokens/sec 234.529, Trained Tokens 10775, Peak mem 10.708 GB
20
+ Iter 13: Train loss 2.382, Learning Rate 1.000e-04, It/sec 0.368, Tokens/sec 197.482, Trained Tokens 11311, Peak mem 10.708 GB
21
+ Iter 14: Train loss 2.030, Learning Rate 1.000e-04, It/sec 0.189, Tokens/sec 221.337, Trained Tokens 12485, Peak mem 10.708 GB
22
+ Iter 15: Train loss 2.189, Learning Rate 9.999e-05, It/sec 0.337, Tokens/sec 217.554, Trained Tokens 13131, Peak mem 10.708 GB
23
+ Iter 16: Train loss 2.217, Learning Rate 9.999e-05, It/sec 0.166, Tokens/sec 233.943, Trained Tokens 14541, Peak mem 10.813 GB
24
+ Iter 17: Train loss 2.367, Learning Rate 9.998e-05, It/sec 0.317, Tokens/sec 242.232, Trained Tokens 15305, Peak mem 10.813 GB
25
+ Iter 18: Train loss 2.260, Learning Rate 9.997e-05, It/sec 0.491, Tokens/sec 245.964, Trained Tokens 15806, Peak mem 10.813 GB
26
+ Iter 19: Train loss 2.157, Learning Rate 9.996e-05, It/sec 0.344, Tokens/sec 261.297, Trained Tokens 16565, Peak mem 10.813 GB
27
+ Iter 20: Train loss 2.263, Learning Rate 9.995e-05, It/sec 0.340, Tokens/sec 245.259, Trained Tokens 17286, Peak mem 10.813 GB
28
+ Iter 21: Train loss 2.059, Learning Rate 9.994e-05, It/sec 0.132, Tokens/sec 212.162, Trained Tokens 18888, Peak mem 11.112 GB
29
+ Iter 22: Train loss 2.036, Learning Rate 9.992e-05, It/sec 0.334, Tokens/sec 256.373, Trained Tokens 19655, Peak mem 11.112 GB
30
+ Iter 23: Train loss 2.212, Learning Rate 9.990e-05, It/sec 0.138, Tokens/sec 203.609, Trained Tokens 21126, Peak mem 11.112 GB
31
+ Iter 24: Train loss 2.047, Learning Rate 9.989e-05, It/sec 0.242, Tokens/sec 206.687, Trained Tokens 21980, Peak mem 11.112 GB
32
+ Iter 25: Train loss 2.006, Learning Rate 9.987e-05, It/sec 0.159, Tokens/sec 197.755, Trained Tokens 23224, Peak mem 11.112 GB
33
+ Iter 26: Train loss 1.963, Learning Rate 9.984e-05, It/sec 0.293, Tokens/sec 220.318, Trained Tokens 23977, Peak mem 11.112 GB
34
+ Iter 27: Train loss 1.874, Learning Rate 9.982e-05, It/sec 0.191, Tokens/sec 198.110, Trained Tokens 25012, Peak mem 11.112 GB
35
+ Iter 28: Train loss 2.071, Learning Rate 9.980e-05, It/sec 0.523, Tokens/sec 228.348, Trained Tokens 25449, Peak mem 11.112 GB
36
+ Iter 29: Train loss 1.899, Learning Rate 9.977e-05, It/sec 0.138, Tokens/sec 186.320, Trained Tokens 26804, Peak mem 11.112 GB
37
+ Iter 30: Train loss 2.018, Learning Rate 9.974e-05, It/sec 0.332, Tokens/sec 198.264, Trained Tokens 27401, Peak mem 11.112 GB
38
+ Iter 31: Train loss 1.800, Learning Rate 9.971e-05, It/sec 0.528, Tokens/sec 216.419, Trained Tokens 27811, Peak mem 11.112 GB
39
+ Iter 32: Train loss 2.070, Learning Rate 9.968e-05, It/sec 0.382, Tokens/sec 200.177, Trained Tokens 28335, Peak mem 11.112 GB
40
+ Iter 33: Train loss 1.950, Learning Rate 9.965e-05, It/sec 0.347, Tokens/sec 218.560, Trained Tokens 28965, Peak mem 11.112 GB
41
+ Iter 34: Train loss 1.861, Learning Rate 9.962e-05, It/sec 0.582, Tokens/sec 239.682, Trained Tokens 29377, Peak mem 11.112 GB
42
+ Iter 35: Train loss 2.122, Learning Rate 9.958e-05, It/sec 0.409, Tokens/sec 249.633, Trained Tokens 29987, Peak mem 11.112 GB
43
+ Iter 36: Train loss 1.859, Learning Rate 9.954e-05, It/sec 0.232, Tokens/sec 237.074, Trained Tokens 31007, Peak mem 11.112 GB
44
+ Iter 37: Train loss 2.013, Learning Rate 9.950e-05, It/sec 0.505, Tokens/sec 234.426, Trained Tokens 31471, Peak mem 11.112 GB
45
+ Iter 38: Train loss 1.950, Learning Rate 9.946e-05, It/sec 0.320, Tokens/sec 244.370, Trained Tokens 32235, Peak mem 11.112 GB
46
+ Iter 39: Train loss 2.128, Learning Rate 9.942e-05, It/sec 0.171, Tokens/sec 221.493, Trained Tokens 33527, Peak mem 11.112 GB
47
+ Iter 40: Train loss 1.932, Learning Rate 9.938e-05, It/sec 0.252, Tokens/sec 235.604, Trained Tokens 34462, Peak mem 11.112 GB
48
+ Iter 41: Train loss 1.819, Learning Rate 9.933e-05, It/sec 0.268, Tokens/sec 237.295, Trained Tokens 35346, Peak mem 11.112 GB
49
+ Iter 42: Train loss 1.844, Learning Rate 9.929e-05, It/sec 0.512, Tokens/sec 219.076, Trained Tokens 35774, Peak mem 11.112 GB
50
+ Iter 43: Train loss 1.776, Learning Rate 9.924e-05, It/sec 0.145, Tokens/sec 201.866, Trained Tokens 37169, Peak mem 11.112 GB
51
+ Iter 44: Train loss 1.851, Learning Rate 9.919e-05, It/sec 0.141, Tokens/sec 191.439, Trained Tokens 38523, Peak mem 11.112 GB
52
+ Iter 45: Train loss 1.810, Learning Rate 9.914e-05, It/sec 0.246, Tokens/sec 201.952, Trained Tokens 39344, Peak mem 11.112 GB
53
+ Iter 46: Train loss 2.092, Learning Rate 9.909e-05, It/sec 0.151, Tokens/sec 226.994, Trained Tokens 40851, Peak mem 11.112 GB
54
+ Iter 47: Train loss 2.094, Learning Rate 9.903e-05, It/sec 0.275, Tokens/sec 237.977, Trained Tokens 41715, Peak mem 11.112 GB
55
+ Iter 48: Train loss 1.833, Learning Rate 9.898e-05, It/sec 0.823, Tokens/sec 201.640, Trained Tokens 41960, Peak mem 11.112 GB
56
+ Iter 49: Train loss 2.087, Learning Rate 9.892e-05, It/sec 0.281, Tokens/sec 219.051, Trained Tokens 42739, Peak mem 11.112 GB
57
+ Iter 50: Train loss 1.681, Learning Rate 9.886e-05, It/sec 0.325, Tokens/sec 248.392, Trained Tokens 43504, Peak mem 11.112 GB
58
+ Iter 51: Train loss 1.825, Learning Rate 9.880e-05, It/sec 0.184, Tokens/sec 234.655, Trained Tokens 44779, Peak mem 11.112 GB
59
+ Iter 52: Train loss 1.578, Learning Rate 9.874e-05, It/sec 0.434, Tokens/sec 236.550, Trained Tokens 45324, Peak mem 11.112 GB
60
+ Iter 53: Train loss 1.853, Learning Rate 9.867e-05, It/sec 0.269, Tokens/sec 229.602, Trained Tokens 46176, Peak mem 11.112 GB
61
+ Iter 54: Train loss 1.994, Learning Rate 9.861e-05, It/sec 0.098, Tokens/sec 172.646, Trained Tokens 47936, Peak mem 11.335 GB
62
+ Iter 55: Train loss 1.748, Learning Rate 9.854e-05, It/sec 0.419, Tokens/sec 231.024, Trained Tokens 48487, Peak mem 11.335 GB
63
+ Iter 56: Train loss 2.002, Learning Rate 9.847e-05, It/sec 0.182, Tokens/sec 211.129, Trained Tokens 49649, Peak mem 11.335 GB
64
+ Iter 57: Train loss 2.016, Learning Rate 9.840e-05, It/sec 0.490, Tokens/sec 228.804, Trained Tokens 50116, Peak mem 11.335 GB
65
+ Iter 58: Train loss 1.934, Learning Rate 9.833e-05, It/sec 0.186, Tokens/sec 207.369, Trained Tokens 51232, Peak mem 11.335 GB
66
+ Iter 59: Train loss 1.547, Learning Rate 9.826e-05, It/sec 0.196, Tokens/sec 205.488, Trained Tokens 52283, Peak mem 11.335 GB
67
+ Iter 60: Train loss 1.669, Learning Rate 9.818e-05, It/sec 0.195, Tokens/sec 158.940, Trained Tokens 53100, Peak mem 11.335 GB
68
+ Iter 61: Train loss 1.640, Learning Rate 9.811e-05, It/sec 0.260, Tokens/sec 183.444, Trained Tokens 53805, Peak mem 11.335 GB
69
+ Iter 62: Train loss 1.694, Learning Rate 9.803e-05, It/sec 0.370, Tokens/sec 211.807, Trained Tokens 54377, Peak mem 11.335 GB
70
+ Iter 63: Train loss 1.767, Learning Rate 9.795e-05, It/sec 0.484, Tokens/sec 173.652, Trained Tokens 54736, Peak mem 11.335 GB
71
+ Iter 64: Train loss 2.096, Learning Rate 9.787e-05, It/sec 0.264, Tokens/sec 166.809, Trained Tokens 55367, Peak mem 11.335 GB
72
+ Iter 65: Train loss 1.619, Learning Rate 9.779e-05, It/sec 0.271, Tokens/sec 163.137, Trained Tokens 55969, Peak mem 11.335 GB
73
+ Iter 66: Train loss 1.836, Learning Rate 9.771e-05, It/sec 0.071, Tokens/sec 79.352, Trained Tokens 57094, Peak mem 11.335 GB
74
+ Iter 67: Train loss 1.677, Learning Rate 9.762e-05, It/sec 0.026, Tokens/sec 24.014, Trained Tokens 58001, Peak mem 11.335 GB
75
+ Iter 68: Train loss 1.830, Learning Rate 9.753e-05, It/sec 0.024, Tokens/sec 28.659, Trained Tokens 59214, Peak mem 11.335 GB
76
+ Iter 69: Train loss 1.381, Learning Rate 9.745e-05, It/sec 0.069, Tokens/sec 20.693, Trained Tokens 59516, Peak mem 11.335 GB
77
+ Iter 70: Train loss 1.683, Learning Rate 9.736e-05, It/sec 0.060, Tokens/sec 49.050, Trained Tokens 60337, Peak mem 11.335 GB
78
+ Iter 71: Train loss 1.327, Learning Rate 9.727e-05, It/sec 0.203, Tokens/sec 96.246, Trained Tokens 60812, Peak mem 11.335 GB
79
+ Iter 72: Train loss 1.901, Learning Rate 9.717e-05, It/sec 0.228, Tokens/sec 92.952, Trained Tokens 61220, Peak mem 11.335 GB
80
+ Iter 73: Train loss 1.613, Learning Rate 9.708e-05, It/sec 0.249, Tokens/sec 110.101, Trained Tokens 61663, Peak mem 11.335 GB
81
+ Iter 74: Train loss 1.700, Learning Rate 9.698e-05, It/sec 0.169, Tokens/sec 97.672, Trained Tokens 62240, Peak mem 11.335 GB
82
+ Iter 75: Train loss 1.797, Learning Rate 9.689e-05, It/sec 0.086, Tokens/sec 121.013, Trained Tokens 63647, Peak mem 11.335 GB
83
+ Iter 76: Train loss 1.690, Learning Rate 9.679e-05, It/sec 0.319, Tokens/sec 132.233, Trained Tokens 64061, Peak mem 11.335 GB
84
+ Iter 77: Train loss 1.603, Learning Rate 9.669e-05, It/sec 0.179, Tokens/sec 156.663, Trained Tokens 64935, Peak mem 11.335 GB
85
+ Iter 78: Train loss 1.642, Learning Rate 9.659e-05, It/sec 0.183, Tokens/sec 156.281, Trained Tokens 65789, Peak mem 11.335 GB
86
+ Iter 79: Train loss 1.869, Learning Rate 9.648e-05, It/sec 0.213, Tokens/sec 165.583, Trained Tokens 66568, Peak mem 11.335 GB
87
+ Iter 80: Train loss 1.738, Learning Rate 9.638e-05, It/sec 0.115, Tokens/sec 168.977, Trained Tokens 68032, Peak mem 11.335 GB
88
+ Iter 81: Train loss 1.202, Learning Rate 9.627e-05, It/sec 0.509, Tokens/sec 193.932, Trained Tokens 68413, Peak mem 11.335 GB
89
+ Iter 82: Train loss 1.773, Learning Rate 9.617e-05, It/sec 0.188, Tokens/sec 179.129, Trained Tokens 69364, Peak mem 11.335 GB
90
+ Iter 83: Train loss 1.675, Learning Rate 9.606e-05, It/sec 0.201, Tokens/sec 195.419, Trained Tokens 70334, Peak mem 11.335 GB
91
+ Iter 84: Train loss 1.797, Learning Rate 9.595e-05, It/sec 0.114, Tokens/sec 191.511, Trained Tokens 72013, Peak mem 11.335 GB
92
+ Iter 85: Train loss 1.672, Learning Rate 9.584e-05, It/sec 0.298, Tokens/sec 212.803, Trained Tokens 72727, Peak mem 11.335 GB
93
+ Iter 86: Train loss 1.884, Learning Rate 9.572e-05, It/sec 0.230, Tokens/sec 218.996, Trained Tokens 73681, Peak mem 11.335 GB
94
+ Iter 87: Train loss 1.665, Learning Rate 9.561e-05, It/sec 0.526, Tokens/sec 235.250, Trained Tokens 74128, Peak mem 11.335 GB
95
+ Iter 88: Train loss 1.671, Learning Rate 9.549e-05, It/sec 0.556, Tokens/sec 192.450, Trained Tokens 74474, Peak mem 11.335 GB
96
+ Iter 89: Train loss 1.599, Learning Rate 9.538e-05, It/sec 0.332, Tokens/sec 227.407, Trained Tokens 75158, Peak mem 11.335 GB
97
+ Iter 90: Train loss 1.870, Learning Rate 9.526e-05, It/sec 0.221, Tokens/sec 220.800, Trained Tokens 76159, Peak mem 11.335 GB
98
+ Iter 91: Train loss 1.507, Learning Rate 9.514e-05, It/sec 0.280, Tokens/sec 225.755, Trained Tokens 76965, Peak mem 11.335 GB
99
+ Iter 92: Train loss 1.387, Learning Rate 9.502e-05, It/sec 0.535, Tokens/sec 223.628, Trained Tokens 77383, Peak mem 11.335 GB
100
+ Iter 93: Train loss 1.630, Learning Rate 9.489e-05, It/sec 0.631, Tokens/sec 230.397, Trained Tokens 77748, Peak mem 11.335 GB
101
+ Iter 94: Train loss 1.806, Learning Rate 9.477e-05, It/sec 0.305, Tokens/sec 233.757, Trained Tokens 78514, Peak mem 11.335 GB
102
+ Iter 95: Train loss 1.842, Learning Rate 9.464e-05, It/sec 0.263, Tokens/sec 235.078, Trained Tokens 79409, Peak mem 11.335 GB
103
+ Iter 96: Train loss 1.635, Learning Rate 9.452e-05, It/sec 0.315, Tokens/sec 226.484, Trained Tokens 80127, Peak mem 11.335 GB
104
+ Iter 97: Train loss 1.653, Learning Rate 9.439e-05, It/sec 0.278, Tokens/sec 220.115, Trained Tokens 80919, Peak mem 11.335 GB
105
+ Iter 98: Train loss 1.847, Learning Rate 9.426e-05, It/sec 0.150, Tokens/sec 215.874, Trained Tokens 82359, Peak mem 11.335 GB
106
+ Iter 99: Train loss 1.973, Learning Rate 9.413e-05, It/sec 0.142, Tokens/sec 213.572, Trained Tokens 83860, Peak mem 11.335 GB
107
+ Iter 100: Train loss 1.560, Learning Rate 9.399e-05, It/sec 0.311, Tokens/sec 229.391, Trained Tokens 84598, Peak mem 11.335 GB
108
+ Iter 100: Saved adapter weights to adapters-uncertain/adapters.safetensors and adapters-uncertain/0000100_adapters.safetensors.
109
+ Iter 101: Train loss 1.504, Learning Rate 9.386e-05, It/sec 0.425, Tokens/sec 241.324, Trained Tokens 85166, Peak mem 11.335 GB
110
+ Iter 102: Train loss 1.733, Learning Rate 9.372e-05, It/sec 0.213, Tokens/sec 221.280, Trained Tokens 86203, Peak mem 11.335 GB
111
+ Iter 103: Train loss 1.812, Learning Rate 9.359e-05, It/sec 0.336, Tokens/sec 231.219, Trained Tokens 86892, Peak mem 11.335 GB
112
+ Iter 104: Train loss 1.470, Learning Rate 9.345e-05, It/sec 0.305, Tokens/sec 232.397, Trained Tokens 87654, Peak mem 11.335 GB
113
+ Iter 105: Train loss 1.567, Learning Rate 9.331e-05, It/sec 0.534, Tokens/sec 221.032, Trained Tokens 88068, Peak mem 11.335 GB
114
+ Iter 106: Train loss 2.022, Learning Rate 9.317e-05, It/sec 0.154, Tokens/sec 180.084, Trained Tokens 89234, Peak mem 11.335 GB
115
+ Iter 107: Train loss 1.388, Learning Rate 9.303e-05, It/sec 0.334, Tokens/sec 152.616, Trained Tokens 89691, Peak mem 11.335 GB
116
+ Iter 108: Train loss 1.450, Learning Rate 9.288e-05, It/sec 0.546, Tokens/sec 199.242, Trained Tokens 90056, Peak mem 11.335 GB
117
+ Iter 109: Train loss 1.655, Learning Rate 9.274e-05, It/sec 0.150, Tokens/sec 138.934, Trained Tokens 90982, Peak mem 11.335 GB
118
+ Iter 110: Train loss 1.427, Learning Rate 9.259e-05, It/sec 0.238, Tokens/sec 149.505, Trained Tokens 91610, Peak mem 11.335 GB
119
+ Iter 111: Train loss 1.566, Learning Rate 9.244e-05, It/sec 0.178, Tokens/sec 157.267, Trained Tokens 92495, Peak mem 11.335 GB
120
+ Iter 112: Train loss 1.379, Learning Rate 9.230e-05, It/sec 0.533, Tokens/sec 163.554, Trained Tokens 92802, Peak mem 11.335 GB
121
+ Iter 113: Train loss 1.675, Learning Rate 9.214e-05, It/sec 0.352, Tokens/sec 152.955, Trained Tokens 93237, Peak mem 11.335 GB
122
+ Iter 114: Train loss 1.354, Learning Rate 9.199e-05, It/sec 0.198, Tokens/sec 117.122, Trained Tokens 93830, Peak mem 11.335 GB
123
+ Iter 115: Train loss 1.704, Learning Rate 9.184e-05, It/sec 0.336, Tokens/sec 140.509, Trained Tokens 94248, Peak mem 11.335 GB
124
+ Iter 116: Train loss 1.906, Learning Rate 9.169e-05, It/sec 0.067, Tokens/sec 111.677, Trained Tokens 95921, Peak mem 11.356 GB
125
+ Iter 117: Train loss 1.592, Learning Rate 9.153e-05, It/sec 0.152, Tokens/sec 170.913, Trained Tokens 97043, Peak mem 11.356 GB
126
+ Iter 118: Train loss 1.773, Learning Rate 9.137e-05, It/sec 0.487, Tokens/sec 197.761, Trained Tokens 97449, Peak mem 11.356 GB
127
+ Iter 119: Train loss 1.486, Learning Rate 9.122e-05, It/sec 0.333, Tokens/sec 198.415, Trained Tokens 98045, Peak mem 11.356 GB
128
+ Iter 120: Train loss 1.477, Learning Rate 9.106e-05, It/sec 0.114, Tokens/sec 107.414, Trained Tokens 98989, Peak mem 11.356 GB
129
+ Iter 121: Train loss 1.677, Learning Rate 9.090e-05, It/sec 0.139, Tokens/sec 112.483, Trained Tokens 99801, Peak mem 11.356 GB
130
+ Iter 122: Train loss 1.871, Learning Rate 9.073e-05, It/sec 0.198, Tokens/sec 152.147, Trained Tokens 100571, Peak mem 11.356 GB
131
+ Iter 123: Train loss 1.758, Learning Rate 9.057e-05, It/sec 0.081, Tokens/sec 88.125, Trained Tokens 101653, Peak mem 11.356 GB
132
+ Iter 124: Train loss 1.814, Learning Rate 9.041e-05, It/sec 0.103, Tokens/sec 59.313, Trained Tokens 102230, Peak mem 11.356 GB
133
+ Iter 125: Train loss 1.482, Learning Rate 9.024e-05, It/sec 0.070, Tokens/sec 64.781, Trained Tokens 103152, Peak mem 11.356 GB
134
+ Iter 126: Train loss 1.880, Learning Rate 9.008e-05, It/sec 0.075, Tokens/sec 79.846, Trained Tokens 104223, Peak mem 11.356 GB
135
+ Iter 127: Train loss 1.732, Learning Rate 8.991e-05, It/sec 0.057, Tokens/sec 82.125, Trained Tokens 105663, Peak mem 11.356 GB
136
+ Iter 128: Train loss 1.465, Learning Rate 8.974e-05, It/sec 0.272, Tokens/sec 156.732, Trained Tokens 106239, Peak mem 11.356 GB
137
+ Iter 129: Train loss 1.576, Learning Rate 8.957e-05, It/sec 0.136, Tokens/sec 147.993, Trained Tokens 107329, Peak mem 11.356 GB
138
+ Iter 130: Train loss 1.671, Learning Rate 8.940e-05, It/sec 0.168, Tokens/sec 129.854, Trained Tokens 108103, Peak mem 11.356 GB
139
+ Iter 131: Train loss 1.678, Learning Rate 8.922e-05, It/sec 0.126, Tokens/sec 136.512, Trained Tokens 109186, Peak mem 11.356 GB
140
+ Iter 132: Train loss 1.305, Learning Rate 8.905e-05, It/sec 0.183, Tokens/sec 122.220, Trained Tokens 109853, Peak mem 11.356 GB
141
+ Iter 133: Train loss 1.383, Learning Rate 8.887e-05, It/sec 0.234, Tokens/sec 123.316, Trained Tokens 110381, Peak mem 11.356 GB
142
+ Iter 134: Train loss 1.558, Learning Rate 8.870e-05, It/sec 0.155, Tokens/sec 108.064, Trained Tokens 111077, Peak mem 11.356 GB
143
+ Iter 135: Train loss 1.691, Learning Rate 8.852e-05, It/sec 0.152, Tokens/sec 118.928, Trained Tokens 111860, Peak mem 11.356 GB
144
+ Iter 136: Train loss 1.381, Learning Rate 8.834e-05, It/sec 0.227, Tokens/sec 89.709, Trained Tokens 112256, Peak mem 11.356 GB
145
+ Iter 137: Train loss 1.486, Learning Rate 8.816e-05, It/sec 0.078, Tokens/sec 87.493, Trained Tokens 113382, Peak mem 11.356 GB
146
+ Iter 138: Train loss 1.228, Learning Rate 8.798e-05, It/sec 0.222, Tokens/sec 73.934, Trained Tokens 113715, Peak mem 11.356 GB
147
+ Iter 139: Train loss 1.710, Learning Rate 8.780e-05, It/sec 0.157, Tokens/sec 103.224, Trained Tokens 114371, Peak mem 11.356 GB
148
+ Iter 140: Train loss 1.468, Learning Rate 8.761e-05, It/sec 0.370, Tokens/sec 177.750, Trained Tokens 114851, Peak mem 11.356 GB
149
+ Iter 141: Train loss 1.613, Learning Rate 8.743e-05, It/sec 0.272, Tokens/sec 173.025, Trained Tokens 115487, Peak mem 11.356 GB
150
+ Iter 142: Train loss 1.598, Learning Rate 8.724e-05, It/sec 0.584, Tokens/sec 204.563, Trained Tokens 115837, Peak mem 11.356 GB
151
+ Iter 143: Train loss 1.918, Learning Rate 8.706e-05, It/sec 0.285, Tokens/sec 176.890, Trained Tokens 116458, Peak mem 11.356 GB
152
+ Iter 144: Train loss 1.509, Learning Rate 8.687e-05, It/sec 0.157, Tokens/sec 99.188, Trained Tokens 117090, Peak mem 11.356 GB
153
+ Iter 145: Train loss 1.729, Learning Rate 8.668e-05, It/sec 0.089, Tokens/sec 127.656, Trained Tokens 118528, Peak mem 11.356 GB
154
+ Iter 146: Train loss 1.362, Learning Rate 8.649e-05, It/sec 0.273, Tokens/sec 124.593, Trained Tokens 118985, Peak mem 11.356 GB
155
+ Iter 147: Train loss 1.487, Learning Rate 8.630e-05, It/sec 0.272, Tokens/sec 114.244, Trained Tokens 119405, Peak mem 11.356 GB
156
+ Iter 148: Train loss 1.863, Learning Rate 8.610e-05, It/sec 0.055, Tokens/sec 101.213, Trained Tokens 121246, Peak mem 11.560 GB
157
+ Iter 149: Train loss 1.646, Learning Rate 8.591e-05, It/sec 0.103, Tokens/sec 127.635, Trained Tokens 122485, Peak mem 11.560 GB
158
+ Iter 150: Train loss 1.416, Learning Rate 8.571e-05, It/sec 0.082, Tokens/sec 120.353, Trained Tokens 123959, Peak mem 11.560 GB
159
+ Iter 151: Train loss 1.664, Learning Rate 8.552e-05, It/sec 0.076, Tokens/sec 120.389, Trained Tokens 125551, Peak mem 11.560 GB
160
+ Iter 152: Train loss 1.609, Learning Rate 8.532e-05, It/sec 0.285, Tokens/sec 128.325, Trained Tokens 126002, Peak mem 11.560 GB
161
+ Iter 153: Train loss 1.951, Learning Rate 8.512e-05, It/sec 0.113, Tokens/sec 126.271, Trained Tokens 127122, Peak mem 11.560 GB
162
+ Iter 154: Train loss 1.851, Learning Rate 8.493e-05, It/sec 0.130, Tokens/sec 119.761, Trained Tokens 128045, Peak mem 11.560 GB
163
+ Iter 155: Train loss 1.507, Learning Rate 8.472e-05, It/sec 0.147, Tokens/sec 114.694, Trained Tokens 128824, Peak mem 11.560 GB
164
+ Iter 156: Train loss 1.941, Learning Rate 8.452e-05, It/sec 0.247, Tokens/sec 181.818, Trained Tokens 129560, Peak mem 11.560 GB
165
+ Iter 157: Train loss 1.959, Learning Rate 8.432e-05, It/sec 0.069, Tokens/sec 135.785, Trained Tokens 131520, Peak mem 11.782 GB
166
+ Iter 158: Train loss 1.697, Learning Rate 8.412e-05, It/sec 0.279, Tokens/sec 145.265, Trained Tokens 132041, Peak mem 11.782 GB
167
+ Iter 159: Train loss 1.412, Learning Rate 8.391e-05, It/sec 0.354, Tokens/sec 138.051, Trained Tokens 132431, Peak mem 11.782 GB
168
+ Iter 160: Train loss 1.518, Learning Rate 8.371e-05, It/sec 0.256, Tokens/sec 175.024, Trained Tokens 133115, Peak mem 11.782 GB
169
+ Iter 161: Train loss 1.601, Learning Rate 8.350e-05, It/sec 0.198, Tokens/sec 159.841, Trained Tokens 133924, Peak mem 11.782 GB
170
+ Iter 162: Train loss 1.424, Learning Rate 8.330e-05, It/sec 0.350, Tokens/sec 145.474, Trained Tokens 134340, Peak mem 11.782 GB
171
+ Iter 163: Train loss 1.291, Learning Rate 8.309e-05, It/sec 0.232, Tokens/sec 129.760, Trained Tokens 134900, Peak mem 11.782 GB
172
+ Iter 164: Train loss 1.435, Learning Rate 8.288e-05, It/sec 0.285, Tokens/sec 120.330, Trained Tokens 135322, Peak mem 11.782 GB
173
+ Iter 165: Train loss 1.498, Learning Rate 8.267e-05, It/sec 0.105, Tokens/sec 103.297, Trained Tokens 136309, Peak mem 11.782 GB
174
+ Iter 166: Train loss 1.482, Learning Rate 8.246e-05, It/sec 0.070, Tokens/sec 85.321, Trained Tokens 137522, Peak mem 11.782 GB
175
+ Iter 167: Train loss 1.566, Learning Rate 8.224e-05, It/sec 0.138, Tokens/sec 129.446, Trained Tokens 138462, Peak mem 11.782 GB
176
+ Iter 168: Train loss 1.838, Learning Rate 8.203e-05, It/sec 0.388, Tokens/sec 122.693, Trained Tokens 138778, Peak mem 11.782 GB
177
+ Iter 169: Train loss 1.523, Learning Rate 8.182e-05, It/sec 0.222, Tokens/sec 139.885, Trained Tokens 139409, Peak mem 11.782 GB
178
+ Iter 170: Train loss 1.404, Learning Rate 8.160e-05, It/sec 0.191, Tokens/sec 97.913, Trained Tokens 139921, Peak mem 11.782 GB
179
+ Iter 171: Train loss 1.638, Learning Rate 8.139e-05, It/sec 0.277, Tokens/sec 157.652, Trained Tokens 140491, Peak mem 11.782 GB
180
+ Iter 172: Train loss 1.829, Learning Rate 8.117e-05, It/sec 0.104, Tokens/sec 100.631, Trained Tokens 141455, Peak mem 11.782 GB
181
+ Iter 173: Train loss 1.433, Learning Rate 8.095e-05, It/sec 0.152, Tokens/sec 110.829, Trained Tokens 142186, Peak mem 11.782 GB
182
+ Iter 174: Train loss 1.433, Learning Rate 8.073e-05, It/sec 0.094, Tokens/sec 94.337, Trained Tokens 143193, Peak mem 11.782 GB
183
+ Iter 175: Train loss 1.726, Learning Rate 8.051e-05, It/sec 0.156, Tokens/sec 133.037, Trained Tokens 144044, Peak mem 11.782 GB
184
+ Iter 176: Train loss 1.432, Learning Rate 8.029e-05, It/sec 0.458, Tokens/sec 165.370, Trained Tokens 144405, Peak mem 11.782 GB
185
+ Iter 177: Train loss 1.507, Learning Rate 8.007e-05, It/sec 0.197, Tokens/sec 152.644, Trained Tokens 145181, Peak mem 11.782 GB
186
+ Iter 178: Train loss 1.374, Learning Rate 7.985e-05, It/sec 0.146, Tokens/sec 106.122, Trained Tokens 145910, Peak mem 11.782 GB
187
+ Iter 179: Train loss 1.345, Learning Rate 7.962e-05, It/sec 0.413, Tokens/sec 134.483, Trained Tokens 146236, Peak mem 11.782 GB
188
+ Iter 180: Train loss 1.358, Learning Rate 7.940e-05, It/sec 0.119, Tokens/sec 124.365, Trained Tokens 147277, Peak mem 11.782 GB
189
+ Iter 181: Train loss 1.309, Learning Rate 7.918e-05, It/sec 0.151, Tokens/sec 91.185, Trained Tokens 147880, Peak mem 11.782 GB
190
+ Iter 182: Train loss 1.445, Learning Rate 7.895e-05, It/sec 0.243, Tokens/sec 99.055, Trained Tokens 148287, Peak mem 11.782 GB
191
+ Iter 183: Train loss 1.803, Learning Rate 7.872e-05, It/sec 0.181, Tokens/sec 86.738, Trained Tokens 148765, Peak mem 11.782 GB
192
+ Iter 184: Train loss 1.597, Learning Rate 7.850e-05, It/sec 0.101, Tokens/sec 81.253, Trained Tokens 149572, Peak mem 11.782 GB
193
+ Iter 185: Train loss 1.558, Learning Rate 7.827e-05, It/sec 0.067, Tokens/sec 88.088, Trained Tokens 150884, Peak mem 11.782 GB
194
+ Iter 186: Train loss 2.014, Learning Rate 7.804e-05, It/sec 0.099, Tokens/sec 150.324, Trained Tokens 152406, Peak mem 11.782 GB
195
+ Iter 187: Train loss 1.533, Learning Rate 7.781e-05, It/sec 0.215, Tokens/sec 174.980, Trained Tokens 153221, Peak mem 11.782 GB
196
+ Iter 188: Train loss 1.521, Learning Rate 7.758e-05, It/sec 0.210, Tokens/sec 173.442, Trained Tokens 154045, Peak mem 11.782 GB
197
+ Iter 189: Train loss 1.903, Learning Rate 7.735e-05, It/sec 0.077, Tokens/sec 128.014, Trained Tokens 155713, Peak mem 11.782 GB
198
+ Iter 190: Train loss 1.923, Learning Rate 7.711e-05, It/sec 0.118, Tokens/sec 114.927, Trained Tokens 156686, Peak mem 11.782 GB
199
+ Iter 191: Train loss 1.451, Learning Rate 7.688e-05, It/sec 0.093, Tokens/sec 99.014, Trained Tokens 157754, Peak mem 11.782 GB
200
+ Iter 192: Train loss 1.648, Learning Rate 7.665e-05, It/sec 0.133, Tokens/sec 114.264, Trained Tokens 158611, Peak mem 11.782 GB
201
+ Iter 193: Train loss 1.704, Learning Rate 7.641e-05, It/sec 0.088, Tokens/sec 125.325, Trained Tokens 160041, Peak mem 11.782 GB
202
+ Iter 194: Train loss 1.337, Learning Rate 7.618e-05, It/sec 0.423, Tokens/sec 174.465, Trained Tokens 160453, Peak mem 11.782 GB
203
+ Iter 195: Train loss 1.559, Learning Rate 7.594e-05, It/sec 0.133, Tokens/sec 153.956, Trained Tokens 161612, Peak mem 11.782 GB
204
+ Iter 196: Train loss 1.245, Learning Rate 7.570e-05, It/sec 0.533, Tokens/sec 178.516, Trained Tokens 161947, Peak mem 11.782 GB
205
+ Iter 197: Train loss 1.589, Learning Rate 7.547e-05, It/sec 0.329, Tokens/sec 202.147, Trained Tokens 162562, Peak mem 11.782 GB
206
+ Iter 198: Train loss 1.669, Learning Rate 7.523e-05, It/sec 0.150, Tokens/sec 165.004, Trained Tokens 163663, Peak mem 11.782 GB
207
+ Iter 199: Train loss 1.497, Learning Rate 7.499e-05, It/sec 0.091, Tokens/sec 134.078, Trained Tokens 165142, Peak mem 11.782 GB
208
+ Calculating loss...: 0it [00:00, ?it/s]
209
+ Iter 200: Val loss nan, Val took 0.011s
210
+ Iter 200: Train loss 1.569, Learning Rate 7.475e-05, It/sec 0.188, Tokens/sec 187.390, Trained Tokens 166139, Peak mem 11.782 GB
211
+ Iter 200: Saved adapter weights to adapters-uncertain/adapters.safetensors and adapters-uncertain/0000200_adapters.safetensors.
212
+ Iter 201: Train loss 1.525, Learning Rate 7.451e-05, It/sec 0.151, Tokens/sec 139.218, Trained Tokens 167059, Peak mem 11.782 GB
213
+ Iter 202: Train loss 1.718, Learning Rate 7.427e-05, It/sec 0.148, Tokens/sec 157.481, Trained Tokens 168124, Peak mem 11.782 GB
214
+ Iter 203: Train loss 1.437, Learning Rate 7.402e-05, It/sec 0.288, Tokens/sec 140.940, Trained Tokens 168614, Peak mem 11.782 GB
215
+ Iter 204: Train loss 1.580, Learning Rate 7.378e-05, It/sec 0.072, Tokens/sec 114.469, Trained Tokens 170205, Peak mem 11.782 GB
216
+ Iter 205: Train loss 1.869, Learning Rate 7.354e-05, It/sec 0.206, Tokens/sec 171.255, Trained Tokens 171035, Peak mem 11.782 GB
217
+ Iter 206: Train loss 1.551, Learning Rate 7.329e-05, It/sec 0.134, Tokens/sec 152.601, Trained Tokens 172177, Peak mem 11.782 GB
218
+ Iter 207: Train loss 1.391, Learning Rate 7.305e-05, It/sec 0.092, Tokens/sec 102.490, Trained Tokens 173286, Peak mem 11.782 GB
219
+ Iter 208: Train loss 1.631, Learning Rate 7.281e-05, It/sec 0.091, Tokens/sec 115.483, Trained Tokens 174551, Peak mem 11.782 GB
220
+ Iter 209: Train loss 1.721, Learning Rate 7.256e-05, It/sec 0.159, Tokens/sec 173.114, Trained Tokens 175638, Peak mem 11.782 GB
221
+ Iter 210: Train loss 1.493, Learning Rate 7.231e-05, It/sec 0.322, Tokens/sec 153.347, Trained Tokens 176114, Peak mem 11.782 GB
222
+ Iter 211: Train loss 1.385, Learning Rate 7.207e-05, It/sec 0.253, Tokens/sec 157.059, Trained Tokens 176736, Peak mem 11.782 GB
223
+ Iter 212: Train loss 1.344, Learning Rate 7.182e-05, It/sec 0.182, Tokens/sec 120.575, Trained Tokens 177399, Peak mem 11.782 GB
224
+ Iter 213: Train loss 1.746, Learning Rate 7.157e-05, It/sec 0.307, Tokens/sec 169.804, Trained Tokens 177953, Peak mem 11.782 GB
225
+ Iter 214: Train loss 1.638, Learning Rate 7.132e-05, It/sec 0.364, Tokens/sec 149.921, Trained Tokens 178365, Peak mem 11.782 GB
226
+ Iter 215: Train loss 1.638, Learning Rate 7.107e-05, It/sec 0.095, Tokens/sec 139.525, Trained Tokens 179835, Peak mem 11.782 GB
227
+ Iter 216: Train loss 1.518, Learning Rate 7.082e-05, It/sec 0.100, Tokens/sec 103.544, Trained Tokens 180868, Peak mem 11.782 GB
228
+ Iter 217: Train loss 1.405, Learning Rate 7.057e-05, It/sec 0.097, Tokens/sec 90.170, Trained Tokens 181799, Peak mem 11.782 GB
229
+ Iter 218: Train loss 1.280, Learning Rate 7.032e-05, It/sec 0.519, Tokens/sec 126.674, Trained Tokens 182043, Peak mem 11.782 GB
230
+ Iter 219: Train loss 1.496, Learning Rate 7.007e-05, It/sec 0.226, Tokens/sec 98.952, Trained Tokens 182480, Peak mem 11.782 GB
231
+ Iter 220: Train loss 1.613, Learning Rate 6.982e-05, It/sec 0.060, Tokens/sec 93.248, Trained Tokens 184034, Peak mem 11.782 GB
232
+ Iter 221: Train loss 1.418, Learning Rate 6.956e-05, It/sec 0.165, Tokens/sec 135.237, Trained Tokens 184852, Peak mem 11.782 GB
233
+ Iter 222: Train loss 1.541, Learning Rate 6.931e-05, It/sec 0.147, Tokens/sec 143.088, Trained Tokens 185824, Peak mem 11.782 GB
234
+ Iter 223: Train loss 1.748, Learning Rate 6.906e-05, It/sec 0.064, Tokens/sec 117.759, Trained Tokens 187673, Peak mem 11.782 GB
235
+ Iter 224: Train loss 1.354, Learning Rate 6.880e-05, It/sec 0.146, Tokens/sec 119.932, Trained Tokens 188497, Peak mem 11.782 GB
236
+ Iter 225: Train loss 1.589, Learning Rate 6.855e-05, It/sec 0.087, Tokens/sec 104.801, Trained Tokens 189707, Peak mem 11.782 GB
237
+ Iter 226: Train loss 1.295, Learning Rate 6.829e-05, It/sec 0.161, Tokens/sec 140.384, Trained Tokens 190580, Peak mem 11.782 GB
238
+ Iter 227: Train loss 1.571, Learning Rate 6.804e-05, It/sec 0.141, Tokens/sec 154.074, Trained Tokens 191675, Peak mem 11.782 GB
239
+ Iter 228: Train loss 1.486, Learning Rate 6.778e-05, It/sec 0.158, Tokens/sec 129.157, Trained Tokens 192493, Peak mem 11.782 GB
240
+ Iter 229: Train loss 1.524, Learning Rate 6.753e-05, It/sec 0.148, Tokens/sec 136.942, Trained Tokens 193416, Peak mem 11.782 GB
241
+ Iter 230: Train loss 1.282, Learning Rate 6.727e-05, It/sec 0.240, Tokens/sec 131.263, Trained Tokens 193964, Peak mem 11.782 GB
242
+ Iter 231: Train loss 1.636, Learning Rate 6.701e-05, It/sec 0.349, Tokens/sec 161.153, Trained Tokens 194426, Peak mem 11.782 GB
243
+ Iter 232: Train loss 1.562, Learning Rate 6.675e-05, It/sec 0.132, Tokens/sec 126.171, Trained Tokens 195382, Peak mem 11.782 GB
244
+ Iter 233: Train loss 1.378, Learning Rate 6.650e-05, It/sec 0.179, Tokens/sec 138.775, Trained Tokens 196158, Peak mem 11.782 GB
245
+ Iter 234: Train loss 1.569, Learning Rate 6.624e-05, It/sec 0.142, Tokens/sec 129.257, Trained Tokens 197067, Peak mem 11.782 GB
246
+ Iter 235: Train loss 1.229, Learning Rate 6.598e-05, It/sec 0.231, Tokens/sec 163.433, Trained Tokens 197774, Peak mem 11.782 GB
247
+ Iter 236: Train loss 1.378, Learning Rate 6.572e-05, It/sec 0.170, Tokens/sec 159.916, Trained Tokens 198712, Peak mem 11.782 GB
248
+ Iter 237: Train loss 1.388, Learning Rate 6.546e-05, It/sec 0.337, Tokens/sec 147.581, Trained Tokens 199150, Peak mem 11.782 GB
249
+ Iter 238: Train loss 1.363, Learning Rate 6.520e-05, It/sec 0.171, Tokens/sec 127.357, Trained Tokens 199893, Peak mem 11.782 GB
250
+ Iter 239: Train loss 1.674, Learning Rate 6.494e-05, It/sec 0.150, Tokens/sec 167.995, Trained Tokens 201013, Peak mem 11.782 GB
251
+ Iter 240: Train loss 1.417, Learning Rate 6.468e-05, It/sec 0.213, Tokens/sec 88.623, Trained Tokens 201430, Peak mem 11.782 GB
252
+ Iter 241: Train loss 1.587, Learning Rate 6.442e-05, It/sec 0.133, Tokens/sec 128.084, Trained Tokens 202395, Peak mem 11.782 GB
253
+ Iter 242: Train loss 1.700, Learning Rate 6.416e-05, It/sec 0.079, Tokens/sec 143.003, Trained Tokens 204195, Peak mem 11.782 GB
254
+ Iter 243: Train loss 1.610, Learning Rate 6.389e-05, It/sec 0.180, Tokens/sec 141.183, Trained Tokens 204981, Peak mem 11.782 GB
255
+ Iter 244: Train loss 1.560, Learning Rate 6.363e-05, It/sec 0.281, Tokens/sec 108.921, Trained Tokens 205368, Peak mem 11.782 GB
256
+ Iter 245: Train loss 1.569, Learning Rate 6.337e-05, It/sec 0.124, Tokens/sec 126.598, Trained Tokens 206388, Peak mem 11.782 GB
257
+ Iter 246: Train loss 1.281, Learning Rate 6.311e-05, It/sec 0.332, Tokens/sec 129.395, Trained Tokens 206778, Peak mem 11.782 GB
258
+ Iter 247: Train loss 1.670, Learning Rate 6.284e-05, It/sec 0.092, Tokens/sec 107.997, Trained Tokens 207947, Peak mem 11.782 GB
259
+ Iter 248: Train loss 1.578, Learning Rate 6.258e-05, It/sec 0.213, Tokens/sec 99.499, Trained Tokens 208415, Peak mem 11.782 GB
260
+ Iter 249: Train loss 1.435, Learning Rate 6.232e-05, It/sec 0.295, Tokens/sec 140.004, Trained Tokens 208890, Peak mem 11.782 GB
261
+ Iter 250: Train loss 1.513, Learning Rate 6.205e-05, It/sec 0.097, Tokens/sec 131.962, Trained Tokens 210257, Peak mem 11.782 GB
262
+ Iter 251: Train loss 1.274, Learning Rate 6.179e-05, It/sec 0.221, Tokens/sec 142.926, Trained Tokens 210904, Peak mem 11.782 GB
263
+ Iter 252: Train loss 1.523, Learning Rate 6.152e-05, It/sec 0.170, Tokens/sec 134.736, Trained Tokens 211695, Peak mem 11.782 GB
264
+ Iter 253: Train loss 1.345, Learning Rate 6.126e-05, It/sec 0.125, Tokens/sec 103.124, Trained Tokens 212519, Peak mem 11.782 GB
265
+ Iter 254: Train loss 1.436, Learning Rate 6.100e-05, It/sec 0.411, Tokens/sec 176.567, Trained Tokens 212949, Peak mem 11.782 GB
266
+ Iter 255: Train loss 1.447, Learning Rate 6.073e-05, It/sec 0.137, Tokens/sec 91.668, Trained Tokens 213619, Peak mem 11.782 GB
267
+ Iter 256: Train loss 1.104, Learning Rate 6.046e-05, It/sec 0.281, Tokens/sec 98.838, Trained Tokens 213971, Peak mem 11.782 GB
268
+ Iter 257: Train loss 1.680, Learning Rate 6.020e-05, It/sec 0.199, Tokens/sec 149.657, Trained Tokens 214722, Peak mem 11.782 GB
269
+ Iter 258: Train loss 1.250, Learning Rate 5.993e-05, It/sec 0.223, Tokens/sec 132.747, Trained Tokens 215318, Peak mem 11.782 GB
270
+ Iter 259: Train loss 1.360, Learning Rate 5.967e-05, It/sec 0.114, Tokens/sec 101.960, Trained Tokens 216214, Peak mem 11.782 GB
271
+ Iter 260: Train loss 1.179, Learning Rate 5.940e-05, It/sec 0.308, Tokens/sec 167.504, Trained Tokens 216757, Peak mem 11.782 GB
272
+ Iter 261: Train loss 1.406, Learning Rate 5.914e-05, It/sec 0.169, Tokens/sec 172.783, Trained Tokens 217780, Peak mem 11.782 GB
273
+ Iter 262: Train loss 1.157, Learning Rate 5.887e-05, It/sec 0.423, Tokens/sec 134.219, Trained Tokens 218097, Peak mem 11.782 GB
274
+ Iter 263: Train loss 1.462, Learning Rate 5.860e-05, It/sec 0.255, Tokens/sec 145.077, Trained Tokens 218666, Peak mem 11.782 GB
275
+ Iter 264: Train loss 1.762, Learning Rate 5.834e-05, It/sec 0.078, Tokens/sec 124.590, Trained Tokens 220256, Peak mem 11.782 GB
276
+ Iter 265: Train loss 1.315, Learning Rate 5.807e-05, It/sec 0.338, Tokens/sec 143.162, Trained Tokens 220680, Peak mem 11.782 GB
277
+ Iter 266: Train loss 0.912, Learning Rate 5.780e-05, It/sec 0.410, Tokens/sec 108.294, Trained Tokens 220944, Peak mem 11.782 GB
278
+ Iter 267: Train loss 1.754, Learning Rate 5.754e-05, It/sec 0.076, Tokens/sec 130.289, Trained Tokens 222660, Peak mem 11.782 GB
279
+ Iter 268: Train loss 1.414, Learning Rate 5.727e-05, It/sec 0.226, Tokens/sec 149.885, Trained Tokens 223323, Peak mem 11.782 GB
280
+ Iter 269: Train loss 1.314, Learning Rate 5.700e-05, It/sec 0.221, Tokens/sec 168.263, Trained Tokens 224086, Peak mem 11.782 GB
281
+ Iter 270: Train loss 1.488, Learning Rate 5.674e-05, It/sec 0.086, Tokens/sec 102.069, Trained Tokens 225273, Peak mem 11.782 GB
282
+ Iter 271: Train loss 1.396, Learning Rate 5.647e-05, It/sec 0.167, Tokens/sec 97.747, Trained Tokens 225858, Peak mem 11.782 GB
283
+ Iter 272: Train loss 1.443, Learning Rate 5.620e-05, It/sec 0.184, Tokens/sec 149.761, Trained Tokens 226670, Peak mem 11.782 GB
284
+ Iter 273: Train loss 1.567, Learning Rate 5.594e-05, It/sec 0.105, Tokens/sec 138.126, Trained Tokens 227981, Peak mem 11.782 GB
285
+ Iter 274: Train loss 1.307, Learning Rate 5.567e-05, It/sec 0.213, Tokens/sec 152.938, Trained Tokens 228700, Peak mem 11.782 GB
286
+ Iter 275: Train loss 1.732, Learning Rate 5.540e-05, It/sec 0.356, Tokens/sec 171.426, Trained Tokens 229181, Peak mem 11.782 GB
287
+ Iter 276: Train loss 1.738, Learning Rate 5.513e-05, It/sec 0.080, Tokens/sec 126.795, Trained Tokens 230758, Peak mem 11.782 GB
288
+ Iter 277: Train loss 1.373, Learning Rate 5.487e-05, It/sec 0.263, Tokens/sec 174.896, Trained Tokens 231424, Peak mem 11.782 GB
289
+ Iter 278: Train loss 1.413, Learning Rate 5.460e-05, It/sec 0.529, Tokens/sec 174.128, Trained Tokens 231753, Peak mem 11.782 GB
290
+ Iter 279: Train loss 1.471, Learning Rate 5.433e-05, It/sec 0.250, Tokens/sec 181.529, Trained Tokens 232480, Peak mem 11.782 GB
291
+ Iter 280: Train loss 1.737, Learning Rate 5.406e-05, It/sec 0.073, Tokens/sec 143.948, Trained Tokens 234450, Peak mem 11.782 GB
292
+ Iter 281: Train loss 1.411, Learning Rate 5.380e-05, It/sec 0.164, Tokens/sec 134.249, Trained Tokens 235270, Peak mem 11.782 GB
293
+ Iter 282: Train loss 1.578, Learning Rate 5.353e-05, It/sec 0.138, Tokens/sec 158.257, Trained Tokens 236414, Peak mem 11.782 GB
294
+ Iter 283: Train loss 1.364, Learning Rate 5.326e-05, It/sec 0.116, Tokens/sec 110.453, Trained Tokens 237367, Peak mem 11.782 GB
295
+ Iter 284: Train loss 1.383, Learning Rate 5.300e-05, It/sec 0.147, Tokens/sec 100.354, Trained Tokens 238050, Peak mem 11.782 GB
296
+ Iter 285: Train loss 1.574, Learning Rate 5.273e-05, It/sec 0.118, Tokens/sec 129.320, Trained Tokens 239146, Peak mem 11.782 GB
297
+ Iter 286: Train loss 1.701, Learning Rate 5.246e-05, It/sec 0.111, Tokens/sec 118.324, Trained Tokens 240210, Peak mem 11.782 GB
298
+ Iter 287: Train loss 1.205, Learning Rate 5.220e-05, It/sec 0.336, Tokens/sec 144.649, Trained Tokens 240640, Peak mem 11.782 GB
299
+ Iter 288: Train loss 1.313, Learning Rate 5.193e-05, It/sec 0.165, Tokens/sec 106.317, Trained Tokens 241285, Peak mem 11.782 GB
300
+ Iter 289: Train loss 1.100, Learning Rate 5.166e-05, It/sec 0.119, Tokens/sec 70.448, Trained Tokens 241879, Peak mem 11.782 GB
301
+ Iter 290: Train loss 1.193, Learning Rate 5.140e-05, It/sec 0.148, Tokens/sec 85.814, Trained Tokens 242458, Peak mem 11.782 GB
302
+ Iter 291: Train loss 1.398, Learning Rate 5.113e-05, It/sec 0.073, Tokens/sec 75.792, Trained Tokens 243492, Peak mem 11.782 GB
303
+ Iter 292: Train loss 1.420, Learning Rate 5.086e-05, It/sec 0.061, Tokens/sec 67.486, Trained Tokens 244601, Peak mem 11.782 GB
304
+ Iter 293: Train loss 1.565, Learning Rate 5.060e-05, It/sec 0.101, Tokens/sec 97.134, Trained Tokens 245563, Peak mem 11.782 GB
305
+ Iter 294: Train loss 1.379, Learning Rate 5.033e-05, It/sec 0.130, Tokens/sec 58.306, Trained Tokens 246010, Peak mem 11.782 GB
306
+ Iter 295: Train loss 1.367, Learning Rate 5.007e-05, It/sec 0.113, Tokens/sec 69.855, Trained Tokens 246627, Peak mem 11.782 GB
307
+ Iter 296: Train loss 1.738, Learning Rate 4.980e-05, It/sec 0.173, Tokens/sec 82.912, Trained Tokens 247107, Peak mem 11.782 GB
308
+ Iter 297: Train loss 1.321, Learning Rate 4.954e-05, It/sec 0.121, Tokens/sec 120.667, Trained Tokens 248105, Peak mem 11.782 GB
309
+ Iter 298: Train loss 1.191, Learning Rate 4.927e-05, It/sec 0.234, Tokens/sec 108.731, Trained Tokens 248570, Peak mem 11.782 GB
310
+ Iter 299: Train loss 1.158, Learning Rate 4.900e-05, It/sec 0.122, Tokens/sec 94.183, Trained Tokens 249341, Peak mem 11.782 GB
311
+ Calculating loss...: 0it [00:00, ?it/s]
312
+ Iter 300: Val loss nan, Val took 0.021s
313
+ Iter 300: Train loss 1.497, Learning Rate 4.874e-05, It/sec 0.163, Tokens/sec 154.640, Trained Tokens 250292, Peak mem 11.782 GB
314
+ Iter 300: Saved adapter weights to adapters-uncertain/adapters.safetensors and adapters-uncertain/0000300_adapters.safetensors.
315
+ Saved final weights to adapters-uncertain/adapters.safetensors.
adapters-uncertain/wandb/run-20260216_154932-918bwjte/files/requirements.txt ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ shellingham==1.5.4
2
+ contourpy==1.3.2
3
+ jiter==0.12.0
4
+ audioread==3.1.0
5
+ threadpoolctl==3.6.0
6
+ lazy_loader==0.4
7
+ GitPython==3.1.45
8
+ async-timeout==5.0.1
9
+ requests==2.32.5
10
+ rich==14.2.0
11
+ tokenizers==0.22.1
12
+ urllib3==2.5.0
13
+ exceptiongroup==1.3.1
14
+ numpy==2.2.6
15
+ click==8.3.1
16
+ pytz==2025.2
17
+ miniaudio==1.61
18
+ pyarrow==22.0.0
19
+ sse-starlette==3.2.0
20
+ scikit-learn==1.7.2
21
+ soxr==1.0.0
22
+ jsonschema-specifications==2025.9.1
23
+ python-multipart==0.0.22
24
+ utilsforecast==0.2.15
25
+ ftfy==6.3.1
26
+ torchvision==0.25.0
27
+ statsmodels==0.14.6
28
+ file-read-backwards==3.2.0
29
+ propcache==0.4.1
30
+ python-dotenv==1.2.1
31
+ anyio==4.12.0
32
+ mlx==0.30.6
33
+ wordfreq==3.1.1
34
+ networkx==3.4.2
35
+ pip==25.3
36
+ texttable==1.7.0
37
+ mlx-audio==0.3.1
38
+ narwhals==2.15.0
39
+ multidict==6.7.0
40
+ numba==0.63.1
41
+ idna==3.11
42
+ regex==2025.11.3
43
+ fonttools==4.60.1
44
+ openai==2.16.0
45
+ aiohttp==3.13.2
46
+ mistral_common==1.8.6
47
+ einshape==1.0
48
+ cffi==2.0.0
49
+ kiwisolver==1.4.9
50
+ tqdm==4.67.1
51
+ setuptools==80.9.0
52
+ RapidFuzz==3.14.3
53
+ pyparsing==3.2.5
54
+ starlette==0.52.1
55
+ tzdata==2025.2
56
+ mlx-lm==0.30.6
57
+ httpcore==1.0.9
58
+ decorator==5.2.1
59
+ certifi==2025.11.12
60
+ typer==0.21.1
61
+ pydantic==2.12.4
62
+ fsspec==2025.10.0
63
+ mcp==1.26.0
64
+ librosa==0.11.0
65
+ charset-normalizer==3.4.4
66
+ sympy==1.14.0
67
+ jsonschema==4.25.1
68
+ pydantic-settings==2.12.0
69
+ markdown-it-py==4.0.0
70
+ tiktoken==0.12.0
71
+ PyJWT==2.11.0
72
+ sentry-sdk==2.45.0
73
+ platformdirs==4.5.0
74
+ absl-py==2.3.1
75
+ transformers==5.1.0
76
+ diffusers==0.37.0.dev0
77
+ h11==0.16.0
78
+ gitdb==4.0.12
79
+ sniffio==1.3.1
80
+ pycparser==3.0
81
+ sentencepiece==0.2.1
82
+ importlib_metadata==8.7.1
83
+ mdurl==0.1.2
84
+ patsy==1.0.2
85
+ python-dateutil==2.9.0.post0
86
+ mpmath==1.3.0
87
+ pillow==12.0.0
88
+ PyYAML==6.0.3
89
+ sentence-transformers==5.1.2
90
+ multiprocess==0.70.18
91
+ pydantic_core==2.41.5
92
+ uvicorn==0.40.0
93
+ frozenlist==1.8.0
94
+ typer-slim==0.20.1
95
+ typing_extensions==4.15.0
96
+ aiosignal==1.4.0
97
+ packaging==25.0
98
+ cycler==0.12.1
99
+ cryptography==46.0.4
100
+ hf-xet==1.2.0
101
+ Jinja2==3.1.6
102
+ wheel==0.45.1
103
+ referencing==0.37.0
104
+ pandas==2.3.3
105
+ soundfile==0.13.1
106
+ pooch==1.8.2
107
+ MarkupSafe==3.0.3
108
+ dill==0.4.0
109
+ pydantic-extra-types==2.10.6
110
+ msgpack==1.1.2
111
+ distro==1.9.0
112
+ locate==1.1.1
113
+ datasets==4.4.1
114
+ Pygments==2.19.2
115
+ aiohappyeyeballs==2.6.1
116
+ llvmlite==0.46.0
117
+ attrs==25.4.0
118
+ huggingface_hub==1.3.5
119
+ nltk==3.9.2
120
+ torch==2.10.0
121
+ httpx==0.28.1
122
+ filelock==3.20.0
123
+ smmap==5.0.2
124
+ sounddevice==0.5.3
125
+ timesfm==1.3.0
126
+ pycountry==24.6.1
127
+ mlx-metal==0.30.6
128
+ scipy==1.15.3
129
+ protobuf==6.33.1
130
+ psutil==7.1.3
131
+ typing-inspection==0.4.2
132
+ joblib==1.5.2
133
+ zipp==3.23.0
134
+ annotated-types==0.7.0
135
+ accelerate==1.12.0
136
+ safetensors==0.6.2
137
+ httpx-sse==0.4.3
138
+ wcwidth==0.2.14
139
+ igraph==1.0.0
140
+ rpds-py==0.30.0
141
+ langcodes==3.5.1
142
+ six==1.17.0
143
+ wandb==0.23.0
144
+ yarl==1.22.0
145
+ pyloudnorm==0.2.0
146
+ xxhash==3.6.0
147
+ matplotlib==3.10.7
adapters-uncertain/wandb/run-20260216_154932-918bwjte/files/wandb-metadata.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "macOS-26.2-arm64-arm-64bit",
3
+ "python": "CPython 3.10.19",
4
+ "startedAt": "2026-02-16T20:49:32.288869Z",
5
+ "args": [
6
+ "--config",
7
+ "train.yaml"
8
+ ],
9
+ "program": "/Users/natebreslow/miniconda3/envs/mlx-experiment/bin/mlx_lm.lora",
10
+ "email": "nathanbreslow@gmail.com",
11
+ "root": "adapters-uncertain",
12
+ "host": "MacBook-Pro-135.local",
13
+ "executable": "/Users/natebreslow/miniconda3/envs/mlx-experiment/bin/python3.10",
14
+ "cpu_count": 16,
15
+ "cpu_count_logical": 16,
16
+ "disk": {
17
+ "/": {
18
+ "total": "1995218165760",
19
+ "used": "1702686375936"
20
+ }
21
+ },
22
+ "memory": {
23
+ "total": "68719476736"
24
+ },
25
+ "apple": {
26
+ "name": "Apple M3 Max",
27
+ "ecpuCores": 4,
28
+ "pcpuCores": 12,
29
+ "gpuCores": 40,
30
+ "memoryGb": 64,
31
+ "swapTotalBytes": "5368709120",
32
+ "ramTotalBytes": "68719476736"
33
+ },
34
+ "writerId": "4stii5owh2cve1gvc5kugyr75h6wj8db"
35
+ }
adapters-uncertain/wandb/run-20260216_154932-918bwjte/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train_loss":1.4971740245819092,"_runtime":1875.19269225,"iterations_per_second":0.16260756296367251,"_step":300,"val_loss":NaN,"_timestamp":1.77127684632109e+09,"tokens_per_second":154.63979237845257,"_wandb":{"runtime":1875},"iteration":300,"peak_memory":11.781823884,"val_time":0.020949832993210293,"learning_rate":4.8740144848125055e-05,"trained_tokens":250292}
adapters-uncertain/wandb/run-20260216_154932-918bwjte/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-02-16T15:49:32.584829-05:00","level":"INFO","msg":"stream: starting","core version":"0.23.0"}
2
+ {"time":"2026-02-16T15:49:32.835831-05:00","level":"INFO","msg":"stream: created new stream","id":"918bwjte"}
3
+ {"time":"2026-02-16T15:49:32.835901-05:00","level":"INFO","msg":"handler: started","stream_id":"918bwjte"}
4
+ {"time":"2026-02-16T15:49:32.836119-05:00","level":"INFO","msg":"stream: started","id":"918bwjte"}
5
+ {"time":"2026-02-16T15:49:32.836134-05:00","level":"INFO","msg":"sender: started","stream_id":"918bwjte"}
6
+ {"time":"2026-02-16T15:49:32.836138-05:00","level":"INFO","msg":"writer: started","stream_id":"918bwjte"}
7
+ {"time":"2026-02-16T16:20:48.46164-05:00","level":"INFO","msg":"stream: closing","id":"918bwjte"}
8
+ {"time":"2026-02-16T16:20:48.851376-05:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2026-02-16T16:20:48.997596-05:00","level":"INFO","msg":"handler: closed","stream_id":"918bwjte"}
10
+ {"time":"2026-02-16T16:20:48.997985-05:00","level":"INFO","msg":"sender: closed","stream_id":"918bwjte"}
11
+ {"time":"2026-02-16T16:20:48.998039-05:00","level":"INFO","msg":"stream: closed","id":"918bwjte"}
adapters-uncertain/wandb/run-20260216_154932-918bwjte/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-02-16 15:49:32,296 INFO MainThread:54732 [wandb_setup.py:_flush():80] Current SDK version is 0.23.0
2
+ 2026-02-16 15:49:32,296 INFO MainThread:54732 [wandb_setup.py:_flush():80] Configure stats pid to 54732
3
+ 2026-02-16 15:49:32,296 INFO MainThread:54732 [wandb_setup.py:_flush():80] Loading settings from /Users/natebreslow/.config/wandb/settings
4
+ 2026-02-16 15:49:32,296 INFO MainThread:54732 [wandb_setup.py:_flush():80] Loading settings from /Users/natebreslow/Documents/llmSelfReport/wandb/settings
5
+ 2026-02-16 15:49:32,296 INFO MainThread:54732 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2026-02-16 15:49:32,296 INFO MainThread:54732 [wandb_init.py:setup_run_log_directory():713] Logging user logs to adapters-uncertain/wandb/run-20260216_154932-918bwjte/logs/debug.log
7
+ 2026-02-16 15:49:32,296 INFO MainThread:54732 [wandb_init.py:setup_run_log_directory():714] Logging internal logs to adapters-uncertain/wandb/run-20260216_154932-918bwjte/logs/debug-internal.log
8
+ 2026-02-16 15:49:32,297 INFO MainThread:54732 [wandb_init.py:init():840] calling init triggers
9
+ 2026-02-16 15:49:32,297 INFO MainThread:54732 [wandb_init.py:init():845] wandb.init called with sweep_config: {}
10
+ config: {'model': 'Qwen3-4B-Instruct-2507', 'train': True, 'data': 'training/uncertain', 'fine_tune_type': 'lora', 'optimizer': 'adam', 'mask_prompt': False, 'num_layers': 36, 'batch_size': 1, 'iters': 300, 'val_batches': 0, 'learning_rate': 0.0001, 'steps_per_report': 1, 'steps_per_eval': 200, 'grad_accumulation_steps': 1, 'resume_adapter_file': None, 'adapter_path': 'adapters-uncertain', 'save_every': 100, 'test': False, 'test_batches': 100, 'max_seq_length': 8192, 'config': 'train.yaml', 'grad_checkpoint': True, 'report_to': 'wandb', 'project_name': 'conscious-finetuning', 'seed': 0, 'optimizer_config': {'adam': {'betas': [0.9, 0.9999], 'eps': 1e-06, 'bias_correction': True}}, 'lora_parameters': {'keys': ['self_attn.q_proj', 'self_attn.v_proj', 'self_attn.k_proj', 'self_attn.o_proj', 'mlp.gate_proj', 'mlp.up_proj', 'mlp.down_proj'], 'rank': 16, 'scale': 2.0, 'dropout': 0.0}, 'lr_schedule': {'name': 'cosine_decay', 'warmup': 10, 'warmup_init': 1e-05, 'arguments': [0.0001, 529, 1e-05]}, '_wandb': {}}
11
+ 2026-02-16 15:49:32,297 INFO MainThread:54732 [wandb_init.py:init():888] starting backend
12
+ 2026-02-16 15:49:32,540 INFO MainThread:54732 [wandb_init.py:init():891] sending inform_init request
13
+ 2026-02-16 15:49:32,583 INFO MainThread:54732 [wandb_init.py:init():899] backend started and connected
14
+ 2026-02-16 15:49:32,586 INFO MainThread:54732 [wandb_init.py:init():969] updated telemetry
15
+ 2026-02-16 15:49:32,586 INFO MainThread:54732 [wandb_init.py:init():993] communicating run to backend with 90.0 second timeout
16
+ 2026-02-16 15:49:33,237 INFO MainThread:54732 [wandb_init.py:init():1040] starting run threads in backend
17
+ 2026-02-16 15:49:33,347 INFO MainThread:54732 [wandb_run.py:_console_start():2504] atexit reg
18
+ 2026-02-16 15:49:33,347 INFO MainThread:54732 [wandb_run.py:_redirect():2352] redirect: wrap_raw
19
+ 2026-02-16 15:49:33,347 INFO MainThread:54732 [wandb_run.py:_redirect():2421] Wrapping output streams.
20
+ 2026-02-16 15:49:33,348 INFO MainThread:54732 [wandb_run.py:_redirect():2444] Redirects installed.
21
+ 2026-02-16 15:49:33,350 INFO MainThread:54732 [wandb_init.py:init():1080] run started, returning control to user process
22
+ 2026-02-16 16:20:48,459 INFO wandb-AsyncioManager-main:54732 [service_client.py:_forward_responses():80] Reached EOF.
23
+ 2026-02-16 16:20:48,460 INFO wandb-AsyncioManager-main:54732 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
adapters-uncertain/wandb/run-20260216_154932-918bwjte/run-918bwjte.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76ab5455644c83a067107fc32a7bd8fb8f7417fd797d14daf4c0325b831ac45
3
+ size 416192