Upload LoRA adapter + logs (2026-03-12 14:32:09)
Browse files- adapter_config.json +2 -2
- adapter_model.safetensors +1 -1
- best/adapter_config.json +2 -2
- best/adapter_model.safetensors +1 -1
- loss_curve.png +0 -0
- train_log.csv +10 -8
- training_args.json +3 -3
adapter_config.json
CHANGED
|
@@ -33,9 +33,9 @@
|
|
| 33 |
"revision": null,
|
| 34 |
"target_modules": [
|
| 35 |
"value",
|
| 36 |
-
"output.dense",
|
| 37 |
"query",
|
| 38 |
-
"key"
|
|
|
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": null,
|
|
|
|
| 33 |
"revision": null,
|
| 34 |
"target_modules": [
|
| 35 |
"value",
|
|
|
|
| 36 |
"query",
|
| 37 |
+
"key",
|
| 38 |
+
"output.dense"
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": null,
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7683872
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c2d07659b7c5b0d393b74ef97c5cb2b100908aa6842f50a314172040d2a1467
|
| 3 |
size 7683872
|
best/adapter_config.json
CHANGED
|
@@ -33,9 +33,9 @@
|
|
| 33 |
"revision": null,
|
| 34 |
"target_modules": [
|
| 35 |
"value",
|
| 36 |
-
"output.dense",
|
| 37 |
"query",
|
| 38 |
-
"key"
|
|
|
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": null,
|
|
|
|
| 33 |
"revision": null,
|
| 34 |
"target_modules": [
|
| 35 |
"value",
|
|
|
|
| 36 |
"query",
|
| 37 |
+
"key",
|
| 38 |
+
"output.dense"
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": null,
|
best/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7683872
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c2d07659b7c5b0d393b74ef97c5cb2b100908aa6842f50a314172040d2a1467
|
| 3 |
size 7683872
|
loss_curve.png
CHANGED
|
|
train_log.csv
CHANGED
|
@@ -1,9 +1,11 @@
|
|
| 1 |
epoch,train_loss,val_loss
|
| 2 |
-
1,0.
|
| 3 |
-
2,0.
|
| 4 |
-
3,0.
|
| 5 |
-
4,0.
|
| 6 |
-
5,0.
|
| 7 |
-
6,0.
|
| 8 |
-
7,0.
|
| 9 |
-
8,0.
|
|
|
|
|
|
|
|
|
| 1 |
epoch,train_loss,val_loss
|
| 2 |
+
1,0.01731098252816581,0.01604353136240022
|
| 3 |
+
2,0.013366963278279714,0.014053670106642345
|
| 4 |
+
3,0.012405841278663234,0.013696867510426178
|
| 5 |
+
4,0.011644153810377955,0.013898261968713643
|
| 6 |
+
5,0.010800416682985692,0.011461490960811565
|
| 7 |
+
6,0.01006045886965499,0.010559050928253228
|
| 8 |
+
7,0.009253272447451635,0.010183120465605084
|
| 9 |
+
8,0.008544183403884223,0.009594380102459905
|
| 10 |
+
9,0.007975867728515752,0.00852605485434789
|
| 11 |
+
10,0.007436476772464326,0.008271555498198692
|
training_args.json
CHANGED
|
@@ -7,8 +7,8 @@
|
|
| 7 |
},
|
| 8 |
"hyperparams": {
|
| 9 |
"batch_size": 32,
|
| 10 |
-
"lr":
|
| 11 |
-
"num_epochs_max":
|
| 12 |
"patience": 3,
|
| 13 |
"min_delta": 0.0001,
|
| 14 |
"lora_r": 16,
|
|
@@ -21,5 +21,5 @@
|
|
| 21 |
"output.dense"
|
| 22 |
]
|
| 23 |
},
|
| 24 |
-
"best_val_loss": 0.
|
| 25 |
}
|
|
|
|
| 7 |
},
|
| 8 |
"hyperparams": {
|
| 9 |
"batch_size": 32,
|
| 10 |
+
"lr": 0.0001,
|
| 11 |
+
"num_epochs_max": 10,
|
| 12 |
"patience": 3,
|
| 13 |
"min_delta": 0.0001,
|
| 14 |
"lora_r": 16,
|
|
|
|
| 21 |
"output.dense"
|
| 22 |
]
|
| 23 |
},
|
| 24 |
+
"best_val_loss": 0.008271555498198692
|
| 25 |
}
|