Upload LoRA adapter + logs (2026-03-12 14:32:09)

Files changed (7) hide show

adapter_config.json CHANGED Viewed

@@ -33,9 +33,9 @@
   "revision": null,
   "target_modules": [
     "value",
-    "output.dense",
     "query",
-    "key"
   ],
   "target_parameters": null,
   "task_type": null,

   "revision": null,
   "target_modules": [
     "value",
     "query",
+    "key",
+    "output.dense"
   ],
   "target_parameters": null,
   "task_type": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b8762f131b641f4f4a72e003b1cb607d7456e34e58bbefc1093feb6b218ff90
 size 7683872

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c2d07659b7c5b0d393b74ef97c5cb2b100908aa6842f50a314172040d2a1467
 size 7683872

best/adapter_config.json CHANGED Viewed

@@ -33,9 +33,9 @@
   "revision": null,
   "target_modules": [
     "value",
-    "output.dense",
     "query",
-    "key"
   ],
   "target_parameters": null,
   "task_type": null,

   "revision": null,
   "target_modules": [
     "value",
     "query",
+    "key",
+    "output.dense"
   ],
   "target_parameters": null,
   "task_type": null,

best/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b8762f131b641f4f4a72e003b1cb607d7456e34e58bbefc1093feb6b218ff90
 size 7683872

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c2d07659b7c5b0d393b74ef97c5cb2b100908aa6842f50a314172040d2a1467
 size 7683872

loss_curve.png CHANGED Viewed

train_log.csv CHANGED Viewed

@@ -1,9 +1,11 @@
 epoch,train_loss,val_loss
-1,0.02169319980120941,0.017418300716605144
-2,0.014011799641068756,0.01688864966556906
-3,0.013401006330221817,0.01542824082383023
-4,0.012886841707275913,0.01468768443156498
-5,0.012437969915319858,0.013446119143077122
-6,0.011991326015563785,0.013553697425431718
-7,0.011552147430205871,0.012770607659872918
-8,0.011140394895120666,0.012400428240839095

 epoch,train_loss,val_loss
+1,0.01731098252816581,0.01604353136240022
+2,0.013366963278279714,0.014053670106642345
+3,0.012405841278663234,0.013696867510426178
+4,0.011644153810377955,0.013898261968713643
+5,0.010800416682985692,0.011461490960811565
+6,0.01006045886965499,0.010559050928253228
+7,0.009253272447451635,0.010183120465605084
+8,0.008544183403884223,0.009594380102459905
+9,0.007975867728515752,0.00852605485434789
+10,0.007436476772464326,0.008271555498198692

training_args.json CHANGED Viewed

@@ -7,8 +7,8 @@
   },
   "hyperparams": {
     "batch_size": 32,
-    "lr": 5e-05,
-    "num_epochs_max": 8,
     "patience": 3,
     "min_delta": 0.0001,
     "lora_r": 16,
@@ -21,5 +21,5 @@
       "output.dense"
     ]
   },
-  "best_val_loss": 0.012400428240839095
 }

   },
   "hyperparams": {
     "batch_size": 32,
+    "lr": 0.0001,
+    "num_epochs_max": 10,
     "patience": 3,
     "min_delta": 0.0001,
     "lora_r": 16,
       "output.dense"
     ]
   },
+  "best_val_loss": 0.008271555498198692
 }