Subiendo modelo GPT-2 fine-tuned con LoRA

Browse files

Files changed (10) hide show

adapter_config.json +2 -2
adapter_model.safetensors +1 -1
checkpoint-50/adapter_config.json +2 -2
checkpoint-50/adapter_model.safetensors +1 -1
checkpoint-50/optimizer.pt +1 -1
checkpoint-50/trainer_state.json +20 -20
checkpoint-78/adapter_config.json +2 -2
checkpoint-78/adapter_model.safetensors +1 -1
checkpoint-78/optimizer.pt +1 -1
checkpoint-78/trainer_state.json +30 -30

adapter_config.json CHANGED Viewed

@@ -29,9 +29,9 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "c_attn",
     "c_proj",
-    "c_fc"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "c_proj",
+    "c_fc",
+    "c_attn"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:828fd8d7f38a3db51f0ce0ebaed69642722f764b9f6f3645bd18bce83b11647e
 size 4730632

 version https://git-lfs.github.com/spec/v1
+oid sha256:55232f8282cb9bd78ce25259d56c143a26458573ce9f80ca07d6ae0ffc790618
 size 4730632

checkpoint-50/adapter_config.json CHANGED Viewed

@@ -29,9 +29,9 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "c_attn",
     "c_proj",
-    "c_fc"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "c_proj",
+    "c_fc",
+    "c_attn"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

checkpoint-50/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ce82dbe48e451241a5cce9194ec62d0c8d1c64df21baaa4f438a08fce6680ef
 size 4730632

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b8b1b2997277f932b7d8d7f8089c2494b2f173b8b9190bdc499d2b3d8c86988
 size 4730632

checkpoint-50/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50541f81bd14270389bd0a5f8e89857e8a2eb435e9c8989e4ceffaf5126e1be7
 size 9515787

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad8d39626a094c9582a9c023435d1d8c9d42c48032e6ebf2fc01ddff47e2db4f
 size 9515787

checkpoint-50/trainer_state.json CHANGED Viewed

@@ -11,72 +11,72 @@
   "log_history": [
     {
       "epoch": 0.19230769230769232,
-      "grad_norm": 0.5310236811637878,
       "learning_rate": 0.0001,
-      "loss": 5.0245,
       "step": 5
     },
     {
       "epoch": 0.38461538461538464,
-      "grad_norm": 0.7837315797805786,
       "learning_rate": 0.00019714285714285716,
-      "loss": 4.8883,
       "step": 10
     },
     {
       "epoch": 0.5769230769230769,
-      "grad_norm": 0.8394624590873718,
       "learning_rate": 0.00018285714285714286,
-      "loss": 4.7357,
       "step": 15
     },
     {
       "epoch": 0.7692307692307693,
-      "grad_norm": 0.8658212423324585,
       "learning_rate": 0.00016857142857142857,
-      "loss": 4.4614,
       "step": 20
     },
     {
       "epoch": 0.9615384615384616,
-      "grad_norm": 1.1082484722137451,
       "learning_rate": 0.0001542857142857143,
-      "loss": 4.0515,
       "step": 25
     },
     {
       "epoch": 1.1538461538461537,
-      "grad_norm": 1.198523759841919,
       "learning_rate": 0.00014,
-      "loss": 3.871,
       "step": 30
     },
     {
       "epoch": 1.3461538461538463,
-      "grad_norm": 1.0658003091812134,
       "learning_rate": 0.00012571428571428572,
-      "loss": 3.6984,
       "step": 35
     },
     {
       "epoch": 1.5384615384615383,
-      "grad_norm": 1.2157020568847656,
       "learning_rate": 0.00011142857142857144,
-      "loss": 3.3734,
       "step": 40
     },
     {
       "epoch": 1.7307692307692308,
-      "grad_norm": 1.2755290269851685,
       "learning_rate": 9.714285714285715e-05,
-      "loss": 3.3713,
       "step": 45
     },
     {
       "epoch": 1.9230769230769231,
-      "grad_norm": 1.2740757465362549,
       "learning_rate": 8.285714285714287e-05,
-      "loss": 3.1705,
       "step": 50
     }
   ],

   "log_history": [
     {
       "epoch": 0.19230769230769232,
+      "grad_norm": 0.5379420518875122,
       "learning_rate": 0.0001,
+      "loss": 5.0246,
       "step": 5
     },
     {
       "epoch": 0.38461538461538464,
+      "grad_norm": 0.811471164226532,
       "learning_rate": 0.00019714285714285716,
+      "loss": 4.8901,
       "step": 10
     },
     {
       "epoch": 0.5769230769230769,
+      "grad_norm": 0.8395925164222717,
       "learning_rate": 0.00018285714285714286,
+      "loss": 4.7398,
       "step": 15
     },
     {
       "epoch": 0.7692307692307693,
+      "grad_norm": 0.8671197891235352,
       "learning_rate": 0.00016857142857142857,
+      "loss": 4.462,
       "step": 20
     },
     {
       "epoch": 0.9615384615384616,
+      "grad_norm": 1.1186156272888184,
       "learning_rate": 0.0001542857142857143,
+      "loss": 4.0565,
       "step": 25
     },
     {
       "epoch": 1.1538461538461537,
+      "grad_norm": 1.1562882661819458,
       "learning_rate": 0.00014,
+      "loss": 3.8807,
       "step": 30
     },
     {
       "epoch": 1.3461538461538463,
+      "grad_norm": 1.0266945362091064,
       "learning_rate": 0.00012571428571428572,
+      "loss": 3.7139,
       "step": 35
     },
     {
       "epoch": 1.5384615384615383,
+      "grad_norm": 1.23048996925354,
       "learning_rate": 0.00011142857142857144,
+      "loss": 3.3839,
       "step": 40
     },
     {
       "epoch": 1.7307692307692308,
+      "grad_norm": 1.2847602367401123,
       "learning_rate": 9.714285714285715e-05,
+      "loss": 3.3867,
       "step": 45
     },
     {
       "epoch": 1.9230769230769231,
+      "grad_norm": 1.256270408630371,
       "learning_rate": 8.285714285714287e-05,
+      "loss": 3.1795,
       "step": 50
     }
   ],

checkpoint-78/adapter_config.json CHANGED Viewed

@@ -29,9 +29,9 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "c_attn",
     "c_proj",
-    "c_fc"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "c_proj",
+    "c_fc",
+    "c_attn"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

checkpoint-78/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:828fd8d7f38a3db51f0ce0ebaed69642722f764b9f6f3645bd18bce83b11647e
 size 4730632

 version https://git-lfs.github.com/spec/v1
+oid sha256:55232f8282cb9bd78ce25259d56c143a26458573ce9f80ca07d6ae0ffc790618
 size 4730632

checkpoint-78/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e907c9afc2643e207a5e0d8977af80e20c0be6a132947dfcb9ba90672f17e38
 size 9515787

 version https://git-lfs.github.com/spec/v1
+oid sha256:5daceb474505ddfd977fd87658a266e662d19d2a2a7fb78fbba28c64daffb4bf
 size 9515787

checkpoint-78/trainer_state.json CHANGED Viewed

@@ -11,107 +11,107 @@
   "log_history": [
     {
       "epoch": 0.19230769230769232,
-      "grad_norm": 0.5310236811637878,
       "learning_rate": 0.0001,
-      "loss": 5.0245,
       "step": 5
     },
     {
       "epoch": 0.38461538461538464,
-      "grad_norm": 0.7837315797805786,
       "learning_rate": 0.00019714285714285716,
-      "loss": 4.8883,
       "step": 10
     },
     {
       "epoch": 0.5769230769230769,
-      "grad_norm": 0.8394624590873718,
       "learning_rate": 0.00018285714285714286,
-      "loss": 4.7357,
       "step": 15
     },
     {
       "epoch": 0.7692307692307693,
-      "grad_norm": 0.8658212423324585,
       "learning_rate": 0.00016857142857142857,
-      "loss": 4.4614,
       "step": 20
     },
     {
       "epoch": 0.9615384615384616,
-      "grad_norm": 1.1082484722137451,
       "learning_rate": 0.0001542857142857143,
-      "loss": 4.0515,
       "step": 25
     },
     {
       "epoch": 1.1538461538461537,
-      "grad_norm": 1.198523759841919,
       "learning_rate": 0.00014,
-      "loss": 3.871,
       "step": 30
     },
     {
       "epoch": 1.3461538461538463,
-      "grad_norm": 1.0658003091812134,
       "learning_rate": 0.00012571428571428572,
-      "loss": 3.6984,
       "step": 35
     },
     {
       "epoch": 1.5384615384615383,
-      "grad_norm": 1.2157020568847656,
       "learning_rate": 0.00011142857142857144,
-      "loss": 3.3734,
       "step": 40
     },
     {
       "epoch": 1.7307692307692308,
-      "grad_norm": 1.2755290269851685,
       "learning_rate": 9.714285714285715e-05,
-      "loss": 3.3713,
       "step": 45
     },
     {
       "epoch": 1.9230769230769231,
-      "grad_norm": 1.2740757465362549,
       "learning_rate": 8.285714285714287e-05,
-      "loss": 3.1705,
       "step": 50
     },
     {
       "epoch": 2.1153846153846154,
-      "grad_norm": 1.6474992036819458,
       "learning_rate": 6.857142857142858e-05,
-      "loss": 3.0296,
       "step": 55
     },
     {
       "epoch": 2.3076923076923075,
-      "grad_norm": 1.3132846355438232,
       "learning_rate": 5.428571428571428e-05,
-      "loss": 2.9035,
       "step": 60
     },
     {
       "epoch": 2.5,
-      "grad_norm": 1.4660000801086426,
       "learning_rate": 4e-05,
-      "loss": 2.8212,
       "step": 65
     },
     {
       "epoch": 2.6923076923076925,
-      "grad_norm": 1.4135533571243286,
       "learning_rate": 2.5714285714285714e-05,
-      "loss": 2.7462,
       "step": 70
     },
     {
       "epoch": 2.8846153846153846,
-      "grad_norm": 1.3768360614776611,
       "learning_rate": 1.1428571428571429e-05,
-      "loss": 2.8159,
       "step": 75
     }
   ],

   "log_history": [
     {
       "epoch": 0.19230769230769232,
+      "grad_norm": 0.5379420518875122,
       "learning_rate": 0.0001,
+      "loss": 5.0246,
       "step": 5
     },
     {
       "epoch": 0.38461538461538464,
+      "grad_norm": 0.811471164226532,
       "learning_rate": 0.00019714285714285716,
+      "loss": 4.8901,
       "step": 10
     },
     {
       "epoch": 0.5769230769230769,
+      "grad_norm": 0.8395925164222717,
       "learning_rate": 0.00018285714285714286,
+      "loss": 4.7398,
       "step": 15
     },
     {
       "epoch": 0.7692307692307693,
+      "grad_norm": 0.8671197891235352,
       "learning_rate": 0.00016857142857142857,
+      "loss": 4.462,
       "step": 20
     },
     {
       "epoch": 0.9615384615384616,
+      "grad_norm": 1.1186156272888184,
       "learning_rate": 0.0001542857142857143,
+      "loss": 4.0565,
       "step": 25
     },
     {
       "epoch": 1.1538461538461537,
+      "grad_norm": 1.1562882661819458,
       "learning_rate": 0.00014,
+      "loss": 3.8807,
       "step": 30
     },
     {
       "epoch": 1.3461538461538463,
+      "grad_norm": 1.0266945362091064,
       "learning_rate": 0.00012571428571428572,
+      "loss": 3.7139,
       "step": 35
     },
     {
       "epoch": 1.5384615384615383,
+      "grad_norm": 1.23048996925354,
       "learning_rate": 0.00011142857142857144,
+      "loss": 3.3839,
       "step": 40
     },
     {
       "epoch": 1.7307692307692308,
+      "grad_norm": 1.2847602367401123,
       "learning_rate": 9.714285714285715e-05,
+      "loss": 3.3867,
       "step": 45
     },
     {
       "epoch": 1.9230769230769231,
+      "grad_norm": 1.256270408630371,
       "learning_rate": 8.285714285714287e-05,
+      "loss": 3.1795,
       "step": 50
     },
     {
       "epoch": 2.1153846153846154,
+      "grad_norm": 1.6300894021987915,
       "learning_rate": 6.857142857142858e-05,
+      "loss": 3.0392,
       "step": 55
     },
     {
       "epoch": 2.3076923076923075,
+      "grad_norm": 1.2983814477920532,
       "learning_rate": 5.428571428571428e-05,
+      "loss": 2.9103,
       "step": 60
     },
     {
       "epoch": 2.5,
+      "grad_norm": 1.5019299983978271,
       "learning_rate": 4e-05,
+      "loss": 2.8314,
       "step": 65
     },
     {
       "epoch": 2.6923076923076925,
+      "grad_norm": 1.4436630010604858,
       "learning_rate": 2.5714285714285714e-05,
+      "loss": 2.753,
       "step": 70
     },
     {
       "epoch": 2.8846153846153846,
+      "grad_norm": 1.3541405200958252,
       "learning_rate": 1.1428571428571429e-05,
+      "loss": 2.8238,
       "step": 75
     }
   ],