Subiendo modelo GPT-2 fine-tuned con LoRA

Browse files

Files changed (10) hide show

adapter_config.json +2 -2
adapter_model.safetensors +1 -1
checkpoint-50/adapter_config.json +2 -2
checkpoint-50/adapter_model.safetensors +1 -1
checkpoint-50/optimizer.pt +1 -1
checkpoint-50/trainer_state.json +20 -20
checkpoint-78/adapter_config.json +2 -2
checkpoint-78/adapter_model.safetensors +1 -1
checkpoint-78/optimizer.pt +1 -1
checkpoint-78/trainer_state.json +30 -30

adapter_config.json CHANGED Viewed

@@ -29,9 +29,9 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "c_proj",
     "c_fc",
-    "c_attn"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "c_attn",
     "c_fc",
+    "c_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:55232f8282cb9bd78ce25259d56c143a26458573ce9f80ca07d6ae0ffc790618
 size 4730632

 version https://git-lfs.github.com/spec/v1
+oid sha256:1171030a6e6953eafbda85a62d5b12996f5ffc4822ace50a8cd23d8695c8e744
 size 4730632

checkpoint-50/adapter_config.json CHANGED Viewed

@@ -29,9 +29,9 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "c_proj",
     "c_fc",
-    "c_attn"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "c_attn",
     "c_fc",
+    "c_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

checkpoint-50/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b8b1b2997277f932b7d8d7f8089c2494b2f173b8b9190bdc499d2b3d8c86988
 size 4730632

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e11ed97a557c62215dd3af808efbbac546251b812b5a520368b3752082dd7d4
 size 4730632

checkpoint-50/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad8d39626a094c9582a9c023435d1d8c9d42c48032e6ebf2fc01ddff47e2db4f
 size 9515787

 version https://git-lfs.github.com/spec/v1
+oid sha256:e15baab776ec9c437c2558f5c634c5aa9f8dfe0ed8a7cb86909bfaeedeccd872
 size 9515787

checkpoint-50/trainer_state.json CHANGED Viewed

@@ -11,72 +11,72 @@
   "log_history": [
     {
       "epoch": 0.19230769230769232,
-      "grad_norm": 0.5379420518875122,
       "learning_rate": 0.0001,
-      "loss": 5.0246,
       "step": 5
     },
     {
       "epoch": 0.38461538461538464,
-      "grad_norm": 0.811471164226532,
       "learning_rate": 0.00019714285714285716,
-      "loss": 4.8901,
       "step": 10
     },
     {
       "epoch": 0.5769230769230769,
-      "grad_norm": 0.8395925164222717,
       "learning_rate": 0.00018285714285714286,
-      "loss": 4.7398,
       "step": 15
     },
     {
       "epoch": 0.7692307692307693,
-      "grad_norm": 0.8671197891235352,
       "learning_rate": 0.00016857142857142857,
-      "loss": 4.462,
       "step": 20
     },
     {
       "epoch": 0.9615384615384616,
-      "grad_norm": 1.1186156272888184,
       "learning_rate": 0.0001542857142857143,
-      "loss": 4.0565,
       "step": 25
     },
     {
       "epoch": 1.1538461538461537,
-      "grad_norm": 1.1562882661819458,
       "learning_rate": 0.00014,
-      "loss": 3.8807,
       "step": 30
     },
     {
       "epoch": 1.3461538461538463,
-      "grad_norm": 1.0266945362091064,
       "learning_rate": 0.00012571428571428572,
-      "loss": 3.7139,
       "step": 35
     },
     {
       "epoch": 1.5384615384615383,
-      "grad_norm": 1.23048996925354,
       "learning_rate": 0.00011142857142857144,
-      "loss": 3.3839,
       "step": 40
     },
     {
       "epoch": 1.7307692307692308,
-      "grad_norm": 1.2847602367401123,
       "learning_rate": 9.714285714285715e-05,
-      "loss": 3.3867,
       "step": 45
     },
     {
       "epoch": 1.9230769230769231,
-      "grad_norm": 1.256270408630371,
       "learning_rate": 8.285714285714287e-05,
-      "loss": 3.1795,
       "step": 50
     }
   ],

   "log_history": [
     {
       "epoch": 0.19230769230769232,
+      "grad_norm": 0.5508620738983154,
       "learning_rate": 0.0001,
+      "loss": 5.0244,
       "step": 5
     },
     {
       "epoch": 0.38461538461538464,
+      "grad_norm": 0.8022432923316956,
       "learning_rate": 0.00019714285714285716,
+      "loss": 4.888,
       "step": 10
     },
     {
       "epoch": 0.5769230769230769,
+      "grad_norm": 0.8547099828720093,
       "learning_rate": 0.00018285714285714286,
+      "loss": 4.735,
       "step": 15
     },
     {
       "epoch": 0.7692307692307693,
+      "grad_norm": 0.8760125041007996,
       "learning_rate": 0.00016857142857142857,
+      "loss": 4.4548,
       "step": 20
     },
     {
       "epoch": 0.9615384615384616,
+      "grad_norm": 1.1308597326278687,
       "learning_rate": 0.0001542857142857143,
+      "loss": 4.0466,
       "step": 25
     },
     {
       "epoch": 1.1538461538461537,
+      "grad_norm": 1.1865196228027344,
       "learning_rate": 0.00014,
+      "loss": 3.8666,
       "step": 30
     },
     {
       "epoch": 1.3461538461538463,
+      "grad_norm": 1.0487556457519531,
       "learning_rate": 0.00012571428571428572,
+      "loss": 3.6951,
       "step": 35
     },
     {
       "epoch": 1.5384615384615383,
+      "grad_norm": 1.2422493696212769,
       "learning_rate": 0.00011142857142857144,
+      "loss": 3.3633,
       "step": 40
     },
     {
       "epoch": 1.7307692307692308,
+      "grad_norm": 1.2872875928878784,
       "learning_rate": 9.714285714285715e-05,
+      "loss": 3.367,
       "step": 45
     },
     {
       "epoch": 1.9230769230769231,
+      "grad_norm": 1.2250062227249146,
       "learning_rate": 8.285714285714287e-05,
+      "loss": 3.1619,
       "step": 50
     }
   ],

checkpoint-78/adapter_config.json CHANGED Viewed

@@ -29,9 +29,9 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "c_proj",
     "c_fc",
-    "c_attn"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "c_attn",
     "c_fc",
+    "c_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

checkpoint-78/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:55232f8282cb9bd78ce25259d56c143a26458573ce9f80ca07d6ae0ffc790618
 size 4730632

 version https://git-lfs.github.com/spec/v1
+oid sha256:1171030a6e6953eafbda85a62d5b12996f5ffc4822ace50a8cd23d8695c8e744
 size 4730632

checkpoint-78/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5daceb474505ddfd977fd87658a266e662d19d2a2a7fb78fbba28c64daffb4bf
 size 9515787

 version https://git-lfs.github.com/spec/v1
+oid sha256:7fe5ed8a12018017e9aa3816457aac6ea8f0c8f54b2ccb555a190f96470de51e
 size 9515787

checkpoint-78/trainer_state.json CHANGED Viewed

@@ -11,107 +11,107 @@
   "log_history": [
     {
       "epoch": 0.19230769230769232,
-      "grad_norm": 0.5379420518875122,
       "learning_rate": 0.0001,
-      "loss": 5.0246,
       "step": 5
     },
     {
       "epoch": 0.38461538461538464,
-      "grad_norm": 0.811471164226532,
       "learning_rate": 0.00019714285714285716,
-      "loss": 4.8901,
       "step": 10
     },
     {
       "epoch": 0.5769230769230769,
-      "grad_norm": 0.8395925164222717,
       "learning_rate": 0.00018285714285714286,
-      "loss": 4.7398,
       "step": 15
     },
     {
       "epoch": 0.7692307692307693,
-      "grad_norm": 0.8671197891235352,
       "learning_rate": 0.00016857142857142857,
-      "loss": 4.462,
       "step": 20
     },
     {
       "epoch": 0.9615384615384616,
-      "grad_norm": 1.1186156272888184,
       "learning_rate": 0.0001542857142857143,
-      "loss": 4.0565,
       "step": 25
     },
     {
       "epoch": 1.1538461538461537,
-      "grad_norm": 1.1562882661819458,
       "learning_rate": 0.00014,
-      "loss": 3.8807,
       "step": 30
     },
     {
       "epoch": 1.3461538461538463,
-      "grad_norm": 1.0266945362091064,
       "learning_rate": 0.00012571428571428572,
-      "loss": 3.7139,
       "step": 35
     },
     {
       "epoch": 1.5384615384615383,
-      "grad_norm": 1.23048996925354,
       "learning_rate": 0.00011142857142857144,
-      "loss": 3.3839,
       "step": 40
     },
     {
       "epoch": 1.7307692307692308,
-      "grad_norm": 1.2847602367401123,
       "learning_rate": 9.714285714285715e-05,
-      "loss": 3.3867,
       "step": 45
     },
     {
       "epoch": 1.9230769230769231,
-      "grad_norm": 1.256270408630371,
       "learning_rate": 8.285714285714287e-05,
-      "loss": 3.1795,
       "step": 50
     },
     {
       "epoch": 2.1153846153846154,
-      "grad_norm": 1.6300894021987915,
       "learning_rate": 6.857142857142858e-05,
-      "loss": 3.0392,
       "step": 55
     },
     {
       "epoch": 2.3076923076923075,
-      "grad_norm": 1.2983814477920532,
       "learning_rate": 5.428571428571428e-05,
-      "loss": 2.9103,
       "step": 60
     },
     {
       "epoch": 2.5,
-      "grad_norm": 1.5019299983978271,
       "learning_rate": 4e-05,
-      "loss": 2.8314,
       "step": 65
     },
     {
       "epoch": 2.6923076923076925,
-      "grad_norm": 1.4436630010604858,
       "learning_rate": 2.5714285714285714e-05,
-      "loss": 2.753,
       "step": 70
     },
     {
       "epoch": 2.8846153846153846,
-      "grad_norm": 1.3541405200958252,
       "learning_rate": 1.1428571428571429e-05,
-      "loss": 2.8238,
       "step": 75
     }
   ],

   "log_history": [
     {
       "epoch": 0.19230769230769232,
+      "grad_norm": 0.5508620738983154,
       "learning_rate": 0.0001,
+      "loss": 5.0244,
       "step": 5
     },
     {
       "epoch": 0.38461538461538464,
+      "grad_norm": 0.8022432923316956,
       "learning_rate": 0.00019714285714285716,
+      "loss": 4.888,
       "step": 10
     },
     {
       "epoch": 0.5769230769230769,
+      "grad_norm": 0.8547099828720093,
       "learning_rate": 0.00018285714285714286,
+      "loss": 4.735,
       "step": 15
     },
     {
       "epoch": 0.7692307692307693,
+      "grad_norm": 0.8760125041007996,
       "learning_rate": 0.00016857142857142857,
+      "loss": 4.4548,
       "step": 20
     },
     {
       "epoch": 0.9615384615384616,
+      "grad_norm": 1.1308597326278687,
       "learning_rate": 0.0001542857142857143,
+      "loss": 4.0466,
       "step": 25
     },
     {
       "epoch": 1.1538461538461537,
+      "grad_norm": 1.1865196228027344,
       "learning_rate": 0.00014,
+      "loss": 3.8666,
       "step": 30
     },
     {
       "epoch": 1.3461538461538463,
+      "grad_norm": 1.0487556457519531,
       "learning_rate": 0.00012571428571428572,
+      "loss": 3.6951,
       "step": 35
     },
     {
       "epoch": 1.5384615384615383,
+      "grad_norm": 1.2422493696212769,
       "learning_rate": 0.00011142857142857144,
+      "loss": 3.3633,
       "step": 40
     },
     {
       "epoch": 1.7307692307692308,
+      "grad_norm": 1.2872875928878784,
       "learning_rate": 9.714285714285715e-05,
+      "loss": 3.367,
       "step": 45
     },
     {
       "epoch": 1.9230769230769231,
+      "grad_norm": 1.2250062227249146,
       "learning_rate": 8.285714285714287e-05,
+      "loss": 3.1619,
       "step": 50
     },
     {
       "epoch": 2.1153846153846154,
+      "grad_norm": 1.7271333932876587,
       "learning_rate": 6.857142857142858e-05,
+      "loss": 3.0247,
       "step": 55
     },
     {
       "epoch": 2.3076923076923075,
+      "grad_norm": 1.3085808753967285,
       "learning_rate": 5.428571428571428e-05,
+      "loss": 2.8975,
       "step": 60
     },
     {
       "epoch": 2.5,
+      "grad_norm": 1.4766792058944702,
       "learning_rate": 4e-05,
+      "loss": 2.8193,
       "step": 65
     },
     {
       "epoch": 2.6923076923076925,
+      "grad_norm": 1.506183385848999,
       "learning_rate": 2.5714285714285714e-05,
+      "loss": 2.7399,
       "step": 70
     },
     {
       "epoch": 2.8846153846153846,
+      "grad_norm": 1.3497138023376465,
       "learning_rate": 1.1428571428571429e-05,
+      "loss": 2.811,
       "step": 75
     }
   ],