Subiendo modelo GPT-2 fine-tuned con LoRA

Browse files

Files changed (10) hide show

adapter_config.json +1 -1
adapter_model.safetensors +1 -1
checkpoint-50/adapter_config.json +1 -1
checkpoint-50/adapter_model.safetensors +1 -1
checkpoint-50/optimizer.pt +1 -1
checkpoint-50/trainer_state.json +18 -18
checkpoint-78/adapter_config.json +1 -1
checkpoint-78/adapter_model.safetensors +1 -1
checkpoint-78/optimizer.pt +1 -1
checkpoint-78/trainer_state.json +28 -28

adapter_config.json CHANGED Viewed

@@ -29,8 +29,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "c_attn",
     "c_fc",
     "c_proj"
   ],
   "target_parameters": null,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "c_fc",
+    "c_attn",
     "c_proj"
   ],
   "target_parameters": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1171030a6e6953eafbda85a62d5b12996f5ffc4822ace50a8cd23d8695c8e744
 size 4730632

 version https://git-lfs.github.com/spec/v1
+oid sha256:f698211d64a505f3d2c7fdf113ae678afa3b0ef2ce539da4bdeaffc49fd15315
 size 4730632

checkpoint-50/adapter_config.json CHANGED Viewed

@@ -29,8 +29,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "c_attn",
     "c_fc",
     "c_proj"
   ],
   "target_parameters": null,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "c_fc",
+    "c_attn",
     "c_proj"
   ],
   "target_parameters": null,

checkpoint-50/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e11ed97a557c62215dd3af808efbbac546251b812b5a520368b3752082dd7d4
 size 4730632

 version https://git-lfs.github.com/spec/v1
+oid sha256:7366bd22197f6a97e1d060cce0d169717d86edcfddffd48ddf2df93824f9d8f8
 size 4730632

checkpoint-50/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e15baab776ec9c437c2558f5c634c5aa9f8dfe0ed8a7cb86909bfaeedeccd872
 size 9515787

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c84da8fa8ac3f9c89b90f167e8044eb63bcb9b27d96969d4812f526169fd43f
 size 9515787

checkpoint-50/trainer_state.json CHANGED Viewed

@@ -11,72 +11,72 @@
   "log_history": [
     {
       "epoch": 0.19230769230769232,
-      "grad_norm": 0.5508620738983154,
       "learning_rate": 0.0001,
       "loss": 5.0244,
       "step": 5
     },
     {
       "epoch": 0.38461538461538464,
-      "grad_norm": 0.8022432923316956,
       "learning_rate": 0.00019714285714285716,
-      "loss": 4.888,
       "step": 10
     },
     {
       "epoch": 0.5769230769230769,
-      "grad_norm": 0.8547099828720093,
       "learning_rate": 0.00018285714285714286,
-      "loss": 4.735,
       "step": 15
     },
     {
       "epoch": 0.7692307692307693,
-      "grad_norm": 0.8760125041007996,
       "learning_rate": 0.00016857142857142857,
       "loss": 4.4548,
       "step": 20
     },
     {
       "epoch": 0.9615384615384616,
-      "grad_norm": 1.1308597326278687,
       "learning_rate": 0.0001542857142857143,
-      "loss": 4.0466,
       "step": 25
     },
     {
       "epoch": 1.1538461538461537,
-      "grad_norm": 1.1865196228027344,
       "learning_rate": 0.00014,
-      "loss": 3.8666,
       "step": 30
     },
     {
       "epoch": 1.3461538461538463,
-      "grad_norm": 1.0487556457519531,
       "learning_rate": 0.00012571428571428572,
-      "loss": 3.6951,
       "step": 35
     },
     {
       "epoch": 1.5384615384615383,
-      "grad_norm": 1.2422493696212769,
       "learning_rate": 0.00011142857142857144,
-      "loss": 3.3633,
       "step": 40
     },
     {
       "epoch": 1.7307692307692308,
-      "grad_norm": 1.2872875928878784,
       "learning_rate": 9.714285714285715e-05,
-      "loss": 3.367,
       "step": 45
     },
     {
       "epoch": 1.9230769230769231,
-      "grad_norm": 1.2250062227249146,
       "learning_rate": 8.285714285714287e-05,
-      "loss": 3.1619,
       "step": 50
     }
   ],

   "log_history": [
     {
       "epoch": 0.19230769230769232,
+      "grad_norm": 0.548379123210907,
       "learning_rate": 0.0001,
       "loss": 5.0244,
       "step": 5
     },
     {
       "epoch": 0.38461538461538464,
+      "grad_norm": 0.8064137697219849,
       "learning_rate": 0.00019714285714285716,
+      "loss": 4.889,
       "step": 10
     },
     {
       "epoch": 0.5769230769230769,
+      "grad_norm": 0.8431358933448792,
       "learning_rate": 0.00018285714285714286,
+      "loss": 4.7358,
       "step": 15
     },
     {
       "epoch": 0.7692307692307693,
+      "grad_norm": 0.8824865221977234,
       "learning_rate": 0.00016857142857142857,
       "loss": 4.4548,
       "step": 20
     },
     {
       "epoch": 0.9615384615384616,
+      "grad_norm": 1.1104286909103394,
       "learning_rate": 0.0001542857142857143,
+      "loss": 4.042,
       "step": 25
     },
     {
       "epoch": 1.1538461538461537,
+      "grad_norm": 1.1461974382400513,
       "learning_rate": 0.00014,
+      "loss": 3.8627,
       "step": 30
     },
     {
       "epoch": 1.3461538461538463,
+      "grad_norm": 1.0308071374893188,
       "learning_rate": 0.00012571428571428572,
+      "loss": 3.6893,
       "step": 35
     },
     {
       "epoch": 1.5384615384615383,
+      "grad_norm": 1.2822504043579102,
       "learning_rate": 0.00011142857142857144,
+      "loss": 3.352,
       "step": 40
     },
     {
       "epoch": 1.7307692307692308,
+      "grad_norm": 1.2842049598693848,
       "learning_rate": 9.714285714285715e-05,
+      "loss": 3.3562,
       "step": 45
     },
     {
       "epoch": 1.9230769230769231,
+      "grad_norm": 1.255327820777893,
       "learning_rate": 8.285714285714287e-05,
+      "loss": 3.1523,
       "step": 50
     }
   ],

checkpoint-78/adapter_config.json CHANGED Viewed

@@ -29,8 +29,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "c_attn",
     "c_fc",
     "c_proj"
   ],
   "target_parameters": null,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "c_fc",
+    "c_attn",
     "c_proj"
   ],
   "target_parameters": null,

checkpoint-78/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1171030a6e6953eafbda85a62d5b12996f5ffc4822ace50a8cd23d8695c8e744
 size 4730632

 version https://git-lfs.github.com/spec/v1
+oid sha256:f698211d64a505f3d2c7fdf113ae678afa3b0ef2ce539da4bdeaffc49fd15315
 size 4730632

checkpoint-78/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fe5ed8a12018017e9aa3816457aac6ea8f0c8f54b2ccb555a190f96470de51e
 size 9515787

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa0c16a3e57c4220b33a0edb5f35209a95983db805a12e518e05e0a8c6ebb163
 size 9515787

checkpoint-78/trainer_state.json CHANGED Viewed

@@ -11,107 +11,107 @@
   "log_history": [
     {
       "epoch": 0.19230769230769232,
-      "grad_norm": 0.5508620738983154,
       "learning_rate": 0.0001,
       "loss": 5.0244,
       "step": 5
     },
     {
       "epoch": 0.38461538461538464,
-      "grad_norm": 0.8022432923316956,
       "learning_rate": 0.00019714285714285716,
-      "loss": 4.888,
       "step": 10
     },
     {
       "epoch": 0.5769230769230769,
-      "grad_norm": 0.8547099828720093,
       "learning_rate": 0.00018285714285714286,
-      "loss": 4.735,
       "step": 15
     },
     {
       "epoch": 0.7692307692307693,
-      "grad_norm": 0.8760125041007996,
       "learning_rate": 0.00016857142857142857,
       "loss": 4.4548,
       "step": 20
     },
     {
       "epoch": 0.9615384615384616,
-      "grad_norm": 1.1308597326278687,
       "learning_rate": 0.0001542857142857143,
-      "loss": 4.0466,
       "step": 25
     },
     {
       "epoch": 1.1538461538461537,
-      "grad_norm": 1.1865196228027344,
       "learning_rate": 0.00014,
-      "loss": 3.8666,
       "step": 30
     },
     {
       "epoch": 1.3461538461538463,
-      "grad_norm": 1.0487556457519531,
       "learning_rate": 0.00012571428571428572,
-      "loss": 3.6951,
       "step": 35
     },
     {
       "epoch": 1.5384615384615383,
-      "grad_norm": 1.2422493696212769,
       "learning_rate": 0.00011142857142857144,
-      "loss": 3.3633,
       "step": 40
     },
     {
       "epoch": 1.7307692307692308,
-      "grad_norm": 1.2872875928878784,
       "learning_rate": 9.714285714285715e-05,
-      "loss": 3.367,
       "step": 45
     },
     {
       "epoch": 1.9230769230769231,
-      "grad_norm": 1.2250062227249146,
       "learning_rate": 8.285714285714287e-05,
-      "loss": 3.1619,
       "step": 50
     },
     {
       "epoch": 2.1153846153846154,
-      "grad_norm": 1.7271333932876587,
       "learning_rate": 6.857142857142858e-05,
-      "loss": 3.0247,
       "step": 55
     },
     {
       "epoch": 2.3076923076923075,
-      "grad_norm": 1.3085808753967285,
       "learning_rate": 5.428571428571428e-05,
-      "loss": 2.8975,
       "step": 60
     },
     {
       "epoch": 2.5,
-      "grad_norm": 1.4766792058944702,
       "learning_rate": 4e-05,
-      "loss": 2.8193,
       "step": 65
     },
     {
       "epoch": 2.6923076923076925,
-      "grad_norm": 1.506183385848999,
       "learning_rate": 2.5714285714285714e-05,
-      "loss": 2.7399,
       "step": 70
     },
     {
       "epoch": 2.8846153846153846,
-      "grad_norm": 1.3497138023376465,
       "learning_rate": 1.1428571428571429e-05,
-      "loss": 2.811,
       "step": 75
     }
   ],

   "log_history": [
     {
       "epoch": 0.19230769230769232,
+      "grad_norm": 0.548379123210907,
       "learning_rate": 0.0001,
       "loss": 5.0244,
       "step": 5
     },
     {
       "epoch": 0.38461538461538464,
+      "grad_norm": 0.8064137697219849,
       "learning_rate": 0.00019714285714285716,
+      "loss": 4.889,
       "step": 10
     },
     {
       "epoch": 0.5769230769230769,
+      "grad_norm": 0.8431358933448792,
       "learning_rate": 0.00018285714285714286,
+      "loss": 4.7358,
       "step": 15
     },
     {
       "epoch": 0.7692307692307693,
+      "grad_norm": 0.8824865221977234,
       "learning_rate": 0.00016857142857142857,
       "loss": 4.4548,
       "step": 20
     },
     {
       "epoch": 0.9615384615384616,
+      "grad_norm": 1.1104286909103394,
       "learning_rate": 0.0001542857142857143,
+      "loss": 4.042,
       "step": 25
     },
     {
       "epoch": 1.1538461538461537,
+      "grad_norm": 1.1461974382400513,
       "learning_rate": 0.00014,
+      "loss": 3.8627,
       "step": 30
     },
     {
       "epoch": 1.3461538461538463,
+      "grad_norm": 1.0308071374893188,
       "learning_rate": 0.00012571428571428572,
+      "loss": 3.6893,
       "step": 35
     },
     {
       "epoch": 1.5384615384615383,
+      "grad_norm": 1.2822504043579102,
       "learning_rate": 0.00011142857142857144,
+      "loss": 3.352,
       "step": 40
     },
     {
       "epoch": 1.7307692307692308,
+      "grad_norm": 1.2842049598693848,
       "learning_rate": 9.714285714285715e-05,
+      "loss": 3.3562,
       "step": 45
     },
     {
       "epoch": 1.9230769230769231,
+      "grad_norm": 1.255327820777893,
       "learning_rate": 8.285714285714287e-05,
+      "loss": 3.1523,
       "step": 50
     },
     {
       "epoch": 2.1153846153846154,
+      "grad_norm": 1.5920555591583252,
       "learning_rate": 6.857142857142858e-05,
+      "loss": 3.0097,
       "step": 55
     },
     {
       "epoch": 2.3076923076923075,
+      "grad_norm": 1.3065139055252075,
       "learning_rate": 5.428571428571428e-05,
+      "loss": 2.8838,
       "step": 60
     },
     {
       "epoch": 2.5,
+      "grad_norm": 1.4456508159637451,
       "learning_rate": 4e-05,
+      "loss": 2.8021,
       "step": 65
     },
     {
       "epoch": 2.6923076923076925,
+      "grad_norm": 1.435351014137268,
       "learning_rate": 2.5714285714285714e-05,
+      "loss": 2.724,
       "step": 70
     },
     {
       "epoch": 2.8846153846153846,
+      "grad_norm": 1.3584095239639282,
       "learning_rate": 1.1428571428571429e-05,
+      "loss": 2.7916,
       "step": 75
     }
   ],