Training in progress, step 1400

Browse files

Files changed (6) hide show

adapter_model.safetensors +1 -1
last-checkpoint/adapter_config.json +4 -4
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +3 -493

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e3c47872211fbcf052ba7df0f0915e501b20e8abc3bca44c5104a2f4ba81046
 size 121537408

 version https://git-lfs.github.com/spec/v1
+oid sha256:a5548db897760d11eb42f797aede598c24ba8638657290f8d4ec41761003bdc5
 size 121537408

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -33,13 +33,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
-    "down_proj",
     "up_proj",
-    "v_proj",
     "q_proj",
     "gate_proj",
-    "k_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
     "q_proj",
+    "v_proj",
+    "k_proj",
+    "o_proj",
     "gate_proj",
+    "down_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e3c47872211fbcf052ba7df0f0915e501b20e8abc3bca44c5104a2f4ba81046
 size 121537408

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac86271e61f61c7e0e996c9a0b387781c0cf7e105d9e2809cf486578571e3692
 size 121537408

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ae996e1fc958f0c827e315fbb2ff4690cc75f8738d96a091461acafa611b7e6
-size 62655371

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6166d92c0eec7a743b35a9b0a6952a1ccf685a19c88ceec877f066ad8bcf660
+size 62000725

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e226002dfde38ec81edb46535c412ea560a3a1d4ecaee989cfc82070757a4f85
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2f5818bcde61cb939645ced952eb7a6ec5c7bbec5f630a7156d7c2ae39b50d0
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.44616044616044614,
   "eval_steps": 500,
-  "global_step": 1300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -428,496 +428,6 @@
       "learning_rate": 0.00019783319385748891,
       "loss": 0.005371841043233872,
       "step": 600
-    },
-    {
-      "epoch": 0.20935220935220936,
-      "grad_norm": 0.03531381115317345,
-      "learning_rate": 0.00019775798436093438,
-      "loss": 0.0034491006284952165,
-      "step": 610
-    },
-    {
-      "epoch": 0.21278421278421278,
-      "grad_norm": 0.3441513478755951,
-      "learning_rate": 0.00019768150657209797,
-      "loss": 0.0036659084260463716,
-      "step": 620
-    },
-    {
-      "epoch": 0.21621621621621623,
-      "grad_norm": 0.12480789422988892,
-      "learning_rate": 0.00019760376148318697,
-      "loss": 0.005842024832963944,
-      "step": 630
-    },
-    {
-      "epoch": 0.21964821964821965,
-      "grad_norm": 0.18049751222133636,
-      "learning_rate": 0.00019752475010285044,
-      "loss": 0.004476210474967957,
-      "step": 640
-    },
-    {
-      "epoch": 0.22308022308022307,
-      "grad_norm": 0.03993777185678482,
-      "learning_rate": 0.00019744447345616603,
-      "loss": 0.002967344969511032,
-      "step": 650
-    },
-    {
-      "epoch": 0.22651222651222652,
-      "grad_norm": 0.014075578190386295,
-      "learning_rate": 0.00019736293258462663,
-      "loss": 0.0038539741188287737,
-      "step": 660
-    },
-    {
-      "epoch": 0.22994422994422994,
-      "grad_norm": 0.10091689974069595,
-      "learning_rate": 0.00019728012854612707,
-      "loss": 0.0027353862300515176,
-      "step": 670
-    },
-    {
-      "epoch": 0.23337623337623337,
-      "grad_norm": 0.07889121025800705,
-      "learning_rate": 0.00019719606241495015,
-      "loss": 0.002710958570241928,
-      "step": 680
-    },
-    {
-      "epoch": 0.23680823680823682,
-      "grad_norm": 0.30752819776535034,
-      "learning_rate": 0.00019711073528175276,
-      "loss": 0.00333489403128624,
-      "step": 690
-    },
-    {
-      "epoch": 0.24024024024024024,
-      "grad_norm": 0.044186487793922424,
-      "learning_rate": 0.00019702414825355192,
-      "loss": 0.0013431076891720294,
-      "step": 700
-    },
-    {
-      "epoch": 0.24367224367224366,
-      "grad_norm": 0.002193969674408436,
-      "learning_rate": 0.00019693630245371012,
-      "loss": 0.0016361255198717116,
-      "step": 710
-    },
-    {
-      "epoch": 0.2471042471042471,
-      "grad_norm": 0.0048366570845246315,
-      "learning_rate": 0.00019684719902192098,
-      "loss": 0.0031337443739175796,
-      "step": 720
-    },
-    {
-      "epoch": 0.25053625053625056,
-      "grad_norm": 0.003241632366552949,
-      "learning_rate": 0.0001967568391141944,
-      "loss": 0.0018327673897147179,
-      "step": 730
-    },
-    {
-      "epoch": 0.25396825396825395,
-      "grad_norm": 0.07791941612958908,
-      "learning_rate": 0.00019666522390284155,
-      "loss": 0.005795871838927269,
-      "step": 740
-    },
-    {
-      "epoch": 0.2574002574002574,
-      "grad_norm": 0.04366520047187805,
-      "learning_rate": 0.00019657235457645956,
-      "loss": 0.0033348515629768372,
-      "step": 750
-    },
-    {
-      "epoch": 0.26083226083226085,
-      "grad_norm": 0.013953814283013344,
-      "learning_rate": 0.00019647823233991623,
-      "loss": 0.00804762840270996,
-      "step": 760
-    },
-    {
-      "epoch": 0.26426426426426425,
-      "grad_norm": 0.017218926921486855,
-      "learning_rate": 0.00019638285841433442,
-      "loss": 0.005346460640430451,
-      "step": 770
-    },
-    {
-      "epoch": 0.2676962676962677,
-      "grad_norm": 1.2277870178222656,
-      "learning_rate": 0.00019628623403707612,
-      "loss": 0.005925276502966881,
-      "step": 780
-    },
-    {
-      "epoch": 0.27112827112827115,
-      "grad_norm": 0.05051695927977562,
-      "learning_rate": 0.00019618836046172647,
-      "loss": 0.0050374619662761685,
-      "step": 790
-    },
-    {
-      "epoch": 0.27456027456027454,
-      "grad_norm": 0.004715205170214176,
-      "learning_rate": 0.00019608923895807732,
-      "loss": 0.0031466834247112275,
-      "step": 800
-    },
-    {
-      "epoch": 0.277992277992278,
-      "grad_norm": 0.15201859176158905,
-      "learning_rate": 0.00019598887081211103,
-      "loss": 0.00553952269256115,
-      "step": 810
-    },
-    {
-      "epoch": 0.28142428142428144,
-      "grad_norm": 0.017294466495513916,
-      "learning_rate": 0.00019588725732598358,
-      "loss": 0.0026330363005399706,
-      "step": 820
-    },
-    {
-      "epoch": 0.28485628485628484,
-      "grad_norm": 0.009572334587574005,
-      "learning_rate": 0.0001957843998180077,
-      "loss": 0.0053256206214427945,
-      "step": 830
-    },
-    {
-      "epoch": 0.2882882882882883,
-      "grad_norm": 0.9419263005256653,
-      "learning_rate": 0.00019568029962263592,
-      "loss": 0.005014676600694656,
-      "step": 840
-    },
-    {
-      "epoch": 0.29172029172029174,
-      "grad_norm": 0.007972943596541882,
-      "learning_rate": 0.0001955749580904431,
-      "loss": 0.002865707501769066,
-      "step": 850
-    },
-    {
-      "epoch": 0.29515229515229513,
-      "grad_norm": 0.0037338004913181067,
-      "learning_rate": 0.00019546837658810883,
-      "loss": 0.002737715095281601,
-      "step": 860
-    },
-    {
-      "epoch": 0.2985842985842986,
-      "grad_norm": 0.044745851308107376,
-      "learning_rate": 0.00019536055649840007,
-      "loss": 0.005683861300349235,
-      "step": 870
-    },
-    {
-      "epoch": 0.30201630201630203,
-      "grad_norm": 0.3476060628890991,
-      "learning_rate": 0.00019525149922015268,
-      "loss": 0.007439766824245453,
-      "step": 880
-    },
-    {
-      "epoch": 0.3054483054483054,
-      "grad_norm": 0.046753134578466415,
-      "learning_rate": 0.00019514120616825377,
-      "loss": 0.009560897201299667,
-      "step": 890
-    },
-    {
-      "epoch": 0.3088803088803089,
-      "grad_norm": 0.6365974545478821,
-      "learning_rate": 0.00019502967877362305,
-      "loss": 0.006552433967590332,
-      "step": 900
-    },
-    {
-      "epoch": 0.3123123123123123,
-      "grad_norm": 1.7795714139938354,
-      "learning_rate": 0.00019491691848319432,
-      "loss": 0.0097378209233284,
-      "step": 910
-    },
-    {
-      "epoch": 0.3157443157443157,
-      "grad_norm": 0.030942745506763458,
-      "learning_rate": 0.00019480292675989677,
-      "loss": 0.011464773118495942,
-      "step": 920
-    },
-    {
-      "epoch": 0.31917631917631917,
-      "grad_norm": 0.05824369192123413,
-      "learning_rate": 0.00019468770508263586,
-      "loss": 0.0077786631882190704,
-      "step": 930
-    },
-    {
-      "epoch": 0.3226083226083226,
-      "grad_norm": 1.115598440170288,
-      "learning_rate": 0.00019457125494627431,
-      "loss": 0.005580966919660568,
-      "step": 940
-    },
-    {
-      "epoch": 0.32604032604032607,
-      "grad_norm": 0.11986860632896423,
-      "learning_rate": 0.00019445357786161265,
-      "loss": 0.01756148934364319,
-      "step": 950
-    },
-    {
-      "epoch": 0.32947232947232946,
-      "grad_norm": 0.026070566847920418,
-      "learning_rate": 0.00019433467535536947,
-      "loss": 0.01354750245809555,
-      "step": 960
-    },
-    {
-      "epoch": 0.3329043329043329,
-      "grad_norm": 0.11441248655319214,
-      "learning_rate": 0.0001942145489701618,
-      "loss": 0.008156213909387589,
-      "step": 970
-    },
-    {
-      "epoch": 0.33633633633633636,
-      "grad_norm": 0.3980163037776947,
-      "learning_rate": 0.00019409320026448504,
-      "loss": 0.0047673903405666355,
-      "step": 980
-    },
-    {
-      "epoch": 0.33976833976833976,
-      "grad_norm": 1.6305179595947266,
-      "learning_rate": 0.0001939706308126927,
-      "loss": 0.012219312787055969,
-      "step": 990
-    },
-    {
-      "epoch": 0.3432003432003432,
-      "grad_norm": 0.3982492983341217,
-      "learning_rate": 0.00019384684220497605,
-      "loss": 0.011943883448839187,
-      "step": 1000
-    },
-    {
-      "epoch": 0.34663234663234666,
-      "grad_norm": 0.8722233772277832,
-      "learning_rate": 0.00019372183604734336,
-      "loss": 0.01119406521320343,
-      "step": 1010
-    },
-    {
-      "epoch": 0.35006435006435005,
-      "grad_norm": 0.4889911413192749,
-      "learning_rate": 0.00019359561396159922,
-      "loss": 0.01964961290359497,
-      "step": 1020
-    },
-    {
-      "epoch": 0.3534963534963535,
-      "grad_norm": 2.336963176727295,
-      "learning_rate": 0.00019346817758532337,
-      "loss": 0.013343000411987304,
-      "step": 1030
-    },
-    {
-      "epoch": 0.35692835692835695,
-      "grad_norm": 5.095973014831543,
-      "learning_rate": 0.0001933395285718495,
-      "loss": 0.045030930638313295,
-      "step": 1040
-    },
-    {
-      "epoch": 0.36036036036036034,
-      "grad_norm": 1.8921918869018555,
-      "learning_rate": 0.00019320966859024397,
-      "loss": 0.017123931646347047,
-      "step": 1050
-    },
-    {
-      "epoch": 0.3637923637923638,
-      "grad_norm": 0.32683122158050537,
-      "learning_rate": 0.00019307859932528375,
-      "loss": 0.0226660281419754,
-      "step": 1060
-    },
-    {
-      "epoch": 0.36722436722436724,
-      "grad_norm": 1.3853524923324585,
-      "learning_rate": 0.000192946322477435,
-      "loss": 0.009634046256542206,
-      "step": 1070
-    },
-    {
-      "epoch": 0.37065637065637064,
-      "grad_norm": 0.34646913409233093,
-      "learning_rate": 0.0001928128397628307,
-      "loss": 0.03943045735359192,
-      "step": 1080
-    },
-    {
-      "epoch": 0.3740883740883741,
-      "grad_norm": 0.8397905826568604,
-      "learning_rate": 0.00019267815291324852,
-      "loss": 0.017884735763072968,
-      "step": 1090
-    },
-    {
-      "epoch": 0.37752037752037754,
-      "grad_norm": 0.9594695568084717,
-      "learning_rate": 0.00019254226367608842,
-      "loss": 0.01769815683364868,
-      "step": 1100
-    },
-    {
-      "epoch": 0.38095238095238093,
-      "grad_norm": 0.7242799997329712,
-      "learning_rate": 0.0001924051738143498,
-      "loss": 0.04254389405250549,
-      "step": 1110
-    },
-    {
-      "epoch": 0.3843843843843844,
-      "grad_norm": 0.1905749887228012,
-      "learning_rate": 0.00019226688510660877,
-      "loss": 0.011978869885206222,
-      "step": 1120
-    },
-    {
-      "epoch": 0.38781638781638783,
-      "grad_norm": 0.12193301320075989,
-      "learning_rate": 0.00019212739934699498,
-      "loss": 0.010143650323152542,
-      "step": 1130
-    },
-    {
-      "epoch": 0.3912483912483912,
-      "grad_norm": 0.20223842561244965,
-      "learning_rate": 0.00019198671834516843,
-      "loss": 0.012704399228096009,
-      "step": 1140
-    },
-    {
-      "epoch": 0.3946803946803947,
-      "grad_norm": 0.16973139345645905,
-      "learning_rate": 0.00019184484392629586,
-      "loss": 0.009967386722564697,
-      "step": 1150
-    },
-    {
-      "epoch": 0.3981123981123981,
-      "grad_norm": 0.3374408185482025,
-      "learning_rate": 0.00019170177793102736,
-      "loss": 0.013026086986064911,
-      "step": 1160
-    },
-    {
-      "epoch": 0.4015444015444015,
-      "grad_norm": 0.6263651847839355,
-      "learning_rate": 0.0001915575222154721,
-      "loss": 0.014929966628551483,
-      "step": 1170
-    },
-    {
-      "epoch": 0.40497640497640497,
-      "grad_norm": 0.5159519910812378,
-      "learning_rate": 0.00019141207865117448,
-      "loss": 0.022531284391880034,
-      "step": 1180
-    },
-    {
-      "epoch": 0.4084084084084084,
-      "grad_norm": 0.27669215202331543,
-      "learning_rate": 0.0001912654491250899,
-      "loss": 0.013255235552787781,
-      "step": 1190
-    },
-    {
-      "epoch": 0.4118404118404118,
-      "grad_norm": 0.3306339383125305,
-      "learning_rate": 0.00019111763553956006,
-      "loss": 0.0071789674460887905,
-      "step": 1200
-    },
-    {
-      "epoch": 0.41527241527241526,
-      "grad_norm": 0.1400749832391739,
-      "learning_rate": 0.0001909686398122885,
-      "loss": 0.012304867804050445,
-      "step": 1210
-    },
-    {
-      "epoch": 0.4187044187044187,
-      "grad_norm": 0.480392724275589,
-      "learning_rate": 0.0001908184638763156,
-      "loss": 0.013182352483272552,
-      "step": 1220
-    },
-    {
-      "epoch": 0.42213642213642216,
-      "grad_norm": 0.07393156737089157,
-      "learning_rate": 0.00019066710967999352,
-      "loss": 0.01357671171426773,
-      "step": 1230
-    },
-    {
-      "epoch": 0.42556842556842556,
-      "grad_norm": 0.5818315148353577,
-      "learning_rate": 0.00019051457918696092,
-      "loss": 0.01494317352771759,
-      "step": 1240
-    },
-    {
-      "epoch": 0.429000429000429,
-      "grad_norm": 0.12537962198257446,
-      "learning_rate": 0.0001903608743761175,
-      "loss": 0.016142460703849792,
-      "step": 1250
-    },
-    {
-      "epoch": 0.43243243243243246,
-      "grad_norm": 0.6750714778900146,
-      "learning_rate": 0.00019020599724159842,
-      "loss": 0.010620266944169999,
-      "step": 1260
-    },
-    {
-      "epoch": 0.43586443586443585,
-      "grad_norm": 2.9211416244506836,
-      "learning_rate": 0.00019004994979274816,
-      "loss": 0.02269883006811142,
-      "step": 1270
-    },
-    {
-      "epoch": 0.4392964392964393,
-      "grad_norm": 0.16892600059509277,
-      "learning_rate": 0.0001898927340540947,
-      "loss": 0.020440049469470978,
-      "step": 1280
-    },
-    {
-      "epoch": 0.44272844272844275,
-      "grad_norm": 1.130327820777893,
-      "learning_rate": 0.00018973435206532323,
-      "loss": 0.012587438523769378,
-      "step": 1290
-    },
-    {
-      "epoch": 0.44616044616044614,
-      "grad_norm": 1.3412842750549316,
-      "learning_rate": 0.00018957480588124956,
-      "loss": 0.009808909147977829,
-      "step": 1300
     }
   ],
   "logging_steps": 10,
@@ -937,7 +447,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4930831518299136.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2059202059202059,
   "eval_steps": 500,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00019783319385748891,
       "loss": 0.005371841043233872,
       "step": 600
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2276492884035072.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null