Training in progress, step 50, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/adapter_config.json +5 -5
last-checkpoint/adapter_model.safetensors +2 -2
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +237 -175
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -10,23 +10,23 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 8,
   "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 4,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
     "v_proj",
     "q_proj",
-    "o_proj",
     "up_proj",
-    "gate_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 128,
   "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 64,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
     "v_proj",
     "q_proj",
     "up_proj",
+    "o_proj",
+    "down_proj",
+    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b85235b9fd06fc2b7556026c9d400735ea1cf6ceb01413094a4e13a4f220409c
-size 40036040

 version https://git-lfs.github.com/spec/v1
+oid sha256:fda53b2dbb64ee3df1572930d050a53fd43af43308677011be467155149e9da9
+size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd89e21dd25e4404711c18b21837d9f7f514a8add2433a4536b637bc67095f19
-size 20814996

 version https://git-lfs.github.com/spec/v1
+oid sha256:60f307f665d6353bf718fbea916abadf16ba6ed584c31e16d0fb310e793bcda5
+size 325350676

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9cc25fc0d4a476d263c6b1dbecc0b805055d4f792509dbda83e19cdd50420ee0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5915d9c3b1deb3d66ce6ff11178835811888398e5199a01a9136f0a008ed6a10
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a75dcc5ec09eada6641b366eac390a2a47e7ec4306b94cfdb718bc9a73ac9b0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:183d86b4afef5d114e28423b41699eb53696ddb9b0b1e5de0b39a3f185c3455e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,323 +1,385 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0014274116566004406,
-  "eval_steps": 20,
-  "global_step": 40,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 3.568529141501102e-05,
-      "grad_norm": 1.025141716003418,
       "learning_rate": 2.9999999999999997e-05,
-      "loss": 2.7097,
       "step": 1
     },
     {
-      "epoch": 3.568529141501102e-05,
-      "eval_loss": 3.7184572219848633,
-      "eval_runtime": 200.2041,
-      "eval_samples_per_second": 11.428,
-      "eval_steps_per_second": 11.428,
       "step": 1
     },
     {
-      "epoch": 7.137058283002204e-05,
-      "grad_norm": 1.6995129585266113,
       "learning_rate": 5.9999999999999995e-05,
-      "loss": 2.9932,
       "step": 2
     },
     {
-      "epoch": 0.00010705587424503305,
-      "grad_norm": 2.4651355743408203,
       "learning_rate": 8.999999999999999e-05,
-      "loss": 3.8545,
       "step": 3
     },
     {
-      "epoch": 0.00014274116566004408,
-      "grad_norm": 1.8627501726150513,
       "learning_rate": 0.00011999999999999999,
-      "loss": 2.4703,
       "step": 4
     },
     {
-      "epoch": 0.00017842645707505508,
-      "grad_norm": 3.136054277420044,
       "learning_rate": 0.00015,
-      "loss": 3.9009,
       "step": 5
     },
     {
-      "epoch": 0.0002141117484900661,
-      "grad_norm": 1.439232349395752,
       "learning_rate": 0.00017999999999999998,
-      "loss": 2.9751,
       "step": 6
     },
     {
-      "epoch": 0.00024979703990507713,
-      "grad_norm": 3.2479472160339355,
       "learning_rate": 0.00020999999999999998,
-      "loss": 3.1769,
       "step": 7
     },
     {
-      "epoch": 0.00028548233132008816,
-      "grad_norm": 5.708984375,
       "learning_rate": 0.00023999999999999998,
-      "loss": 4.0564,
       "step": 8
     },
     {
-      "epoch": 0.00032116762273509913,
-      "grad_norm": 3.132951259613037,
       "learning_rate": 0.00027,
-      "loss": 2.5955,
       "step": 9
     },
     {
-      "epoch": 0.00035685291415011016,
-      "grad_norm": 3.4401779174804688,
       "learning_rate": 0.0003,
-      "loss": 2.9243,
       "step": 10
     },
     {
-      "epoch": 0.0003925382055651212,
-      "grad_norm": 3.282693386077881,
-      "learning_rate": 0.0002999911984174669,
-      "loss": 2.7178,
       "step": 11
     },
     {
-      "epoch": 0.0004282234969801322,
-      "grad_norm": 3.4943759441375732,
-      "learning_rate": 0.0002999647947027726,
-      "loss": 2.9392,
       "step": 12
     },
     {
-      "epoch": 0.00046390878839514324,
-      "grad_norm": 3.1215672492980957,
-      "learning_rate": 0.0002999207919545099,
-      "loss": 3.2245,
       "step": 13
     },
     {
-      "epoch": 0.0004995940798101543,
-      "grad_norm": 3.432668924331665,
-      "learning_rate": 0.0002998591953365965,
-      "loss": 3.2253,
       "step": 14
     },
     {
-      "epoch": 0.0005352793712251652,
-      "grad_norm": 4.813501358032227,
-      "learning_rate": 0.00029978001207766854,
-      "loss": 3.104,
       "step": 15
     },
     {
-      "epoch": 0.0005709646626401763,
-      "grad_norm": 2.568082809448242,
-      "learning_rate": 0.00029968325147023263,
-      "loss": 2.8546,
       "step": 16
     },
     {
-      "epoch": 0.0006066499540551873,
-      "grad_norm": 4.107773780822754,
-      "learning_rate": 0.000299568924869575,
-      "loss": 2.7499,
       "step": 17
     },
     {
-      "epoch": 0.0006423352454701983,
-      "grad_norm": 3.392408847808838,
-      "learning_rate": 0.00029943704569242917,
-      "loss": 3.1998,
       "step": 18
     },
     {
-      "epoch": 0.0006780205368852093,
-      "grad_norm": 4.212235927581787,
-      "learning_rate": 0.0002992876294154013,
-      "loss": 2.4685,
       "step": 19
     },
     {
-      "epoch": 0.0007137058283002203,
-      "grad_norm": 2.821186065673828,
-      "learning_rate": 0.00029912069357315393,
-      "loss": 2.7551,
       "step": 20
     },
     {
-      "epoch": 0.0007137058283002203,
-      "eval_loss": 2.9195058345794678,
-      "eval_runtime": 199.0718,
-      "eval_samples_per_second": 11.493,
-      "eval_steps_per_second": 11.493,
-      "step": 20
-    },
-    {
-      "epoch": 0.0007493911197152314,
-      "grad_norm": 3.009274482727051,
-      "learning_rate": 0.00029893625775634835,
-      "loss": 3.1292,
       "step": 21
     },
     {
-      "epoch": 0.0007850764111302424,
-      "grad_norm": 4.148017406463623,
-      "learning_rate": 0.0002987343436093454,
-      "loss": 3.0939,
       "step": 22
     },
     {
-      "epoch": 0.0008207617025452535,
-      "grad_norm": 2.899528980255127,
-      "learning_rate": 0.00029851497482766547,
-      "loss": 2.8648,
       "step": 23
     },
     {
-      "epoch": 0.0008564469939602644,
-      "grad_norm": 3.6243538856506348,
-      "learning_rate": 0.00029827817715520773,
-      "loss": 3.1808,
       "step": 24
     },
     {
-      "epoch": 0.0008921322853752754,
-      "grad_norm": 4.4735894203186035,
-      "learning_rate": 0.0002980239783812289,
-      "loss": 2.9876,
       "step": 25
     },
     {
-      "epoch": 0.0009278175767902865,
-      "grad_norm": 3.8239352703094482,
-      "learning_rate": 0.0002977524083370822,
-      "loss": 3.6976,
       "step": 26
     },
     {
-      "epoch": 0.0009635028682052975,
-      "grad_norm": 3.177990436553955,
-      "learning_rate": 0.00029746349889271645,
-      "loss": 2.5572,
       "step": 27
     },
     {
-      "epoch": 0.0009991881596203085,
-      "grad_norm": 2.7317492961883545,
-      "learning_rate": 0.0002971572839529358,
-      "loss": 2.6316,
       "step": 28
     },
     {
-      "epoch": 0.0010348734510353195,
-      "grad_norm": 4.169151306152344,
-      "learning_rate": 0.00029683379945342125,
-      "loss": 3.3164,
       "step": 29
     },
     {
-      "epoch": 0.0010705587424503305,
-      "grad_norm": 4.244836807250977,
-      "learning_rate": 0.000296493083356513,
-      "loss": 2.8311,
       "step": 30
     },
     {
-      "epoch": 0.0011062440338653415,
-      "grad_norm": 3.5411789417266846,
-      "learning_rate": 0.00029613517564675565,
-      "loss": 2.5447,
       "step": 31
     },
     {
-      "epoch": 0.0011419293252803526,
-      "grad_norm": 3.9989023208618164,
-      "learning_rate": 0.0002957601183262058,
-      "loss": 2.6177,
       "step": 32
     },
     {
-      "epoch": 0.0011776146166953636,
-      "grad_norm": 3.9558351039886475,
-      "learning_rate": 0.000295367955409503,
-      "loss": 2.5028,
       "step": 33
     },
     {
-      "epoch": 0.0012132999081103746,
-      "grad_norm": 4.089743614196777,
-      "learning_rate": 0.00029495873291870436,
-      "loss": 3.1483,
       "step": 34
     },
     {
-      "epoch": 0.0012489851995253856,
-      "grad_norm": 4.250207901000977,
-      "learning_rate": 0.0002945324988778834,
-      "loss": 2.8495,
       "step": 35
     },
     {
-      "epoch": 0.0012846704909403965,
-      "grad_norm": 5.241243839263916,
-      "learning_rate": 0.00029408930330749477,
-      "loss": 3.318,
       "step": 36
     },
     {
-      "epoch": 0.0013203557823554077,
-      "grad_norm": 3.5582404136657715,
-      "learning_rate": 0.0002936291982185036,
-      "loss": 2.5436,
       "step": 37
     },
     {
-      "epoch": 0.0013560410737704187,
-      "grad_norm": 4.2042622566223145,
-      "learning_rate": 0.00029315223760628217,
-      "loss": 2.849,
       "step": 38
     },
     {
-      "epoch": 0.0013917263651854297,
-      "grad_norm": 3.4244472980499268,
-      "learning_rate": 0.00029265847744427303,
-      "loss": 2.2284,
       "step": 39
     },
     {
-      "epoch": 0.0014274116566004406,
-      "grad_norm": 5.198617458343506,
-      "learning_rate": 0.00029214797567742035,
-      "loss": 2.3866,
       "step": 40
     },
     {
-      "epoch": 0.0014274116566004406,
-      "eval_loss": 2.7816474437713623,
-      "eval_runtime": 199.5889,
-      "eval_samples_per_second": 11.464,
-      "eval_steps_per_second": 11.464,
-      "step": 40
     }
   ],
   "logging_steps": 1,
-  "max_steps": 300,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
-  "save_steps": 20,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
@@ -330,7 +392,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 769818193035264.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.001757361146502412,
+  "eval_steps": 50,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 3.514722293004824e-05,
+      "grad_norm": 3.2373061180114746,
       "learning_rate": 2.9999999999999997e-05,
+      "loss": 2.6534,
       "step": 1
     },
     {
+      "epoch": 3.514722293004824e-05,
+      "eval_loss": 3.5773849487304688,
+      "eval_runtime": 122.8556,
+      "eval_samples_per_second": 4.656,
+      "eval_steps_per_second": 4.656,
       "step": 1
     },
     {
+      "epoch": 7.029444586009648e-05,
+      "grad_norm": 10.253190040588379,
       "learning_rate": 5.9999999999999995e-05,
+      "loss": 3.5291,
       "step": 2
     },
     {
+      "epoch": 0.00010544166879014472,
+      "grad_norm": 8.353500366210938,
       "learning_rate": 8.999999999999999e-05,
+      "loss": 2.8977,
       "step": 3
     },
     {
+      "epoch": 0.00014058889172019297,
+      "grad_norm": 7.733084201812744,
       "learning_rate": 0.00011999999999999999,
+      "loss": 3.5255,
       "step": 4
     },
     {
+      "epoch": 0.0001757361146502412,
+      "grad_norm": 9.435683250427246,
       "learning_rate": 0.00015,
+      "loss": 2.3491,
       "step": 5
     },
     {
+      "epoch": 0.00021088333758028944,
+      "grad_norm": 7.884566307067871,
       "learning_rate": 0.00017999999999999998,
+      "loss": 3.0472,
       "step": 6
     },
     {
+      "epoch": 0.0002460305605103377,
+      "grad_norm": 7.662365436553955,
       "learning_rate": 0.00020999999999999998,
+      "loss": 2.8102,
       "step": 7
     },
     {
+      "epoch": 0.00028117778344038594,
+      "grad_norm": 12.221363067626953,
       "learning_rate": 0.00023999999999999998,
+      "loss": 2.9842,
       "step": 8
     },
     {
+      "epoch": 0.00031632500637043413,
+      "grad_norm": 6.3601908683776855,
       "learning_rate": 0.00027,
+      "loss": 2.3343,
       "step": 9
     },
     {
+      "epoch": 0.0003514722293004824,
+      "grad_norm": 7.852142810821533,
       "learning_rate": 0.0003,
+      "loss": 2.6562,
       "step": 10
     },
     {
+      "epoch": 0.00038661945223053063,
+      "grad_norm": 3.860373020172119,
+      "learning_rate": 0.00029999691704375486,
+      "loss": 3.1401,
       "step": 11
     },
     {
+      "epoch": 0.0004217666751605789,
+      "grad_norm": 6.923058032989502,
+      "learning_rate": 0.00029998766830174786,
+      "loss": 2.9283,
       "step": 12
     },
     {
+      "epoch": 0.00045691389809062713,
+      "grad_norm": 7.274583339691162,
+      "learning_rate": 0.00029997225415415846,
+      "loss": 2.9534,
       "step": 13
     },
     {
+      "epoch": 0.0004920611210206754,
+      "grad_norm": 6.7508978843688965,
+      "learning_rate": 0.00029995067523460196,
+      "loss": 3.2048,
       "step": 14
     },
     {
+      "epoch": 0.0005272083439507236,
+      "grad_norm": 7.768868446350098,
+      "learning_rate": 0.0002999229324301032,
+      "loss": 3.0451,
       "step": 15
     },
     {
+      "epoch": 0.0005623555668807719,
+      "grad_norm": 5.01677131652832,
+      "learning_rate": 0.0002998890268810601,
+      "loss": 2.8798,
       "step": 16
     },
     {
+      "epoch": 0.0005975027898108201,
+      "grad_norm": 6.7470293045043945,
+      "learning_rate": 0.0002998489599811972,
+      "loss": 2.47,
       "step": 17
     },
     {
+      "epoch": 0.0006326500127408683,
+      "grad_norm": 8.27274227142334,
+      "learning_rate": 0.00029980273337750765,
+      "loss": 3.1441,
       "step": 18
     },
     {
+      "epoch": 0.0006677972356709165,
+      "grad_norm": 8.152812004089355,
+      "learning_rate": 0.00029975034897018613,
+      "loss": 3.4123,
       "step": 19
     },
     {
+      "epoch": 0.0007029444586009648,
+      "grad_norm": 7.479596138000488,
+      "learning_rate": 0.00029969180891255043,
+      "loss": 3.336,
       "step": 20
     },
     {
+      "epoch": 0.000738091681531013,
+      "grad_norm": 6.2453789710998535,
+      "learning_rate": 0.00029962711561095306,
+      "loss": 3.3127,
       "step": 21
     },
     {
+      "epoch": 0.0007732389044610613,
+      "grad_norm": 7.580628871917725,
+      "learning_rate": 0.00029955627172468223,
+      "loss": 3.2636,
       "step": 22
     },
     {
+      "epoch": 0.0008083861273911095,
+      "grad_norm": 6.434226989746094,
+      "learning_rate": 0.0002994792801658527,
+      "loss": 2.7362,
       "step": 23
     },
     {
+      "epoch": 0.0008435333503211578,
+      "grad_norm": 6.997501373291016,
+      "learning_rate": 0.00029939614409928584,
+      "loss": 2.872,
       "step": 24
     },
     {
+      "epoch": 0.000878680573251206,
+      "grad_norm": 6.878482818603516,
+      "learning_rate": 0.0002993068669423797,
+      "loss": 2.7587,
       "step": 25
     },
     {
+      "epoch": 0.0009138277961812543,
+      "grad_norm": 6.985559463500977,
+      "learning_rate": 0.0002992114523649686,
+      "loss": 2.891,
       "step": 26
     },
     {
+      "epoch": 0.0009489750191113025,
+      "grad_norm": 9.444601058959961,
+      "learning_rate": 0.000299109904289172,
+      "loss": 2.9249,
       "step": 27
     },
     {
+      "epoch": 0.0009841222420413508,
+      "grad_norm": 6.679138660430908,
+      "learning_rate": 0.0002990022268892337,
+      "loss": 2.752,
       "step": 28
     },
     {
+      "epoch": 0.001019269464971399,
+      "grad_norm": 9.364578247070312,
+      "learning_rate": 0.00029888842459134974,
+      "loss": 3.3749,
       "step": 29
     },
     {
+      "epoch": 0.0010544166879014473,
+      "grad_norm": 12.166234970092773,
+      "learning_rate": 0.0002987685020734869,
+      "loss": 3.7972,
       "step": 30
     },
     {
+      "epoch": 0.0010895639108314955,
+      "grad_norm": 7.539794921875,
+      "learning_rate": 0.0002986424642651902,
+      "loss": 2.9194,
       "step": 31
     },
     {
+      "epoch": 0.0011247111337615438,
+      "grad_norm": 9.334528923034668,
+      "learning_rate": 0.00029851031634738024,
+      "loss": 3.0255,
       "step": 32
     },
     {
+      "epoch": 0.001159858356691592,
+      "grad_norm": 9.99315357208252,
+      "learning_rate": 0.0002983720637521404,
+      "loss": 2.8137,
       "step": 33
     },
     {
+      "epoch": 0.0011950055796216402,
+      "grad_norm": 8.625016212463379,
+      "learning_rate": 0.00029822771216249334,
+      "loss": 3.1498,
       "step": 34
     },
     {
+      "epoch": 0.0012301528025516883,
+      "grad_norm": 7.1607441902160645,
+      "learning_rate": 0.00029807726751216753,
+      "loss": 2.9576,
       "step": 35
     },
     {
+      "epoch": 0.0012653000254817365,
+      "grad_norm": 7.234793186187744,
+      "learning_rate": 0.0002979207359853532,
+      "loss": 2.7008,
       "step": 36
     },
     {
+      "epoch": 0.0013004472484117848,
+      "grad_norm": 9.99887752532959,
+      "learning_rate": 0.0002977581240164485,
+      "loss": 3.6027,
       "step": 37
     },
     {
+      "epoch": 0.001335594471341833,
+      "grad_norm": 9.398946762084961,
+      "learning_rate": 0.00029758943828979444,
+      "loss": 3.0668,
       "step": 38
     },
     {
+      "epoch": 0.0013707416942718813,
+      "grad_norm": 7.37238073348999,
+      "learning_rate": 0.00029741468573940056,
+      "loss": 2.915,
       "step": 39
     },
     {
+      "epoch": 0.0014058889172019295,
+      "grad_norm": 9.463376998901367,
+      "learning_rate": 0.0002972338735486598,
+      "loss": 2.7669,
       "step": 40
     },
     {
+      "epoch": 0.0014410361401319778,
+      "grad_norm": 8.600595474243164,
+      "learning_rate": 0.00029704700915005305,
+      "loss": 2.9336,
+      "step": 41
+    },
+    {
+      "epoch": 0.001476183363062026,
+      "grad_norm": 9.883442878723145,
+      "learning_rate": 0.00029685410022484393,
+      "loss": 3.2071,
+      "step": 42
+    },
+    {
+      "epoch": 0.0015113305859920743,
+      "grad_norm": 12.098119735717773,
+      "learning_rate": 0.0002966551547027627,
+      "loss": 3.0556,
+      "step": 43
+    },
+    {
+      "epoch": 0.0015464778089221225,
+      "grad_norm": 17.335891723632812,
+      "learning_rate": 0.0002964501807616806,
+      "loss": 3.9033,
+      "step": 44
+    },
+    {
+      "epoch": 0.0015816250318521708,
+      "grad_norm": 8.842806816101074,
+      "learning_rate": 0.0002962391868272735,
+      "loss": 3.3062,
+      "step": 45
+    },
+    {
+      "epoch": 0.001616772254782219,
+      "grad_norm": 11.304153442382812,
+      "learning_rate": 0.0002960221815726757,
+      "loss": 2.0332,
+      "step": 46
+    },
+    {
+      "epoch": 0.0016519194777122673,
+      "grad_norm": 10.703750610351562,
+      "learning_rate": 0.00029579917391812314,
+      "loss": 2.9962,
+      "step": 47
+    },
+    {
+      "epoch": 0.0016870667006423155,
+      "grad_norm": 20.069766998291016,
+      "learning_rate": 0.0002955701730305872,
+      "loss": 3.6418,
+      "step": 48
+    },
+    {
+      "epoch": 0.0017222139235723638,
+      "grad_norm": 11.564349174499512,
+      "learning_rate": 0.00029533518832339727,
+      "loss": 3.2814,
+      "step": 49
+    },
+    {
+      "epoch": 0.001757361146502412,
+      "grad_norm": 12.544486045837402,
+      "learning_rate": 0.0002950942294558544,
+      "loss": 3.4021,
+      "step": 50
+    },
+    {
+      "epoch": 0.001757361146502412,
+      "eval_loss": 3.1045420169830322,
+      "eval_runtime": 122.4658,
+      "eval_samples_per_second": 4.671,
+      "eval_steps_per_second": 4.671,
+      "step": 50
     }
   ],
   "logging_steps": 1,
+  "max_steps": 500,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
+  "save_steps": 50,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
       "attributes": {}
     }
   },
+  "total_flos": 4482169061769216.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8572afd05393422d8959a963fd5560752865fdb6ad5191112e1b0b4f625b9672
 size 6776

 version https://git-lfs.github.com/spec/v1
+oid sha256:467127371b2d4da2f03dc3723fc2daad3034ada0f638fbfcb2df74368b95df56
 size 6776