MHGanainy/16-clusters-imbalanced-10

Browse files

Files changed (5) hide show

README.md +2 -0
all_results.json +13 -0
eval_results.json +8 -0
train_results.json +8 -0
trainer_state.json +350 -0

README.md CHANGED Viewed

@@ -15,6 +15,8 @@ should probably proofread and complete it, then remove this comment. -->
 # 16-clusters-imbalanced-10
 This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
 ## Model description

 # 16-clusters-imbalanced-10
 This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 2.0434
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 2.043407678604126,
+    "eval_runtime": 65.3589,
+    "eval_samples_per_second": 13.678,
+    "eval_steps_per_second": 1.714,
+    "perplexity": 7.716861026791248,
+    "total_flos": 8.01984399409152e+16,
+    "train_loss": 2.1353629073560736,
+    "train_runtime": 1379.8897,
+    "train_samples_per_second": 6.396,
+    "train_steps_per_second": 3.198
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 2.043407678604126,
+    "eval_runtime": 65.3589,
+    "eval_samples_per_second": 13.678,
+    "eval_steps_per_second": 1.714,
+    "perplexity": 7.716861026791248
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "total_flos": 8.01984399409152e+16,
+    "train_loss": 2.1353629073560736,
+    "train_runtime": 1379.8897,
+    "train_samples_per_second": 6.396,
+    "train_steps_per_second": 3.198
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,350 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 4413,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.022660321776569226,
+      "grad_norm": 0.09582193195819855,
+      "learning_rate": 6.666666666666667e-06,
+      "loss": 2.4053,
+      "step": 100
+    },
+    {
+      "epoch": 0.04532064355313845,
+      "grad_norm": 0.1730571836233139,
+      "learning_rate": 1.3333333333333333e-05,
+      "loss": 2.3825,
+      "step": 200
+    },
+    {
+      "epoch": 0.06798096532970768,
+      "grad_norm": 0.3355884253978729,
+      "learning_rate": 2e-05,
+      "loss": 2.356,
+      "step": 300
+    },
+    {
+      "epoch": 0.0906412871062769,
+      "grad_norm": 0.37008875608444214,
+      "learning_rate": 1.9970843111690533e-05,
+      "loss": 2.2935,
+      "step": 400
+    },
+    {
+      "epoch": 0.11330160888284614,
+      "grad_norm": 0.5142782330513,
+      "learning_rate": 1.9883542471589315e-05,
+      "loss": 2.2561,
+      "step": 500
+    },
+    {
+      "epoch": 0.13596193065941536,
+      "grad_norm": 0.42936116456985474,
+      "learning_rate": 1.9738607162698895e-05,
+      "loss": 2.2424,
+      "step": 600
+    },
+    {
+      "epoch": 0.1586222524359846,
+      "grad_norm": 0.537521243095398,
+      "learning_rate": 1.9536882357541958e-05,
+      "loss": 2.242,
+      "step": 700
+    },
+    {
+      "epoch": 0.1812825742125538,
+      "grad_norm": 0.602051854133606,
+      "learning_rate": 1.927954438964115e-05,
+      "loss": 2.214,
+      "step": 800
+    },
+    {
+      "epoch": 0.20394289598912305,
+      "grad_norm": 0.5285528898239136,
+      "learning_rate": 1.8968093893874042e-05,
+      "loss": 2.1943,
+      "step": 900
+    },
+    {
+      "epoch": 0.22660321776569228,
+      "grad_norm": 0.6450159549713135,
+      "learning_rate": 1.8604347055704433e-05,
+      "loss": 2.1566,
+      "step": 1000
+    },
+    {
+      "epoch": 0.2492635395422615,
+      "grad_norm": 0.6594407558441162,
+      "learning_rate": 1.8190425020319016e-05,
+      "loss": 2.1578,
+      "step": 1100
+    },
+    {
+      "epoch": 0.27192386131883073,
+      "grad_norm": 0.6740846633911133,
+      "learning_rate": 1.7728741523428696e-05,
+      "loss": 2.1578,
+      "step": 1200
+    },
+    {
+      "epoch": 0.29458418309539997,
+      "grad_norm": 0.6054636240005493,
+      "learning_rate": 1.722198881586411e-05,
+      "loss": 2.1301,
+      "step": 1300
+    },
+    {
+      "epoch": 0.3172445048719692,
+      "grad_norm": 0.5829110145568848,
+      "learning_rate": 1.667312196404425e-05,
+      "loss": 2.1366,
+      "step": 1400
+    },
+    {
+      "epoch": 0.3399048266485384,
+      "grad_norm": 0.6636696457862854,
+      "learning_rate": 1.6085341617868172e-05,
+      "loss": 2.1301,
+      "step": 1500
+    },
+    {
+      "epoch": 0.3625651484251076,
+      "grad_norm": 0.8352382779121399,
+      "learning_rate": 1.546207534651667e-05,
+      "loss": 2.1157,
+      "step": 1600
+    },
+    {
+      "epoch": 0.38522547020167686,
+      "grad_norm": 0.6855395436286926,
+      "learning_rate": 1.4806957651001911e-05,
+      "loss": 2.1084,
+      "step": 1700
+    },
+    {
+      "epoch": 0.4078857919782461,
+      "grad_norm": 0.8896074891090393,
+      "learning_rate": 1.4123808770019433e-05,
+      "loss": 2.1494,
+      "step": 1800
+    },
+    {
+      "epoch": 0.43054611375481533,
+      "grad_norm": 0.7051901817321777,
+      "learning_rate": 1.3416612402693543e-05,
+      "loss": 2.1406,
+      "step": 1900
+    },
+    {
+      "epoch": 0.45320643553138457,
+      "grad_norm": 0.9859122633934021,
+      "learning_rate": 1.2689492478123242e-05,
+      "loss": 2.1142,
+      "step": 2000
+    },
+    {
+      "epoch": 0.47586675730795375,
+      "grad_norm": 0.9501364827156067,
+      "learning_rate": 1.1946689107194183e-05,
+      "loss": 2.091,
+      "step": 2100
+    },
+    {
+      "epoch": 0.498527079084523,
+      "grad_norm": 1.0703001022338867,
+      "learning_rate": 1.119253385689078e-05,
+      "loss": 2.0765,
+      "step": 2200
+    },
+    {
+      "epoch": 0.5211874008610923,
+      "grad_norm": 0.669400691986084,
+      "learning_rate": 1.0431424491293254e-05,
+      "loss": 2.0824,
+      "step": 2300
+    },
+    {
+      "epoch": 0.5438477226376615,
+      "grad_norm": 0.7835758924484253,
+      "learning_rate": 9.667799326554403e-06,
+      "loss": 2.0818,
+      "step": 2400
+    },
+    {
+      "epoch": 0.5665080444142306,
+      "grad_norm": 0.8207575082778931,
+      "learning_rate": 8.906111349401949e-06,
+      "loss": 2.1016,
+      "step": 2500
+    },
+    {
+      "epoch": 0.5891683661907999,
+      "grad_norm": 0.8124341368675232,
+      "learning_rate": 8.150802250091193e-06,
+      "loss": 2.0647,
+      "step": 2600
+    },
+    {
+      "epoch": 0.6118286879673691,
+      "grad_norm": 0.8744191527366638,
+      "learning_rate": 7.406276521231679e-06,
+      "loss": 2.0657,
+      "step": 2700
+    },
+    {
+      "epoch": 0.6344890097439384,
+      "grad_norm": 1.0869206190109253,
+      "learning_rate": 6.676875773527383e-06,
+      "loss": 2.0547,
+      "step": 2800
+    },
+    {
+      "epoch": 0.6571493315205076,
+      "grad_norm": 0.7237268686294556,
+      "learning_rate": 5.966853418205035e-06,
+      "loss": 2.124,
+      "step": 2900
+    },
+    {
+      "epoch": 0.6798096532970768,
+      "grad_norm": 0.9836551547050476,
+      "learning_rate": 5.2803498637669055e-06,
+      "loss": 2.0877,
+      "step": 3000
+    },
+    {
+      "epoch": 0.7024699750736461,
+      "grad_norm": 0.8831650614738464,
+      "learning_rate": 4.621368371705162e-06,
+      "loss": 2.0978,
+      "step": 3100
+    },
+    {
+      "epoch": 0.7251302968502152,
+      "grad_norm": 0.8482229709625244,
+      "learning_rate": 3.993751711972204e-06,
+      "loss": 2.075,
+      "step": 3200
+    },
+    {
+      "epoch": 0.7477906186267845,
+      "grad_norm": 0.8325951099395752,
+      "learning_rate": 3.401159754337836e-06,
+      "loss": 2.1016,
+      "step": 3300
+    },
+    {
+      "epoch": 0.7704509404033537,
+      "grad_norm": 1.3220783472061157,
+      "learning_rate": 2.8470481263064255e-06,
+      "loss": 2.1096,
+      "step": 3400
+    },
+    {
+      "epoch": 0.793111262179923,
+      "grad_norm": 0.8809642195701599,
+      "learning_rate": 2.3346480620478685e-06,
+      "loss": 2.079,
+      "step": 3500
+    },
+    {
+      "epoch": 0.8157715839564922,
+      "grad_norm": 0.9344497919082642,
+      "learning_rate": 1.866947559850839e-06,
+      "loss": 2.1025,
+      "step": 3600
+    },
+    {
+      "epoch": 0.8384319057330614,
+      "grad_norm": 0.9643566012382507,
+      "learning_rate": 1.446673957976298e-06,
+      "loss": 2.1116,
+      "step": 3700
+    },
+    {
+      "epoch": 0.8610922275096307,
+      "grad_norm": 1.0109236240386963,
+      "learning_rate": 1.0762780305181064e-06,
+      "loss": 2.0662,
+      "step": 3800
+    },
+    {
+      "epoch": 0.8837525492861998,
+      "grad_norm": 0.9456806182861328,
+      "learning_rate": 7.579196960136958e-07,
+      "loss": 2.0815,
+      "step": 3900
+    },
+    {
+      "epoch": 0.9064128710627691,
+      "grad_norm": 1.229778528213501,
+      "learning_rate": 4.934554221433741e-07,
+      "loss": 2.0636,
+      "step": 4000
+    },
+    {
+      "epoch": 0.9290731928393383,
+      "grad_norm": 0.8360131978988647,
+      "learning_rate": 2.8442739996615956e-07,
+      "loss": 2.0465,
+      "step": 4100
+    },
+    {
+      "epoch": 0.9517335146159075,
+      "grad_norm": 0.6662079691886902,
+      "learning_rate": 1.3205455082128228e-07,
+      "loss": 2.0419,
+      "step": 4200
+    },
+    {
+      "epoch": 0.9743938363924768,
+      "grad_norm": 0.8761087656021118,
+      "learning_rate": 3.7225418337528685e-08,
+      "loss": 2.0664,
+      "step": 4300
+    },
+    {
+      "epoch": 0.997054158169046,
+      "grad_norm": 1.0942589044570923,
+      "learning_rate": 4.929869997571945e-10,
+      "loss": 2.0249,
+      "step": 4400
+    },
+    {
+      "epoch": 1.0,
+      "step": 4413,
+      "total_flos": 8.01984399409152e+16,
+      "train_loss": 2.1353629073560736,
+      "train_runtime": 1379.8897,
+      "train_samples_per_second": 6.396,
+      "train_steps_per_second": 3.198
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 4413,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.01984399409152e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}