Upload 16 files

Browse files

Files changed (16) hide show

results/checkpoint-261/config.json +39 -0
results/checkpoint-261/generation_config.json +6 -0
results/checkpoint-261/model.safetensors +3 -0
results/checkpoint-261/optimizer.pt +3 -0
results/checkpoint-261/rng_state.pth +3 -0
results/checkpoint-261/scheduler.pt +3 -0
results/checkpoint-261/trainer_state.json +32 -0
results/checkpoint-261/training_args.bin +3 -0
results/checkpoint-780/config.json +39 -0
results/checkpoint-780/generation_config.json +6 -0
results/checkpoint-780/model.safetensors +3 -0
results/checkpoint-780/optimizer.pt +3 -0
results/checkpoint-780/rng_state.pth +3 -0
results/checkpoint-780/scheduler.pt +3 -0
results/checkpoint-780/trainer_state.json +154 -0
results/checkpoint-780/training_args.bin +3 -0

results/checkpoint-261/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "sshleifer/tiny-gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 2,
+  "n_head": 2,
+  "n_inner": null,
+  "n_layer": 2,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

results/checkpoint-261/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.44.0"
+}

results/checkpoint-261/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:640601ba0b3c1e4fd7165066bc40d9f8763ab0bf0f6100dd1774347029fa9d6e
+size 413296

results/checkpoint-261/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f681331a803a387caafb9c32ec9a0db1b3488ad53149947c96b53d572342b75e
+size 843914

results/checkpoint-261/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f479ad266ec74a3abb9fbbca69099f40c96f82543253302c1a6863078f93d5bf
+size 13990

results/checkpoint-261/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c35e5bd9ecf468d46bff56ff4d224e988cf593ec3cefa0ab68a1aa4aa0dca58
+size 1064

results/checkpoint-261/trainer_state.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 261,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [],
+  "logging_steps": 500,
+  "max_steps": 261,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 10000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 121171968.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

results/checkpoint-261/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5a9801ba1718affc6aa74261559536f39e94ea7a2abc8fc5a228f92471ec808
+size 5112

results/checkpoint-780/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "sshleifer/tiny-gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 2,
+  "n_head": 2,
+  "n_inner": null,
+  "n_layer": 2,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

results/checkpoint-780/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.44.0"
+}

results/checkpoint-780/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5433a97ae6e3065d18a7dfccc21a2a57756708fde717b57b5d411fcb02570b3c
+size 413296

results/checkpoint-780/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea04690b7cf7aac0cbcd904574d8af9ed2d8c6f5b1cf3b0ab0f7b3e5a9a99f6f
+size 843914

results/checkpoint-780/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df5fc66fffd1f067af01dd93904daffa117b42757d39da87dd8616556f5d8ae8
+size 13990

results/checkpoint-780/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04724f42bf4812cc2716a9b914df39dc353c86cdbfee5c8198ced0bfcbe76d6c
+size 1064

results/checkpoint-780/trainer_state.json ADDED Viewed

	@@ -0,0 +1,154 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 100,
+  "global_step": 780,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.6410256410256411,
+      "grad_norm": 0.5242204666137695,
+      "learning_rate": 0.002,
+      "loss": 10.7409,
+      "step": 100
+    },
+    {
+      "epoch": 0.6410256410256411,
+      "eval_loss": NaN,
+      "eval_perplexity": 39350.50390625,
+      "eval_runtime": 6.6038,
+      "eval_samples_per_second": 5.3,
+      "eval_steps_per_second": 2.726,
+      "step": 100
+    },
+    {
+      "epoch": 1.282051282051282,
+      "grad_norm": 1.9119541645050049,
+      "learning_rate": 0.004,
+      "loss": 9.6783,
+      "step": 200
+    },
+    {
+      "epoch": 1.282051282051282,
+      "eval_loss": NaN,
+      "eval_perplexity": 5642.44091796875,
+      "eval_runtime": 6.5477,
+      "eval_samples_per_second": 5.345,
+      "eval_steps_per_second": 2.749,
+      "step": 200
+    },
+    {
+      "epoch": 1.9230769230769231,
+      "grad_norm": 1.3237804174423218,
+      "learning_rate": 0.006,
+      "loss": 6.8212,
+      "step": 300
+    },
+    {
+      "epoch": 1.9230769230769231,
+      "eval_loss": NaN,
+      "eval_perplexity": 653.0867309570312,
+      "eval_runtime": 6.3668,
+      "eval_samples_per_second": 5.497,
+      "eval_steps_per_second": 2.827,
+      "step": 300
+    },
+    {
+      "epoch": 2.564102564102564,
+      "grad_norm": 1.503178596496582,
+      "learning_rate": 0.008,
+      "loss": 5.5177,
+      "step": 400
+    },
+    {
+      "epoch": 2.564102564102564,
+      "eval_loss": NaN,
+      "eval_perplexity": 1196.52783203125,
+      "eval_runtime": 6.6136,
+      "eval_samples_per_second": 5.292,
+      "eval_steps_per_second": 2.722,
+      "step": 400
+    },
+    {
+      "epoch": 3.2051282051282053,
+      "grad_norm": 3.9715425968170166,
+      "learning_rate": 0.01,
+      "loss": 5.4345,
+      "step": 500
+    },
+    {
+      "epoch": 3.2051282051282053,
+      "eval_loss": NaN,
+      "eval_perplexity": 655.4906005859375,
+      "eval_runtime": 6.6124,
+      "eval_samples_per_second": 5.293,
+      "eval_steps_per_second": 2.722,
+      "step": 500
+    },
+    {
+      "epoch": 3.8461538461538463,
+      "grad_norm": 1.5786511898040771,
+      "learning_rate": 0.006428571428571429,
+      "loss": 5.3271,
+      "step": 600
+    },
+    {
+      "epoch": 3.8461538461538463,
+      "eval_loss": NaN,
+      "eval_perplexity": 631.6055297851562,
+      "eval_runtime": 6.3755,
+      "eval_samples_per_second": 5.49,
+      "eval_steps_per_second": 2.823,
+      "step": 600
+    },
+    {
+      "epoch": 4.487179487179487,
+      "grad_norm": 0.9853035807609558,
+      "learning_rate": 0.002857142857142857,
+      "loss": 5.2154,
+      "step": 700
+    },
+    {
+      "epoch": 4.487179487179487,
+      "eval_loss": NaN,
+      "eval_perplexity": 642.812255859375,
+      "eval_runtime": 6.8174,
+      "eval_samples_per_second": 5.134,
+      "eval_steps_per_second": 2.64,
+      "step": 700
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 780,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 2000,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.01
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 181524480.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

results/checkpoint-780/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc26b2a2a9d0c96596a22a65fedd593e95159c88a5f6f2d7c6f5f9c400c7679d
+size 5112