lamrin8224 commited on Jun 8, 2024

Commit

93905bb

verified ·

1 Parent(s): 8bab153

Upload folder using huggingface_hub

Browse files

Files changed (37) hide show

checkpoint-1000/config.json +36 -0
checkpoint-1000/optimizer.pt +3 -0
checkpoint-1000/pytorch_model.bin +3 -0
checkpoint-1000/scheduler.pt +3 -0
checkpoint-1000/trainer_state.json +28 -0
checkpoint-1000/training_args.bin +3 -0
checkpoint-1500/config.json +36 -0
checkpoint-1500/optimizer.pt +3 -0
checkpoint-1500/pytorch_model.bin +3 -0
checkpoint-1500/scheduler.pt +3 -0
checkpoint-1500/trainer_state.json +34 -0
checkpoint-1500/training_args.bin +3 -0
checkpoint-2000/config.json +36 -0
checkpoint-2000/optimizer.pt +3 -0
checkpoint-2000/pytorch_model.bin +3 -0
checkpoint-2000/scheduler.pt +3 -0
checkpoint-2000/trainer_state.json +40 -0
checkpoint-2000/training_args.bin +3 -0
checkpoint-2500/config.json +36 -0
checkpoint-2500/optimizer.pt +3 -0
checkpoint-2500/pytorch_model.bin +3 -0
checkpoint-2500/scheduler.pt +3 -0
checkpoint-2500/trainer_state.json +46 -0
checkpoint-2500/training_args.bin +3 -0
checkpoint-500/config.json +36 -0
checkpoint-500/optimizer.pt +3 -0
checkpoint-500/pytorch_model.bin +3 -0
checkpoint-500/scheduler.pt +3 -0
checkpoint-500/trainer_state.json +22 -0
checkpoint-500/training_args.bin +3 -0
config.json +36 -0
merges.txt +0 -0
pytorch_model.bin +3 -0
special_tokens_map.json +1 -0
tokenizer_config.json +1 -0
training_args.bin +3 -0
vocab.json +0 -0

checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "resid_pdrop": 0.1,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.5.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb552d691579a985af8bbeb473ccedf3fa1badf66bd51c56fa6d0d5de9bfef92
+size 995611287

checkpoint-1000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54529ed706ea35d0774e9c55d36bad2177d279bd838e7a01ec70c1803cf620cd
+size 510408315

checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65d985aa5fa7f96f55e308692cf8f9b9ebb6b0bc09a65dd50e86d5656314b7e5
+size 623

checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.9920318725099602,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.00398406374502e-05,
+      "loss": 3.6087,
+      "step": 500
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 3.00796812749004e-05,
+      "loss": 3.242,
+      "step": 1000
+    }
+  ],
+  "max_steps": 2510,
+  "num_train_epochs": 5,
+  "total_flos": 1528351802523648.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4daf1a81ff5405b3e107d846931aef68f65bd8b41b07f9bfc197cc2b6752a2ea
+size 2351

checkpoint-1500/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "resid_pdrop": 0.1,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.5.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-1500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46ddc653f49376bfe12c7c95f8ee0c54b129f86c68329a53d2e67c78407a5955
+size 995611287

checkpoint-1500/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33746d5d9702d787b0eff4fb07ccd81fa452b835d2be9d88b52efee81894f01c
+size 510408315

checkpoint-1500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:055aead58de9e916ba06ab68fc8fb79dea44441ff0687603f042abb9c8f9377d
+size 623

checkpoint-1500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.9880478087649402,
+  "global_step": 1500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.00398406374502e-05,
+      "loss": 3.6087,
+      "step": 500
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 3.00796812749004e-05,
+      "loss": 3.242,
+      "step": 1000
+    },
+    {
+      "epoch": 2.99,
+      "learning_rate": 2.01195219123506e-05,
+      "loss": 3.0879,
+      "step": 1500
+    }
+  ],
+  "max_steps": 2510,
+  "num_train_epochs": 5,
+  "total_flos": 2292145424695296.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4daf1a81ff5405b3e107d846931aef68f65bd8b41b07f9bfc197cc2b6752a2ea
+size 2351

checkpoint-2000/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "resid_pdrop": 0.1,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.5.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-2000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77ff8db2d4ccc1a8842d88fb6f5f74119dff50f74ffac330f77f71d1ae8e7830
+size 995611287

checkpoint-2000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42be8068fcdc55688a53ea8bfd2b9e5b95a155cf49aafd93f27b0324919583d0
+size 510408315

checkpoint-2000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f386075610e8d830c19be2a324388e2bc4a321f3dfad61a5a26471d14a912c48
+size 623

checkpoint-2000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.9840637450199203,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.00398406374502e-05,
+      "loss": 3.6087,
+      "step": 500
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 3.00796812749004e-05,
+      "loss": 3.242,
+      "step": 1000
+    },
+    {
+      "epoch": 2.99,
+      "learning_rate": 2.01195219123506e-05,
+      "loss": 3.0879,
+      "step": 1500
+    },
+    {
+      "epoch": 3.98,
+      "learning_rate": 1.0159362549800798e-05,
+      "loss": 2.9902,
+      "step": 2000
+    }
+  ],
+  "max_steps": 2510,
+  "num_train_epochs": 5,
+  "total_flos": 3055939046866944.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4daf1a81ff5405b3e107d846931aef68f65bd8b41b07f9bfc197cc2b6752a2ea
+size 2351

checkpoint-2500/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "resid_pdrop": 0.1,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.5.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-2500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:804d672410b51a73aae44bb5d3700ee810c5757d0d3e129729a5ea189cd633df
+size 995611287

checkpoint-2500/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e647f6dc37c89c54041fb818b59bbb2869d4e2c194384bfcf3e26539539c9191
+size 510408315

checkpoint-2500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb3ba70b6baf7349452de153560a79836352adad02dd9dd6b93ac1a0bfa64fc5
+size 623

checkpoint-2500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.9800796812749,
+  "global_step": 2500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.00398406374502e-05,
+      "loss": 3.6087,
+      "step": 500
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 3.00796812749004e-05,
+      "loss": 3.242,
+      "step": 1000
+    },
+    {
+      "epoch": 2.99,
+      "learning_rate": 2.01195219123506e-05,
+      "loss": 3.0879,
+      "step": 1500
+    },
+    {
+      "epoch": 3.98,
+      "learning_rate": 1.0159362549800798e-05,
+      "loss": 2.9902,
+      "step": 2000
+    },
+    {
+      "epoch": 4.98,
+      "learning_rate": 1.99203187250996e-07,
+      "loss": 2.9376,
+      "step": 2500
+    }
+  ],
+  "max_steps": 2510,
+  "num_train_epochs": 5,
+  "total_flos": 3819732669038592.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4daf1a81ff5405b3e107d846931aef68f65bd8b41b07f9bfc197cc2b6752a2ea
+size 2351

checkpoint-500/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "resid_pdrop": 0.1,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.5.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13e7c1173b1b302eccd1df85adb7e264e20ac713c33acb067f62e564d25de3e8
+size 995611287

checkpoint-500/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50e9a4fac5b3595613b24273f13fd414238cb7f9bc7ad54090f6f6270fd8e396
+size 510408315

checkpoint-500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a973005d918b454d0104032202a454a4d22b75a3834290b73cd401b8032d5a5
+size 623

checkpoint-500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9960159362549801,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 4.00398406374502e-05,
+      "loss": 3.6087,
+      "step": 500
+    }
+  ],
+  "max_steps": 2510,
+  "num_train_epochs": 5,
+  "total_flos": 764558180352000.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4daf1a81ff5405b3e107d846931aef68f65bd8b41b07f9bfc197cc2b6752a2ea
+size 2351

config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "resid_pdrop": 0.1,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.5.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f1ab4cce04c1a3ca92b9b77e897e88d63ebd4d1d4d488304dff91997f3a66b7
+size 510408315

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"errors": "replace", "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "tokenizer_file": "/root/.cache/huggingface/transformers/16a2f78023c8dc511294f0c97b5e10fde3ef9889ad6d11ffaa2a00714e73926e.cf2d0ecb83b6df91b3dbb53f1d1e4c311578bfd3aa0e04934215a49bf9898df0", "name_or_path": "gpt2"}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4daf1a81ff5405b3e107d846931aef68f65bd8b41b07f9bfc197cc2b6752a2ea
+size 2351

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff