zaas12 commited on Mar 19, 2025

Commit

094217c

verified ·

1 Parent(s): 603feea

Upload folder using huggingface_hub

Browse files

Files changed (25) hide show

.ipynb_checkpoints/config-checkpoint.json +30 -0
.ipynb_checkpoints/generation_config-checkpoint.json +4 -0
checkpoint-2994/config.json +30 -0
checkpoint-2994/generation_config.json +4 -0
checkpoint-2994/model.safetensors +3 -0
checkpoint-2994/optimizer.pt +3 -0
checkpoint-2994/rng_state.pth +3 -0
checkpoint-2994/scheduler.pt +3 -0
checkpoint-2994/trainer_state.json +454 -0
checkpoint-2994/training_args.bin +3 -0
checkpoint-5988/config.json +30 -0
checkpoint-5988/generation_config.json +4 -0
checkpoint-5988/model.safetensors +3 -0
checkpoint-5988/optimizer.pt +3 -0
checkpoint-5988/rng_state.pth +3 -0
checkpoint-5988/scheduler.pt +3 -0
checkpoint-5988/trainer_state.json +882 -0
checkpoint-5988/training_args.bin +3 -0
config.json +30 -0
generation_config.json +4 -0
model.safetensors +3 -0
special_tokens_map.json +5 -0
tokenizer.json +0 -0
tokenizer_config.json +9 -0
training_args.bin +3 -0

.ipynb_checkpoints/config-checkpoint.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_name_or_path": "microsoft/phi-1_5",
+  "architectures": [
+    "PhiForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": null,
+  "embd_pdrop": 0.0,
+  "eos_token_id": null,
+  "hidden_act": "gelu_new",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 2048,
+  "model_type": "phi",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 32,
+  "partial_rotary_factor": 0.5,
+  "qk_layernorm": false,
+  "resid_pdrop": 0.0,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.45.0",
+  "use_cache": false,
+  "vocab_size": 51200
+}

.ipynb_checkpoints/generation_config-checkpoint.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "_from_model_config": true,
+  "transformers_version": "4.45.0"
+}

checkpoint-2994/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_name_or_path": "microsoft/phi-1_5",
+  "architectures": [
+    "PhiForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": null,
+  "embd_pdrop": 0.0,
+  "eos_token_id": null,
+  "hidden_act": "gelu_new",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 2048,
+  "model_type": "phi",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 32,
+  "partial_rotary_factor": 0.5,
+  "qk_layernorm": false,
+  "resid_pdrop": 0.0,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.45.0",
+  "use_cache": false,
+  "vocab_size": 51200
+}

checkpoint-2994/generation_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "_from_model_config": true,
+  "transformers_version": "4.45.0"
+}

checkpoint-2994/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28afc072c0368ffb1e5a528422ef98a4d7bcfde48daeb85ef4c70b6e3fdab506
+size 2836579040

checkpoint-2994/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fcfee381d271b30f63fb2353bd5caf11a7faf709a92f7f1d1d00426e4c2a09a6
+size 11346377382

checkpoint-2994/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ff264f99d31b522cc7e2a4eac9d38606d0c58a34c0adc74d71e0ca8b371dc36
+size 14244

checkpoint-2994/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef73212274ea360a47c107bc0721c878dc6e4db3a7d41cc16c2ace3fbf6a7f74
+size 1064

checkpoint-2994/trainer_state.json ADDED Viewed

	@@ -0,0 +1,454 @@

+{
+  "best_metric": 0.945128321647644,
+  "best_model_checkpoint": "./checkpoints/ultrafeedback_binarized/phi-1_5-ultrafeedback_binarized-lambda0.25-ORPO-18-16-19/checkpoint-2994",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 2994,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.016700066800267203,
+      "grad_norm": 5.34375,
+      "learning_rate": 2.0000000000000002e-07,
+      "loss": 1.2526,
+      "step": 50
+    },
+    {
+      "epoch": 0.033400133600534405,
+      "grad_norm": 3.453125,
+      "learning_rate": 4.0000000000000003e-07,
+      "loss": 1.2115,
+      "step": 100
+    },
+    {
+      "epoch": 0.050100200400801605,
+      "grad_norm": 2.953125,
+      "learning_rate": 6.000000000000001e-07,
+      "loss": 1.2056,
+      "step": 150
+    },
+    {
+      "epoch": 0.06680026720106881,
+      "grad_norm": 2.3125,
+      "learning_rate": 8.000000000000001e-07,
+      "loss": 1.2425,
+      "step": 200
+    },
+    {
+      "epoch": 0.08350033400133601,
+      "grad_norm": 1.9921875,
+      "learning_rate": 1.0000000000000002e-06,
+      "loss": 1.2355,
+      "step": 250
+    },
+    {
+      "epoch": 0.10020040080160321,
+      "grad_norm": 2.484375,
+      "learning_rate": 1.2000000000000002e-06,
+      "loss": 1.2408,
+      "step": 300
+    },
+    {
+      "epoch": 0.11690046760187041,
+      "grad_norm": 2.453125,
+      "learning_rate": 1.4000000000000001e-06,
+      "loss": 1.2136,
+      "step": 350
+    },
+    {
+      "epoch": 0.13360053440213762,
+      "grad_norm": 2.328125,
+      "learning_rate": 1.6000000000000001e-06,
+      "loss": 1.2241,
+      "step": 400
+    },
+    {
+      "epoch": 0.15030060120240482,
+      "grad_norm": 3.859375,
+      "learning_rate": 1.8000000000000001e-06,
+      "loss": 1.213,
+      "step": 450
+    },
+    {
+      "epoch": 0.16700066800267202,
+      "grad_norm": 2.90625,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 1.2212,
+      "step": 500
+    },
+    {
+      "epoch": 0.18370073480293922,
+      "grad_norm": 1.828125,
+      "learning_rate": 2.2e-06,
+      "loss": 1.1722,
+      "step": 550
+    },
+    {
+      "epoch": 0.20040080160320642,
+      "grad_norm": 2.953125,
+      "learning_rate": 2.4000000000000003e-06,
+      "loss": 1.1541,
+      "step": 600
+    },
+    {
+      "epoch": 0.21710086840347362,
+      "grad_norm": 4.3125,
+      "learning_rate": 2.6e-06,
+      "loss": 1.1767,
+      "step": 650
+    },
+    {
+      "epoch": 0.23380093520374082,
+      "grad_norm": 2.828125,
+      "learning_rate": 2.8000000000000003e-06,
+      "loss": 1.0593,
+      "step": 700
+    },
+    {
+      "epoch": 0.250501002004008,
+      "grad_norm": 1.7421875,
+      "learning_rate": 3e-06,
+      "loss": 1.128,
+      "step": 750
+    },
+    {
+      "epoch": 0.26720106880427524,
+      "grad_norm": 2.34375,
+      "learning_rate": 3.2000000000000003e-06,
+      "loss": 1.1307,
+      "step": 800
+    },
+    {
+      "epoch": 0.28390113560454244,
+      "grad_norm": 2.390625,
+      "learning_rate": 3.4000000000000005e-06,
+      "loss": 1.0742,
+      "step": 850
+    },
+    {
+      "epoch": 0.30060120240480964,
+      "grad_norm": 1.921875,
+      "learning_rate": 3.6000000000000003e-06,
+      "loss": 1.078,
+      "step": 900
+    },
+    {
+      "epoch": 0.31730126920507684,
+      "grad_norm": 2.4375,
+      "learning_rate": 3.8000000000000005e-06,
+      "loss": 1.063,
+      "step": 950
+    },
+    {
+      "epoch": 0.33400133600534404,
+      "grad_norm": 2.59375,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 1.0211,
+      "step": 1000
+    },
+    {
+      "epoch": 0.35070140280561124,
+      "grad_norm": 1.7890625,
+      "learning_rate": 4.2000000000000004e-06,
+      "loss": 1.0235,
+      "step": 1050
+    },
+    {
+      "epoch": 0.36740146960587844,
+      "grad_norm": 2.46875,
+      "learning_rate": 4.4e-06,
+      "loss": 1.0346,
+      "step": 1100
+    },
+    {
+      "epoch": 0.38410153640614564,
+      "grad_norm": 1.0625,
+      "learning_rate": 4.600000000000001e-06,
+      "loss": 1.038,
+      "step": 1150
+    },
+    {
+      "epoch": 0.40080160320641284,
+      "grad_norm": 3.375,
+      "learning_rate": 4.800000000000001e-06,
+      "loss": 1.0236,
+      "step": 1200
+    },
+    {
+      "epoch": 0.41750167000668004,
+      "grad_norm": 1.6953125,
+      "learning_rate": 5e-06,
+      "loss": 1.0158,
+      "step": 1250
+    },
+    {
+      "epoch": 0.43420173680694724,
+      "grad_norm": 2.265625,
+      "learning_rate": 5.2e-06,
+      "loss": 1.0127,
+      "step": 1300
+    },
+    {
+      "epoch": 0.45090180360721444,
+      "grad_norm": 2.984375,
+      "learning_rate": 5.400000000000001e-06,
+      "loss": 1.0234,
+      "step": 1350
+    },
+    {
+      "epoch": 0.46760187040748163,
+      "grad_norm": 1.9609375,
+      "learning_rate": 5.600000000000001e-06,
+      "loss": 0.993,
+      "step": 1400
+    },
+    {
+      "epoch": 0.48430193720774883,
+      "grad_norm": 2.0625,
+      "learning_rate": 5.8e-06,
+      "loss": 1.0006,
+      "step": 1450
+    },
+    {
+      "epoch": 0.501002004008016,
+      "grad_norm": 1.0078125,
+      "learning_rate": 6e-06,
+      "loss": 1.0038,
+      "step": 1500
+    },
+    {
+      "epoch": 0.5177020708082832,
+      "grad_norm": 2.5625,
+      "learning_rate": 6.200000000000001e-06,
+      "loss": 0.9896,
+      "step": 1550
+    },
+    {
+      "epoch": 0.5344021376085505,
+      "grad_norm": 2.40625,
+      "learning_rate": 6.4000000000000006e-06,
+      "loss": 1.0005,
+      "step": 1600
+    },
+    {
+      "epoch": 0.5511022044088176,
+      "grad_norm": 1.546875,
+      "learning_rate": 6.600000000000001e-06,
+      "loss": 1.0176,
+      "step": 1650
+    },
+    {
+      "epoch": 0.5678022712090849,
+      "grad_norm": 1.8828125,
+      "learning_rate": 6.800000000000001e-06,
+      "loss": 0.9663,
+      "step": 1700
+    },
+    {
+      "epoch": 0.584502338009352,
+      "grad_norm": 1.1796875,
+      "learning_rate": 7e-06,
+      "loss": 0.9879,
+      "step": 1750
+    },
+    {
+      "epoch": 0.6012024048096193,
+      "grad_norm": 2.421875,
+      "learning_rate": 7.2000000000000005e-06,
+      "loss": 0.9724,
+      "step": 1800
+    },
+    {
+      "epoch": 0.6179024716098864,
+      "grad_norm": 1.5,
+      "learning_rate": 7.4e-06,
+      "loss": 0.9754,
+      "step": 1850
+    },
+    {
+      "epoch": 0.6346025384101537,
+      "grad_norm": 2.046875,
+      "learning_rate": 7.600000000000001e-06,
+      "loss": 0.9884,
+      "step": 1900
+    },
+    {
+      "epoch": 0.6513026052104208,
+      "grad_norm": 2.59375,
+      "learning_rate": 7.800000000000002e-06,
+      "loss": 0.9916,
+      "step": 1950
+    },
+    {
+      "epoch": 0.6680026720106881,
+      "grad_norm": 1.8984375,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 0.9821,
+      "step": 2000
+    },
+    {
+      "epoch": 0.6847027388109552,
+      "grad_norm": 1.484375,
+      "learning_rate": 8.2e-06,
+      "loss": 0.977,
+      "step": 2050
+    },
+    {
+      "epoch": 0.7014028056112225,
+      "grad_norm": 0.921875,
+      "learning_rate": 8.400000000000001e-06,
+      "loss": 0.9736,
+      "step": 2100
+    },
+    {
+      "epoch": 0.7181028724114896,
+      "grad_norm": 1.9609375,
+      "learning_rate": 8.6e-06,
+      "loss": 0.9545,
+      "step": 2150
+    },
+    {
+      "epoch": 0.7348029392117569,
+      "grad_norm": 2.125,
+      "learning_rate": 8.8e-06,
+      "loss": 0.9588,
+      "step": 2200
+    },
+    {
+      "epoch": 0.751503006012024,
+      "grad_norm": 1.8125,
+      "learning_rate": 9e-06,
+      "loss": 0.9655,
+      "step": 2250
+    },
+    {
+      "epoch": 0.7682030728122913,
+      "grad_norm": 1.25,
+      "learning_rate": 9.200000000000002e-06,
+      "loss": 0.9768,
+      "step": 2300
+    },
+    {
+      "epoch": 0.7849031396125584,
+      "grad_norm": 2.34375,
+      "learning_rate": 9.4e-06,
+      "loss": 0.9613,
+      "step": 2350
+    },
+    {
+      "epoch": 0.8016032064128257,
+      "grad_norm": 1.875,
+      "learning_rate": 9.600000000000001e-06,
+      "loss": 0.9656,
+      "step": 2400
+    },
+    {
+      "epoch": 0.8183032732130928,
+      "grad_norm": 1.3359375,
+      "learning_rate": 9.800000000000001e-06,
+      "loss": 0.9628,
+      "step": 2450
+    },
+    {
+      "epoch": 0.8350033400133601,
+      "grad_norm": 1.734375,
+      "learning_rate": 1e-05,
+      "loss": 0.9661,
+      "step": 2500
+    },
+    {
+      "epoch": 0.8517034068136272,
+      "grad_norm": 1.3046875,
+      "learning_rate": 1.02e-05,
+      "loss": 0.9763,
+      "step": 2550
+    },
+    {
+      "epoch": 0.8684034736138945,
+      "grad_norm": 1.3828125,
+      "learning_rate": 1.04e-05,
+      "loss": 0.9674,
+      "step": 2600
+    },
+    {
+      "epoch": 0.8851035404141616,
+      "grad_norm": 2.828125,
+      "learning_rate": 1.0600000000000002e-05,
+      "loss": 0.9804,
+      "step": 2650
+    },
+    {
+      "epoch": 0.9018036072144289,
+      "grad_norm": 3.53125,
+      "learning_rate": 1.0800000000000002e-05,
+      "loss": 0.9814,
+      "step": 2700
+    },
+    {
+      "epoch": 0.918503674014696,
+      "grad_norm": 1.5078125,
+      "learning_rate": 1.1000000000000001e-05,
+      "loss": 0.9553,
+      "step": 2750
+    },
+    {
+      "epoch": 0.9352037408149633,
+      "grad_norm": 1.796875,
+      "learning_rate": 1.1200000000000001e-05,
+      "loss": 0.9595,
+      "step": 2800
+    },
+    {
+      "epoch": 0.9519038076152304,
+      "grad_norm": 1.5078125,
+      "learning_rate": 1.14e-05,
+      "loss": 0.9364,
+      "step": 2850
+    },
+    {
+      "epoch": 0.9686038744154977,
+      "grad_norm": 3.28125,
+      "learning_rate": 1.16e-05,
+      "loss": 0.9597,
+      "step": 2900
+    },
+    {
+      "epoch": 0.9853039412157648,
+      "grad_norm": 1.609375,
+      "learning_rate": 1.18e-05,
+      "loss": 0.938,
+      "step": 2950
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.945128321647644,
+      "eval_runtime": 100.8455,
+      "eval_samples_per_second": 15.985,
+      "eval_steps_per_second": 1.002,
+      "step": 2994
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 5988,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.8655046824493056e+17,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2994/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fee32f064590f4dbedd46c2ae13a662c26baa4367c87d2089d6e6d34cdcae7b
+size 5432

checkpoint-5988/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_name_or_path": "microsoft/phi-1_5",
+  "architectures": [
+    "PhiForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": null,
+  "embd_pdrop": 0.0,
+  "eos_token_id": null,
+  "hidden_act": "gelu_new",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 2048,
+  "model_type": "phi",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 32,
+  "partial_rotary_factor": 0.5,
+  "qk_layernorm": false,
+  "resid_pdrop": 0.0,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.45.0",
+  "use_cache": false,
+  "vocab_size": 51200
+}

checkpoint-5988/generation_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "_from_model_config": true,
+  "transformers_version": "4.45.0"
+}

checkpoint-5988/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:659a8db5e3e06113bc4e343fc87ae3b103823d2ff6efbe0c6eddafefd06183b9
+size 2836579040

checkpoint-5988/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5540fe647696a7c1294b5f87f8b2b9167bca95a5e8bd61272cf1c691bc1d4285
+size 11346377382

checkpoint-5988/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d138cfe3a4adf21f048848ee35837c9a757a0a3616ff7adbb45b69aac247435
+size 14244

checkpoint-5988/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9e7aec8d0b74157d11a4dcca01d36a7ec3df8af858b2081233b73e3c09150cc
+size 1064

checkpoint-5988/trainer_state.json ADDED Viewed

	@@ -0,0 +1,882 @@

+{
+  "best_metric": 0.877801239490509,
+  "best_model_checkpoint": "./checkpoints/ultrafeedback_binarized/phi-1_5-ultrafeedback_binarized-lambda0.25-ORPO-18-16-19/checkpoint-5988",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 5988,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.016700066800267203,
+      "grad_norm": 5.34375,
+      "learning_rate": 2.0000000000000002e-07,
+      "loss": 1.2526,
+      "step": 50
+    },
+    {
+      "epoch": 0.033400133600534405,
+      "grad_norm": 3.453125,
+      "learning_rate": 4.0000000000000003e-07,
+      "loss": 1.2115,
+      "step": 100
+    },
+    {
+      "epoch": 0.050100200400801605,
+      "grad_norm": 2.953125,
+      "learning_rate": 6.000000000000001e-07,
+      "loss": 1.2056,
+      "step": 150
+    },
+    {
+      "epoch": 0.06680026720106881,
+      "grad_norm": 2.3125,
+      "learning_rate": 8.000000000000001e-07,
+      "loss": 1.2425,
+      "step": 200
+    },
+    {
+      "epoch": 0.08350033400133601,
+      "grad_norm": 1.9921875,
+      "learning_rate": 1.0000000000000002e-06,
+      "loss": 1.2355,
+      "step": 250
+    },
+    {
+      "epoch": 0.10020040080160321,
+      "grad_norm": 2.484375,
+      "learning_rate": 1.2000000000000002e-06,
+      "loss": 1.2408,
+      "step": 300
+    },
+    {
+      "epoch": 0.11690046760187041,
+      "grad_norm": 2.453125,
+      "learning_rate": 1.4000000000000001e-06,
+      "loss": 1.2136,
+      "step": 350
+    },
+    {
+      "epoch": 0.13360053440213762,
+      "grad_norm": 2.328125,
+      "learning_rate": 1.6000000000000001e-06,
+      "loss": 1.2241,
+      "step": 400
+    },
+    {
+      "epoch": 0.15030060120240482,
+      "grad_norm": 3.859375,
+      "learning_rate": 1.8000000000000001e-06,
+      "loss": 1.213,
+      "step": 450
+    },
+    {
+      "epoch": 0.16700066800267202,
+      "grad_norm": 2.90625,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 1.2212,
+      "step": 500
+    },
+    {
+      "epoch": 0.18370073480293922,
+      "grad_norm": 1.828125,
+      "learning_rate": 2.2e-06,
+      "loss": 1.1722,
+      "step": 550
+    },
+    {
+      "epoch": 0.20040080160320642,
+      "grad_norm": 2.953125,
+      "learning_rate": 2.4000000000000003e-06,
+      "loss": 1.1541,
+      "step": 600
+    },
+    {
+      "epoch": 0.21710086840347362,
+      "grad_norm": 4.3125,
+      "learning_rate": 2.6e-06,
+      "loss": 1.1767,
+      "step": 650
+    },
+    {
+      "epoch": 0.23380093520374082,
+      "grad_norm": 2.828125,
+      "learning_rate": 2.8000000000000003e-06,
+      "loss": 1.0593,
+      "step": 700
+    },
+    {
+      "epoch": 0.250501002004008,
+      "grad_norm": 1.7421875,
+      "learning_rate": 3e-06,
+      "loss": 1.128,
+      "step": 750
+    },
+    {
+      "epoch": 0.26720106880427524,
+      "grad_norm": 2.34375,
+      "learning_rate": 3.2000000000000003e-06,
+      "loss": 1.1307,
+      "step": 800
+    },
+    {
+      "epoch": 0.28390113560454244,
+      "grad_norm": 2.390625,
+      "learning_rate": 3.4000000000000005e-06,
+      "loss": 1.0742,
+      "step": 850
+    },
+    {
+      "epoch": 0.30060120240480964,
+      "grad_norm": 1.921875,
+      "learning_rate": 3.6000000000000003e-06,
+      "loss": 1.078,
+      "step": 900
+    },
+    {
+      "epoch": 0.31730126920507684,
+      "grad_norm": 2.4375,
+      "learning_rate": 3.8000000000000005e-06,
+      "loss": 1.063,
+      "step": 950
+    },
+    {
+      "epoch": 0.33400133600534404,
+      "grad_norm": 2.59375,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 1.0211,
+      "step": 1000
+    },
+    {
+      "epoch": 0.35070140280561124,
+      "grad_norm": 1.7890625,
+      "learning_rate": 4.2000000000000004e-06,
+      "loss": 1.0235,
+      "step": 1050
+    },
+    {
+      "epoch": 0.36740146960587844,
+      "grad_norm": 2.46875,
+      "learning_rate": 4.4e-06,
+      "loss": 1.0346,
+      "step": 1100
+    },
+    {
+      "epoch": 0.38410153640614564,
+      "grad_norm": 1.0625,
+      "learning_rate": 4.600000000000001e-06,
+      "loss": 1.038,
+      "step": 1150
+    },
+    {
+      "epoch": 0.40080160320641284,
+      "grad_norm": 3.375,
+      "learning_rate": 4.800000000000001e-06,
+      "loss": 1.0236,
+      "step": 1200
+    },
+    {
+      "epoch": 0.41750167000668004,
+      "grad_norm": 1.6953125,
+      "learning_rate": 5e-06,
+      "loss": 1.0158,
+      "step": 1250
+    },
+    {
+      "epoch": 0.43420173680694724,
+      "grad_norm": 2.265625,
+      "learning_rate": 5.2e-06,
+      "loss": 1.0127,
+      "step": 1300
+    },
+    {
+      "epoch": 0.45090180360721444,
+      "grad_norm": 2.984375,
+      "learning_rate": 5.400000000000001e-06,
+      "loss": 1.0234,
+      "step": 1350
+    },
+    {
+      "epoch": 0.46760187040748163,
+      "grad_norm": 1.9609375,
+      "learning_rate": 5.600000000000001e-06,
+      "loss": 0.993,
+      "step": 1400
+    },
+    {
+      "epoch": 0.48430193720774883,
+      "grad_norm": 2.0625,
+      "learning_rate": 5.8e-06,
+      "loss": 1.0006,
+      "step": 1450
+    },
+    {
+      "epoch": 0.501002004008016,
+      "grad_norm": 1.0078125,
+      "learning_rate": 6e-06,
+      "loss": 1.0038,
+      "step": 1500
+    },
+    {
+      "epoch": 0.5177020708082832,
+      "grad_norm": 2.5625,
+      "learning_rate": 6.200000000000001e-06,
+      "loss": 0.9896,
+      "step": 1550
+    },
+    {
+      "epoch": 0.5344021376085505,
+      "grad_norm": 2.40625,
+      "learning_rate": 6.4000000000000006e-06,
+      "loss": 1.0005,
+      "step": 1600
+    },
+    {
+      "epoch": 0.5511022044088176,
+      "grad_norm": 1.546875,
+      "learning_rate": 6.600000000000001e-06,
+      "loss": 1.0176,
+      "step": 1650
+    },
+    {
+      "epoch": 0.5678022712090849,
+      "grad_norm": 1.8828125,
+      "learning_rate": 6.800000000000001e-06,
+      "loss": 0.9663,
+      "step": 1700
+    },
+    {
+      "epoch": 0.584502338009352,
+      "grad_norm": 1.1796875,
+      "learning_rate": 7e-06,
+      "loss": 0.9879,
+      "step": 1750
+    },
+    {
+      "epoch": 0.6012024048096193,
+      "grad_norm": 2.421875,
+      "learning_rate": 7.2000000000000005e-06,
+      "loss": 0.9724,
+      "step": 1800
+    },
+    {
+      "epoch": 0.6179024716098864,
+      "grad_norm": 1.5,
+      "learning_rate": 7.4e-06,
+      "loss": 0.9754,
+      "step": 1850
+    },
+    {
+      "epoch": 0.6346025384101537,
+      "grad_norm": 2.046875,
+      "learning_rate": 7.600000000000001e-06,
+      "loss": 0.9884,
+      "step": 1900
+    },
+    {
+      "epoch": 0.6513026052104208,
+      "grad_norm": 2.59375,
+      "learning_rate": 7.800000000000002e-06,
+      "loss": 0.9916,
+      "step": 1950
+    },
+    {
+      "epoch": 0.6680026720106881,
+      "grad_norm": 1.8984375,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 0.9821,
+      "step": 2000
+    },
+    {
+      "epoch": 0.6847027388109552,
+      "grad_norm": 1.484375,
+      "learning_rate": 8.2e-06,
+      "loss": 0.977,
+      "step": 2050
+    },
+    {
+      "epoch": 0.7014028056112225,
+      "grad_norm": 0.921875,
+      "learning_rate": 8.400000000000001e-06,
+      "loss": 0.9736,
+      "step": 2100
+    },
+    {
+      "epoch": 0.7181028724114896,
+      "grad_norm": 1.9609375,
+      "learning_rate": 8.6e-06,
+      "loss": 0.9545,
+      "step": 2150
+    },
+    {
+      "epoch": 0.7348029392117569,
+      "grad_norm": 2.125,
+      "learning_rate": 8.8e-06,
+      "loss": 0.9588,
+      "step": 2200
+    },
+    {
+      "epoch": 0.751503006012024,
+      "grad_norm": 1.8125,
+      "learning_rate": 9e-06,
+      "loss": 0.9655,
+      "step": 2250
+    },
+    {
+      "epoch": 0.7682030728122913,
+      "grad_norm": 1.25,
+      "learning_rate": 9.200000000000002e-06,
+      "loss": 0.9768,
+      "step": 2300
+    },
+    {
+      "epoch": 0.7849031396125584,
+      "grad_norm": 2.34375,
+      "learning_rate": 9.4e-06,
+      "loss": 0.9613,
+      "step": 2350
+    },
+    {
+      "epoch": 0.8016032064128257,
+      "grad_norm": 1.875,
+      "learning_rate": 9.600000000000001e-06,
+      "loss": 0.9656,
+      "step": 2400
+    },
+    {
+      "epoch": 0.8183032732130928,
+      "grad_norm": 1.3359375,
+      "learning_rate": 9.800000000000001e-06,
+      "loss": 0.9628,
+      "step": 2450
+    },
+    {
+      "epoch": 0.8350033400133601,
+      "grad_norm": 1.734375,
+      "learning_rate": 1e-05,
+      "loss": 0.9661,
+      "step": 2500
+    },
+    {
+      "epoch": 0.8517034068136272,
+      "grad_norm": 1.3046875,
+      "learning_rate": 1.02e-05,
+      "loss": 0.9763,
+      "step": 2550
+    },
+    {
+      "epoch": 0.8684034736138945,
+      "grad_norm": 1.3828125,
+      "learning_rate": 1.04e-05,
+      "loss": 0.9674,
+      "step": 2600
+    },
+    {
+      "epoch": 0.8851035404141616,
+      "grad_norm": 2.828125,
+      "learning_rate": 1.0600000000000002e-05,
+      "loss": 0.9804,
+      "step": 2650
+    },
+    {
+      "epoch": 0.9018036072144289,
+      "grad_norm": 3.53125,
+      "learning_rate": 1.0800000000000002e-05,
+      "loss": 0.9814,
+      "step": 2700
+    },
+    {
+      "epoch": 0.918503674014696,
+      "grad_norm": 1.5078125,
+      "learning_rate": 1.1000000000000001e-05,
+      "loss": 0.9553,
+      "step": 2750
+    },
+    {
+      "epoch": 0.9352037408149633,
+      "grad_norm": 1.796875,
+      "learning_rate": 1.1200000000000001e-05,
+      "loss": 0.9595,
+      "step": 2800
+    },
+    {
+      "epoch": 0.9519038076152304,
+      "grad_norm": 1.5078125,
+      "learning_rate": 1.14e-05,
+      "loss": 0.9364,
+      "step": 2850
+    },
+    {
+      "epoch": 0.9686038744154977,
+      "grad_norm": 3.28125,
+      "learning_rate": 1.16e-05,
+      "loss": 0.9597,
+      "step": 2900
+    },
+    {
+      "epoch": 0.9853039412157648,
+      "grad_norm": 1.609375,
+      "learning_rate": 1.18e-05,
+      "loss": 0.938,
+      "step": 2950
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.945128321647644,
+      "eval_runtime": 100.8455,
+      "eval_samples_per_second": 15.985,
+      "eval_steps_per_second": 1.002,
+      "step": 2994
+    },
+    {
+      "epoch": 1.002004008016032,
+      "grad_norm": 2.703125,
+      "learning_rate": 1.2e-05,
+      "loss": 0.9488,
+      "step": 3000
+    },
+    {
+      "epoch": 1.0187040748162992,
+      "grad_norm": 6.40625,
+      "learning_rate": 1.22e-05,
+      "loss": 0.9558,
+      "step": 3050
+    },
+    {
+      "epoch": 1.0354041416165665,
+      "grad_norm": 1.2890625,
+      "learning_rate": 1.2400000000000002e-05,
+      "loss": 0.9402,
+      "step": 3100
+    },
+    {
+      "epoch": 1.0521042084168337,
+      "grad_norm": 2.625,
+      "learning_rate": 1.2600000000000001e-05,
+      "loss": 0.9402,
+      "step": 3150
+    },
+    {
+      "epoch": 1.0688042752171008,
+      "grad_norm": 2.546875,
+      "learning_rate": 1.2800000000000001e-05,
+      "loss": 0.9116,
+      "step": 3200
+    },
+    {
+      "epoch": 1.085504342017368,
+      "grad_norm": 3.84375,
+      "learning_rate": 1.3000000000000001e-05,
+      "loss": 0.9298,
+      "step": 3250
+    },
+    {
+      "epoch": 1.1022044088176353,
+      "grad_norm": 1.921875,
+      "learning_rate": 1.3200000000000002e-05,
+      "loss": 0.9347,
+      "step": 3300
+    },
+    {
+      "epoch": 1.1189044756179025,
+      "grad_norm": 2.453125,
+      "learning_rate": 1.3400000000000002e-05,
+      "loss": 0.9245,
+      "step": 3350
+    },
+    {
+      "epoch": 1.1356045424181698,
+      "grad_norm": 2.796875,
+      "learning_rate": 1.3600000000000002e-05,
+      "loss": 0.962,
+      "step": 3400
+    },
+    {
+      "epoch": 1.1523046092184368,
+      "grad_norm": 2.109375,
+      "learning_rate": 1.38e-05,
+      "loss": 0.9058,
+      "step": 3450
+    },
+    {
+      "epoch": 1.169004676018704,
+      "grad_norm": 2.53125,
+      "learning_rate": 1.4e-05,
+      "loss": 0.9173,
+      "step": 3500
+    },
+    {
+      "epoch": 1.1857047428189713,
+      "grad_norm": 3.34375,
+      "learning_rate": 1.4200000000000001e-05,
+      "loss": 0.9079,
+      "step": 3550
+    },
+    {
+      "epoch": 1.2024048096192386,
+      "grad_norm": 2.515625,
+      "learning_rate": 1.4400000000000001e-05,
+      "loss": 0.9239,
+      "step": 3600
+    },
+    {
+      "epoch": 1.2191048764195056,
+      "grad_norm": 1.6015625,
+      "learning_rate": 1.46e-05,
+      "loss": 0.9147,
+      "step": 3650
+    },
+    {
+      "epoch": 1.2358049432197729,
+      "grad_norm": 2.59375,
+      "learning_rate": 1.48e-05,
+      "loss": 0.9367,
+      "step": 3700
+    },
+    {
+      "epoch": 1.25250501002004,
+      "grad_norm": 2.890625,
+      "learning_rate": 1.5000000000000002e-05,
+      "loss": 0.9116,
+      "step": 3750
+    },
+    {
+      "epoch": 1.2692050768203074,
+      "grad_norm": 2.765625,
+      "learning_rate": 1.5200000000000002e-05,
+      "loss": 0.9198,
+      "step": 3800
+    },
+    {
+      "epoch": 1.2859051436205746,
+      "grad_norm": 3.046875,
+      "learning_rate": 1.54e-05,
+      "loss": 0.908,
+      "step": 3850
+    },
+    {
+      "epoch": 1.3026052104208417,
+      "grad_norm": 1.953125,
+      "learning_rate": 1.5600000000000003e-05,
+      "loss": 0.908,
+      "step": 3900
+    },
+    {
+      "epoch": 1.319305277221109,
+      "grad_norm": 1.578125,
+      "learning_rate": 1.58e-05,
+      "loss": 0.9109,
+      "step": 3950
+    },
+    {
+      "epoch": 1.3360053440213762,
+      "grad_norm": 2.421875,
+      "learning_rate": 1.6000000000000003e-05,
+      "loss": 0.9148,
+      "step": 4000
+    },
+    {
+      "epoch": 1.3527054108216432,
+      "grad_norm": 4.28125,
+      "learning_rate": 1.62e-05,
+      "loss": 0.8995,
+      "step": 4050
+    },
+    {
+      "epoch": 1.3694054776219104,
+      "grad_norm": 2.765625,
+      "learning_rate": 1.64e-05,
+      "loss": 0.9177,
+      "step": 4100
+    },
+    {
+      "epoch": 1.3861055444221777,
+      "grad_norm": 2.921875,
+      "learning_rate": 1.66e-05,
+      "loss": 0.9138,
+      "step": 4150
+    },
+    {
+      "epoch": 1.402805611222445,
+      "grad_norm": 2.46875,
+      "learning_rate": 1.6800000000000002e-05,
+      "loss": 0.9378,
+      "step": 4200
+    },
+    {
+      "epoch": 1.4195056780227122,
+      "grad_norm": 2.578125,
+      "learning_rate": 1.7e-05,
+      "loss": 0.8952,
+      "step": 4250
+    },
+    {
+      "epoch": 1.4362057448229792,
+      "grad_norm": 2.890625,
+      "learning_rate": 1.72e-05,
+      "loss": 0.9045,
+      "step": 4300
+    },
+    {
+      "epoch": 1.4529058116232465,
+      "grad_norm": 2.375,
+      "learning_rate": 1.7400000000000003e-05,
+      "loss": 0.942,
+      "step": 4350
+    },
+    {
+      "epoch": 1.4696058784235138,
+      "grad_norm": 2.296875,
+      "learning_rate": 1.76e-05,
+      "loss": 0.9018,
+      "step": 4400
+    },
+    {
+      "epoch": 1.4863059452237808,
+      "grad_norm": 2.546875,
+      "learning_rate": 1.7800000000000002e-05,
+      "loss": 0.886,
+      "step": 4450
+    },
+    {
+      "epoch": 1.503006012024048,
+      "grad_norm": 2.40625,
+      "learning_rate": 1.8e-05,
+      "loss": 0.9152,
+      "step": 4500
+    },
+    {
+      "epoch": 1.5197060788243153,
+      "grad_norm": 1.375,
+      "learning_rate": 1.8200000000000002e-05,
+      "loss": 0.8884,
+      "step": 4550
+    },
+    {
+      "epoch": 1.5364061456245826,
+      "grad_norm": 2.703125,
+      "learning_rate": 1.8400000000000003e-05,
+      "loss": 0.8905,
+      "step": 4600
+    },
+    {
+      "epoch": 1.5531062124248498,
+      "grad_norm": 3.15625,
+      "learning_rate": 1.86e-05,
+      "loss": 0.9163,
+      "step": 4650
+    },
+    {
+      "epoch": 1.569806279225117,
+      "grad_norm": 2.921875,
+      "learning_rate": 1.88e-05,
+      "loss": 0.9196,
+      "step": 4700
+    },
+    {
+      "epoch": 1.586506346025384,
+      "grad_norm": 2.296875,
+      "learning_rate": 1.9e-05,
+      "loss": 0.88,
+      "step": 4750
+    },
+    {
+      "epoch": 1.6032064128256514,
+      "grad_norm": 2.671875,
+      "learning_rate": 1.9200000000000003e-05,
+      "loss": 0.887,
+      "step": 4800
+    },
+    {
+      "epoch": 1.6199064796259184,
+      "grad_norm": 2.046875,
+      "learning_rate": 1.94e-05,
+      "loss": 0.8727,
+      "step": 4850
+    },
+    {
+      "epoch": 1.6366065464261856,
+      "grad_norm": 1.953125,
+      "learning_rate": 1.9600000000000002e-05,
+      "loss": 0.8891,
+      "step": 4900
+    },
+    {
+      "epoch": 1.653306613226453,
+      "grad_norm": 2.890625,
+      "learning_rate": 1.98e-05,
+      "loss": 0.9095,
+      "step": 4950
+    },
+    {
+      "epoch": 1.6700066800267201,
+      "grad_norm": 2.28125,
+      "learning_rate": 2e-05,
+      "loss": 0.8675,
+      "step": 5000
+    },
+    {
+      "epoch": 1.6867067468269874,
+      "grad_norm": 4.0,
+      "learning_rate": 1.9873880897766597e-05,
+      "loss": 0.8988,
+      "step": 5050
+    },
+    {
+      "epoch": 1.7034068136272547,
+      "grad_norm": 1.5859375,
+      "learning_rate": 1.949870479665602e-05,
+      "loss": 0.8884,
+      "step": 5100
+    },
+    {
+      "epoch": 1.7201068804275217,
+      "grad_norm": 2.5,
+      "learning_rate": 1.888393507127856e-05,
+      "loss": 0.8995,
+      "step": 5150
+    },
+    {
+      "epoch": 1.736806947227789,
+      "grad_norm": 4.125,
+      "learning_rate": 1.8045078562803203e-05,
+      "loss": 0.8793,
+      "step": 5200
+    },
+    {
+      "epoch": 1.753507014028056,
+      "grad_norm": 1.3046875,
+      "learning_rate": 1.7003294437180254e-05,
+      "loss": 0.889,
+      "step": 5250
+    },
+    {
+      "epoch": 1.7702070808283232,
+      "grad_norm": 1.796875,
+      "learning_rate": 1.5784860470138633e-05,
+      "loss": 0.8857,
+      "step": 5300
+    },
+    {
+      "epoch": 1.7869071476285905,
+      "grad_norm": 2.5,
+      "learning_rate": 1.4420510221289137e-05,
+      "loss": 0.8643,
+      "step": 5350
+    },
+    {
+      "epoch": 1.8036072144288577,
+      "grad_norm": 1.6953125,
+      "learning_rate": 1.2944657816335124e-05,
+      "loss": 0.8841,
+      "step": 5400
+    },
+    {
+      "epoch": 1.820307281229125,
+      "grad_norm": 1.640625,
+      "learning_rate": 1.139452989134496e-05,
+      "loss": 0.8548,
+      "step": 5450
+    },
+    {
+      "epoch": 1.8370073480293923,
+      "grad_norm": 1.6328125,
+      "learning_rate": 9.809226594767979e-06,
+      "loss": 0.8762,
+      "step": 5500
+    },
+    {
+      "epoch": 1.8537074148296593,
+      "grad_norm": 2.0,
+      "learning_rate": 8.228735332310575e-06,
+      "loss": 0.8726,
+      "step": 5550
+    },
+    {
+      "epoch": 1.8704074816299265,
+      "grad_norm": 2.28125,
+      "learning_rate": 6.692922131794517e-06,
+      "loss": 0.8995,
+      "step": 5600
+    },
+    {
+      "epoch": 1.8871075484301936,
+      "grad_norm": 3.09375,
+      "learning_rate": 5.240526069629265e-06,
+      "loss": 0.8944,
+      "step": 5650
+    },
+    {
+      "epoch": 1.9038076152304608,
+      "grad_norm": 1.8203125,
+      "learning_rate": 3.908182123304344e-06,
+      "loss": 0.8777,
+      "step": 5700
+    },
+    {
+      "epoch": 1.920507682030728,
+      "grad_norm": 1.2421875,
+      "learning_rate": 2.729497097295075e-06,
+      "loss": 0.8733,
+      "step": 5750
+    },
+    {
+      "epoch": 1.9372077488309953,
+      "grad_norm": 2.71875,
+      "learning_rate": 1.7342019310607062e-06,
+      "loss": 0.8705,
+      "step": 5800
+    },
+    {
+      "epoch": 1.9539078156312626,
+      "grad_norm": 1.9375,
+      "learning_rate": 9.474017711657835e-07,
+      "loss": 0.8602,
+      "step": 5850
+    },
+    {
+      "epoch": 1.9706078824315298,
+      "grad_norm": 2.765625,
+      "learning_rate": 3.889427235709153e-07,
+      "loss": 0.8755,
+      "step": 5900
+    },
+    {
+      "epoch": 1.9873079492317969,
+      "grad_norm": 2.515625,
+      "learning_rate": 7.291125901946027e-08,
+      "loss": 0.8718,
+      "step": 5950
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.877801239490509,
+      "eval_runtime": 101.041,
+      "eval_samples_per_second": 15.954,
+      "eval_steps_per_second": 1.0,
+      "step": 5988
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 5988,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.731009364898611e+17,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-5988/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fee32f064590f4dbedd46c2ae13a662c26baa4367c87d2089d6e6d34cdcae7b
+size 5432

config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_name_or_path": "microsoft/phi-1_5",
+  "architectures": [
+    "PhiForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": null,
+  "embd_pdrop": 0.0,
+  "eos_token_id": null,
+  "hidden_act": "gelu_new",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 2048,
+  "model_type": "phi",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 32,
+  "partial_rotary_factor": 0.5,
+  "qk_layernorm": false,
+  "resid_pdrop": 0.0,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.45.0",
+  "use_cache": false,
+  "vocab_size": 51200
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "_from_model_config": true,
+  "transformers_version": "4.45.0"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:659a8db5e3e06113bc4e343fc87ae3b103823d2ff6efbe0c6eddafefd06183b9
+size 2836579040

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 2048,
+  "tokenizer_class": "CodeGenTokenizer",
+  "unk_token": "<|endoftext|>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fee32f064590f4dbedd46c2ae13a662c26baa4367c87d2089d6e6d34cdcae7b
+size 5432