craa commited on 12 days ago

Commit

72d73da

verified ·

1 Parent(s): 028718f

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

exp3/high_0_1208/README.md +186 -0
exp3/high_0_1208/all_results.json +16 -0
exp3/high_0_1208/checkpoint-100000/config.json +31 -0
exp3/high_0_1208/checkpoint-100000/generation_config.json +6 -0
exp3/high_0_1208/checkpoint-100000/model.safetensors +3 -0
exp3/high_0_1208/checkpoint-100000/optimizer.pt +3 -0
exp3/high_0_1208/checkpoint-100000/rng_state.pth +3 -0
exp3/high_0_1208/checkpoint-100000/scaler.pt +3 -0
exp3/high_0_1208/checkpoint-100000/scheduler.pt +3 -0
exp3/high_0_1208/checkpoint-100000/special_tokens_map.json +1 -0
exp3/high_0_1208/checkpoint-100000/tokenizer.json +0 -0
exp3/high_0_1208/checkpoint-100000/tokenizer_config.json +0 -0
exp3/high_0_1208/checkpoint-100000/trainer_state.json +0 -0
exp3/high_0_1208/checkpoint-100000/training_args.bin +3 -0
exp3/high_0_1208/checkpoint-110000/config.json +31 -0
exp3/high_0_1208/checkpoint-110000/generation_config.json +6 -0
exp3/high_0_1208/checkpoint-110000/model.safetensors +3 -0
exp3/high_0_1208/checkpoint-110000/optimizer.pt +3 -0
exp3/high_0_1208/checkpoint-110000/rng_state.pth +3 -0
exp3/high_0_1208/checkpoint-110000/scaler.pt +3 -0
exp3/high_0_1208/checkpoint-110000/scheduler.pt +3 -0
exp3/high_0_1208/checkpoint-110000/special_tokens_map.json +1 -0
exp3/high_0_1208/checkpoint-110000/tokenizer.json +0 -0
exp3/high_0_1208/checkpoint-110000/tokenizer_config.json +0 -0
exp3/high_0_1208/checkpoint-110000/trainer_state.json +0 -0
exp3/high_0_1208/checkpoint-110000/training_args.bin +3 -0
exp3/high_0_1208/checkpoint-70000/config.json +31 -0
exp3/high_0_1208/checkpoint-70000/generation_config.json +6 -0
exp3/high_0_1208/checkpoint-70000/model.safetensors +3 -0
exp3/high_0_1208/checkpoint-70000/optimizer.pt +3 -0
exp3/high_0_1208/checkpoint-70000/rng_state.pth +3 -0
exp3/high_0_1208/checkpoint-70000/scaler.pt +3 -0
exp3/high_0_1208/checkpoint-70000/scheduler.pt +3 -0
exp3/high_0_1208/checkpoint-70000/special_tokens_map.json +1 -0
exp3/high_0_1208/checkpoint-70000/tokenizer.json +0 -0
exp3/high_0_1208/checkpoint-70000/tokenizer_config.json +0 -0
exp3/high_0_1208/checkpoint-70000/trainer_state.json +0 -0
exp3/high_0_1208/checkpoint-70000/training_args.bin +3 -0
exp3/high_0_1208/checkpoint-80000/config.json +31 -0
exp3/high_0_1208/checkpoint-80000/generation_config.json +6 -0
exp3/high_0_1208/checkpoint-80000/model.safetensors +3 -0
exp3/high_0_1208/checkpoint-80000/optimizer.pt +3 -0
exp3/high_0_1208/checkpoint-80000/rng_state.pth +3 -0
exp3/high_0_1208/checkpoint-80000/scaler.pt +3 -0
exp3/high_0_1208/checkpoint-80000/scheduler.pt +3 -0
exp3/high_0_1208/checkpoint-80000/special_tokens_map.json +1 -0
exp3/high_0_1208/checkpoint-80000/tokenizer.json +0 -0
exp3/high_0_1208/checkpoint-80000/tokenizer_config.json +0 -0
exp3/high_0_1208/checkpoint-80000/trainer_state.json +0 -0
exp3/high_0_1208/checkpoint-80000/training_args.bin +3 -0

exp3/high_0_1208/README.md ADDED Viewed

	@@ -0,0 +1,186 @@

+---
+library_name: transformers
+tags:
+- generated_from_trainer
+datasets:
+- craa/100M
+metrics:
+- accuracy
+model-index:
+- name: high_0_1208
+  results:
+  - task:
+      name: Causal Language Modeling
+      type: text-generation
+    dataset:
+      name: craa/100M high_0
+      type: craa/100M
+      args: high_0
+    metrics:
+    - name: Accuracy
+      type: accuracy
+      value: 0.3955190994653271
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/craaaa/exceptions_exp3/runs/a1c25jsh)
+# high_0_1208
+This model is a fine-tuned version of [](https://huggingface.co/) on the craa/100M high_0 dataset.
+It achieves the following results on the evaluation set:
+- Loss: 3.2801
+- Accuracy: 0.3955
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0006
+- train_batch_size: 16
+- eval_batch_size: 16
+- seed: 1208
+- gradient_accumulation_steps: 5
+- total_train_batch_size: 80
+- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 100
+- num_epochs: 40.0
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch   | Step   | Accuracy | Validation Loss |
+|:-------------:|:-------:|:------:|:--------:|:---------------:|
+| 4.6123        | 0.2692  | 1000   | 0.2653   | 4.5295          |
+| 4.0841        | 0.5383  | 2000   | 0.3178   | 4.0247          |
+| 3.9046        | 0.8075  | 3000   | 0.3340   | 3.8418          |
+| 3.7493        | 1.0764  | 4000   | 0.3442   | 3.7381          |
+| 3.6752        | 1.3456  | 5000   | 0.3510   | 3.6677          |
+| 3.6324        | 1.6148  | 6000   | 0.3565   | 3.6097          |
+| 3.5971        | 1.8839  | 7000   | 0.3612   | 3.5648          |
+| 3.5112        | 2.1529  | 8000   | 0.3640   | 3.5371          |
+| 3.4948        | 2.4220  | 9000   | 0.3666   | 3.5100          |
+| 3.4936        | 2.6912  | 10000  | 0.3695   | 3.4849          |
+| 3.4683        | 2.9604  | 11000  | 0.3713   | 3.4629          |
+| 3.3893        | 3.2293  | 12000  | 0.3734   | 3.4498          |
+| 3.3915        | 3.4985  | 13000  | 0.3747   | 3.4363          |
+| 3.3858        | 3.7677  | 14000  | 0.3765   | 3.4231          |
+| 3.3038        | 4.0366  | 15000  | 0.3777   | 3.4126          |
+| 3.3297        | 4.3058  | 16000  | 0.3788   | 3.4042          |
+| 3.3282        | 4.5749  | 17000  | 0.3796   | 3.3931          |
+| 3.335         | 4.8441  | 18000  | 0.3807   | 3.3818          |
+| 3.2534        | 5.1130  | 19000  | 0.3813   | 3.3826          |
+| 3.2736        | 5.3822  | 20000  | 0.3821   | 3.3757          |
+| 3.2934        | 5.6514  | 21000  | 0.3832   | 3.3653          |
+| 3.2915        | 5.9205  | 22000  | 0.3843   | 3.3548          |
+| 3.2243        | 6.1895  | 23000  | 0.3844   | 3.3621          |
+| 3.2413        | 6.4587  | 24000  | 0.3850   | 3.3538          |
+| 3.2643        | 6.7278  | 25000  | 0.3858   | 3.3439          |
+| 3.2581        | 6.9970  | 26000  | 0.3860   | 3.3379          |
+| 3.198         | 7.2659  | 27000  | 0.3861   | 3.3452          |
+| 3.2262        | 7.5351  | 28000  | 0.3867   | 3.3381          |
+| 3.2175        | 7.8043  | 29000  | 0.3875   | 3.3317          |
+| 3.146         | 8.0732  | 30000  | 0.3880   | 3.3365          |
+| 3.1766        | 8.3424  | 31000  | 0.3883   | 3.3303          |
+| 3.2014        | 8.6115  | 32000  | 0.3884   | 3.3256          |
+| 3.1996        | 8.8807  | 33000  | 0.3893   | 3.3176          |
+| 3.137         | 9.1497  | 34000  | 0.3889   | 3.3283          |
+| 3.1566        | 9.4188  | 35000  | 0.3895   | 3.3220          |
+| 3.1774        | 9.6880  | 36000  | 0.3900   | 3.3119          |
+| 3.1915        | 9.9571  | 37000  | 0.3907   | 3.3075          |
+| 3.1193        | 10.2261 | 38000  | 0.3902   | 3.3206          |
+| 3.1495        | 10.4953 | 39000  | 0.3906   | 3.3105          |
+| 3.1636        | 10.7644 | 40000  | 0.3913   | 3.3054          |
+| 3.0765        | 11.0334 | 41000  | 0.3909   | 3.3150          |
+| 3.1059        | 11.3025 | 42000  | 0.3909   | 3.3138          |
+| 3.1231        | 11.5717 | 43000  | 0.3916   | 3.3042          |
+| 3.1332        | 11.8409 | 44000  | 0.3919   | 3.3014          |
+| 3.0655        | 12.1098 | 45000  | 0.3917   | 3.3108          |
+| 3.0919        | 12.3790 | 46000  | 0.3921   | 3.3039          |
+| 3.1198        | 12.6481 | 47000  | 0.3922   | 3.3034          |
+| 3.1283        | 12.9173 | 48000  | 0.3925   | 3.2959          |
+| 3.0591        | 13.1863 | 49000  | 0.3923   | 3.3064          |
+| 3.0828        | 13.4554 | 50000  | 0.3929   | 3.3012          |
+| 3.1093        | 13.7246 | 51000  | 0.3933   | 3.2926          |
+| 3.1115        | 13.9938 | 52000  | 0.3937   | 3.2874          |
+| 3.0686        | 14.2627 | 53000  | 0.3930   | 3.3014          |
+| 3.0762        | 14.5319 | 54000  | 0.3935   | 3.2952          |
+| 3.0797        | 14.8010 | 55000  | 0.3938   | 3.2878          |
+| 3.0089        | 15.0700 | 56000  | 0.3934   | 3.2989          |
+| 3.0535        | 15.3391 | 57000  | 0.3935   | 3.2966          |
+| 3.0641        | 15.6083 | 58000  | 0.3938   | 3.2892          |
+| 3.082         | 15.8775 | 59000  | 0.3941   | 3.2861          |
+| 3.0178        | 16.1464 | 60000  | 0.3936   | 3.3004          |
+| 3.0405        | 16.4156 | 61000  | 0.3940   | 3.2945          |
+| 3.0625        | 16.6848 | 62000  | 0.3947   | 3.2866          |
+| 3.0662        | 16.9539 | 63000  | 0.3951   | 3.2806          |
+| 3.0215        | 17.2229 | 64000  | 0.3943   | 3.2988          |
+| 3.0386        | 17.4920 | 65000  | 0.3941   | 3.2925          |
+| 3.0558        | 17.7612 | 66000  | 0.3951   | 3.2857          |
+| 2.9668        | 18.0301 | 67000  | 0.3944   | 3.2943          |
+| 3.006         | 18.2993 | 68000  | 0.3947   | 3.2958          |
+| 3.033         | 18.5685 | 69000  | 0.3947   | 3.2902          |
+| 3.0561        | 18.8376 | 70000  | 0.3955   | 3.2801          |
+| 2.9788        | 19.1066 | 71000  | 0.3948   | 3.2952          |
+| 3.0036        | 19.3758 | 72000  | 0.3949   | 3.2904          |
+| 3.0251        | 19.6449 | 73000  | 0.3955   | 3.2823          |
+| 3.0421        | 19.9141 | 74000  | 0.3959   | 3.2781          |
+| 2.9677        | 20.1830 | 75000  | 0.3951   | 3.2937          |
+| 2.9962        | 20.4522 | 76000  | 0.3956   | 3.2879          |
+| 3.028         | 20.7214 | 77000  | 0.3958   | 3.2840          |
+| 3.015         | 20.9905 | 78000  | 0.3964   | 3.2760          |
+| 2.9675        | 21.2595 | 79000  | 0.3955   | 3.2941          |
+| 2.9881        | 21.5286 | 80000  | 0.3960   | 3.2854          |
+| 3.0037        | 21.7978 | 81000  | 0.3962   | 3.2802          |
+| 2.9562        | 22.0668 | 82000  | 0.3957   | 3.2926          |
+| 2.9683        | 22.3359 | 83000  | 0.3956   | 3.2929          |
+| 2.9852        | 22.6051 | 84000  | 0.3964   | 3.2840          |
+| 3.0085        | 22.8742 | 85000  | 0.3965   | 3.2787          |
+| 2.9407        | 23.1432 | 86000  | 0.3959   | 3.2916          |
+| 2.9756        | 23.4124 | 87000  | 0.3963   | 3.2877          |
+| 2.981         | 23.6815 | 88000  | 0.3965   | 3.2832          |
+| 2.9913        | 23.9507 | 89000  | 0.3971   | 3.2763          |
+| 2.958         | 24.2196 | 90000  | 0.3960   | 3.2928          |
+| 2.951         | 24.4888 | 91000  | 3.2936   | 0.3961          |
+| 2.9644        | 24.7580 | 92000  | 3.2854   | 0.3966          |
+| 2.9012        | 25.0272 | 93000  | 3.2926   | 0.3964          |
+| 2.9246        | 25.2964 | 94000  | 3.2946   | 0.3961          |
+| 2.9605        | 25.5655 | 95000  | 3.2850   | 0.3967          |
+| 2.9767        | 25.8347 | 96000  | 3.2806   | 0.3970          |
+| 2.9106        | 26.1036 | 97000  | 3.2930   | 0.3965          |
+| 2.9378        | 26.3728 | 98000  | 3.2884   | 0.3969          |
+| 2.9573        | 26.6420 | 99000  | 3.2869   | 0.3968          |
+| 2.9592        | 26.9111 | 100000 | 3.2794   | 0.3975          |
+| 2.9014        | 27.1801 | 101000 | 3.2881   | 0.3966          |
+| 2.9383        | 27.4492 | 102000 | 3.2870   | 0.3971          |
+| 2.9516        | 27.7184 | 103000 | 3.2830   | 0.3972          |
+| 2.9534        | 27.9876 | 104000 | 3.2764   | 0.3976          |
+| 2.9109        | 28.2565 | 105000 | 3.2897   | 0.3971          |
+| 2.9167        | 28.5257 | 106000 | 3.2850   | 0.3975          |
+| 2.9433        | 28.7948 | 107000 | 3.2777   | 0.3979          |
+| 2.8714        | 29.0638 | 108000 | 3.2901   | 0.3969          |
+| 2.897         | 29.3330 | 109000 | 3.2881   | 0.3971          |
+| 2.9142        | 29.6021 | 110000 | 3.2813   | 0.3976          |
+### Framework versions
+- Transformers 4.55.2
+- Pytorch 2.8.0+cu128
+- Datasets 4.0.0
+- Tokenizers 0.21.4

exp3/high_0_1208/all_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "epoch": 29.602121016365203,
+    "eval_accuracy": 0.3955190994653271,
+    "eval_loss": 3.2801434993743896,
+    "eval_runtime": 146.9307,
+    "eval_samples": 18012,
+    "eval_samples_per_second": 122.588,
+    "eval_steps_per_second": 7.663,
+    "perplexity": 26.579586580268494,
+    "total_flos": 2.29889824948224e+18,
+    "train_loss": 0.5332863204956054,
+    "train_runtime": 28956.9492,
+    "train_samples": 297215,
+    "train_samples_per_second": 410.561,
+    "train_steps_per_second": 5.133
+}

exp3/high_0_1208/checkpoint-100000/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.2",
+  "use_cache": true,
+  "vocab_size": 52000
+}

exp3/high_0_1208/checkpoint-100000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.55.2"
+}

exp3/high_0_1208/checkpoint-100000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5673976cbbb92f2aab1e16e4bf0436ee47eb049e14ef1a6f081efb739321d87d
+size 503128704

exp3/high_0_1208/checkpoint-100000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:861b1f706a820c309cf5ba8bdcb4ff7b570f941fe61528153a0a25ef71f6431b
+size 1006353803

exp3/high_0_1208/checkpoint-100000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3276de41992bc81790a34f0f59f918e73fa421c969b8e72a2b954307b129aaf8
+size 14645

exp3/high_0_1208/checkpoint-100000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:811bd85528dd57de70262283c043dbadbf33aba55abb6df01fc4136d4f863ee6
+size 1383

exp3/high_0_1208/checkpoint-100000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb5f87e96c1c8f5fce55507aeb8d0dc8c408b4d2335dd1720334dae3e85e9a85
+size 1465

exp3/high_0_1208/checkpoint-100000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

exp3/high_0_1208/checkpoint-100000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

exp3/high_0_1208/checkpoint-100000/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

exp3/high_0_1208/checkpoint-100000/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

exp3/high_0_1208/checkpoint-100000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ccc2166c4da6012db6c023cb1946ef2aee399cfa07452041f7b5fa210c69456
+size 5841

exp3/high_0_1208/checkpoint-110000/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.2",
+  "use_cache": true,
+  "vocab_size": 52000
+}

exp3/high_0_1208/checkpoint-110000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.55.2"
+}

exp3/high_0_1208/checkpoint-110000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:871208332ff9d378483f6fcac83a8ae5bb425180336c0c720de52ba268b7ee6b
+size 503128704

exp3/high_0_1208/checkpoint-110000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a996922213b06b49987734b88cabd9fb3296a3588a4325b977270a5dca324c68
+size 1006353803

exp3/high_0_1208/checkpoint-110000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92b15762684816b646c9c701b43c28c05bca054e54d82265a94459eee0e6d17b
+size 14645

exp3/high_0_1208/checkpoint-110000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5730fd33d203a67552804655ef8956d573f7d9f9dc92e69ecc3fc368bdf12a9d
+size 1383

exp3/high_0_1208/checkpoint-110000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9217aea74098c9110d702c1d92fd04e7419c5df9248078c7ffd6546a6a625f71
+size 1465

exp3/high_0_1208/checkpoint-110000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

exp3/high_0_1208/checkpoint-110000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

exp3/high_0_1208/checkpoint-110000/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

exp3/high_0_1208/checkpoint-110000/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

exp3/high_0_1208/checkpoint-110000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ccc2166c4da6012db6c023cb1946ef2aee399cfa07452041f7b5fa210c69456
+size 5841

exp3/high_0_1208/checkpoint-70000/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.2",
+  "use_cache": true,
+  "vocab_size": 52000
+}

exp3/high_0_1208/checkpoint-70000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.55.2"
+}

exp3/high_0_1208/checkpoint-70000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69d4c0654b4c6e3e22c3306dbd7504192a447f8fcad4126a0ce306f65c58f493
+size 503128704

exp3/high_0_1208/checkpoint-70000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b1946dd7e6b7e33fd09e0aee77d97ba4767efad56081d3f58342bc513b3ef97
+size 1006353803

exp3/high_0_1208/checkpoint-70000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb1e004a57a1a03a18f7dfc524bb0a2b3489a0397549ded54c1ba1b3c8632004
+size 14645

exp3/high_0_1208/checkpoint-70000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:958fd3f2b8fcfe86151a71735a38edd30e9ec27a87dc1d45ac78422c8ce2d023
+size 1383

exp3/high_0_1208/checkpoint-70000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75ab6c51a065793275d0a875ce67ef84d65b8b95db1f27a50a73ae2e5ccce2db
+size 1465

exp3/high_0_1208/checkpoint-70000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

exp3/high_0_1208/checkpoint-70000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

exp3/high_0_1208/checkpoint-70000/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

exp3/high_0_1208/checkpoint-70000/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

exp3/high_0_1208/checkpoint-70000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a868d4a21de74ab91f0a541d786ec40e22f96d084897d32bf61af25d393c39b
+size 5841

exp3/high_0_1208/checkpoint-80000/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.2",
+  "use_cache": true,
+  "vocab_size": 52000
+}

exp3/high_0_1208/checkpoint-80000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.55.2"
+}

exp3/high_0_1208/checkpoint-80000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f596b7adf3c0376bcb81bd57687c31a0dc175f0747d0e955b7de1761fb4d452
+size 503128704

exp3/high_0_1208/checkpoint-80000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e95aa0fe826f59e2aab84e4abf15fc9f1bcc7161f40b13933134b390b21bed2
+size 1006353803

exp3/high_0_1208/checkpoint-80000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa4d16818aacfc6973b81e2b3de7209b32e4c26f56d33aebe26af0fc3649a509
+size 14645

exp3/high_0_1208/checkpoint-80000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7973781c91569bbb9c65b6dae2a9f3ad71efd838984ba53b4e022bfcc86efc2
+size 1383

exp3/high_0_1208/checkpoint-80000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e0cdfaf2b55133552c07ff4b138910f99e3db89b261770e55e7dcce420aacd14
+size 1465

exp3/high_0_1208/checkpoint-80000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

exp3/high_0_1208/checkpoint-80000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

exp3/high_0_1208/checkpoint-80000/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

exp3/high_0_1208/checkpoint-80000/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

exp3/high_0_1208/checkpoint-80000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a868d4a21de74ab91f0a541d786ec40e22f96d084897d32bf61af25d393c39b
+size 5841