ptsv/gpt2-m_tinystories_upsampled_tom

Browse files

Files changed (7) hide show

README.md +90 -0
config.json +41 -0
ep1_upsampled.zip +3 -0
generation_config.json +6 -0
model.safetensors +3 -0
trainer_state.json +0 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,90 @@

+---
+library_name: transformers
+license: mit
+base_model: openai-community/gpt2-medium
+tags:
+- generated_from_trainer
+model-index:
+- name: tinystories_upsampled_tom
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/ptsvil/tom-training/runs/t2roxoo7)
+# tinystories_upsampled_tom
+This model is a fine-tuned version of [openai-community/gpt2-medium](https://huggingface.co/openai-community/gpt2-medium) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.6461
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 32
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 256
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 1
+- num_epochs: 3
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch  | Step  | Validation Loss |
+|:-------------:|:------:|:-----:|:---------------:|
+| 1.8989        | 0.1051 | 400   | 1.8900          |
+| 1.8563        | 0.2102 | 800   | 1.8378          |
+| 1.8476        | 0.3153 | 1200  | 1.7993          |
+| 1.8063        | 0.4204 | 1600  | 1.7859          |
+| 1.7846        | 0.5255 | 2000  | 1.7627          |
+| 1.7625        | 0.6306 | 2400  | 1.7536          |
+| 1.7617        | 0.7357 | 2800  | 1.7368          |
+| 1.7527        | 0.8408 | 3200  | 1.7257          |
+| 1.7714        | 0.9459 | 3600  | 1.7172          |
+| 1.6993        | 1.0510 | 4000  | 1.7162          |
+| 1.6844        | 1.1561 | 4400  | 1.7071          |
+| 1.6898        | 1.2612 | 4800  | 1.7007          |
+| 1.6678        | 1.3663 | 5200  | 1.6925          |
+| 1.7036        | 1.4714 | 5600  | 1.6887          |
+| 1.6849        | 1.5765 | 6000  | 1.6817          |
+| 1.6781        | 1.6816 | 6400  | 1.6764          |
+| 1.6228        | 1.7867 | 6800  | 1.6712          |
+| 1.6467        | 1.8918 | 7200  | 1.6679          |
+| 1.6672        | 1.9969 | 7600  | 1.6619          |
+| 1.6092        | 2.1020 | 8000  | 1.6652          |
+| 1.6181        | 2.2071 | 8400  | 1.6615          |
+| 1.6183        | 2.3122 | 8800  | 1.6566          |
+| 1.6101        | 2.4173 | 9200  | 1.6573          |
+| 1.6009        | 2.5224 | 9600  | 1.6515          |
+| 1.6002        | 2.6275 | 10000 | 1.6520          |
+| 1.6387        | 2.7326 | 10400 | 1.6497          |
+| 1.6401        | 2.8377 | 10800 | 1.6477          |
+| 1.6186        | 2.9428 | 11200 | 1.6466          |
+### Framework versions
+- Transformers 4.44.1
+- Pytorch 2.2.2
+- Datasets 2.18.0
+- Tokenizers 0.19.1

config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "_name_or_path": "openai-community/gpt2-medium",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 1024,
+  "n_head": 16,
+  "n_inner": null,
+  "n_layer": 24,
+  "n_positions": 1024,
+  "n_special": 0,
+  "predict_special_tokens": true,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}

ep1_upsampled.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c0855659a1a72ee2365a25644cb7b6d2a2befd9ebd1a0e006ad22ce421a0b44
+size 1942661742

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.44.1"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecfd075885635d6eff2c06f5b4980d6381e4a79686d34a00d50c669d59cf8618
+size 1419322880

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da8c89229ce8f727bebe47f319e8bdf92643a3e7474ce19057857af275747ad5
+size 5240