Finished finetuning grade 3

Browse files

Files changed (5) hide show

README.md +16 -11
gpt2-grade-3-finetuned/adapter_config.json +34 -0
gpt2-grade-3-finetuned/adapter_model.bin +3 -0
tokenizer.json +2 -14
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -12,12 +12,12 @@ model-index:
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/uds/Graded%20text%20simplification%20training/runs/z2eqo7kv)
 # text-simplification
-This model is a fine-tuned version of [openai-community/gpt2](https://huggingface.co/openai-community/gpt2) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.5581
 ## Model description
@@ -42,17 +42,22 @@ The following hyperparameters were used during training:
 - seed: 42
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
-- num_epochs: 5
 ### Training results
-| Training Loss | Epoch | Step  | Validation Loss |
-|:-------------:|:-----:|:-----:|:---------------:|
-| 0.7915        | 1.0   | 9864  | 0.5974          |
-| 0.6181        | 2.0   | 19728 | 0.5722          |
-| 0.6011        | 3.0   | 29592 | 0.5637          |
-| 0.5938        | 4.0   | 39456 | 0.5591          |
-| 0.5902        | 5.0   | 49320 | 0.5581          |
 ### Framework versions

 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/uds/Graded%20text%20simplification%20training/runs/1mkbeo5j)
 # text-simplification
+This model is a fine-tuned version of [openai-community/gpt2](https://huggingface.co/openai-community/gpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.3883
 ## Model description
 - seed: 42
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
+- num_epochs: 10
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 0.412         | 1.0   | 597  | 0.3916          |
+| 0.406         | 2.0   | 1194 | 0.3891          |
+| 0.4023        | 3.0   | 1791 | 0.3865          |
+| 0.3999        | 4.0   | 2388 | 0.3895          |
+| 0.3983        | 5.0   | 2985 | 0.3886          |
+| 0.3964        | 6.0   | 3582 | 0.3884          |
+| 0.3949        | 7.0   | 4179 | 0.3876          |
+| 0.3944        | 8.0   | 4776 | 0.3870          |
+| 0.3935        | 9.0   | 5373 | 0.3879          |
+| 0.3934        | 10.0  | 5970 | 0.3883          |
 ### Framework versions

gpt2-grade-3-finetuned/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "openai-community/gpt2",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.01,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_attn",
+    "c_fc",
+    "lm_head",
+    "c_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

gpt2-grade-3-finetuned/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f3aa3e03bd1accebd9e8e9458423a78b09f5e7aa1a77d74ec7452e3fd2972f9
+size 160776023

tokenizer.json CHANGED Viewed

@@ -1,19 +1,7 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 1024,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
-  "padding": {
-    "strategy": "BatchLongest",
-    "direction": "Left",
-    "pad_to_multiple_of": null,
-    "pad_id": 50256,
-    "pad_type_id": 0,
-    "pad_token": "<|endoftext|>"
-  },
   "added_tokens": [
     {
       "id": 50256,

 {
   "version": "1.0",
+  "truncation": null,
+  "padding": null,
   "added_tokens": [
     {
       "id": 50256,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81d58a712d8d32d4e159d1bec24ead7fae6605c50be3cf4024eff6af00573442
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce30d7e87d05d01b405761335c6d37c16185ec7a498cae8a2ee67a4df04829c1
 size 5368