KotshinZ commited on
Commit
ccfb34a
·
verified ·
1 Parent(s): c0fba22

Model save

Browse files
README.md CHANGED
@@ -1,11 +1,9 @@
1
  ---
2
  base_model: openai-community/gpt2
3
- datasets: HuggingFaceFW/fineweb-edu
4
  library_name: transformers
5
  model_name: gpt2-RMT-2-mem512
6
  tags:
7
  - generated_from_trainer
8
- - open-r1
9
  - trl
10
  - sft
11
  licence: license
@@ -13,7 +11,7 @@ licence: license
13
 
14
  # Model Card for gpt2-RMT-2-mem512
15
 
16
- This model is a fine-tuned version of [openai-community/gpt2](https://huggingface.co/openai-community/gpt2) on the [HuggingFaceFW/fineweb-edu](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
@@ -29,7 +27,7 @@ print(output["generated_text"])
29
 
30
  ## Training procedure
31
 
32
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/shin2021001-osaka-city-university/huggingface/runs/gt0ble5n)
33
 
34
 
35
  This model was trained with SFT.
 
1
  ---
2
  base_model: openai-community/gpt2
 
3
  library_name: transformers
4
  model_name: gpt2-RMT-2-mem512
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
  - sft
9
  licence: license
 
11
 
12
  # Model Card for gpt2-RMT-2-mem512
13
 
14
+ This model is a fine-tuned version of [openai-community/gpt2](https://huggingface.co/openai-community/gpt2).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/shin2021001-osaka-city-university/huggingface/runs/nt4l8say)
31
 
32
 
33
  This model was trained with SFT.
all_results.json CHANGED
@@ -3,10 +3,10 @@
3
  "eval_samples": 100,
4
  "eval_samples_per_second": 249.241,
5
  "eval_steps_per_second": 16.08,
6
- "total_flos": 5418484972388352.0,
7
- "train_loss": 3.603820517774343,
8
- "train_runtime": 400.5994,
9
  "train_samples": 19883,
10
- "train_samples_per_second": 51.708,
11
- "train_steps_per_second": 1.615
12
  }
 
3
  "eval_samples": 100,
4
  "eval_samples_per_second": 249.241,
5
  "eval_steps_per_second": 16.08,
6
+ "total_flos": 5419008396361728.0,
7
+ "train_loss": 4.076150745858688,
8
+ "train_runtime": 7573.4415,
9
  "train_samples": 19883,
10
+ "train_samples_per_second": 87.49,
11
+ "train_steps_per_second": 2.734
12
  }
config.json CHANGED
@@ -103,7 +103,7 @@
103
  "embd_pdrop": 0.1,
104
  "eos_token_id": 50256,
105
  "initializer_range": 0.02,
106
- "input_seg_len": 512,
107
  "is_memory_all": false,
108
  "layer_norm_epsilon": 1e-05,
109
  "max_n_segments": 2,
 
103
  "embd_pdrop": 0.1,
104
  "eos_token_id": 50256,
105
  "initializer_range": 0.02,
106
+ "input_seg_len": 16,
107
  "is_memory_all": false,
108
  "layer_norm_epsilon": 1e-05,
109
  "max_n_segments": 2,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93df75f5f8b5b0835ad0330c6b3b9585fae38e86fa81ca6401d49acc12d7f317
3
  size 248915448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65cd08da7dc4048511a86bef339939ec8531258568d7775e32f42921c96aaab4
3
  size 248915448
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "total_flos": 5418484972388352.0,
3
- "train_loss": 3.603820517774343,
4
- "train_runtime": 400.5994,
5
  "train_samples": 19883,
6
- "train_samples_per_second": 51.708,
7
- "train_steps_per_second": 1.615
8
  }
 
1
  {
2
+ "total_flos": 5419008396361728.0,
3
+ "train_loss": 4.076150745858688,
4
+ "train_runtime": 7573.4415,
5
  "train_samples": 19883,
6
+ "train_samples_per_second": 87.49,
7
+ "train_steps_per_second": 2.734
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88846274ec0b368f3fe3ca616827474715cdc7f57eaee25769d05eff2d5e13d6
3
  size 7352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e506da221a187e12d0f07664922d412eb372d52d46c7a3b6e4d2d2ee1a0abcd
3
  size 7352