CharlesLi commited on
Commit
36a9249
·
verified ·
1 Parent(s): 3f9866e

Model save

Browse files
README.md CHANGED
@@ -3,12 +3,9 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: mistralai/Mistral-7B-Instruct-v0.1
5
  tags:
6
- - alignment-handbook
7
- - trl
8
- - sft
9
- - generated_from_trainer
10
  - trl
11
  - sft
 
12
  - generated_from_trainer
13
  datasets:
14
  - generator
@@ -61,7 +58,7 @@ The following hyperparameters were used during training:
61
 
62
  | Training Loss | Epoch | Step | Validation Loss |
63
  |:-------------:|:------:|:----:|:---------------:|
64
- | 0.5654 | 0.9050 | 100 | 0.5335 |
65
 
66
 
67
  ### Framework versions
 
3
  license: apache-2.0
4
  base_model: mistralai/Mistral-7B-Instruct-v0.1
5
  tags:
 
 
 
 
6
  - trl
7
  - sft
8
+ - alignment-handbook
9
  - generated_from_trainer
10
  datasets:
11
  - generator
 
58
 
59
  | Training Loss | Epoch | Step | Validation Loss |
60
  |:-------------:|:------:|:----:|:---------------:|
61
+ | 0.5699 | 0.9050 | 100 | 0.5321 |
62
 
63
 
64
  ### Framework versions
all_results.json CHANGED
@@ -6,9 +6,9 @@
6
  "eval_samples_per_second": 6.891,
7
  "eval_steps_per_second": 1.378,
8
  "total_flos": 11463536148480.0,
9
- "train_loss": 0.5638216186653484,
10
- "train_runtime": 614.5888,
11
  "train_samples": 15980,
12
- "train_samples_per_second": 5.742,
13
- "train_steps_per_second": 0.179
14
  }
 
6
  "eval_samples_per_second": 6.891,
7
  "eval_steps_per_second": 1.378,
8
  "total_flos": 11463536148480.0,
9
+ "train_loss": 0.5680296632376585,
10
+ "train_runtime": 618.7593,
11
  "train_samples": 15980,
12
+ "train_samples_per_second": 5.703,
13
+ "train_steps_per_second": 0.178
14
  }
config.json CHANGED
@@ -22,6 +22,6 @@
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.44.2",
25
- "use_cache": true,
26
  "vocab_size": 32000
27
  }
 
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.44.2",
25
+ "use_cache": false,
26
  "vocab_size": 32000
27
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:362d2a1875e021b4e5bfeb2e92126146207d00114efc4e2acbb15c64fd30fdf0
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:961419ed377ff962fba43cc950fed5063937f0ebb88019459677141f7d6f031a
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46da86bda82e4d7e4a01b844fee220c666839f97a92aedd79dadfaf6e2073851
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34fb22df5be105211dcb6040052dfeb92bfaae71ad049b9bd54359297178ed92
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feeea4bd6ff2b6afdb22b6fded6c2caa924c7d2f746cc488de63dd4e1f82483e
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9767b73d530382016877ac771838a6a6b2693f0849e29c9b11ee1a7f1e525d36
3
  size 4540516344
runs/Jan20_10-52-56_dgx-a100-16/events.out.tfevents.1737366789.dgx-a100-16.2922240.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88b9dbf155d676db361458712d26f98f1e630de4c265e699d019eded551fd3ce
3
+ size 6679
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.995475113122172,
3
  "total_flos": 11463536148480.0,
4
- "train_loss": 0.5638216186653484,
5
- "train_runtime": 614.5888,
6
  "train_samples": 15980,
7
- "train_samples_per_second": 5.742,
8
- "train_steps_per_second": 0.179
9
  }
 
1
  {
2
  "epoch": 0.995475113122172,
3
  "total_flos": 11463536148480.0,
4
+ "train_loss": 0.5680296632376585,
5
+ "train_runtime": 618.7593,
6
  "train_samples": 15980,
7
+ "train_samples_per_second": 5.703,
8
+ "train_steps_per_second": 0.178
9
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:897c93caa34fedd360293464d53a6d359a842bc0f6ed500d9c6d933187f3fe0e
3
  size 7032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:141fd1c0a6c1a632542050fbf81f5e325456a1febdd058d8dae6c9b3877fd275
3
  size 7032