ninagroot/Llama-360Mtest

Browse files

Files changed (6) hide show

README.md +18 -17
config.json +2 -2
generation_config.json +1 -1
model.safetensors +2 -2
runs/Mar21_14-06-12_gcn29.local.snellius.surf.nl/events.out.tfevents.1711026382.gcn29.local.snellius.surf.nl.3605748.0 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 6.8937
 ## Model description
@@ -33,11 +33,11 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 3e-05
-- train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
 - gradient_accumulation_steps: 8
-- total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 300
@@ -48,20 +48,21 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| No log        | 0.89  | 5    | 8.5629          |
-| No log        | 1.96  | 11   | 8.5316          |
-| No log        | 2.84  | 16   | 8.4872          |
-| 8.5217        | 3.91  | 22   | 8.4115          |
-| 8.5217        | 4.98  | 28   | 8.3069          |
-| 8.5217        | 5.87  | 33   | 8.1945          |
-| 8.5217        | 6.93  | 39   | 8.0237          |
-| 8.174         | 8.0   | 45   | 7.8173          |
-| 8.174         | 8.89  | 50   | 7.6343          |
-| 8.174         | 9.96  | 56   | 7.4226          |
-| 7.4864        | 10.84 | 61   | 7.2617          |
-| 7.4864        | 11.91 | 67   | 7.0916          |
-| 7.4864        | 12.98 | 73   | 6.9408          |
-| 7.4864        | 13.33 | 75   | 6.8937          |
 ### Framework versions

 This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 4.7211
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 3e-05
+- train_batch_size: 1
 - eval_batch_size: 8
 - seed: 42
 - gradient_accumulation_steps: 8
+- total_train_batch_size: 8
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 300
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 10.8127       | 0.98  | 41   | 10.5984         |
+| 9.6409        | 1.98  | 83   | 9.0771          |
+| 8.3725        | 2.99  | 125  | 8.0192          |
+| 7.2983        | 3.99  | 167  | 7.0163          |
+| 6.408         | 4.99  | 209  | 6.2115          |
+| 5.4886        | 5.99  | 251  | 5.4663          |
+| 4.8987        | 7.0   | 293  | 5.1240          |
+| 4.4918        | 8.0   | 335  | 4.9146          |
+| 4.2381        | 8.98  | 376  | 4.8076          |
+| 4.0984        | 9.98  | 418  | 4.7645          |
+| 3.9135        | 10.99 | 460  | 4.7195          |
+| 3.6736        | 11.99 | 502  | 4.7150          |
+| 3.467         | 12.99 | 544  | 4.7175          |
+| 3.4323        | 13.99 | 586  | 4.7194          |
+| 3.443         | 14.69 | 615  | 4.7211          |
 ### Framework versions

config.json CHANGED Viewed

@@ -15,7 +15,7 @@
   "num_attention_heads": 8,
   "num_hidden_layers": 24,
   "num_key_value_heads": 8,
-  "pad_token_id": 0,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
@@ -24,5 +24,5 @@
   "torch_dtype": "float32",
   "transformers_version": "4.37.2",
   "use_cache": true,
-  "vocab_size": 4312
 }

   "num_attention_heads": 8,
   "num_hidden_layers": 24,
   "num_key_value_heads": 8,
+  "pad_token_id": 50256,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "torch_dtype": "float32",
   "transformers_version": "4.37.2",
   "use_cache": true,
+  "vocab_size": 50257
 }

generation_config.json CHANGED Viewed

@@ -2,6 +2,6 @@
   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
-  "pad_token_id": 0,
   "transformers_version": "4.37.2"
 }

   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
+  "pad_token_id": 50256,
   "transformers_version": "4.37.2"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9540083c13f66af40a2d681a8cc3433b17aa1087a1ee83d06484297c0ccaad0a
-size 1344172280

 version https://git-lfs.github.com/spec/v1
+oid sha256:52c8d6589ac866b90c44e5b803d259a7ee96f9f9029f707aa7db6e82b4515797
+size 1720553864

runs/Mar21_14-06-12_gcn29.local.snellius.surf.nl/events.out.tfevents.1711026382.gcn29.local.snellius.surf.nl.3605748.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:078e9bde84335b92eb3479a1d81a9b3621230071826a1c54206d089f6f4b50c6
+size 13419

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:441af247b61748e0071ab6c73277ce2d0e45005e5cab77b93c7097303d530ab4
 size 4728

 version https://git-lfs.github.com/spec/v1
+oid sha256:6930c0ab087b6ba724e4cf9b2bcb7b1cd8afe2a296b2b6ad30397ad33891d6cf
 size 4728