error577
/

be3c53b4-2dbf-4a12-957b-9bf2e80845f8

@@ -42,19 +42,19 @@ deepspeed: null
 early_stopping_patience: null
 eval_max_new_tokens: 128
 eval_table_size: null
-evals_per_epoch: 2
 flash_attention: true
 fp16: null
 fsdp: null
 fsdp_config: null
-gradient_accumulation_steps: 32
 gradient_checkpointing: false
 group_by_length: false
 hub_model_id: error577/be3c53b4-2dbf-4a12-957b-9bf2e80845f8
 hub_repo: null
 hub_strategy: checkpoint
 hub_token: null
-learning_rate: 0.0001
 load_in_4bit: true
 load_in_8bit: false
 local_rank: null
@@ -77,7 +77,7 @@ pad_to_sequence_len: true
 resume_from_checkpoint: null
 s2_attention: null
 sample_packing: false
-saves_per_epoch: 4
 sequence_len: 512
 special_tokens:
   pad_token: </s>
@@ -86,7 +86,7 @@ tf32: false
 tokenizer_type: AutoTokenizer
 train_on_inputs: false
 trust_remote_code: true
-val_set_size: 0.05
 wandb_entity: null
 wandb_mode: online
 wandb_name: 838ffd28-d356-4c40-a584-abc51f2d4a95
@@ -105,7 +105,7 @@ xformers_attention: null
 This model is a fine-tuned version of [codellama/CodeLlama-7b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.5603
 ## Model description
@@ -124,12 +124,12 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0001
 - train_batch_size: 1
 - eval_batch_size: 1
 - seed: 42
-- gradient_accumulation_steps: 32
-- total_train_batch_size: 32
 - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
@@ -137,11 +137,11 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch  | Step | Validation Loss |
-|:-------------:|:------:|:----:|:---------------:|
-| 2.9505        | 0.0084 | 1    | 3.0549          |
-| 2.6023        | 0.4190 | 50   | 2.5784          |
-| 2.4252        | 0.8379 | 100  | 2.5603          |
 ### Framework versions

 early_stopping_patience: null
 eval_max_new_tokens: 128
 eval_table_size: null
+eval_steps: 50
 flash_attention: true
 fp16: null
 fsdp: null
 fsdp_config: null
+gradient_accumulation_steps: 8
 gradient_checkpointing: false
 group_by_length: false
 hub_model_id: error577/be3c53b4-2dbf-4a12-957b-9bf2e80845f8
 hub_repo: null
 hub_strategy: checkpoint
 hub_token: null
+learning_rate: 0.001
 load_in_4bit: true
 load_in_8bit: false
 local_rank: null
 resume_from_checkpoint: null
 s2_attention: null
 sample_packing: false
+saves_per_epoch: 1
 sequence_len: 512
 special_tokens:
   pad_token: </s>
 tokenizer_type: AutoTokenizer
 train_on_inputs: false
 trust_remote_code: true
+val_set_size: 0.005
 wandb_entity: null
 wandb_mode: online
 wandb_name: 838ffd28-d356-4c40-a584-abc51f2d4a95
 This model is a fine-tuned version of [codellama/CodeLlama-7b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.5279
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.001
 - train_batch_size: 1
 - eval_batch_size: 1
 - seed: 42
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 8
 - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 2.7769        | 0.002 | 1    | 3.0125          |
+| 2.4637        | 0.1   | 50   | 2.5838          |
+| 2.1737        | 0.2   | 100  | 2.5279          |
 ### Framework versions

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5904314606d4ef749802638e716fb731135b5cd3a3531a2adcc18e94e1f2a525
 size 80115210

 version https://git-lfs.github.com/spec/v1
+oid sha256:58668547fc92a09603fd5d5334cd1ed152385fcec7e51fc3e1c7f2ace842917c
 size 80115210