Ba2han commited on
Commit
0b71d48
·
verified ·
1 Parent(s): e79d758

Training in progress, step 390

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. config.json +4 -4
  3. model.safetensors +2 -2
  4. training_args.bin +1 -1
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/batuhan409/huggingface/runs/dd1eu3b5)
31
 
32
 
33
  This model was trained with SFT.
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/batuhan409/huggingface/runs/gcmu9feu)
31
 
32
 
33
  This model was trained with SFT.
config.json CHANGED
@@ -75,10 +75,10 @@
75
  "num_hidden_layers": 48,
76
  "num_key_value_heads": 4,
77
  "pad_token_id": 50034,
78
- "resid_lambda_end": 1.05,
79
- "resid_lambda_init": 1.15,
80
  "resid_lambda_init_end": 1.05,
81
  "resid_lambda_init_start": 1.15,
 
 
82
  "resid_scalar_lr_mult": 0.01,
83
  "resid_scalar_weight_decay": 0.05,
84
  "rms_norm_eps": 1e-06,
@@ -96,9 +96,9 @@
96
  "use_cache": false,
97
  "use_sliding_window": false,
98
  "vocab_size": 50048,
99
- "x0_lambda_end": 0.0,
100
- "x0_lambda_init": 0.02,
101
  "x0_lambda_init_end": 0.05,
102
  "x0_lambda_init_start": 0.2,
 
 
103
  "x0_scalar_weight_decay": 0.0
104
  }
 
75
  "num_hidden_layers": 48,
76
  "num_key_value_heads": 4,
77
  "pad_token_id": 50034,
 
 
78
  "resid_lambda_init_end": 1.05,
79
  "resid_lambda_init_start": 1.15,
80
+ "resid_lambda_max": 1.25,
81
+ "resid_lambda_min": 0.75,
82
  "resid_scalar_lr_mult": 0.01,
83
  "resid_scalar_weight_decay": 0.05,
84
  "rms_norm_eps": 1e-06,
 
96
  "use_cache": false,
97
  "use_sliding_window": false,
98
  "vocab_size": 50048,
 
 
99
  "x0_lambda_init_end": 0.05,
100
  "x0_lambda_init_start": 0.2,
101
+ "x0_mix_max": 0.3,
102
+ "x0_scalar_lr_mult": 0.01,
103
  "x0_scalar_weight_decay": 0.0
104
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f53d74af2b3082df05a025c64c68722ca660725afe94f4ae6de06107074aea1
3
- size 1151039640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd7e6a063adf275be5a5eecef05d85e520e1bffac9fb3d5baee4b2ac72ebae57
3
+ size 1151039648
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b350586eae4abc083bd0fbabe256a8a042109d3a895c5dbd482bedadc195db10
3
  size 5777
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49fa95ecdb125dba941409b1921c0141f8cdfb3fd3d0645b173ea1d009eba909
3
  size 5777