Ba2han commited on
Commit
f2d4e77
·
1 Parent(s): 3fbbaa0

Training in progress, step 390

Browse files
Files changed (2) hide show
  1. README.md +2 -2
  2. config.json +1 -13
README.md CHANGED
@@ -4,8 +4,8 @@ model_name: experimental2
4
  tags:
5
  - generated_from_trainer
6
  - trl
7
- - unsloth
8
  - sft
 
9
  licence: license
10
  ---
11
 
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/batuhan409/huggingface/runs/gcmu9feu)
31
 
32
 
33
  This model was trained with SFT.
 
4
  tags:
5
  - generated_from_trainer
6
  - trl
 
7
  - sft
8
+ - unsloth
9
  licence: license
10
  ---
11
 
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/batuhan409/huggingface/runs/h8b0mm7w)
31
 
32
 
33
  This model was trained with SFT.
config.json CHANGED
@@ -75,18 +75,11 @@
75
  "num_hidden_layers": 48,
76
  "num_key_value_heads": 4,
77
  "pad_token_id": 50034,
78
- "resid_lambda_init_end": 1.05,
79
- "resid_lambda_init_start": 1.15,
80
- "resid_lambda_max": 1.25,
81
- "resid_lambda_min": 0.75,
82
- "resid_scalar_lr_mult": 0.01,
83
- "resid_scalar_weight_decay": 0.05,
84
  "rms_norm_eps": 1e-06,
85
  "rope_parameters": {
86
  "rope_theta": 50000,
87
  "rope_type": "default"
88
  },
89
- "scalar_lr": 0.5,
90
  "sliding_window": null,
91
  "squared_relu_activation": "relu2",
92
  "squared_relu_intermediate_size": 2880,
@@ -95,10 +88,5 @@
95
  "unsloth_version": "2026.4.8",
96
  "use_cache": false,
97
  "use_sliding_window": false,
98
- "vocab_size": 50048,
99
- "x0_lambda_init_end": 0.05,
100
- "x0_lambda_init_start": 0.2,
101
- "x0_mix_max": 0.3,
102
- "x0_scalar_lr_mult": 0.01,
103
- "x0_scalar_weight_decay": 0.0
104
  }
 
75
  "num_hidden_layers": 48,
76
  "num_key_value_heads": 4,
77
  "pad_token_id": 50034,
 
 
 
 
 
 
78
  "rms_norm_eps": 1e-06,
79
  "rope_parameters": {
80
  "rope_theta": 50000,
81
  "rope_type": "default"
82
  },
 
83
  "sliding_window": null,
84
  "squared_relu_activation": "relu2",
85
  "squared_relu_intermediate_size": 2880,
 
88
  "unsloth_version": "2026.4.8",
89
  "use_cache": false,
90
  "use_sliding_window": false,
91
+ "vocab_size": 50048
 
 
 
 
 
92
  }