Ba2han commited on
Commit
6725f16
·
1 Parent(s): f2d4e77

Training in progress, step 390, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -75,18 +75,11 @@
75
  "num_hidden_layers": 48,
76
  "num_key_value_heads": 4,
77
  "pad_token_id": 50034,
78
- "resid_lambda_init_end": 1.05,
79
- "resid_lambda_init_start": 1.15,
80
- "resid_lambda_max": 1.25,
81
- "resid_lambda_min": 0.75,
82
- "resid_scalar_lr_mult": 0.01,
83
- "resid_scalar_weight_decay": 0.05,
84
  "rms_norm_eps": 1e-06,
85
  "rope_parameters": {
86
  "rope_theta": 50000,
87
  "rope_type": "default"
88
  },
89
- "scalar_lr": 0.5,
90
  "sliding_window": null,
91
  "squared_relu_activation": "relu2",
92
  "squared_relu_intermediate_size": 2880,
@@ -95,10 +88,5 @@
95
  "unsloth_version": "2026.4.8",
96
  "use_cache": false,
97
  "use_sliding_window": false,
98
- "vocab_size": 50048,
99
- "x0_lambda_init_end": 0.05,
100
- "x0_lambda_init_start": 0.2,
101
- "x0_mix_max": 0.3,
102
- "x0_scalar_lr_mult": 0.01,
103
- "x0_scalar_weight_decay": 0.0
104
  }
 
75
  "num_hidden_layers": 48,
76
  "num_key_value_heads": 4,
77
  "pad_token_id": 50034,
 
 
 
 
 
 
78
  "rms_norm_eps": 1e-06,
79
  "rope_parameters": {
80
  "rope_theta": 50000,
81
  "rope_type": "default"
82
  },
 
83
  "sliding_window": null,
84
  "squared_relu_activation": "relu2",
85
  "squared_relu_intermediate_size": 2880,
 
88
  "unsloth_version": "2026.4.8",
89
  "use_cache": false,
90
  "use_sliding_window": false,
91
+ "vocab_size": 50048
 
 
 
 
 
92
  }
last-checkpoint/trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff