Ba2han commited on
Commit
c31ae10
·
1 Parent(s): d4bd2e0

Training in progress, step 975

Browse files
Files changed (3) hide show
  1. README.md +3 -3
  2. config.json +3 -3
  3. generation_config.json +1 -1
README.md CHANGED
@@ -3,9 +3,9 @@ library_name: transformers
3
  model_name: experimental2
4
  tags:
5
  - generated_from_trainer
6
- - unsloth
7
  - trl
8
  - sft
 
9
  licence: license
10
  ---
11
 
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/batuhan409/huggingface/runs/8fcww366)
31
 
32
 
33
  This model was trained with SFT.
@@ -35,7 +35,7 @@ This model was trained with SFT.
35
  ### Framework versions
36
 
37
  - TRL: 0.24.0
38
- - Transformers: 5.6.2
39
  - Pytorch: 2.10.0
40
  - Datasets: 4.3.0
41
  - Tokenizers: 0.22.2
 
3
  model_name: experimental2
4
  tags:
5
  - generated_from_trainer
 
6
  - trl
7
  - sft
8
+ - unsloth
9
  licence: license
10
  ---
11
 
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/batuhan409/huggingface/runs/0blltk43)
31
 
32
 
33
  This model was trained with SFT.
 
35
  ### Framework versions
36
 
37
  - TRL: 0.24.0
38
+ - Transformers: 5.7.0
39
  - Pytorch: 2.10.0
40
  - Datasets: 4.3.0
41
  - Tokenizers: 0.22.2
config.json CHANGED
@@ -14,7 +14,7 @@
14
  "hidden_act": "silu",
15
  "hidden_size": 1024,
16
  "initializer_range": 0.02,
17
- "intermediate_size": 4096,
18
  "layer_types": [
19
  "full_attention",
20
  "full_attention",
@@ -63,7 +63,7 @@
63
  "max_window_layers": 42,
64
  "model_name": "test_checkpoint",
65
  "model_type": "qwen3",
66
- "num_attention_heads": 12,
67
  "num_hidden_layers": 42,
68
  "num_key_value_heads": 4,
69
  "pad_token_id": 50034,
@@ -80,7 +80,7 @@
80
  "softcap_scale": 23.0,
81
  "softcap_shift": 5.0,
82
  "tie_word_embeddings": true,
83
- "transformers_version": "5.6.2",
84
  "unsloth_version": "2026.4.8",
85
  "use_cache": false,
86
  "use_qk_norm_patch": true,
 
14
  "hidden_act": "silu",
15
  "hidden_size": 1024,
16
  "initializer_range": 0.02,
17
+ "intermediate_size": 2816,
18
  "layer_types": [
19
  "full_attention",
20
  "full_attention",
 
63
  "max_window_layers": 42,
64
  "model_name": "test_checkpoint",
65
  "model_type": "qwen3",
66
+ "num_attention_heads": 16,
67
  "num_hidden_layers": 42,
68
  "num_key_value_heads": 4,
69
  "pad_token_id": 50034,
 
80
  "softcap_scale": 23.0,
81
  "softcap_shift": 5.0,
82
  "tie_word_embeddings": true,
83
+ "transformers_version": "5.7.0",
84
  "unsloth_version": "2026.4.8",
85
  "use_cache": false,
86
  "use_qk_norm_patch": true,
generation_config.json CHANGED
@@ -8,6 +8,6 @@
8
  "output_attentions": false,
9
  "output_hidden_states": false,
10
  "pad_token_id": 50034,
11
- "transformers_version": "5.6.2",
12
  "use_cache": false
13
  }
 
8
  "output_attentions": false,
9
  "output_hidden_states": false,
10
  "pad_token_id": 50034,
11
+ "transformers_version": "5.7.0",
12
  "use_cache": false
13
  }