anujathore commited on
Commit
b4c553d
·
verified ·
1 Parent(s): e0584e7

Training in progress, step 500

Browse files
Files changed (3) hide show
  1. config.json +8 -8
  2. model.safetensors +1 -1
  3. training_args.bin +1 -1
config.json CHANGED
@@ -6,23 +6,23 @@
6
  "bos_token_id": 151643,
7
  "eos_token_id": 151645,
8
  "hidden_act": "silu",
9
- "hidden_size": 3584,
10
  "initializer_range": 0.02,
11
- "intermediate_size": 18944,
12
  "max_position_embeddings": 32768,
13
- "max_window_layers": 28,
14
  "model_type": "qwen2",
15
- "num_attention_heads": 28,
16
  "num_hidden_layers": 28,
17
- "num_key_value_heads": 4,
18
  "rms_norm_eps": 1e-06,
19
  "rope_scaling": null,
20
  "rope_theta": 1000000.0,
21
- "sliding_window": 131072,
22
- "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.52.3",
25
  "use_cache": false,
26
  "use_sliding_window": false,
27
- "vocab_size": 152064
28
  }
 
6
  "bos_token_id": 151643,
7
  "eos_token_id": 151645,
8
  "hidden_act": "silu",
9
+ "hidden_size": 1536,
10
  "initializer_range": 0.02,
11
+ "intermediate_size": 8960,
12
  "max_position_embeddings": 32768,
13
+ "max_window_layers": 21,
14
  "model_type": "qwen2",
15
+ "num_attention_heads": 12,
16
  "num_hidden_layers": 28,
17
+ "num_key_value_heads": 2,
18
  "rms_norm_eps": 1e-06,
19
  "rope_scaling": null,
20
  "rope_theta": 1000000.0,
21
+ "sliding_window": 32768,
22
+ "tie_word_embeddings": true,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.52.3",
25
  "use_cache": false,
26
  "use_sliding_window": false,
27
+ "vocab_size": 151936
28
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:740facf5fc512081e4a1060daf771bd61342f4b803e997b416951a38dcd029dc
3
  size 3087467144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49f5521bef719ed8b7180d40a08d81104ab759329e440b32c1a185fedcba621a
3
  size 3087467144
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2498b8175f69bd3130933948a179dab1bf7ee7289114284e14c9c58dd6c9cb6
3
  size 9336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b096d93940dc9d46aa0236becc9adceb8294528db82759afbbb2ee92bf70951
3
  size 9336