zhimeng commited on
Commit
f07940c
·
verified ·
1 Parent(s): 67f92a8

Training in progress, step 50

Browse files
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
@@ -11,7 +10,7 @@
11
  "initializer_range": 0.02,
12
  "intermediate_size": 8960,
13
  "max_position_embeddings": 32768,
14
- "max_window_layers": 21,
15
  "model_type": "qwen2",
16
  "num_attention_heads": 12,
17
  "num_hidden_layers": 28,
@@ -22,8 +21,8 @@
22
  "sliding_window": 32768,
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
- "transformers_version": "4.49.0",
26
- "use_cache": true,
27
  "use_sliding_window": false,
28
  "vocab_size": 151936
29
  }
 
1
  {
 
2
  "architectures": [
3
  "Qwen2ForCausalLM"
4
  ],
 
10
  "initializer_range": 0.02,
11
  "intermediate_size": 8960,
12
  "max_position_embeddings": 32768,
13
+ "max_window_layers": 28,
14
  "model_type": "qwen2",
15
  "num_attention_heads": 12,
16
  "num_hidden_layers": 28,
 
21
  "sliding_window": 32768,
22
  "tie_word_embeddings": true,
23
  "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.51.2",
25
+ "use_cache": false,
26
  "use_sliding_window": false,
27
  "vocab_size": 151936
28
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86c79df12fe9eb83deadb33e00b076a353355e7cc6643863e46a97b09339fec7
3
  size 3087467144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a521438ea04179ea5bb90019a057cfd0d65695bf549e5399ecfed99d9845b248
3
  size 3087467144
tokenizer_config.json CHANGED
@@ -200,7 +200,7 @@
200
  "eos_token": "<|im_end|>",
201
  "errors": "replace",
202
  "extra_special_tokens": {},
203
- "model_max_length": 131072,
204
  "pad_token": "<|endoftext|>",
205
  "split_special_tokens": false,
206
  "tokenizer_class": "Qwen2Tokenizer",
 
200
  "eos_token": "<|im_end|>",
201
  "errors": "replace",
202
  "extra_special_tokens": {},
203
+ "model_max_length": 32768,
204
  "pad_token": "<|endoftext|>",
205
  "split_special_tokens": false,
206
  "tokenizer_class": "Qwen2Tokenizer",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2be873df351695990403692ed8e4babbe9498681f03910499bde7029ad15081a
3
- size 8248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d72ae48201f0fcc91aad47076e604bb3f1c4c020ce45fb0292977a515b2ff401
3
+ size 8376