ucmp137538 commited on
Commit
606f836
·
verified ·
1 Parent(s): 8b1294c

Training in progress, epoch 1

Browse files
config.json CHANGED
@@ -9,36 +9,6 @@
9
  "hidden_size": 3584,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 18944,
12
- "layer_types": [
13
- "full_attention",
14
- "full_attention",
15
- "full_attention",
16
- "full_attention",
17
- "full_attention",
18
- "full_attention",
19
- "full_attention",
20
- "full_attention",
21
- "full_attention",
22
- "full_attention",
23
- "full_attention",
24
- "full_attention",
25
- "full_attention",
26
- "full_attention",
27
- "full_attention",
28
- "full_attention",
29
- "full_attention",
30
- "full_attention",
31
- "full_attention",
32
- "full_attention",
33
- "full_attention",
34
- "full_attention",
35
- "full_attention",
36
- "full_attention",
37
- "full_attention",
38
- "full_attention",
39
- "full_attention",
40
- "full_attention"
41
- ],
42
  "max_position_embeddings": 32768,
43
  "max_window_layers": 28,
44
  "model_type": "qwen2",
@@ -48,10 +18,10 @@
48
  "rms_norm_eps": 1e-06,
49
  "rope_scaling": null,
50
  "rope_theta": 1000000.0,
51
- "sliding_window": null,
52
  "tie_word_embeddings": false,
53
  "torch_dtype": "bfloat16",
54
- "transformers_version": "4.54.1",
55
  "use_cache": false,
56
  "use_sliding_window": false,
57
  "vocab_size": 152064
 
9
  "hidden_size": 3584,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 18944,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "max_position_embeddings": 32768,
13
  "max_window_layers": 28,
14
  "model_type": "qwen2",
 
18
  "rms_norm_eps": 1e-06,
19
  "rope_scaling": null,
20
  "rope_theta": 1000000.0,
21
+ "sliding_window": 131072,
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.52.3",
25
  "use_cache": false,
26
  "use_sliding_window": false,
27
  "vocab_size": 152064
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34af926acfed1f365ed6ab5f986e92afbc8f38654c6dfa6e1f076685d4b6df8b
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58c527756029ac3feeaff3d08775069fd2cc3d5f73930d141574a97b40b401db
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cdde8ca67ca17dc14b7f1ddb7948dfb4413ff11fe5a7e601146c3399d9d47ba
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:197fbdbcf74853af7ce7b19ff320cdbd65bc7dd333e15fb07f5f2126f04c3fa0
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae48e05fc28d3d59b202bb7e2cbc9d09327b13dfec253d81557f40672048699e
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff561d88f43cb7211b86bab21522d22949ff46af01241d78a6e908ef14d27974
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cf46df6bc40283311e781cf48477493d87e6a19138f3ea4629d0c75314027cd
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:348573f05f1646816ce5231f0bfbceba5d0a96c3d99b4e9d36f16858c4e6051d
3
  size 1089994880
model.safetensors.index.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 333312,
4
  "total_size": 15231233024
5
  },
6
  "weight_map": {
 
1
  {
2
  "metadata": {
 
3
  "total_size": 15231233024
4
  },
5
  "weight_map": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19ee8f0dd7f1004fae5741c6a7b5ec837383b42add9b3319cd90f04224b7e1b8
3
- size 8081
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ea8872e02997207dd334542b47e6bd1d4831e96e6bff90e5741a10a4cc1447a
3
+ size 8017