Training in progress, epoch 1

Files changed (7) hide show

config.json CHANGED Viewed

@@ -9,36 +9,6 @@
   "hidden_size": 3584,
   "initializer_range": 0.02,
   "intermediate_size": 18944,
-  "layer_types": [
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention"
-  ],
   "max_position_embeddings": 32768,
   "max_window_layers": 28,
   "model_type": "qwen2",
@@ -48,10 +18,10 @@
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "rope_theta": 1000000.0,
-  "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.54.1",
   "use_cache": false,
   "use_sliding_window": false,
   "vocab_size": 152064

   "hidden_size": 3584,
   "initializer_range": 0.02,
   "intermediate_size": 18944,
   "max_position_embeddings": 32768,
   "max_window_layers": 28,
   "model_type": "qwen2",
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "rope_theta": 1000000.0,
+  "sliding_window": 131072,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.3",
   "use_cache": false,
   "use_sliding_window": false,
   "vocab_size": 152064

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34af926acfed1f365ed6ab5f986e92afbc8f38654c6dfa6e1f076685d4b6df8b
 size 4877660776

 version https://git-lfs.github.com/spec/v1
+oid sha256:58c527756029ac3feeaff3d08775069fd2cc3d5f73930d141574a97b40b401db
 size 4877660776

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1cdde8ca67ca17dc14b7f1ddb7948dfb4413ff11fe5a7e601146c3399d9d47ba
 size 4932751008

 version https://git-lfs.github.com/spec/v1
+oid sha256:197fbdbcf74853af7ce7b19ff320cdbd65bc7dd333e15fb07f5f2126f04c3fa0
 size 4932751008

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae48e05fc28d3d59b202bb7e2cbc9d09327b13dfec253d81557f40672048699e
 size 4330865200

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff561d88f43cb7211b86bab21522d22949ff46af01241d78a6e908ef14d27974
 size 4330865200

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cf46df6bc40283311e781cf48477493d87e6a19138f3ea4629d0c75314027cd
 size 1089994880

 version https://git-lfs.github.com/spec/v1
+oid sha256:348573f05f1646816ce5231f0bfbceba5d0a96c3d99b4e9d36f16858c4e6051d
 size 1089994880

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,5 @@
 {
   "metadata": {
-    "total_parameters": 333312,
     "total_size": 15231233024
   },
   "weight_map": {

 {
   "metadata": {
     "total_size": 15231233024
   },
   "weight_map": {

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19ee8f0dd7f1004fae5741c6a7b5ec837383b42add9b3319cd90f04224b7e1b8
-size 8081

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ea8872e02997207dd334542b47e6bd1d4831e96e6bff90e5741a10a4cc1447a
+size 8017