Change the trainer

Browse files

Files changed (10) hide show

model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
model.safetensors.index.json +1 -1
preprocessor_config.json +4 -4
tokenizer_config.json +2 -0
trainer_state.json +104 -0
training_args.bin +1 -1
video_preprocessor_config.json +0 -2

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ef22b8013af95eff7c6c7f5eecc636949e99b7d2c9de68509c54fa798e446c4
 size 4998056552

 version https://git-lfs.github.com/spec/v1
+oid sha256:89deb5c60460c539266bc04340839b1cbb2596b6742e8899134b579979c11bf1
 size 4998056552

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c13bd1203896968ca9ed63bd63e672423c9129b78933d3f779adeca3a3ab354
 size 4915962464

 version https://git-lfs.github.com/spec/v1
+oid sha256:c71f80ee2fac69f64c36d69c90ff86aad74dcef97beea13ef48fcef842aa3ef6
 size 4915962464

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee9924b63d4350972d366262c64a7f42e7db50cc6d94fd2e1b0b5165519f36b3
 size 4915962496

 version https://git-lfs.github.com/spec/v1
+oid sha256:c8286708298fa6ef2d3f8ead7000ce2e5798e710370fad9401ece400ee8d79fd
 size 4915962496

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b782e232254dd4cb96a53ac73a8d17f40d6041c685a6d9b2539fc09ac7d34b32
 size 2704357976

 version https://git-lfs.github.com/spec/v1
+oid sha256:229802669680859a2007f289e77a4177a9197985885e98123ac865c196edd625
 size 2704357976

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_parameters": 770288,
     "total_size": 17534247392
   },
   "weight_map": {

 {
   "metadata": {
+    "total_parameters": 8767123696,
     "total_size": 17534247392
   },
   "weight_map": {

preprocessor_config.json CHANGED Viewed

@@ -22,9 +22,9 @@
     0.5
   ],
   "input_data_format": null,
-  "max_pixels": 4096,
   "merge_size": 2,
-  "min_pixels": 4096,
   "pad_size": null,
   "patch_size": 16,
   "processor_class": "Qwen3VLProcessor",
@@ -32,8 +32,8 @@
   "rescale_factor": 0.00392156862745098,
   "return_tensors": null,
   "size": {
-    "longest_edge": 1638400,
-    "shortest_edge": 262144
   },
   "temporal_patch_size": 2
 }

     0.5
   ],
   "input_data_format": null,
+  "max_pixels": 1638400,
   "merge_size": 2,
+  "min_pixels": 1024,
   "pad_size": null,
   "patch_size": 16,
   "processor_class": "Qwen3VLProcessor",
   "rescale_factor": 0.00392156862745098,
   "return_tensors": null,
   "size": {
+    "longest_edge": 16777216,
+    "shortest_edge": 65536
   },
   "temporal_patch_size": 2
 }

tokenizer_config.json CHANGED Viewed

@@ -231,6 +231,8 @@
   "eos_token": "<|im_end|>",
   "errors": "replace",
   "extra_special_tokens": {},
   "model_max_length": 262144,
   "pad_token": "<|endoftext|>",
   "processor_class": "Qwen3VLProcessor",

   "eos_token": "<|im_end|>",
   "errors": "replace",
   "extra_special_tokens": {},
+  "max_pixels": 1024,
+  "min_pixels": 1638400,
   "model_max_length": 262144,
   "pad_token": "<|endoftext|>",
   "processor_class": "Qwen3VLProcessor",

trainer_state.json ADDED Viewed

	@@ -0,0 +1,104 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 74,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 1.0259285002946854,
+      "epoch": 0.273972602739726,
+      "grad_norm": 3.722426652908325,
+      "learning_rate": 9e-06,
+      "loss": 1.0839,
+      "mean_token_accuracy": 0.761720848083496,
+      "num_tokens": 584826.0,
+      "step": 10
+    },
+    {
+      "entropy": 0.9608730539679528,
+      "epoch": 0.547945205479452,
+      "grad_norm": 0.8895509839057922,
+      "learning_rate": 9.934881598487478e-06,
+      "loss": 0.9788,
+      "mean_token_accuracy": 0.7810790002346039,
+      "num_tokens": 1174174.0,
+      "step": 20
+    },
+    {
+      "entropy": 0.9163148060441018,
+      "epoch": 0.821917808219178,
+      "grad_norm": 0.802199125289917,
+      "learning_rate": 9.711957702320176e-06,
+      "loss": 0.9307,
+      "mean_token_accuracy": 0.7890266820788383,
+      "num_tokens": 1762094.0,
+      "step": 30
+    },
+    {
+      "entropy": 0.8868917273847681,
+      "epoch": 1.0821917808219177,
+      "grad_norm": 0.7817137837409973,
+      "learning_rate": 9.337587608588588e-06,
+      "loss": 0.8963,
+      "mean_token_accuracy": 0.7947045470538893,
+      "num_tokens": 2322415.0,
+      "step": 40
+    },
+    {
+      "entropy": 0.8300882771611213,
+      "epoch": 1.356164383561644,
+      "grad_norm": 0.6832650899887085,
+      "learning_rate": 8.823803880137993e-06,
+      "loss": 0.8464,
+      "mean_token_accuracy": 0.8045218542218209,
+      "num_tokens": 2910028.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.8211456254124642,
+      "epoch": 1.6301369863013697,
+      "grad_norm": 0.6951064467430115,
+      "learning_rate": 8.18711994874345e-06,
+      "loss": 0.8352,
+      "mean_token_accuracy": 0.8072828114032745,
+      "num_tokens": 3495988.0,
+      "step": 60
+    },
+    {
+      "entropy": 0.8173969030380249,
+      "epoch": 1.904109589041096,
+      "grad_norm": 0.762572705745697,
+      "learning_rate": 7.447999359825263e-06,
+      "loss": 0.8315,
+      "mean_token_accuracy": 0.8067627891898155,
+      "num_tokens": 4083547.0,
+      "step": 70
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 185,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.831815282786304e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a6b59a32761800fbeb73ab328d8de71e0138c0135fe171c268fceede5ba4d82
 size 7569

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0c9473dc458d3adec2293b7fc009973fce3e502ccd4ef74529e6b16c887b33a
 size 7569

video_preprocessor_config.json CHANGED Viewed

@@ -6,7 +6,6 @@
   "do_center_crop": null,
   "do_convert_rgb": true,
   "do_normalize": true,
-  "do_pad": null,
   "do_rescale": true,
   "do_resize": true,
   "do_sample_frames": true,
@@ -32,7 +31,6 @@
   "resample": 3,
   "rescale_factor": 0.00392156862745098,
   "return_metadata": false,
-  "return_tensors": null,
   "size": {
     "longest_edge": 25165824,
     "shortest_edge": 4096

   "do_center_crop": null,
   "do_convert_rgb": true,
   "do_normalize": true,
   "do_rescale": true,
   "do_resize": true,
   "do_sample_frames": true,
   "resample": 3,
   "rescale_factor": 0.00392156862745098,
   "return_metadata": false,
   "size": {
     "longest_edge": 25165824,
     "shortest_edge": 4096