ankanmbz commited on
Commit
dfca9fe
·
verified ·
1 Parent(s): e368446

Change the trainer

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ef22b8013af95eff7c6c7f5eecc636949e99b7d2c9de68509c54fa798e446c4
3
  size 4998056552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89deb5c60460c539266bc04340839b1cbb2596b6742e8899134b579979c11bf1
3
  size 4998056552
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c13bd1203896968ca9ed63bd63e672423c9129b78933d3f779adeca3a3ab354
3
  size 4915962464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c71f80ee2fac69f64c36d69c90ff86aad74dcef97beea13ef48fcef842aa3ef6
3
  size 4915962464
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee9924b63d4350972d366262c64a7f42e7db50cc6d94fd2e1b0b5165519f36b3
3
  size 4915962496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8286708298fa6ef2d3f8ead7000ce2e5798e710370fad9401ece400ee8d79fd
3
  size 4915962496
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b782e232254dd4cb96a53ac73a8d17f40d6041c685a6d9b2539fc09ac7d34b32
3
  size 2704357976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:229802669680859a2007f289e77a4177a9197985885e98123ac865c196edd625
3
  size 2704357976
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 770288,
4
  "total_size": 17534247392
5
  },
6
  "weight_map": {
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 8767123696,
4
  "total_size": 17534247392
5
  },
6
  "weight_map": {
preprocessor_config.json CHANGED
@@ -22,9 +22,9 @@
22
  0.5
23
  ],
24
  "input_data_format": null,
25
- "max_pixels": 4096,
26
  "merge_size": 2,
27
- "min_pixels": 4096,
28
  "pad_size": null,
29
  "patch_size": 16,
30
  "processor_class": "Qwen3VLProcessor",
@@ -32,8 +32,8 @@
32
  "rescale_factor": 0.00392156862745098,
33
  "return_tensors": null,
34
  "size": {
35
- "longest_edge": 1638400,
36
- "shortest_edge": 262144
37
  },
38
  "temporal_patch_size": 2
39
  }
 
22
  0.5
23
  ],
24
  "input_data_format": null,
25
+ "max_pixels": 1638400,
26
  "merge_size": 2,
27
+ "min_pixels": 1024,
28
  "pad_size": null,
29
  "patch_size": 16,
30
  "processor_class": "Qwen3VLProcessor",
 
32
  "rescale_factor": 0.00392156862745098,
33
  "return_tensors": null,
34
  "size": {
35
+ "longest_edge": 16777216,
36
+ "shortest_edge": 65536
37
  },
38
  "temporal_patch_size": 2
39
  }
tokenizer_config.json CHANGED
@@ -231,6 +231,8 @@
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
 
 
234
  "model_max_length": 262144,
235
  "pad_token": "<|endoftext|>",
236
  "processor_class": "Qwen3VLProcessor",
 
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
+ "max_pixels": 1024,
235
+ "min_pixels": 1638400,
236
  "model_max_length": 262144,
237
  "pad_token": "<|endoftext|>",
238
  "processor_class": "Qwen3VLProcessor",
trainer_state.json ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 74,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "entropy": 1.0259285002946854,
14
+ "epoch": 0.273972602739726,
15
+ "grad_norm": 3.722426652908325,
16
+ "learning_rate": 9e-06,
17
+ "loss": 1.0839,
18
+ "mean_token_accuracy": 0.761720848083496,
19
+ "num_tokens": 584826.0,
20
+ "step": 10
21
+ },
22
+ {
23
+ "entropy": 0.9608730539679528,
24
+ "epoch": 0.547945205479452,
25
+ "grad_norm": 0.8895509839057922,
26
+ "learning_rate": 9.934881598487478e-06,
27
+ "loss": 0.9788,
28
+ "mean_token_accuracy": 0.7810790002346039,
29
+ "num_tokens": 1174174.0,
30
+ "step": 20
31
+ },
32
+ {
33
+ "entropy": 0.9163148060441018,
34
+ "epoch": 0.821917808219178,
35
+ "grad_norm": 0.802199125289917,
36
+ "learning_rate": 9.711957702320176e-06,
37
+ "loss": 0.9307,
38
+ "mean_token_accuracy": 0.7890266820788383,
39
+ "num_tokens": 1762094.0,
40
+ "step": 30
41
+ },
42
+ {
43
+ "entropy": 0.8868917273847681,
44
+ "epoch": 1.0821917808219177,
45
+ "grad_norm": 0.7817137837409973,
46
+ "learning_rate": 9.337587608588588e-06,
47
+ "loss": 0.8963,
48
+ "mean_token_accuracy": 0.7947045470538893,
49
+ "num_tokens": 2322415.0,
50
+ "step": 40
51
+ },
52
+ {
53
+ "entropy": 0.8300882771611213,
54
+ "epoch": 1.356164383561644,
55
+ "grad_norm": 0.6832650899887085,
56
+ "learning_rate": 8.823803880137993e-06,
57
+ "loss": 0.8464,
58
+ "mean_token_accuracy": 0.8045218542218209,
59
+ "num_tokens": 2910028.0,
60
+ "step": 50
61
+ },
62
+ {
63
+ "entropy": 0.8211456254124642,
64
+ "epoch": 1.6301369863013697,
65
+ "grad_norm": 0.6951064467430115,
66
+ "learning_rate": 8.18711994874345e-06,
67
+ "loss": 0.8352,
68
+ "mean_token_accuracy": 0.8072828114032745,
69
+ "num_tokens": 3495988.0,
70
+ "step": 60
71
+ },
72
+ {
73
+ "entropy": 0.8173969030380249,
74
+ "epoch": 1.904109589041096,
75
+ "grad_norm": 0.762572705745697,
76
+ "learning_rate": 7.447999359825263e-06,
77
+ "loss": 0.8315,
78
+ "mean_token_accuracy": 0.8067627891898155,
79
+ "num_tokens": 4083547.0,
80
+ "step": 70
81
+ }
82
+ ],
83
+ "logging_steps": 10,
84
+ "max_steps": 185,
85
+ "num_input_tokens_seen": 0,
86
+ "num_train_epochs": 5,
87
+ "save_steps": 500,
88
+ "stateful_callbacks": {
89
+ "TrainerControl": {
90
+ "args": {
91
+ "should_epoch_stop": false,
92
+ "should_evaluate": false,
93
+ "should_log": false,
94
+ "should_save": true,
95
+ "should_training_stop": false
96
+ },
97
+ "attributes": {}
98
+ }
99
+ },
100
+ "total_flos": 2.831815282786304e+17,
101
+ "train_batch_size": 4,
102
+ "trial_name": null,
103
+ "trial_params": null
104
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a6b59a32761800fbeb73ab328d8de71e0138c0135fe171c268fceede5ba4d82
3
  size 7569
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0c9473dc458d3adec2293b7fc009973fce3e502ccd4ef74529e6b16c887b33a
3
  size 7569
video_preprocessor_config.json CHANGED
@@ -6,7 +6,6 @@
6
  "do_center_crop": null,
7
  "do_convert_rgb": true,
8
  "do_normalize": true,
9
- "do_pad": null,
10
  "do_rescale": true,
11
  "do_resize": true,
12
  "do_sample_frames": true,
@@ -32,7 +31,6 @@
32
  "resample": 3,
33
  "rescale_factor": 0.00392156862745098,
34
  "return_metadata": false,
35
- "return_tensors": null,
36
  "size": {
37
  "longest_edge": 25165824,
38
  "shortest_edge": 4096
 
6
  "do_center_crop": null,
7
  "do_convert_rgb": true,
8
  "do_normalize": true,
 
9
  "do_rescale": true,
10
  "do_resize": true,
11
  "do_sample_frames": true,
 
31
  "resample": 3,
32
  "rescale_factor": 0.00392156862745098,
33
  "return_metadata": false,
 
34
  "size": {
35
  "longest_edge": 25165824,
36
  "shortest_edge": 4096