Add files using upload-large-folder tool
Browse files
vqinsight-comp/config.json
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/mnt/bn/zxyllm/Video-R1/src/r1-v/log/video-r1-qinsight_aigcdata_5epoch_comparison_0528/checkpoint-1000",
|
| 3 |
"architectures": [
|
| 4 |
"Qwen2_5_VLForConditionalGeneration"
|
| 5 |
],
|
|
@@ -31,17 +30,33 @@
|
|
| 31 |
"sliding_window": 32768,
|
| 32 |
"tie_word_embeddings": false,
|
| 33 |
"torch_dtype": "bfloat16",
|
| 34 |
-
"transformers_version": "4.
|
| 35 |
"use_cache": false,
|
| 36 |
"use_sliding_window": false,
|
| 37 |
"video_token_id": 151656,
|
| 38 |
"vision_config": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
"hidden_size": 1280,
|
|
|
|
| 40 |
"in_chans": 3,
|
|
|
|
| 41 |
"model_type": "qwen2_5_vl",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
"spatial_patch_size": 14,
|
|
|
|
| 43 |
"tokens_per_second": 2,
|
| 44 |
-
"torch_dtype": "float32"
|
|
|
|
| 45 |
},
|
| 46 |
"vision_end_token_id": 151653,
|
| 47 |
"vision_start_token_id": 151652,
|
|
|
|
| 1 |
{
|
|
|
|
| 2 |
"architectures": [
|
| 3 |
"Qwen2_5_VLForConditionalGeneration"
|
| 4 |
],
|
|
|
|
| 30 |
"sliding_window": 32768,
|
| 31 |
"tie_word_embeddings": false,
|
| 32 |
"torch_dtype": "bfloat16",
|
| 33 |
+
"transformers_version": "4.51.3",
|
| 34 |
"use_cache": false,
|
| 35 |
"use_sliding_window": false,
|
| 36 |
"video_token_id": 151656,
|
| 37 |
"vision_config": {
|
| 38 |
+
"depth": 32,
|
| 39 |
+
"fullatt_block_indexes": [
|
| 40 |
+
7,
|
| 41 |
+
15,
|
| 42 |
+
23,
|
| 43 |
+
31
|
| 44 |
+
],
|
| 45 |
+
"hidden_act": "silu",
|
| 46 |
"hidden_size": 1280,
|
| 47 |
+
"in_channels": 3,
|
| 48 |
"in_chans": 3,
|
| 49 |
+
"intermediate_size": 3420,
|
| 50 |
"model_type": "qwen2_5_vl",
|
| 51 |
+
"num_heads": 16,
|
| 52 |
+
"out_hidden_size": 3584,
|
| 53 |
+
"patch_size": 14,
|
| 54 |
+
"spatial_merge_size": 2,
|
| 55 |
"spatial_patch_size": 14,
|
| 56 |
+
"temporal_patch_size": 2,
|
| 57 |
"tokens_per_second": 2,
|
| 58 |
+
"torch_dtype": "float32",
|
| 59 |
+
"window_size": 112
|
| 60 |
},
|
| 61 |
"vision_end_token_id": 151653,
|
| 62 |
"vision_start_token_id": 151652,
|
vqinsight-comp/preprocessor_config.json
CHANGED
|
@@ -8,7 +8,7 @@
|
|
| 8 |
0.4578275,
|
| 9 |
0.40821073
|
| 10 |
],
|
| 11 |
-
"image_processor_type": "
|
| 12 |
"image_std": [
|
| 13 |
0.26862954,
|
| 14 |
0.26130258,
|
|
@@ -22,7 +22,7 @@
|
|
| 22 |
"resample": 3,
|
| 23 |
"rescale_factor": 0.00392156862745098,
|
| 24 |
"size": {
|
| 25 |
-
"longest_edge":
|
| 26 |
"shortest_edge": 3136
|
| 27 |
},
|
| 28 |
"temporal_patch_size": 2
|
|
|
|
| 8 |
0.4578275,
|
| 9 |
0.40821073
|
| 10 |
],
|
| 11 |
+
"image_processor_type": "Qwen2VLImageProcessor",
|
| 12 |
"image_std": [
|
| 13 |
0.26862954,
|
| 14 |
0.26130258,
|
|
|
|
| 22 |
"resample": 3,
|
| 23 |
"rescale_factor": 0.00392156862745098,
|
| 24 |
"size": {
|
| 25 |
+
"longest_edge": 12845056,
|
| 26 |
"shortest_edge": 3136
|
| 27 |
},
|
| 28 |
"temporal_patch_size": 2
|