VEFX-Reward commited on
Commit
a15a8db
·
verified ·
1 Parent(s): 384aaec

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # VEFX-Reward Model
2
+
3
+ Video Editing Quality Reward Model based on Qwen3-VL.
4
+
5
+ ## Dimensions
6
+ - **IF**: Instructional Following (1-4)
7
+ - **RQ**: Render Quality (1-4)
8
+ - **EE**: Edit Exclusivity (1-4)
9
+
10
+ ## Usage
11
+
12
+ See [VEFX-Leaderboard](https://github.com/YOUR_ORG/VEFX-Leaderboard) for usage.
chat_template.jinja ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {%- if messages[0].content is string %}
5
+ {{- messages[0].content }}
6
+ {%- else %}
7
+ {%- for content in messages[0].content %}
8
+ {%- if 'text' in content %}
9
+ {{- content.text }}
10
+ {%- endif %}
11
+ {%- endfor %}
12
+ {%- endif %}
13
+ {{- '\n\n' }}
14
+ {%- endif %}
15
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
16
+ {%- for tool in tools %}
17
+ {{- "\n" }}
18
+ {{- tool | tojson }}
19
+ {%- endfor %}
20
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
21
+ {%- else %}
22
+ {%- if messages[0].role == 'system' %}
23
+ {{- '<|im_start|>system\n' }}
24
+ {%- if messages[0].content is string %}
25
+ {{- messages[0].content }}
26
+ {%- else %}
27
+ {%- for content in messages[0].content %}
28
+ {%- if 'text' in content %}
29
+ {{- content.text }}
30
+ {%- endif %}
31
+ {%- endfor %}
32
+ {%- endif %}
33
+ {{- '<|im_end|>\n' }}
34
+ {%- endif %}
35
+ {%- endif %}
36
+ {%- set image_count = namespace(value=0) %}
37
+ {%- set video_count = namespace(value=0) %}
38
+ {%- for message in messages %}
39
+ {%- if message.role == "user" %}
40
+ {{- '<|im_start|>' + message.role + '\n' }}
41
+ {%- if message.content is string %}
42
+ {{- message.content }}
43
+ {%- else %}
44
+ {%- for content in message.content %}
45
+ {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}
46
+ {%- set image_count.value = image_count.value + 1 %}
47
+ {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
48
+ <|vision_start|><|image_pad|><|vision_end|>
49
+ {%- elif content.type == 'video' or 'video' in content %}
50
+ {%- set video_count.value = video_count.value + 1 %}
51
+ {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
52
+ <|vision_start|><|video_pad|><|vision_end|>
53
+ {%- elif 'text' in content %}
54
+ {{- content.text }}
55
+ {%- endif %}
56
+ {%- endfor %}
57
+ {%- endif %}
58
+ {{- '<|im_end|>\n' }}
59
+ {%- elif message.role == "assistant" %}
60
+ {{- '<|im_start|>' + message.role + '\n' }}
61
+ {%- if message.content is string %}
62
+ {{- message.content }}
63
+ {%- else %}
64
+ {%- for content_item in message.content %}
65
+ {%- if 'text' in content_item %}
66
+ {{- content_item.text }}
67
+ {%- endif %}
68
+ {%- endfor %}
69
+ {%- endif %}
70
+ {%- if message.tool_calls %}
71
+ {%- for tool_call in message.tool_calls %}
72
+ {%- if (loop.first and message.content) or (not loop.first) %}
73
+ {{- '\n' }}
74
+ {%- endif %}
75
+ {%- if tool_call.function %}
76
+ {%- set tool_call = tool_call.function %}
77
+ {%- endif %}
78
+ {{- '<tool_call>\n{"name": "' }}
79
+ {{- tool_call.name }}
80
+ {{- '", "arguments": ' }}
81
+ {%- if tool_call.arguments is string %}
82
+ {{- tool_call.arguments }}
83
+ {%- else %}
84
+ {{- tool_call.arguments | tojson }}
85
+ {%- endif %}
86
+ {{- '}\n</tool_call>' }}
87
+ {%- endfor %}
88
+ {%- endif %}
89
+ {{- '<|im_end|>\n' }}
90
+ {%- elif message.role == "tool" %}
91
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
92
+ {{- '<|im_start|>user' }}
93
+ {%- endif %}
94
+ {{- '\n<tool_response>\n' }}
95
+ {%- if message.content is string %}
96
+ {{- message.content }}
97
+ {%- else %}
98
+ {%- for content in message.content %}
99
+ {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}
100
+ {%- set image_count.value = image_count.value + 1 %}
101
+ {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
102
+ <|vision_start|><|image_pad|><|vision_end|>
103
+ {%- elif content.type == 'video' or 'video' in content %}
104
+ {%- set video_count.value = video_count.value + 1 %}
105
+ {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
106
+ <|vision_start|><|video_pad|><|vision_end|>
107
+ {%- elif 'text' in content %}
108
+ {{- content.text }}
109
+ {%- endif %}
110
+ {%- endfor %}
111
+ {%- endif %}
112
+ {{- '\n</tool_response>' }}
113
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
114
+ {{- '<|im_end|>\n' }}
115
+ {%- endif %}
116
+ {%- endif %}
117
+ {%- endfor %}
118
+ {%- if add_generation_prompt %}
119
+ {{- '<|im_start|>assistant\n' }}
120
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3VLRewardModelBT"
4
+ ],
5
+ "dtype": "bfloat16",
6
+ "image_token_id": 151655,
7
+ "model_type": "qwen3_vl",
8
+ "text_config": {
9
+ "attention_bias": false,
10
+ "attention_dropout": 0.0,
11
+ "bos_token_id": 151643,
12
+ "dtype": "bfloat16",
13
+ "eos_token_id": 151645,
14
+ "head_dim": 128,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 2560,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 9728,
19
+ "max_position_embeddings": 262144,
20
+ "model_type": "qwen3_vl_text",
21
+ "num_attention_heads": 32,
22
+ "num_hidden_layers": 36,
23
+ "num_key_value_heads": 8,
24
+ "pad_token_id": null,
25
+ "rms_norm_eps": 1e-06,
26
+ "rope_parameters": {
27
+ "mrope_interleaved": true,
28
+ "mrope_section": [
29
+ 24,
30
+ 20,
31
+ 20
32
+ ],
33
+ "rope_theta": 5000000,
34
+ "rope_type": "default"
35
+ },
36
+ "tie_word_embeddings": true,
37
+ "use_cache": true,
38
+ "vocab_size": 151675,
39
+ "rope_scaling": {
40
+ "mrope_interleaved": true,
41
+ "mrope_section": [
42
+ 24,
43
+ 20,
44
+ 20
45
+ ],
46
+ "rope_type": "default"
47
+ }
48
+ },
49
+ "tie_word_embeddings": true,
50
+ "transformers_version": "5.4.0",
51
+ "use_cache": true,
52
+ "video_token_id": 151656,
53
+ "vision_config": {
54
+ "deepstack_visual_indexes": [
55
+ 5,
56
+ 11,
57
+ 17
58
+ ],
59
+ "depth": 24,
60
+ "dtype": "bfloat16",
61
+ "hidden_act": "gelu_pytorch_tanh",
62
+ "hidden_size": 1024,
63
+ "in_channels": 3,
64
+ "initializer_range": 0.02,
65
+ "intermediate_size": 4096,
66
+ "model_type": "qwen3_vl",
67
+ "num_heads": 16,
68
+ "num_position_embeddings": 2304,
69
+ "out_hidden_size": 2560,
70
+ "patch_size": 16,
71
+ "spatial_merge_size": 2,
72
+ "temporal_patch_size": 2
73
+ },
74
+ "vision_end_token_id": 151653,
75
+ "vision_start_token_id": 151652
76
+ }
generation_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "num_classes": 4,
9
+ "output_dim": 3,
10
+ "pad_token_id": 151643,
11
+ "repetition_penalty": 1.0,
12
+ "reward_token": "special",
13
+ "special_token_ids": [
14
+ 151669,
15
+ 151670,
16
+ 151671,
17
+ 151672,
18
+ 151673,
19
+ 151674
20
+ ],
21
+ "temperature": 0.7,
22
+ "top_k": 20,
23
+ "top_p": 0.8,
24
+ "transformers_version": "5.4.0",
25
+ "use_cache": true,
26
+ "use_ordinal": true
27
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3c0d03f770f0a73631206821922213de75413bb517f7d7a2fd9ab1f2c38f59d
3
+ size 8874398544
model_config.json ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_config": {
3
+ "max_frame_pixels": 399360,
4
+ "num_frames": null,
5
+ "fps": 4.0,
6
+ "p_shuffle_frames": 0.0,
7
+ "p_color_jitter": 0.0,
8
+ "eval_dim": [
9
+ "IF",
10
+ "RQ",
11
+ "EE"
12
+ ],
13
+ "prompt_template_type": "editreward_v2_special",
14
+ "add_noise": false,
15
+ "sample_type": "uniform",
16
+ "use_tied_data": true,
17
+ "pointwise": true
18
+ },
19
+ "training_args": {
20
+ "overwrite_output_dir": false,
21
+ "do_train": false,
22
+ "do_eval": true,
23
+ "do_predict": false,
24
+ "eval_strategy": "steps",
25
+ "prediction_loss_only": false,
26
+ "per_device_train_batch_size": 1,
27
+ "per_device_eval_batch_size": 1,
28
+ "per_gpu_train_batch_size": null,
29
+ "per_gpu_eval_batch_size": null,
30
+ "gradient_accumulation_steps": 4,
31
+ "eval_accumulation_steps": null,
32
+ "eval_delay": 0,
33
+ "torch_empty_cache_steps": null,
34
+ "learning_rate": 1e-05,
35
+ "weight_decay": 0.0,
36
+ "adam_beta1": 0.9,
37
+ "adam_beta2": 0.999,
38
+ "adam_epsilon": 1e-08,
39
+ "max_grad_norm": 1.0,
40
+ "num_train_epochs": 30.0,
41
+ "max_steps": -1,
42
+ "lr_scheduler_type": "cosine",
43
+ "lr_scheduler_kwargs": null,
44
+ "warmup_ratio": 0.0172,
45
+ "warmup_steps": 0,
46
+ "log_level": "passive",
47
+ "log_level_replica": "warning",
48
+ "log_on_each_node": true,
49
+ "logging_dir": "/mnt/novita2/xiangbo/Folder/Research/2026/VideoEditedReward/VideoEditReward/VideoAlign/logs/v4/ord_4B_lora_2stage_promptv2_res399k/runs/Mar19_16-20-11_host-10-240-99-115",
50
+ "logging_strategy": "steps",
51
+ "logging_first_step": false,
52
+ "logging_steps": 5,
53
+ "logging_nan_inf_filter": true,
54
+ "save_strategy": "steps",
55
+ "save_steps": 50,
56
+ "save_total_limit": 4,
57
+ "save_safetensors": true,
58
+ "save_on_each_node": false,
59
+ "save_only_model": false,
60
+ "restore_callback_states_from_checkpoint": false,
61
+ "no_cuda": false,
62
+ "use_cpu": false,
63
+ "use_mps_device": false,
64
+ "seed": 42,
65
+ "data_seed": null,
66
+ "jit_mode_eval": false,
67
+ "bf16": true,
68
+ "fp16": false,
69
+ "fp16_opt_level": "O1",
70
+ "half_precision_backend": "auto",
71
+ "bf16_full_eval": false,
72
+ "fp16_full_eval": false,
73
+ "tf32": null,
74
+ "ddp_backend": null,
75
+ "tpu_num_cores": null,
76
+ "tpu_metrics_debug": false,
77
+ "debug": [],
78
+ "dataloader_drop_last": false,
79
+ "eval_steps": 50,
80
+ "dataloader_num_workers": 6,
81
+ "dataloader_prefetch_factor": null,
82
+ "past_index": -1,
83
+ "run_name": null,
84
+ "disable_tqdm": false,
85
+ "remove_unused_columns": false,
86
+ "label_names": null,
87
+ "load_best_model_at_end": false,
88
+ "metric_for_best_model": "eval_loss",
89
+ "greater_is_better": false,
90
+ "ignore_data_skip": false,
91
+ "fsdp": [],
92
+ "fsdp_min_num_params": 0,
93
+ "fsdp_config": {
94
+ "min_num_params": 0,
95
+ "xla": false,
96
+ "xla_fsdp_v2": false,
97
+ "xla_fsdp_grad_ckpt": false
98
+ },
99
+ "fsdp_transformer_layer_cls_to_wrap": null,
100
+ "accelerator_config": {
101
+ "split_batches": false,
102
+ "dispatch_batches": null,
103
+ "even_batches": true,
104
+ "use_seedable_sampler": true,
105
+ "non_blocking": false,
106
+ "gradient_accumulation_kwargs": null,
107
+ "use_configured_state": false
108
+ },
109
+ "parallelism_config": null,
110
+ "deepspeed": "ds_config/zero0.json",
111
+ "label_smoothing_factor": 0.0,
112
+ "optim": "adamw_torch",
113
+ "optim_args": null,
114
+ "adafactor": false,
115
+ "group_by_length": false,
116
+ "length_column_name": "length",
117
+ "report_to": [
118
+ "tensorboard"
119
+ ],
120
+ "project": "huggingface",
121
+ "trackio_space_id": "trackio",
122
+ "ddp_find_unused_parameters": null,
123
+ "ddp_bucket_cap_mb": null,
124
+ "ddp_broadcast_buffers": null,
125
+ "dataloader_pin_memory": true,
126
+ "dataloader_persistent_workers": false,
127
+ "skip_memory_metrics": true,
128
+ "use_legacy_prediction_loop": false,
129
+ "push_to_hub": false,
130
+ "resume_from_checkpoint": null,
131
+ "hub_model_id": null,
132
+ "hub_strategy": "every_save",
133
+ "hub_token": null,
134
+ "hub_private_repo": null,
135
+ "hub_always_push": false,
136
+ "hub_revision": null,
137
+ "gradient_checkpointing": true,
138
+ "gradient_checkpointing_kwargs": null,
139
+ "include_inputs_for_metrics": false,
140
+ "include_for_metrics": [],
141
+ "eval_do_concat_batches": true,
142
+ "fp16_backend": "auto",
143
+ "push_to_hub_model_id": null,
144
+ "push_to_hub_organization": null,
145
+ "push_to_hub_token": null,
146
+ "mp_parameters": "",
147
+ "auto_find_batch_size": false,
148
+ "full_determinism": false,
149
+ "torchdynamo": null,
150
+ "ray_scope": "last",
151
+ "ddp_timeout": 1800,
152
+ "torch_compile": false,
153
+ "torch_compile_backend": null,
154
+ "torch_compile_mode": null,
155
+ "include_tokens_per_second": false,
156
+ "include_num_input_tokens_seen": "no",
157
+ "neftune_noise_alpha": null,
158
+ "optim_target_modules": null,
159
+ "batch_eval_metrics": false,
160
+ "eval_on_start": false,
161
+ "use_liger_kernel": false,
162
+ "liger_kernel_config": null,
163
+ "eval_use_gather_object": false,
164
+ "average_tokens_across_devices": true,
165
+ "model_init_kwargs": null,
166
+ "chat_template_path": null,
167
+ "disable_dropout": true,
168
+ "dataset_num_proc": null,
169
+ "eos_token": null,
170
+ "pad_token": null,
171
+ "max_length": 32768,
172
+ "pad_to_multiple_of": null,
173
+ "center_rewards_coefficient": null,
174
+ "activation_offloading": false,
175
+ "vision_lr": 2e-06,
176
+ "merger_lr": 1e-05,
177
+ "special_token_lr": 5e-05,
178
+ "conduct_eval": true,
179
+ "load_from_pretrained": null,
180
+ "load_from_pretrained_step": null,
181
+ "logging_epochs": null,
182
+ "eval_epochs": null,
183
+ "save_epochs": null,
184
+ "save_full_model": false,
185
+ "two_stage_align": true,
186
+ "align_lr": 0.0002,
187
+ "align_epochs": 1.0,
188
+ "align_init_embedding": "0.5"
189
+ },
190
+ "model_config": {
191
+ "model_name_or_path": "Qwen/Qwen3-VL-4B-Instruct",
192
+ "model_revision": "main",
193
+ "output_dim": 3,
194
+ "use_ordinal": true,
195
+ "num_classes": 4,
196
+ "use_special_tokens": true,
197
+ "freeze_vision_tower": true,
198
+ "freeze_llm": false,
199
+ "tune_merger": true,
200
+ "torch_dtype": "bfloat16",
201
+ "trust_remote_code": false,
202
+ "attn_implementation": null,
203
+ "load_in_8bit": false,
204
+ "load_in_4bit": false,
205
+ "bnb_4bit_quant_type": "nf4",
206
+ "use_bnb_nested_quant": false,
207
+ "reward_token": "special",
208
+ "loss_type": "ord"
209
+ },
210
+ "peft_lora_config": {
211
+ "lora_enable": true,
212
+ "vision_lora": false,
213
+ "vision_lora_only": false,
214
+ "lora_r": 16,
215
+ "lora_alpha": 32,
216
+ "lora_dropout": 0.05,
217
+ "lora_target_modules": null,
218
+ "lora_namespan_exclude": [
219
+ "visual"
220
+ ],
221
+ "lora_modules_to_save": null,
222
+ "lora_task_type": "CAUSAL_LM",
223
+ "use_rslora": false,
224
+ "num_lora_modules": -1
225
+ }
226
+ }
processor_config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "do_convert_rgb": true,
4
+ "do_normalize": true,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "image_mean": [
8
+ 0.5,
9
+ 0.5,
10
+ 0.5
11
+ ],
12
+ "image_processor_type": "Qwen2VLImageProcessor",
13
+ "image_std": [
14
+ 0.5,
15
+ 0.5,
16
+ 0.5
17
+ ],
18
+ "merge_size": 2,
19
+ "patch_size": 16,
20
+ "resample": 3,
21
+ "rescale_factor": 0.00392156862745098,
22
+ "size": {
23
+ "longest_edge": 16777216,
24
+ "shortest_edge": 65536
25
+ },
26
+ "temporal_patch_size": 2
27
+ },
28
+ "processor_class": "Qwen3VLProcessor",
29
+ "video_processor": {
30
+ "do_convert_rgb": true,
31
+ "do_normalize": true,
32
+ "do_rescale": true,
33
+ "do_resize": true,
34
+ "do_sample_frames": true,
35
+ "fps": 2,
36
+ "image_mean": [
37
+ 0.5,
38
+ 0.5,
39
+ 0.5
40
+ ],
41
+ "image_std": [
42
+ 0.5,
43
+ 0.5,
44
+ 0.5
45
+ ],
46
+ "max_frames": 768,
47
+ "merge_size": 2,
48
+ "min_frames": 4,
49
+ "patch_size": 16,
50
+ "resample": 3,
51
+ "rescale_factor": 0.00392156862745098,
52
+ "return_metadata": false,
53
+ "size": {
54
+ "longest_edge": 25165824,
55
+ "shortest_edge": 4096
56
+ },
57
+ "temporal_patch_size": 2,
58
+ "video_processor_type": "Qwen3VLVideoProcessor"
59
+ }
60
+ }
stage1/adapter_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "/mnt/novita2/xiangbo/.cache/huggingface/hub/models--Qwen--Qwen3-VL-4B-Instruct/snapshots/ebb281ec70b05090aa6165b016eac8ec08e71b17",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "v_proj",
33
+ "q_proj",
34
+ "down_proj",
35
+ "o_proj",
36
+ "rm_head",
37
+ "up_proj",
38
+ "lm_head",
39
+ "embed_tokens",
40
+ "k_proj",
41
+ "gate_proj"
42
+ ],
43
+ "target_parameters": null,
44
+ "task_type": "CAUSAL_LM",
45
+ "trainable_token_indices": null,
46
+ "use_dora": false,
47
+ "use_qalora": false,
48
+ "use_rslora": false
49
+ }
stage1/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ca4266366a7e98f8a1b72092b6f8100b883944070c1cb0ccd2e41d52b281a8d
3
+ size 1629243584
stage1/non_lora_state_dict.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a617579fefc4742e6e71fe1c984ddcba59fa97a3ab2a8724f6d44c882e354466
3
+ size 776592992
stage2/adapter_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "/mnt/novita2/xiangbo/.cache/huggingface/hub/models--Qwen--Qwen3-VL-4B-Instruct/snapshots/ebb281ec70b05090aa6165b016eac8ec08e71b17",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "v_proj",
33
+ "q_proj",
34
+ "down_proj",
35
+ "o_proj",
36
+ "rm_head",
37
+ "up_proj",
38
+ "lm_head",
39
+ "embed_tokens",
40
+ "k_proj",
41
+ "gate_proj"
42
+ ],
43
+ "target_parameters": null,
44
+ "task_type": "CAUSAL_LM",
45
+ "trainable_token_indices": null,
46
+ "use_dora": false,
47
+ "use_qalora": false,
48
+ "use_rslora": false
49
+ }
stage2/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a10bb1a1a429e7dbdf6c0b4b44397cec04e7cd9d0f0833cde3e89729b6c6f09
3
+ size 1629243584
stage2/non_lora_state_dict.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9daff7dd904a57927c51bd0322d62b4c183fcd42268c752f35512e12e607f73a
3
+ size 831138256
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14b00451fa975cf85805f0a7a15dee0ae5049d5f0b50046f432dd6d9a95b095a
3
+ size 11423790
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "is_local": true,
9
+ "model_max_length": 262144,
10
+ "pad_token": "<|endoftext|>",
11
+ "padding_side": "right",
12
+ "processor_class": "Qwen3VLProcessor",
13
+ "split_special_tokens": false,
14
+ "tokenizer_class": "Qwen2Tokenizer",
15
+ "unk_token": null
16
+ }
vefx_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_dim": 3,
3
+ "use_ordinal": true,
4
+ "num_classes": 4,
5
+ "reward_token": "special",
6
+ "fps": 4.0,
7
+ "max_frame_pixels": 399360,
8
+ "eval_dim": [
9
+ "IF",
10
+ "RQ",
11
+ "EE"
12
+ ],
13
+ "prompt_template_type": "editreward_v2_special"
14
+ }