Training in progress, epoch 1

Files changed (11) hide show

README.md CHANGED Viewed

@@ -35,9 +35,9 @@ This model was trained with SFT.
 ### Framework versions
 - TRL: 0.25.1
-- Transformers: 4.57.1
-- Pytorch: 2.8.0+cu126
-- Datasets: 4.0.0
 - Tokenizers: 0.22.1
 ## Citations

 ### Framework versions
 - TRL: 0.25.1
+- Transformers: 4.57.3
+- Pytorch: 2.9.1
+- Datasets: 4.4.1
 - Tokenizers: 0.22.1
 ## Citations

adapter_config.json CHANGED Viewed

@@ -1,9 +1,12 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
   "base_model_name_or_path": "Qwen/Qwen2-VL-2B-Instruct",
   "bias": "none",
   "corda_config": null,
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": false,
@@ -13,20 +16,23 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 16,
   "lora_bias": false,
   "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
   "qalora_group_size": 16,
   "r": 8,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

 {
+  "alora_invocation_tokens": null,
   "alpha_pattern": {},
+  "arrow_config": null,
   "auto_mapping": null,
   "base_model_name_or_path": "Qwen/Qwen2-VL-2B-Instruct",
   "bias": "none",
   "corda_config": null,
+  "ensure_weight_tying": false,
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": false,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 32,
   "lora_bias": false,
   "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "peft_version": "0.18.0",
   "qalora_group_size": 16,
   "r": 8,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
+    "v_proj",
     "q_proj",
+    "o_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c61e6c26341379c2e665c23fbce3359df5e996d418ef431e723615acb1eae63
-size 4374520

 version https://git-lfs.github.com/spec/v1
+oid sha256:740c85a1250aac945091b2cfb960cfcd7fd3003f1f4c6e4ca13bcd9337e543f3
+size 8749064

preprocessor_config.json ADDED Viewed

+{
+  "crop_size": null,
+  "data_format": "channels_first",
+  "default_to_square": true,
+  "device": null,
+  "disable_grouping": null,
+  "do_center_crop": null,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_pad": null,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "Qwen2VLImageProcessorFast",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "input_data_format": null,
+  "max_pixels": 12845056,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "pad_size": null,
+  "patch_size": 14,
+  "processor_class": "Qwen2VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "return_tensors": null,
+  "size": {
+    "longest_edge": 12845056,
+    "shortest_edge": 3136
+  },
+  "temporal_patch_size": 2
+}

runs/Dec01_04-29-02_132-145-133-63/events.out.tfevents.1764563546.132-145-133-63.4719.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:5819686e8311fb87d76a330e9f3cbd7cb0e04deb1ec5f7edfea1d7ea14edaf73
+size 12568

runs/Nov27_09-05-25_158-101-122-208/events.out.tfevents.1764234417.158-101-122-208.1760.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:934a3c1a698a294a75a7fdd6e047500c42e3d5721783295080e999c7b412e51e
+size 77720

runs/Nov28_05-39-54_129-213-22-251/events.out.tfevents.1764308482.129-213-22-251.1635.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb92b31906b59c0acd32f14c586f7164f067b2b5da4da7016fe51f881ecc820d
+size 20208

runs/Nov28_05-59-12_129-213-22-251/events.out.tfevents.1764309630.129-213-22-251.1635.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f6e595faf6b87acdd921963a4c3346a24399621c9ccd5e9fee4c12d1cc72afb
+size 36116

runs/Nov28_07-06-58_129-213-22-251/events.out.tfevents.1764313877.129-213-22-251.1635.2 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3699e4ad00e96f64f617f41c1c3f903e3bfea725a00546aec43c18e2514ccb8
+size 8331

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ecce47d18bf4423b0687b472437a5fa1c72cae786aabf42ba0600354efbcca9e
 size 6225

 version https://git-lfs.github.com/spec/v1
+oid sha256:1adcf89e2ce7be508f245e87af0fd5d93d0d2e7562c07049b800c8d5a6b1822e
 size 6225

video_preprocessor_config.json ADDED Viewed

+{
+  "crop_size": null,
+  "data_format": "channels_first",
+  "default_to_square": true,
+  "device": null,
+  "do_center_crop": null,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "do_sample_frames": false,
+  "fps": null,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "input_data_format": null,
+  "max_frames": 768,
+  "max_pixels": 12845056,
+  "merge_size": 2,
+  "min_frames": 4,
+  "min_pixels": 3136,
+  "num_frames": null,
+  "pad_size": null,
+  "patch_size": 14,
+  "processor_class": "Qwen2VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "return_metadata": false,
+  "size": {
+    "longest_edge": 12845056,
+    "shortest_edge": 3136
+  },
+  "temporal_patch_size": 2,
+  "video_metadata": null,
+  "video_processor_type": "Qwen2VLVideoProcessor"
+}