File size: 4,085 Bytes
2f0e115 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 | [
"F:\\project_python\\nlp_project\\.venv\\Scripts\\python.exe",
"scripts/train_rich.py",
"--train_file",
"data\\rich_cmgui\\processed\\train_rich_teacher7000_natural_qwen8000.jsonl",
"--valid_file",
"data\\rich_cmgui\\processed\\valid_rich_teacher500_natural_qwen8000.jsonl",
"--output_dir",
"F:\\project_python\\nlp_project\\runs\\rich_cmgui_20260512_titlefix_s1e2\\stage3_vision_adapter",
"--vision_model",
"models/siglip2-base-patch16-224",
"--decoder_model",
"models/mt5-large",
"--image_size",
"224",
"--num_vertical_crops",
"0",
"--max_visual_tokens",
"64",
"--max_elements",
"48",
"--max_element_tokens",
"16",
"--max_context_tokens",
"384",
"--context_mode",
"tokens_direct_encoder",
"--context_text_format",
"text_only",
"--context_include_screen_text",
"true",
"--context_screen_text_items",
"16",
"--context_screen_text_dropout_rate",
"0.15",
"--max_target_tokens",
"384",
"--eval_max_new_tokens",
"384",
"--batch_size",
"2",
"--eval_batch_size",
"1",
"--grad_accum",
"10",
"--epochs",
"1",
"--scheduler_epochs",
"1",
"--weight_decay",
"0.01",
"--optimizer_name",
"adafactor",
"--warmup_ratio",
"0.05",
"--fp16",
"false",
"--amp_dtype",
"fp32",
"--generation_loss_chunk_size",
"8",
"--cuda_empty_cache_steps",
"1",
"--cuda_memory_fraction",
"0.0",
"--data_parallel",
"true",
"--save_every_steps",
"100",
"--save_checkpoints",
"true",
"--eval_every_steps",
"0",
"--target_schema",
"summary_visible_zh",
"--grad_clip_strategy",
"global",
"--max_grad_norm",
"1.0",
"--model_selection_metric",
"grounded_quality_score",
"--model_selection_mode",
"max",
"--early_stopping_patience",
"2",
"--early_stopping_min_delta",
"0.001",
"--max_train_samples",
"0",
"--max_valid_samples",
"100",
"--strict_data_checks",
"true",
"--num_beams",
"1",
"--generation_no_repeat_ngram_size",
"3",
"--generation_repetition_penalty",
"1.1",
"--generation_min_new_tokens",
"0",
"--generation_block_extra_ids",
"true",
"--generation_block_title_prefix",
"true",
"--generation_force_json_start",
"false",
"--context_summary_repair",
"false",
"--canonicalize_targets",
"false",
"--task_intent_context",
"false",
"--drop_bare_search_functions",
"false",
"--structured_function_mode",
"heads",
"--structured_function_threshold",
"0.5",
"--structured_search_threshold",
"0.5",
"--structured_max_functions",
"8",
"--structured_evidence_mode",
"heads",
"--structured_evidence_threshold",
"0.5",
"--structured_max_evidence",
"8",
"--structured_evidence_fallback_top1",
"false",
"--evidence_loss_weight",
"0.2",
"--ui_function_loss_weight",
"0.05",
"--search_function_loss_weight",
"0.02",
"--function_signal_to_decoder",
"false",
"--search_signal_to_decoder",
"false",
"--pooled_memory_scale",
"0.02",
"--decoder_memory_scale",
"1.0",
"--seed",
"20260509",
"--num_workers",
"0",
"--bottleneck_queries",
"4",
"--init_checkpoint",
"F:\\project_python\\nlp_project\\runs\\rich_cmgui_20260512_titlefix_s1e2\\stage2_layout_adapter\\checkpoint-best",
"--model_variant",
"late_fusion",
"--native_context_forward",
"false",
"--disable_vision",
"false",
"--freeze_decoder",
"true",
"--freeze_vision",
"true",
"--unfreeze_vision_last_ratio",
"0.0",
"--direct_visual_tokens",
"false",
"--direct_element_tokens",
"false",
"--direct_context_passthrough",
"true",
"--include_pooled_memory",
"true",
"--activation_checkpointing",
"true",
"--decoder_gradient_checkpointing",
"false",
"--vision_gradient_checkpointing",
"false",
"--lr_new",
"1e-05",
"--lr_fusion",
"0.0",
"--lr_decoder",
"0.0",
"--visual_memory_scale",
"0.1",
"--element_memory_scale",
"0.5"
] |