| [ | |
| "F:\\project_python\\nlp_project\\.venv\\Scripts\\python.exe", | |
| "scripts/train_rich.py", | |
| "--train_file", | |
| "data\\rich_cmgui\\processed\\train_rich_teacher7000_natural_qwen8000.jsonl", | |
| "--valid_file", | |
| "data\\rich_cmgui\\processed\\valid_rich_teacher500_natural_qwen8000.jsonl", | |
| "--output_dir", | |
| "F:\\project_python\\nlp_project\\runs\\rich_cmgui_20260512_titlefix_s1e2\\stage3_vision_adapter", | |
| "--vision_model", | |
| "models/siglip2-base-patch16-224", | |
| "--decoder_model", | |
| "models/mt5-large", | |
| "--image_size", | |
| "224", | |
| "--num_vertical_crops", | |
| "0", | |
| "--max_visual_tokens", | |
| "64", | |
| "--max_elements", | |
| "48", | |
| "--max_element_tokens", | |
| "16", | |
| "--max_context_tokens", | |
| "384", | |
| "--context_mode", | |
| "tokens_direct_encoder", | |
| "--context_text_format", | |
| "text_only", | |
| "--context_include_screen_text", | |
| "true", | |
| "--context_screen_text_items", | |
| "16", | |
| "--context_screen_text_dropout_rate", | |
| "0.15", | |
| "--max_target_tokens", | |
| "384", | |
| "--eval_max_new_tokens", | |
| "384", | |
| "--batch_size", | |
| "2", | |
| "--eval_batch_size", | |
| "1", | |
| "--grad_accum", | |
| "10", | |
| "--epochs", | |
| "1", | |
| "--scheduler_epochs", | |
| "1", | |
| "--weight_decay", | |
| "0.01", | |
| "--optimizer_name", | |
| "adafactor", | |
| "--warmup_ratio", | |
| "0.05", | |
| "--fp16", | |
| "false", | |
| "--amp_dtype", | |
| "fp32", | |
| "--generation_loss_chunk_size", | |
| "8", | |
| "--cuda_empty_cache_steps", | |
| "1", | |
| "--cuda_memory_fraction", | |
| "0.0", | |
| "--data_parallel", | |
| "true", | |
| "--save_every_steps", | |
| "100", | |
| "--save_checkpoints", | |
| "true", | |
| "--eval_every_steps", | |
| "0", | |
| "--target_schema", | |
| "summary_visible_zh", | |
| "--grad_clip_strategy", | |
| "global", | |
| "--max_grad_norm", | |
| "1.0", | |
| "--model_selection_metric", | |
| "grounded_quality_score", | |
| "--model_selection_mode", | |
| "max", | |
| "--early_stopping_patience", | |
| "2", | |
| "--early_stopping_min_delta", | |
| "0.001", | |
| "--max_train_samples", | |
| "0", | |
| "--max_valid_samples", | |
| "100", | |
| "--strict_data_checks", | |
| "true", | |
| "--num_beams", | |
| "1", | |
| "--generation_no_repeat_ngram_size", | |
| "3", | |
| "--generation_repetition_penalty", | |
| "1.1", | |
| "--generation_min_new_tokens", | |
| "0", | |
| "--generation_block_extra_ids", | |
| "true", | |
| "--generation_block_title_prefix", | |
| "true", | |
| "--generation_force_json_start", | |
| "false", | |
| "--context_summary_repair", | |
| "false", | |
| "--canonicalize_targets", | |
| "false", | |
| "--task_intent_context", | |
| "false", | |
| "--drop_bare_search_functions", | |
| "false", | |
| "--structured_function_mode", | |
| "heads", | |
| "--structured_function_threshold", | |
| "0.5", | |
| "--structured_search_threshold", | |
| "0.5", | |
| "--structured_max_functions", | |
| "8", | |
| "--structured_evidence_mode", | |
| "heads", | |
| "--structured_evidence_threshold", | |
| "0.5", | |
| "--structured_max_evidence", | |
| "8", | |
| "--structured_evidence_fallback_top1", | |
| "false", | |
| "--evidence_loss_weight", | |
| "0.2", | |
| "--ui_function_loss_weight", | |
| "0.05", | |
| "--search_function_loss_weight", | |
| "0.02", | |
| "--function_signal_to_decoder", | |
| "false", | |
| "--search_signal_to_decoder", | |
| "false", | |
| "--pooled_memory_scale", | |
| "0.02", | |
| "--decoder_memory_scale", | |
| "1.0", | |
| "--seed", | |
| "20260509", | |
| "--num_workers", | |
| "0", | |
| "--bottleneck_queries", | |
| "4", | |
| "--init_checkpoint", | |
| "F:\\project_python\\nlp_project\\runs\\rich_cmgui_20260512_titlefix_s1e2\\stage2_layout_adapter\\checkpoint-best", | |
| "--model_variant", | |
| "late_fusion", | |
| "--native_context_forward", | |
| "false", | |
| "--disable_vision", | |
| "false", | |
| "--freeze_decoder", | |
| "true", | |
| "--freeze_vision", | |
| "true", | |
| "--unfreeze_vision_last_ratio", | |
| "0.0", | |
| "--direct_visual_tokens", | |
| "false", | |
| "--direct_element_tokens", | |
| "false", | |
| "--direct_context_passthrough", | |
| "true", | |
| "--include_pooled_memory", | |
| "true", | |
| "--activation_checkpointing", | |
| "true", | |
| "--decoder_gradient_checkpointing", | |
| "false", | |
| "--vision_gradient_checkpointing", | |
| "false", | |
| "--lr_new", | |
| "1e-05", | |
| "--lr_fusion", | |
| "0.0", | |
| "--lr_decoder", | |
| "0.0", | |
| "--visual_memory_scale", | |
| "0.1", | |
| "--element_memory_scale", | |
| "0.5" | |
| ] |