[ "F:\\project_python\\nlp_project\\.venv\\Scripts\\python.exe", "scripts/train_rich.py", "--train_file", "data\\rich_cmgui\\processed\\train_rich_teacher7000_natural_qwen8000.jsonl", "--valid_file", "data\\rich_cmgui\\processed\\valid_rich_teacher500_natural_qwen8000.jsonl", "--output_dir", "F:\\project_python\\nlp_project\\runs\\rich_cmgui_20260512_titlefix_s1e2\\stage3_vision_adapter", "--vision_model", "models/siglip2-base-patch16-224", "--decoder_model", "models/mt5-large", "--image_size", "224", "--num_vertical_crops", "0", "--max_visual_tokens", "64", "--max_elements", "48", "--max_element_tokens", "16", "--max_context_tokens", "384", "--context_mode", "tokens_direct_encoder", "--context_text_format", "text_only", "--context_include_screen_text", "true", "--context_screen_text_items", "16", "--context_screen_text_dropout_rate", "0.15", "--max_target_tokens", "384", "--eval_max_new_tokens", "384", "--batch_size", "2", "--eval_batch_size", "1", "--grad_accum", "10", "--epochs", "1", "--scheduler_epochs", "1", "--weight_decay", "0.01", "--optimizer_name", "adafactor", "--warmup_ratio", "0.05", "--fp16", "false", "--amp_dtype", "fp32", "--generation_loss_chunk_size", "8", "--cuda_empty_cache_steps", "1", "--cuda_memory_fraction", "0.0", "--data_parallel", "true", "--save_every_steps", "100", "--save_checkpoints", "true", "--eval_every_steps", "0", "--target_schema", "summary_visible_zh", "--grad_clip_strategy", "global", "--max_grad_norm", "1.0", "--model_selection_metric", "grounded_quality_score", "--model_selection_mode", "max", "--early_stopping_patience", "2", "--early_stopping_min_delta", "0.001", "--max_train_samples", "0", "--max_valid_samples", "100", "--strict_data_checks", "true", "--num_beams", "1", "--generation_no_repeat_ngram_size", "3", "--generation_repetition_penalty", "1.1", "--generation_min_new_tokens", "0", "--generation_block_extra_ids", "true", "--generation_block_title_prefix", "true", "--generation_force_json_start", "false", "--context_summary_repair", "false", "--canonicalize_targets", "false", "--task_intent_context", "false", "--drop_bare_search_functions", "false", "--structured_function_mode", "heads", "--structured_function_threshold", "0.5", "--structured_search_threshold", "0.5", "--structured_max_functions", "8", "--structured_evidence_mode", "heads", "--structured_evidence_threshold", "0.5", "--structured_max_evidence", "8", "--structured_evidence_fallback_top1", "false", "--evidence_loss_weight", "0.2", "--ui_function_loss_weight", "0.05", "--search_function_loss_weight", "0.02", "--function_signal_to_decoder", "false", "--search_signal_to_decoder", "false", "--pooled_memory_scale", "0.02", "--decoder_memory_scale", "1.0", "--seed", "20260509", "--num_workers", "0", "--bottleneck_queries", "4", "--init_checkpoint", "F:\\project_python\\nlp_project\\runs\\rich_cmgui_20260512_titlefix_s1e2\\stage2_layout_adapter\\checkpoint-best", "--model_variant", "late_fusion", "--native_context_forward", "false", "--disable_vision", "false", "--freeze_decoder", "true", "--freeze_vision", "true", "--unfreeze_vision_last_ratio", "0.0", "--direct_visual_tokens", "false", "--direct_element_tokens", "false", "--direct_context_passthrough", "true", "--include_pooled_memory", "true", "--activation_checkpointing", "true", "--decoder_gradient_checkpointing", "false", "--vision_gradient_checkpointing", "false", "--lr_new", "1e-05", "--lr_fusion", "0.0", "--lr_decoder", "0.0", "--visual_memory_scale", "0.1", "--element_memory_scale", "0.5" ]