{ "train_file": "data\\rich_cmgui\\processed\\train_rich_teacher7000_natural_qwen8000.jsonl", "valid_file": "data\\rich_cmgui\\processed\\valid_rich_teacher500_natural_qwen8000.jsonl", "output_dir": "F:\\project_python\\nlp_project\\runs\\rich_cmgui_20260512_titlefix_s1e2\\stage3_vision_adapter", "init_checkpoint": "F:\\project_python\\nlp_project\\runs\\rich_cmgui_20260512_titlefix_s1e2\\stage2_layout_adapter\\checkpoint-best", "model_variant": "late_fusion", "vision_model": "models/siglip2-base-patch16-224", "decoder_model": "models/mt5-large", "image_size": 224, "num_vertical_crops": 0, "max_visual_tokens": 64, "max_elements": 48, "max_element_tokens": 16, "max_context_tokens": 384, "context_text_format": "text_only", "context_include_screen_text": true, "context_screen_text_items": 16, "context_screen_text_dropout_rate": 0.15, "context_mode": "tokens_direct_encoder", "max_target_tokens": 384, "eval_max_new_tokens": 384, "batch_size": 2, "eval_batch_size": 1, "grad_accum": 10, "epochs": 1, "scheduler_epochs": 1, "lr_new": 1e-05, "lr_fusion": 0.0, "lr_decoder": 0.0, "lr_ui_function_head": 0.0, "weight_decay": 0.01, "optimizer_name": "adafactor", "lr_scheduler_type": "linear", "warmup_ratio": 0.05, "fp16": false, "amp_dtype": "fp32", "generation_loss_chunk_size": 8, "activation_checkpointing": true, "cuda_empty_cache_steps": 1, "cuda_memory_fraction": 0.0, "decoder_gradient_checkpointing": false, "vision_gradient_checkpointing": false, "freeze_decoder": true, "freeze_vision": true, "unfreeze_vision_last_ratio": 0.0, "evidence_loss_weight": 0.2, "section_loss_weight": 0.1, "numeric_loss_weight": 0.1, "ui_function_loss_weight": 0.05, "search_function_loss_weight": 0.02, "search_function_pos_weight": 1.0, "save_every_steps": 100, "save_checkpoints": true, "eval_every_steps": 0, "model_selection_metric": "grounded_quality_score", "model_selection_mode": "max", "early_stopping_patience": 2, "early_stopping_min_delta": 0.001, "max_train_samples": 0, "max_valid_samples": 100, "num_beams": 1, "generation_no_repeat_ngram_size": 3, "generation_repetition_penalty": 1.1, "generation_min_new_tokens": 0, "generation_block_extra_ids": true, "generation_block_title_prefix": true, "generation_force_json_start": false, "context_summary_repair": false, "canonicalize_targets": false, "target_schema": "summary_visible_zh", "task_intent_context": false, "drop_bare_search_functions": false, "structured_function_mode": "heads", "structured_function_threshold": 0.5, "structured_search_threshold": 0.5, "structured_max_functions": 8, "structured_strict_search_candidates": false, "structured_evidence_mode": "heads", "structured_evidence_threshold": 0.5, "structured_max_evidence": 8, "structured_evidence_fallback_top1": false, "direct_visual_tokens": false, "direct_element_tokens": false, "direct_context_passthrough": true, "include_pooled_memory": true, "native_context_forward": false, "disable_vision": false, "init_resize_mismatched_non_decoder": false, "grad_clip_strategy": "global", "max_grad_norm": 1.0, "function_signal_to_decoder": false, "function_signal_scale": 1.0, "search_signal_to_decoder": false, "search_signal_scale": 1.0, "visual_memory_scale": 0.1, "element_memory_scale": 0.5, "pooled_memory_scale": 0.02, "decoder_memory_scale": 1.0, "data_parallel": true, "strict_data_checks": true, "max_target_truncation_rate": 0.01, "seed": 20260509, "num_workers": 0, "bottleneck_queries": 4 }