| { | |
| "train_file": "data\\rich_cmgui\\processed\\train_rich_teacher7000_natural_qwen8000.jsonl", | |
| "valid_file": "data\\rich_cmgui\\processed\\valid_rich_teacher500_natural_qwen8000.jsonl", | |
| "output_dir": "F:\\project_python\\nlp_project\\runs\\rich_cmgui_20260512_titlefix_s1e2\\stage3_vision_adapter", | |
| "init_checkpoint": "F:\\project_python\\nlp_project\\runs\\rich_cmgui_20260512_titlefix_s1e2\\stage2_layout_adapter\\checkpoint-best", | |
| "model_variant": "late_fusion", | |
| "vision_model": "models/siglip2-base-patch16-224", | |
| "decoder_model": "models/mt5-large", | |
| "image_size": 224, | |
| "num_vertical_crops": 0, | |
| "max_visual_tokens": 64, | |
| "max_elements": 48, | |
| "max_element_tokens": 16, | |
| "max_context_tokens": 384, | |
| "context_text_format": "text_only", | |
| "context_include_screen_text": true, | |
| "context_screen_text_items": 16, | |
| "context_screen_text_dropout_rate": 0.15, | |
| "context_mode": "tokens_direct_encoder", | |
| "max_target_tokens": 384, | |
| "eval_max_new_tokens": 384, | |
| "batch_size": 2, | |
| "eval_batch_size": 1, | |
| "grad_accum": 10, | |
| "epochs": 1, | |
| "scheduler_epochs": 1, | |
| "lr_new": 1e-05, | |
| "lr_fusion": 0.0, | |
| "lr_decoder": 0.0, | |
| "lr_ui_function_head": 0.0, | |
| "weight_decay": 0.01, | |
| "optimizer_name": "adafactor", | |
| "lr_scheduler_type": "linear", | |
| "warmup_ratio": 0.05, | |
| "fp16": false, | |
| "amp_dtype": "fp32", | |
| "generation_loss_chunk_size": 8, | |
| "activation_checkpointing": true, | |
| "cuda_empty_cache_steps": 1, | |
| "cuda_memory_fraction": 0.0, | |
| "decoder_gradient_checkpointing": false, | |
| "vision_gradient_checkpointing": false, | |
| "freeze_decoder": true, | |
| "freeze_vision": true, | |
| "unfreeze_vision_last_ratio": 0.0, | |
| "evidence_loss_weight": 0.2, | |
| "section_loss_weight": 0.1, | |
| "numeric_loss_weight": 0.1, | |
| "ui_function_loss_weight": 0.05, | |
| "search_function_loss_weight": 0.02, | |
| "search_function_pos_weight": 1.0, | |
| "save_every_steps": 100, | |
| "save_checkpoints": true, | |
| "eval_every_steps": 0, | |
| "model_selection_metric": "grounded_quality_score", | |
| "model_selection_mode": "max", | |
| "early_stopping_patience": 2, | |
| "early_stopping_min_delta": 0.001, | |
| "max_train_samples": 0, | |
| "max_valid_samples": 100, | |
| "num_beams": 1, | |
| "generation_no_repeat_ngram_size": 3, | |
| "generation_repetition_penalty": 1.1, | |
| "generation_min_new_tokens": 0, | |
| "generation_block_extra_ids": true, | |
| "generation_block_title_prefix": true, | |
| "generation_force_json_start": false, | |
| "context_summary_repair": false, | |
| "canonicalize_targets": false, | |
| "target_schema": "summary_visible_zh", | |
| "task_intent_context": false, | |
| "drop_bare_search_functions": false, | |
| "structured_function_mode": "heads", | |
| "structured_function_threshold": 0.5, | |
| "structured_search_threshold": 0.5, | |
| "structured_max_functions": 8, | |
| "structured_strict_search_candidates": false, | |
| "structured_evidence_mode": "heads", | |
| "structured_evidence_threshold": 0.5, | |
| "structured_max_evidence": 8, | |
| "structured_evidence_fallback_top1": false, | |
| "direct_visual_tokens": false, | |
| "direct_element_tokens": false, | |
| "direct_context_passthrough": true, | |
| "include_pooled_memory": true, | |
| "native_context_forward": false, | |
| "disable_vision": false, | |
| "init_resize_mismatched_non_decoder": false, | |
| "grad_clip_strategy": "global", | |
| "max_grad_norm": 1.0, | |
| "function_signal_to_decoder": false, | |
| "function_signal_scale": 1.0, | |
| "search_signal_to_decoder": false, | |
| "search_signal_scale": 1.0, | |
| "visual_memory_scale": 0.1, | |
| "element_memory_scale": 0.5, | |
| "pooled_memory_scale": 0.02, | |
| "decoder_memory_scale": 1.0, | |
| "data_parallel": true, | |
| "strict_data_checks": true, | |
| "max_target_truncation_rate": 0.01, | |
| "seed": 20260509, | |
| "num_workers": 0, | |
| "bottleneck_queries": 4 | |
| } |