File size: 3,739 Bytes
2f0e115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
{
  "train_file": "data\\rich_cmgui\\processed\\train_rich_teacher7000_natural_qwen8000.jsonl",
  "valid_file": "data\\rich_cmgui\\processed\\valid_rich_teacher500_natural_qwen8000.jsonl",
  "output_dir": "F:\\project_python\\nlp_project\\runs\\rich_cmgui_20260512_titlefix_s1e2\\stage3_vision_adapter",
  "init_checkpoint": "F:\\project_python\\nlp_project\\runs\\rich_cmgui_20260512_titlefix_s1e2\\stage2_layout_adapter\\checkpoint-best",
  "model_variant": "late_fusion",
  "vision_model": "models/siglip2-base-patch16-224",
  "decoder_model": "models/mt5-large",
  "image_size": 224,
  "num_vertical_crops": 0,
  "max_visual_tokens": 64,
  "max_elements": 48,
  "max_element_tokens": 16,
  "max_context_tokens": 384,
  "context_text_format": "text_only",
  "context_include_screen_text": true,
  "context_screen_text_items": 16,
  "context_screen_text_dropout_rate": 0.15,
  "context_mode": "tokens_direct_encoder",
  "max_target_tokens": 384,
  "eval_max_new_tokens": 384,
  "batch_size": 2,
  "eval_batch_size": 1,
  "grad_accum": 10,
  "epochs": 1,
  "scheduler_epochs": 1,
  "lr_new": 1e-05,
  "lr_fusion": 0.0,
  "lr_decoder": 0.0,
  "lr_ui_function_head": 0.0,
  "weight_decay": 0.01,
  "optimizer_name": "adafactor",
  "lr_scheduler_type": "linear",
  "warmup_ratio": 0.05,
  "fp16": false,
  "amp_dtype": "fp32",
  "generation_loss_chunk_size": 8,
  "activation_checkpointing": true,
  "cuda_empty_cache_steps": 1,
  "cuda_memory_fraction": 0.0,
  "decoder_gradient_checkpointing": false,
  "vision_gradient_checkpointing": false,
  "freeze_decoder": true,
  "freeze_vision": true,
  "unfreeze_vision_last_ratio": 0.0,
  "evidence_loss_weight": 0.2,
  "section_loss_weight": 0.1,
  "numeric_loss_weight": 0.1,
  "ui_function_loss_weight": 0.05,
  "search_function_loss_weight": 0.02,
  "search_function_pos_weight": 1.0,
  "save_every_steps": 100,
  "save_checkpoints": true,
  "eval_every_steps": 0,
  "model_selection_metric": "grounded_quality_score",
  "model_selection_mode": "max",
  "early_stopping_patience": 2,
  "early_stopping_min_delta": 0.001,
  "max_train_samples": 0,
  "max_valid_samples": 100,
  "num_beams": 1,
  "generation_no_repeat_ngram_size": 3,
  "generation_repetition_penalty": 1.1,
  "generation_min_new_tokens": 0,
  "generation_block_extra_ids": true,
  "generation_block_title_prefix": true,
  "generation_force_json_start": false,
  "context_summary_repair": false,
  "canonicalize_targets": false,
  "target_schema": "summary_visible_zh",
  "task_intent_context": false,
  "drop_bare_search_functions": false,
  "structured_function_mode": "heads",
  "structured_function_threshold": 0.5,
  "structured_search_threshold": 0.5,
  "structured_max_functions": 8,
  "structured_strict_search_candidates": false,
  "structured_evidence_mode": "heads",
  "structured_evidence_threshold": 0.5,
  "structured_max_evidence": 8,
  "structured_evidence_fallback_top1": false,
  "direct_visual_tokens": false,
  "direct_element_tokens": false,
  "direct_context_passthrough": true,
  "include_pooled_memory": true,
  "native_context_forward": false,
  "disable_vision": false,
  "init_resize_mismatched_non_decoder": false,
  "grad_clip_strategy": "global",
  "max_grad_norm": 1.0,
  "function_signal_to_decoder": false,
  "function_signal_scale": 1.0,
  "search_signal_to_decoder": false,
  "search_signal_scale": 1.0,
  "visual_memory_scale": 0.1,
  "element_memory_scale": 0.5,
  "pooled_memory_scale": 0.02,
  "decoder_memory_scale": 1.0,
  "data_parallel": true,
  "strict_data_checks": true,
  "max_target_truncation_rate": 0.01,
  "seed": 20260509,
  "num_workers": 0,
  "bottleneck_queries": 4
}