File size: 2,661 Bytes
2f0e115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
{
  "train_file": "data\\rich_cmgui\\processed\\train_rich_teacher7000_natural_qwen8000.jsonl",
  "valid_file": "data\\rich_cmgui\\processed\\valid_rich_teacher500_natural_qwen8000.jsonl",
  "strict_data_checks": true,
  "train": {
    "rows": 7000,
    "missing_target": 0,
    "short_summary": 0,
    "missing_image_path": 0,
    "missing_image_file": 0,
    "missing_ocr_items": 0,
    "missing_ui_items": 0,
    "missing_ocr_rate": 0.0,
    "missing_ui_rate": 0.0,
    "ocr_items_mean": 51.78171428571429,
    "ocr_items_max": 120,
    "ui_items_mean": 50.60771428571429,
    "ui_items_max": 80,
    "summary_chars_mean": 50.273428571428575,
    "summary_chars_max": 90
  },
  "valid": {
    "rows": 100,
    "missing_target": 0,
    "short_summary": 0,
    "missing_image_path": 0,
    "missing_image_file": 0,
    "missing_ocr_items": 0,
    "missing_ui_items": 0,
    "missing_ocr_rate": 0.0,
    "missing_ui_rate": 0.0,
    "ocr_items_mean": 55.16,
    "ocr_items_max": 120,
    "ui_items_mean": 53.21,
    "ui_items_max": 80,
    "summary_chars_mean": 49.57,
    "summary_chars_max": 80
  },
  "tokenizer": "models/mt5-large",
  "max_target_tokens": 384,
  "eval_max_new_tokens": 384,
  "max_target_truncation_rate": 0.01,
  "train_token_lengths": {
    "target_tokens": {
      "count": 7000,
      "mean": 116.79314285714285,
      "p50": 113,
      "p90": 173,
      "p95": 190,
      "p99": 224,
      "max": 276,
      "configured_max": 384,
      "over_max": 0,
      "over_max_rate": 0.0,
      "at_or_over_max": 0,
      "at_or_over_max_rate": 0.0
    },
    "context_tokens": {
      "count": 7000,
      "mean": 185.6337142857143,
      "p50": 177,
      "p90": 241,
      "p95": 265,
      "p99": 313,
      "max": 617,
      "configured_max": 384,
      "over_max": 11,
      "over_max_rate": 0.0015714285714285715,
      "at_or_over_max": 11,
      "at_or_over_max_rate": 0.0015714285714285715
    }
  },
  "valid_token_lengths": {
    "target_tokens": {
      "count": 100,
      "mean": 121.31,
      "p50": 117,
      "p90": 177,
      "p95": 199,
      "p99": 214,
      "max": 216,
      "configured_max": 384,
      "over_max": 0,
      "over_max_rate": 0.0,
      "at_or_over_max": 0,
      "at_or_over_max_rate": 0.0
    },
    "context_tokens": {
      "count": 100,
      "mean": 189.42,
      "p50": 177,
      "p90": 249,
      "p95": 281,
      "p99": 317,
      "max": 325,
      "configured_max": 384,
      "over_max": 0,
      "over_max_rate": 0.0,
      "at_or_over_max": 0,
      "at_or_over_max_rate": 0.0
    }
  }
}