Likithp commited on
Commit
912c422
·
verified ·
1 Parent(s): a0cf72b

add training_config.json

Browse files
Files changed (1) hide show
  1. training_config.json +195 -0
training_config.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "fixed",
3
+ "seed": 0,
4
+ "hf_repo": "Likithp/v10_fixed_s0",
5
+ "base_model": "Qwen/Qwen2.5-0.5B",
6
+ "dataset": "data/cs7_fixed_v3",
7
+ "dataset_version": "cs7_v3",
8
+ "trained_at": "2026-06-05T19:05:24.286663+00:00",
9
+ "optimizer": "AdamW",
10
+ "lr": 3e-05,
11
+ "weight_decay": 0.0,
12
+ "batch_size": 8,
13
+ "grad_accum": 8,
14
+ "effective_batch": 64,
15
+ "epochs": 5,
16
+ "warmup_steps": 200,
17
+ "grad_clip": 0.5,
18
+ "lr_schedule": "cosine",
19
+ "checkpoint_criterion": "val_em",
20
+ "max_seq_len": 256,
21
+ "dtype": "torch.bfloat16",
22
+ "eval_method": "teacher_forcing_argmax",
23
+ "alias_groups": {
24
+ "T1": [
25
+ "act",
26
+ "cst",
27
+ "emp",
28
+ "inv",
29
+ "ord",
30
+ "spl",
31
+ "txn"
32
+ ],
33
+ "T2": [
34
+ "brc",
35
+ "ctg",
36
+ "dpt",
37
+ "empl",
38
+ "ordr",
39
+ "prd",
40
+ "prj",
41
+ "rgn",
42
+ "shp",
43
+ "tsk",
44
+ "whs"
45
+ ]
46
+ },
47
+ "log_every_steps": 50,
48
+ "eval_n": 500,
49
+ "dry_run": false,
50
+ "best_epoch": 5,
51
+ "best_train_loss": null,
52
+ "val_exact_match": 1.0,
53
+ "val_outer_alias_acc": 1.0,
54
+ "val_inner_alias_acc": 1.0,
55
+ "val_t1_inner_alias_acc": 1.0,
56
+ "val_t2_inner_alias_acc": 1.0,
57
+ "val_t1_t2_gap_pp": 0.0,
58
+ "val_inner_by_alias": {
59
+ "act": {
60
+ "correct": 64,
61
+ "total": 64,
62
+ "pct": 100.0,
63
+ "token_group": "T1"
64
+ },
65
+ "brc": {
66
+ "correct": 50,
67
+ "total": 50,
68
+ "pct": 100.0,
69
+ "token_group": "T2"
70
+ },
71
+ "cst": {
72
+ "correct": 45,
73
+ "total": 45,
74
+ "pct": 100.0,
75
+ "token_group": "T1"
76
+ },
77
+ "ctg": {
78
+ "correct": 61,
79
+ "total": 61,
80
+ "pct": 100.0,
81
+ "token_group": "T2"
82
+ },
83
+ "dpt": {
84
+ "correct": 56,
85
+ "total": 56,
86
+ "pct": 100.0,
87
+ "token_group": "T2"
88
+ },
89
+ "emp": {
90
+ "correct": 51,
91
+ "total": 51,
92
+ "pct": 100.0,
93
+ "token_group": "T1"
94
+ },
95
+ "empl": {
96
+ "correct": 59,
97
+ "total": 59,
98
+ "pct": 100.0,
99
+ "token_group": "T2"
100
+ },
101
+ "inv": {
102
+ "correct": 45,
103
+ "total": 45,
104
+ "pct": 100.0,
105
+ "token_group": "T1"
106
+ },
107
+ "ord": {
108
+ "correct": 67,
109
+ "total": 67,
110
+ "pct": 100.0,
111
+ "token_group": "T1"
112
+ },
113
+ "ordr": {
114
+ "correct": 59,
115
+ "total": 59,
116
+ "pct": 100.0,
117
+ "token_group": "T2"
118
+ },
119
+ "prd": {
120
+ "correct": 57,
121
+ "total": 57,
122
+ "pct": 100.0,
123
+ "token_group": "T2"
124
+ },
125
+ "prj": {
126
+ "correct": 52,
127
+ "total": 52,
128
+ "pct": 100.0,
129
+ "token_group": "T2"
130
+ },
131
+ "rgn": {
132
+ "correct": 70,
133
+ "total": 70,
134
+ "pct": 100.0,
135
+ "token_group": "T2"
136
+ },
137
+ "shp": {
138
+ "correct": 60,
139
+ "total": 60,
140
+ "pct": 100.0,
141
+ "token_group": "T2"
142
+ },
143
+ "spl": {
144
+ "correct": 59,
145
+ "total": 59,
146
+ "pct": 100.0,
147
+ "token_group": "T1"
148
+ },
149
+ "tsk": {
150
+ "correct": 44,
151
+ "total": 44,
152
+ "pct": 100.0,
153
+ "token_group": "T2"
154
+ },
155
+ "txn": {
156
+ "correct": 51,
157
+ "total": 51,
158
+ "pct": 100.0,
159
+ "token_group": "T1"
160
+ },
161
+ "whs": {
162
+ "correct": 50,
163
+ "total": 50,
164
+ "pct": 100.0,
165
+ "token_group": "T2"
166
+ }
167
+ },
168
+ "train_log": [
169
+ {
170
+ "epoch": 1,
171
+ "loss": 0.026001,
172
+ "val_em": 1.0
173
+ },
174
+ {
175
+ "epoch": 2,
176
+ "loss": 1.1e-05,
177
+ "val_em": 1.0
178
+ },
179
+ {
180
+ "epoch": 3,
181
+ "loss": 1.1e-05,
182
+ "val_em": 1.0
183
+ },
184
+ {
185
+ "epoch": 4,
186
+ "loss": 1e-05,
187
+ "val_em": 1.0
188
+ },
189
+ {
190
+ "epoch": 5,
191
+ "loss": 1e-05,
192
+ "val_em": 1.0
193
+ }
194
+ ]
195
+ }