v9_fixed_s42 / training_config.json
Likithp's picture
add training_config.json
a70e3b6 verified
{
"mode": "fixed",
"seed": 42,
"hf_repo": "Likithp/v9_fixed_s42",
"base_model": "Qwen/Qwen2.5-0.5B-Instruct",
"dataset": "data/cs9_fixed_v1",
"hf_dataset": "Likithp/cs9_fixed_v1",
"trained_at": "2026-06-04T08:38:13.346803+00:00",
"optimizer": "AdamW",
"lr": 3e-05,
"weight_decay": 0.0,
"batch_size": 8,
"grad_accum": 8,
"effective_batch": 64,
"epochs": 5,
"warmup_steps": 200,
"grad_clip": 0.5,
"lr_schedule": "cosine",
"max_seq_len": 256,
"dtype": "bfloat16",
"alias_groups": {
"T1": [
"bev",
"cif",
"dov",
"fal",
"gev",
"hac",
"jac",
"kab",
"lex",
"nad"
],
"T2": [
"agov",
"egiv",
"ejof",
"ikob",
"okiv",
"uliv",
"vmob",
"vnob",
"xnob",
"znob"
]
},
"log_every_steps": 50,
"eval_n": 500,
"dry_run": false,
"best_epoch": 5,
"val_exact_match": 1.0,
"val_outer_alias_acc": 1.0,
"val_inner_alias_acc": 1.0,
"val_t1_outer_alias_acc": 1.0,
"val_t2_outer_alias_acc": 1.0,
"val_t1_t2_gap_pp": 0.0,
"val_inner_by_alias": {
"agov": {
"correct": 49,
"total": 49,
"pct": 100.0,
"token_group": "T2"
},
"bev": {
"correct": 47,
"total": 47,
"pct": 100.0,
"token_group": "T1"
},
"cif": {
"correct": 47,
"total": 47,
"pct": 100.0,
"token_group": "T1"
},
"dov": {
"correct": 58,
"total": 58,
"pct": 100.0,
"token_group": "T1"
},
"egiv": {
"correct": 52,
"total": 52,
"pct": 100.0,
"token_group": "T2"
},
"ejof": {
"correct": 62,
"total": 62,
"pct": 100.0,
"token_group": "T2"
},
"fal": {
"correct": 44,
"total": 44,
"pct": 100.0,
"token_group": "T1"
},
"gev": {
"correct": 49,
"total": 49,
"pct": 100.0,
"token_group": "T1"
},
"hac": {
"correct": 44,
"total": 44,
"pct": 100.0,
"token_group": "T1"
},
"ikob": {
"correct": 54,
"total": 54,
"pct": 100.0,
"token_group": "T2"
},
"jac": {
"correct": 47,
"total": 47,
"pct": 100.0,
"token_group": "T1"
},
"kab": {
"correct": 52,
"total": 52,
"pct": 100.0,
"token_group": "T1"
},
"lex": {
"correct": 42,
"total": 42,
"pct": 100.0,
"token_group": "T1"
},
"nad": {
"correct": 51,
"total": 51,
"pct": 100.0,
"token_group": "T1"
},
"okiv": {
"correct": 52,
"total": 52,
"pct": 100.0,
"token_group": "T2"
},
"uliv": {
"correct": 47,
"total": 47,
"pct": 100.0,
"token_group": "T2"
},
"vmob": {
"correct": 56,
"total": 56,
"pct": 100.0,
"token_group": "T2"
},
"vnob": {
"correct": 56,
"total": 56,
"pct": 100.0,
"token_group": "T2"
},
"xnob": {
"correct": 44,
"total": 44,
"pct": 100.0,
"token_group": "T2"
},
"znob": {
"correct": 47,
"total": 47,
"pct": 100.0,
"token_group": "T2"
}
},
"train_log": [
{
"epoch": 1,
"loss": 0.214947,
"val_em": 1.0
},
{
"epoch": 2,
"loss": 1.9e-05,
"val_em": 1.0
},
{
"epoch": 3,
"loss": 1.5e-05,
"val_em": 1.0
},
{
"epoch": 4,
"loss": 1.5e-05,
"val_em": 1.0
},
{
"epoch": 5,
"loss": 1.5e-05,
"val_em": 1.0
}
]
}