v10_fixed_s1 / training_config.json
Likithp's picture
add training_config.json
4410286 verified
{
"mode": "fixed",
"seed": 1,
"hf_repo": "Likithp/v10_fixed_s1",
"base_model": "Qwen/Qwen2.5-0.5B",
"dataset": "data/cs7_fixed_v3",
"dataset_version": "cs7_v3",
"trained_at": "2026-06-06T07:48:24.312420+00:00",
"optimizer": "AdamW",
"lr": 3e-05,
"weight_decay": 0.0,
"batch_size": 8,
"grad_accum": 8,
"effective_batch": 64,
"epochs": 5,
"warmup_steps": 200,
"grad_clip": 0.5,
"lr_schedule": "cosine",
"checkpoint_criterion": "val_em",
"max_seq_len": 256,
"dtype": "torch.bfloat16",
"eval_method": "teacher_forcing_argmax",
"alias_groups": {
"T1": [
"act",
"cst",
"emp",
"inv",
"ord",
"spl",
"txn"
],
"T2": [
"brc",
"ctg",
"dpt",
"empl",
"ordr",
"prd",
"prj",
"rgn",
"shp",
"tsk",
"whs"
]
},
"log_every_steps": 50,
"eval_n": 500,
"dry_run": false,
"best_epoch": 5,
"best_train_loss": null,
"val_exact_match": 1.0,
"val_outer_alias_acc": 1.0,
"val_inner_alias_acc": 1.0,
"val_t1_inner_alias_acc": 1.0,
"val_t2_inner_alias_acc": 1.0,
"val_t1_t2_gap_pp": 0.0,
"val_inner_by_alias": {
"act": {
"correct": 64,
"total": 64,
"pct": 100.0,
"token_group": "T1"
},
"brc": {
"correct": 50,
"total": 50,
"pct": 100.0,
"token_group": "T2"
},
"cst": {
"correct": 45,
"total": 45,
"pct": 100.0,
"token_group": "T1"
},
"ctg": {
"correct": 61,
"total": 61,
"pct": 100.0,
"token_group": "T2"
},
"dpt": {
"correct": 56,
"total": 56,
"pct": 100.0,
"token_group": "T2"
},
"emp": {
"correct": 51,
"total": 51,
"pct": 100.0,
"token_group": "T1"
},
"empl": {
"correct": 59,
"total": 59,
"pct": 100.0,
"token_group": "T2"
},
"inv": {
"correct": 45,
"total": 45,
"pct": 100.0,
"token_group": "T1"
},
"ord": {
"correct": 67,
"total": 67,
"pct": 100.0,
"token_group": "T1"
},
"ordr": {
"correct": 59,
"total": 59,
"pct": 100.0,
"token_group": "T2"
},
"prd": {
"correct": 57,
"total": 57,
"pct": 100.0,
"token_group": "T2"
},
"prj": {
"correct": 52,
"total": 52,
"pct": 100.0,
"token_group": "T2"
},
"rgn": {
"correct": 70,
"total": 70,
"pct": 100.0,
"token_group": "T2"
},
"shp": {
"correct": 60,
"total": 60,
"pct": 100.0,
"token_group": "T2"
},
"spl": {
"correct": 59,
"total": 59,
"pct": 100.0,
"token_group": "T1"
},
"tsk": {
"correct": 44,
"total": 44,
"pct": 100.0,
"token_group": "T2"
},
"txn": {
"correct": 51,
"total": 51,
"pct": 100.0,
"token_group": "T1"
},
"whs": {
"correct": 50,
"total": 50,
"pct": 100.0,
"token_group": "T2"
}
},
"train_log": [
{
"epoch": 1,
"loss": 0.026036,
"val_em": 1.0
},
{
"epoch": 2,
"loss": 1e-05,
"val_em": 1.0
},
{
"epoch": 3,
"loss": 1e-05,
"val_em": 1.0
},
{
"epoch": 4,
"loss": 1e-05,
"val_em": 1.0
},
{
"epoch": 5,
"loss": 1e-05,
"val_em": 1.0
}
]
}