v10_rand_s0 / training_config.json
Likithp's picture
add training_config.json
7afed29 verified
{
"mode": "rand",
"seed": 0,
"hf_repo": "Likithp/v10_rand_s0",
"base_model": "Qwen/Qwen2.5-0.5B",
"dataset": "data/cs7_rand_v3",
"dataset_version": "cs7_v3",
"trained_at": "2026-06-05T19:09:30.721699+00:00",
"optimizer": "AdamW",
"lr": 3e-05,
"weight_decay": 0.0,
"batch_size": 8,
"grad_accum": 8,
"effective_batch": 64,
"epochs": 5,
"warmup_steps": 200,
"grad_clip": 0.5,
"lr_schedule": "cosine",
"checkpoint_criterion": "train_loss",
"max_seq_len": 256,
"dtype": "torch.bfloat16",
"eval_method": "teacher_forcing_argmax",
"alias_groups": {
"T1": [
"act",
"cst",
"emp",
"inv",
"ord",
"spl",
"txn"
],
"T2": [
"brc",
"ctg",
"dpt",
"empl",
"ordr",
"prd",
"prj",
"rgn",
"shp",
"tsk",
"whs"
]
},
"log_every_steps": 50,
"eval_n": 500,
"dry_run": false,
"best_epoch": 5,
"best_train_loss": 0.091353,
"val_exact_match": 0.059,
"val_outer_alias_acc": 1.0,
"val_inner_alias_acc": 0.062,
"val_t1_inner_alias_acc": 0.0,
"val_t2_inner_alias_acc": 0.102142,
"val_t1_t2_gap_pp": -10.21,
"val_inner_by_alias": {
"act": {
"correct": 0,
"total": 61,
"pct": 0.0,
"token_group": "T1"
},
"brc": {
"correct": 0,
"total": 50,
"pct": 0.0,
"token_group": "T2"
},
"cst": {
"correct": 0,
"total": 62,
"pct": 0.0,
"token_group": "T1"
},
"ctg": {
"correct": 0,
"total": 58,
"pct": 0.0,
"token_group": "T2"
},
"dpt": {
"correct": 0,
"total": 47,
"pct": 0.0,
"token_group": "T2"
},
"emp": {
"correct": 0,
"total": 40,
"pct": 0.0,
"token_group": "T1"
},
"empl": {
"correct": 0,
"total": 43,
"pct": 0.0,
"token_group": "T2"
},
"inv": {
"correct": 0,
"total": 56,
"pct": 0.0,
"token_group": "T1"
},
"ord": {
"correct": 0,
"total": 47,
"pct": 0.0,
"token_group": "T1"
},
"ordr": {
"correct": 0,
"total": 50,
"pct": 0.0,
"token_group": "T2"
},
"prd": {
"correct": 51,
"total": 55,
"pct": 92.7,
"token_group": "T2"
},
"prj": {
"correct": 11,
"total": 65,
"pct": 16.9,
"token_group": "T2"
},
"rgn": {
"correct": 0,
"total": 64,
"pct": 0.0,
"token_group": "T2"
},
"shp": {
"correct": 0,
"total": 59,
"pct": 0.0,
"token_group": "T2"
},
"spl": {
"correct": 0,
"total": 56,
"pct": 0.0,
"token_group": "T1"
},
"tsk": {
"correct": 0,
"total": 59,
"pct": 0.0,
"token_group": "T2"
},
"txn": {
"correct": 0,
"total": 71,
"pct": 0.0,
"token_group": "T1"
},
"whs": {
"correct": 0,
"total": 57,
"pct": 0.0,
"token_group": "T2"
}
},
"train_log": [
{
"epoch": 1,
"loss": 0.121865,
"val_em": 0.062
},
{
"epoch": 2,
"loss": 0.092447,
"val_em": 0.056
},
{
"epoch": 3,
"loss": 0.091892,
"val_em": 0.056
},
{
"epoch": 4,
"loss": 0.091518,
"val_em": 0.052
},
{
"epoch": 5,
"loss": 0.091353,
"val_em": 0.056
}
]
}