Avifenesh's picture
Phase 4: LDAM head fine-tune (test F1 0.962, has_next_action 0.907, has_quality_finding 0.917)
f924397 verified
{
"schema_version": "ingestion_model_v1",
"backbone": "answerdotai/ModernBERT-large",
"backbone_type": "modernbert",
"weights_dtype": "bf16",
"hidden_size": 1024,
"max_position_embeddings": 8192,
"vocab_size": 50368,
"pad_token_id": 50283,
"cls_token_id": 50281,
"sep_token_id": 50282,
"trained_args": {
"checkpoint": "models/phase3/modernbert-large-mh-span-v3/best.pt",
"train_file": "data/bakeoff/splits_v3/span_slot.train.jsonl",
"val_file": "data/bakeoff/splits_v3/span_slot.val.jsonl",
"out_dir": "models/phase4/modernbert-large-ldam-head",
"summary": "runs/phase4/ldam-head-summary.json",
"max_steps": 500,
"drw_start": 400,
"batch_size": 32,
"lr": 0.0001,
"warmup_steps": 50,
"max_m": 0.5,
"cb_beta": 0.9999,
"seed": 42
},
"trained_steps": 500,
"eval_at_save": {},
"heads": {
"claim_present": {
"kind": "linear",
"out": 1,
"pool": "cls"
},
"predicate": {
"kind": "linear",
"out": 21,
"pool": "cls"
},
"subject_type": {
"kind": "linear",
"out": 13,
"pool": "cls"
},
"confidence": {
"kind": "linear",
"out": 1,
"pool": "cls",
"activation": "sigmoid"
},
"span": {
"kind": "linear",
"out": 2,
"pool": "per_token",
"interpretation": "start/end logits; softmax over seq"
}
},
"role_prefix": "required: '[USER] ', '[ASSISTANT] ', '[TOOL_RESULT] ' prepended to blurb text before tokenization",
"max_length": 512
}