| { | |
| "schema_version": "ingestion_model_v1", | |
| "backbone": "answerdotai/ModernBERT-large", | |
| "backbone_type": "modernbert", | |
| "weights_dtype": "bf16", | |
| "hidden_size": 1024, | |
| "max_position_embeddings": 8192, | |
| "vocab_size": 50368, | |
| "pad_token_id": 50283, | |
| "cls_token_id": 50281, | |
| "sep_token_id": 50282, | |
| "trained_args": { | |
| "checkpoint": "models/phase3/modernbert-large-mh-span-v3/best.pt", | |
| "train_file": "data/bakeoff/splits_v3/span_slot.train.jsonl", | |
| "val_file": "data/bakeoff/splits_v3/span_slot.val.jsonl", | |
| "out_dir": "models/phase4/modernbert-large-ldam-head", | |
| "summary": "runs/phase4/ldam-head-summary.json", | |
| "max_steps": 500, | |
| "drw_start": 400, | |
| "batch_size": 32, | |
| "lr": 0.0001, | |
| "warmup_steps": 50, | |
| "max_m": 0.5, | |
| "cb_beta": 0.9999, | |
| "seed": 42 | |
| }, | |
| "trained_steps": 500, | |
| "eval_at_save": {}, | |
| "heads": { | |
| "claim_present": { | |
| "kind": "linear", | |
| "out": 1, | |
| "pool": "cls" | |
| }, | |
| "predicate": { | |
| "kind": "linear", | |
| "out": 21, | |
| "pool": "cls" | |
| }, | |
| "subject_type": { | |
| "kind": "linear", | |
| "out": 13, | |
| "pool": "cls" | |
| }, | |
| "confidence": { | |
| "kind": "linear", | |
| "out": 1, | |
| "pool": "cls", | |
| "activation": "sigmoid" | |
| }, | |
| "span": { | |
| "kind": "linear", | |
| "out": 2, | |
| "pool": "per_token", | |
| "interpretation": "start/end logits; softmax over seq" | |
| } | |
| }, | |
| "role_prefix": "required: '[USER] ', '[ASSISTANT] ', '[TOOL_RESULT] ' prepended to blurb text before tokenization", | |
| "max_length": 512 | |
| } |