MMPAE 1.5B E200 BS64
Checkpoint and evaluation artifacts for mmpae-1p5B-e200-bs64.
Training
- Config:
configs/Inverse_CwA_1p5B.yaml - Parameters: about 1.496B
- Epochs: 200
- Batch size: 64
- Dataset path in training:
/data/polyone_tokenized - Tokenizer path in training:
/data/polyBERT
Final Validation Metrics
{
"actual_params": 1495526443,
"best_checkpoint_path": "/data/runs/mmpae-1p5B-e200-bs64/Checkpoint_BEST.pt",
"checkpoint_path": "/data/runs/mmpae-1p5B-e200-bs64/Polyone_AE_0200.pt",
"epoch": 200,
"epochs": 200,
"eval": {
"ce_loss": 0.1119885389772201,
"contrast_loss": 2.187204460526684,
"eos_loss": 0.014220016577973116,
"eval_batches": 1954,
"eval_seconds": 2795.629148006439,
"infer_batches": 10,
"infer_samples": 640,
"infer_seconds": 61.439425230026245,
"inv_r2": 0.7720341189253714,
"inv_rmse": 0.4378301985802189,
"mse_loss": 4.531615727267465,
"prop_r2": 0.6728669822216033,
"prop_rmse": 0.5227363497018814,
"sim_score": 0.39310147781608873,
"total_loss": 2640.4780240952055,
"validity": 0.96875
},
"exp_id": "mmpae-1p5B-e200-bs64",
"experiment_plan_metrics": {
"Actual params from log": 1495526443,
"Best Prop R2 \u2191": 0.6728669822216033,
"Best Prop RMSE \u2193": 0.5227363497018814,
"Exp ID": "mmpae-1p5B-e200-bs64",
"Final ce_loss": 0.1119885389772201,
"Final contrast_loss": 2.187204460526684,
"Final epoch": 200,
"Final mse_loss": 4.531615727267465,
"Final total_loss": 2640.4780240952055,
"Notes": "infer_steps=10, eval_steps=None",
"Peak GPU memory": 92599.5908203125,
"Run directory": "/data/runs/mmpae-1p5B-e200-bs64",
"Runtime": "39h 06m 56s",
"Tanimoto \u2191": 0.39310147781608873,
"Target R2 \u2191": 0.7720341189253714,
"Target RMSE \u2193": 0.4378301985802189,
"Validity \u2191": 0.96875
},
"global_step": 204800,
"is_best": false,
"peak_gpu_memory_mb": 92599.5908203125,
"phase": "Eval",
"runtime": "39h 06m 56s",
"runtime_seconds": 140816.74091649055,
"score": 0.33420392034515256,
"train": {
"ce_loss": 0.1439999616559362,
"contrast_loss": 0.8673558533191681,
"eos_loss": 0.017205233216373017,
"grad_norm": NaN,
"mse_loss": 5.9219387280754745,
"total_loss": 1459.6937276124954
}
}
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support