NexaMass-V3-Struct / config.json
Allanatrix's picture
Add MassSpecGym evaluation adapter and safetensors runtime loader (#1)
a916c63
{
"architecture_config": {
"collision_max": 200.0,
"dropout": 0.1,
"fingerprint_dim": 2048,
"heads": 8,
"layers": 6,
"max_peaks": 256,
"metadata_scale": 0.02,
"model_dim": 384,
"mz_max": 2000.0,
"projection_dim": 192,
"retrieval_mlp_hidden_dim": 512,
"target_projection_dim": 256
},
"architectures": [
"NexaMassSpectralEncoder"
],
"evaluation_adapters": {
"massspecgym": {
"benchmark": "MassSpecGym molecule retrieval",
"claim_boundary": "top-k transfer signal; ranking and confidence remain open decision-layer problems",
"path": "evaluation/massspecgym/run_massspecgym_retrieval_hf.py",
"reference_result": "test Hit@20 0.3505 with frozen V3 projected-dot scorer under Hit@k-only evaluation"
}
},
"foundation_checkpoint": "weights/Final_V3-model_state.safetensors",
"foundation_checkpoint_format": "safetensors",
"full_training_checkpoints": {
"location": "Wasabi object storage",
"note": "Optimizer-bearing full training checkpoints are intentionally not part of the public Hugging Face payload; the public release contains model-state weights only."
},
"input_contract": {
"feature_width": 8,
"mask": "boolean valid-peak mask, shape [batch, max_peaks]",
"max_peaks": 256,
"metadata_embeddings": [
"adduct_id",
"instrument_id"
],
"per_peak_features": [
"mz",
"intensity",
"mz_to_precursor",
"peak_rank",
"precursor_mz",
"charge",
"collision_energy",
"peak_count"
]
},
"library_name": "pytorch",
"model_type": "nexamass",
"not_supported": [
"unrestricted de novo molecule generation",
"calibrated top-1 molecular identification without external validation"
],
"object_storage_pytorch_weights": {
"files": [
"weights/Final_V3-model_state.pt",
"weights/NexaMass-V3-Struct-model_state.pt"
],
"location": "Wasabi object storage",
"note": "PyTorch .pt model-state files are retained in object storage. The public Hugging Face payload is safetensors-only.",
"path": "wasabi:nexa-ms/models/dreams_pretrain_v1/checkpoints/NexaMass-V3-Struct/hf_release/weights/"
},
"outputs": {
"morgan_fingerprint_logits": 2048,
"retrieval_target_dim": 256,
"ssl_projection_dim": 192,
"structure_query_dim": 256
},
"parameter_count": 14106690,
"primary_checkpoint": "weights/NexaMass-V3-Struct-model_state.safetensors",
"primary_checkpoint_format": "safetensors",
"public_weight_files": [
"weights/NexaMass-V3-Struct-model_state.safetensors",
"weights/Final_V3-model_state.safetensors"
],
"public_weight_format": "safetensors",
"recommended_inference": "Use the structure-aligned model-state checkpoint for spectrum embeddings and Morgan fingerprint probability vectors, then compare against a candidate molecular fingerprint bank.",
"task": "MS/MS spectral representation learning and RDKit Morgan fingerprint alignment"
}