{ "architecture_config": { "collision_max": 200.0, "dropout": 0.1, "fingerprint_dim": 2048, "heads": 8, "layers": 6, "max_peaks": 256, "metadata_scale": 0.02, "model_dim": 384, "mz_max": 2000.0, "projection_dim": 192, "retrieval_mlp_hidden_dim": 512, "target_projection_dim": 256 }, "architectures": [ "NexaMassSpectralEncoder" ], "evaluation_adapters": { "massspecgym": { "benchmark": "MassSpecGym molecule retrieval", "claim_boundary": "top-k transfer signal; ranking and confidence remain open decision-layer problems", "path": "evaluation/massspecgym/run_massspecgym_retrieval_hf.py", "reference_result": "test Hit@20 0.3505 with frozen V3 projected-dot scorer under Hit@k-only evaluation" } }, "foundation_checkpoint": "weights/Final_V3-model_state.safetensors", "foundation_checkpoint_format": "safetensors", "full_training_checkpoints": { "location": "Wasabi object storage", "note": "Optimizer-bearing full training checkpoints are intentionally not part of the public Hugging Face payload; the public release contains model-state weights only." }, "input_contract": { "feature_width": 8, "mask": "boolean valid-peak mask, shape [batch, max_peaks]", "max_peaks": 256, "metadata_embeddings": [ "adduct_id", "instrument_id" ], "per_peak_features": [ "mz", "intensity", "mz_to_precursor", "peak_rank", "precursor_mz", "charge", "collision_energy", "peak_count" ] }, "library_name": "pytorch", "model_type": "nexamass", "not_supported": [ "unrestricted de novo molecule generation", "calibrated top-1 molecular identification without external validation" ], "object_storage_pytorch_weights": { "files": [ "weights/Final_V3-model_state.pt", "weights/NexaMass-V3-Struct-model_state.pt" ], "location": "Wasabi object storage", "note": "PyTorch .pt model-state files are retained in object storage. The public Hugging Face payload is safetensors-only.", "path": "wasabi:nexa-ms/models/dreams_pretrain_v1/checkpoints/NexaMass-V3-Struct/hf_release/weights/" }, "outputs": { "morgan_fingerprint_logits": 2048, "retrieval_target_dim": 256, "ssl_projection_dim": 192, "structure_query_dim": 256 }, "parameter_count": 14106690, "primary_checkpoint": "weights/NexaMass-V3-Struct-model_state.safetensors", "primary_checkpoint_format": "safetensors", "public_weight_files": [ "weights/NexaMass-V3-Struct-model_state.safetensors", "weights/Final_V3-model_state.safetensors" ], "public_weight_format": "safetensors", "recommended_inference": "Use the structure-aligned model-state checkpoint for spectrum embeddings and Morgan fingerprint probability vectors, then compare against a candidate molecular fingerprint bank.", "task": "MS/MS spectral representation learning and RDKit Morgan fingerprint alignment" }