File size: 3,002 Bytes
97ced61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a916c63
 
 
 
 
 
 
 
97ced61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88b7bc6
 
 
 
 
 
 
 
 
97ced61
 
 
 
 
 
 
 
 
88b7bc6
 
 
 
 
97ced61
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
{
  "architecture_config": {
    "collision_max": 200.0,
    "dropout": 0.1,
    "fingerprint_dim": 2048,
    "heads": 8,
    "layers": 6,
    "max_peaks": 256,
    "metadata_scale": 0.02,
    "model_dim": 384,
    "mz_max": 2000.0,
    "projection_dim": 192,
    "retrieval_mlp_hidden_dim": 512,
    "target_projection_dim": 256
  },
  "architectures": [
    "NexaMassSpectralEncoder"
  ],
  "evaluation_adapters": {
    "massspecgym": {
      "benchmark": "MassSpecGym molecule retrieval",
      "claim_boundary": "top-k transfer signal; ranking and confidence remain open decision-layer problems",
      "path": "evaluation/massspecgym/run_massspecgym_retrieval_hf.py",
      "reference_result": "test Hit@20 0.3505 with frozen V3 projected-dot scorer under Hit@k-only evaluation"
    }
  },
  "foundation_checkpoint": "weights/Final_V3-model_state.safetensors",
  "foundation_checkpoint_format": "safetensors",
  "full_training_checkpoints": {
    "location": "Wasabi object storage",
    "note": "Optimizer-bearing full training checkpoints are intentionally not part of the public Hugging Face payload; the public release contains model-state weights only."
  },
  "input_contract": {
    "feature_width": 8,
    "mask": "boolean valid-peak mask, shape [batch, max_peaks]",
    "max_peaks": 256,
    "metadata_embeddings": [
      "adduct_id",
      "instrument_id"
    ],
    "per_peak_features": [
      "mz",
      "intensity",
      "mz_to_precursor",
      "peak_rank",
      "precursor_mz",
      "charge",
      "collision_energy",
      "peak_count"
    ]
  },
  "library_name": "pytorch",
  "model_type": "nexamass",
  "not_supported": [
    "unrestricted de novo molecule generation",
    "calibrated top-1 molecular identification without external validation"
  ],
  "object_storage_pytorch_weights": {
    "files": [
      "weights/Final_V3-model_state.pt",
      "weights/NexaMass-V3-Struct-model_state.pt"
    ],
    "location": "Wasabi object storage",
    "note": "PyTorch .pt model-state files are retained in object storage. The public Hugging Face payload is safetensors-only.",
    "path": "wasabi:nexa-ms/models/dreams_pretrain_v1/checkpoints/NexaMass-V3-Struct/hf_release/weights/"
  },
  "outputs": {
    "morgan_fingerprint_logits": 2048,
    "retrieval_target_dim": 256,
    "ssl_projection_dim": 192,
    "structure_query_dim": 256
  },
  "parameter_count": 14106690,
  "primary_checkpoint": "weights/NexaMass-V3-Struct-model_state.safetensors",
  "primary_checkpoint_format": "safetensors",
  "public_weight_files": [
    "weights/NexaMass-V3-Struct-model_state.safetensors",
    "weights/Final_V3-model_state.safetensors"
  ],
  "public_weight_format": "safetensors",
  "recommended_inference": "Use the structure-aligned model-state checkpoint for spectrum embeddings and Morgan fingerprint probability vectors, then compare against a candidate molecular fingerprint bank.",
  "task": "MS/MS spectral representation learning and RDKit Morgan fingerprint alignment"
}