cfisicaro commited on
Commit
591f93c
·
1 Parent(s): e12eed7
Dockerfile CHANGED
@@ -1,5 +1,5 @@
1
  # Use the BioNeMo Framework image as the base
2
- FROM nvcr.io/nvidia/clara/bionemo-framework:2.3 AS openfold-bionemo-image
3
 
4
  # Switch to root user to perform system-level operations
5
  USER root
@@ -27,24 +27,19 @@ WORKDIR /openfold2
27
  # Install uv
28
  RUN pip install uv
29
 
30
- # Uninstall existing Triton and install Triton 3.3.0
31
- # Install nvidia-ml-py (replacement for deprecated pynvml) for cuequivariance
32
- RUN pip uninstall -y triton || true && \
33
- pip uninstall -y pynvml || true && \
34
- pip install triton==3.3.0 && \
35
- pip install nvidia-ml-py
36
 
37
  # Install cuequivariance and its CUDA operations
38
- RUN pip install cuequivariance_torch==0.6.1 && \
39
- pip install cuequivariance-ops-torch-cu12==0.6.1
40
 
41
  RUN uv pip install --upgrade pip && \
42
  uv pip install --no-cache-dir wheel setuptools && \
43
  uv pip install --no-cache-dir --no-build-isolation -e . && \
44
  uv pip install --no-cache-dir \
45
- biopython==1.85 \
46
  mdtraj==1.11.0 \
47
- modelcif==1.5 \
48
  ml_collections==1.1.0 \
49
  bionemo-moco==0.0.2.2 \
50
  "huggingface-hub>=0.24.0,<1.0" \
@@ -78,7 +73,7 @@ ENV PEPTRON_AUTO_DOWNLOAD="true"
78
 
79
  EXPOSE 7860
80
 
81
- RUN chown 1000 /app
82
  USER 1000
83
 
84
  CMD ["python", "app.py"]
 
1
  # Use the BioNeMo Framework image as the base
2
+ FROM nvcr.io/nvidia/clara/bionemo-framework:2.7.1 AS openfold-bionemo-image
3
 
4
  # Switch to root user to perform system-level operations
5
  USER root
 
27
  # Install uv
28
  RUN pip install uv
29
 
30
+ RUN pip install nvidia-ml-py
 
 
 
 
 
31
 
32
  # Install cuequivariance and its CUDA operations
33
+ RUN pip install cuequivariance_torch==0.8.0 && \
34
+ pip install cuequivariance-ops-torch-cu12==0.8.0
35
 
36
  RUN uv pip install --upgrade pip && \
37
  uv pip install --no-cache-dir wheel setuptools && \
38
  uv pip install --no-cache-dir --no-build-isolation -e . && \
39
  uv pip install --no-cache-dir \
40
+ biopython==1.86 \
41
  mdtraj==1.11.0 \
42
+ modelcif==1.6 \
43
  ml_collections==1.1.0 \
44
  bionemo-moco==0.0.2.2 \
45
  "huggingface-hub>=0.24.0,<1.0" \
 
73
 
74
  EXPOSE 7860
75
 
76
+ RUN chown -R 1000:1000 /app
77
  USER 1000
78
 
79
  CMD ["python", "app.py"]
PepTron/esm2/model/attention.py CHANGED
@@ -54,6 +54,7 @@ class ESM2TEDotProductAttention(TEDotProductAttention):
54
  k_channels: int | None = None,
55
  v_channels: int | None = None,
56
  cp_comm_type: str = "p2p",
 
57
  ):
58
  """Initialize ESM2TEDotProductAttention."""
59
  self.config = config
@@ -168,6 +169,7 @@ class ESM2DotProductAttention(DotProductAttention):
168
  attn_mask_type: AttnMaskType,
169
  attention_type: str,
170
  attention_dropout: Optional[float] = None,
 
171
  ) -> None:
172
  """Initializes the Attention class.
173
 
@@ -177,6 +179,7 @@ class ESM2DotProductAttention(DotProductAttention):
177
  attn_mask_type: The type of attention mask to be used.
178
  attention_type: The type of attention mechanism.
179
  attention_dropout: The dropout rate for attention weights. Defaults to None.
 
180
  """
181
  super().__init__(
182
  config=config,
 
54
  k_channels: int | None = None,
55
  v_channels: int | None = None,
56
  cp_comm_type: str = "p2p",
57
+ model_comm_pgs=None, # Added for BioNeMo 2.7+ / Megatron-Core compatibility
58
  ):
59
  """Initialize ESM2TEDotProductAttention."""
60
  self.config = config
 
169
  attn_mask_type: AttnMaskType,
170
  attention_type: str,
171
  attention_dropout: Optional[float] = None,
172
+ model_comm_pgs=None, # Added for BioNeMo 2.7+ / Megatron-Core compatibility
173
  ) -> None:
174
  """Initializes the Attention class.
175
 
 
179
  attn_mask_type: The type of attention mask to be used.
180
  attention_type: The type of attention mechanism.
181
  attention_dropout: The dropout rate for attention weights. Defaults to None.
182
+ model_comm_pgs: Model communication process groups (for Megatron-Core compatibility).
183
  """
184
  super().__init__(
185
  config=config,
PepTron/esm2/model/model.py CHANGED
@@ -446,8 +446,17 @@ class ESM2Model(MegatronBioBertModel):
446
  else:
447
  for l_no, layer in enumerate(self.encoder.layers):
448
  with self.encoder.offload_context:
449
- layer.use_cudagraph = True
450
- if (len(self.encoder.cuda_graphs) == 0) or (not self.encoder.training):
 
 
 
 
 
 
 
 
 
451
  hidden_states, context = layer(
452
  hidden_states=hidden_states,
453
  attention_mask=attention_mask,
 
446
  else:
447
  for l_no, layer in enumerate(self.encoder.layers):
448
  with self.encoder.offload_context:
449
+ # Check if use_cudagraph is supported (not available in newer Megatron-Core)
450
+ if hasattr(layer, 'use_cudagraph'):
451
+ layer.use_cudagraph = True
452
+ # Check if cuda_graphs is supported (not available in newer Megatron-Core)
453
+ has_cuda_graphs = hasattr(self.encoder, 'cuda_graphs')
454
+ use_standard_forward = (
455
+ not has_cuda_graphs or
456
+ (has_cuda_graphs and len(self.encoder.cuda_graphs) == 0) or
457
+ not self.encoder.training
458
+ )
459
+ if use_standard_forward:
460
  hidden_states, context = layer(
461
  hidden_states=hidden_states,
462
  attention_mask=attention_mask,
PepTron/peptron/data/data.py CHANGED
@@ -348,8 +348,44 @@ class OpenFoldDataset(torch.utils.data.Dataset):
348
 
349
  class OpenFoldBatchCollator:
350
  def __call__(self, prots):
351
- stack_fn = lambda x: torch.stack(x, dim=0) if isinstance(x[0], torch.Tensor) else x
352
- return dict_multimap(stack_fn, prots)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
 
354
 
355
  def collate_fn(data_list):
 
348
 
349
  class OpenFoldBatchCollator:
350
  def __call__(self, prots):
351
+ """
352
+ Collate a list of OpenFold-style feature dicts into a single batch.
353
+ Many features are padded to a *per-example* fixed size (e.g. predict mode sets
354
+ crop_size = num_res). When batching multiple proteins, the per-example sizes
355
+ can differ (L=138 vs L=134), so naive torch.stack fails. Here we pad tensors
356
+ to the max shape observed in the batch (per key/leaf) and then stack.
357
+ """
358
+ def pad_and_stack(xs):
359
+ if not isinstance(xs[0], torch.Tensor):
360
+ return xs
361
+ # Fast path: already uniform shapes.
362
+ first_shape = tuple(xs[0].shape)
363
+ if all(tuple(x.shape) == first_shape for x in xs):
364
+ return torch.stack(xs, dim=0)
365
+ # Scalars are always stackable.
366
+ if xs[0].ndim == 0:
367
+ return torch.stack(xs, dim=0)
368
+ # Require consistent rank; OpenFold features should meet this.
369
+ nd = xs[0].ndim
370
+ if any(x.ndim != nd for x in xs):
371
+ raise RuntimeError(
372
+ f"Cannot collate tensors with different ranks: {[tuple(x.shape) for x in xs]}"
373
+ )
374
+ # Pad each dimension to the maximum size in this batch for this leaf.
375
+ max_shape = [max(int(x.shape[d]) for x in xs) for d in range(nd)]
376
+ padded = []
377
+ for x in xs:
378
+ if list(x.shape) == max_shape:
379
+ padded.append(x)
380
+ continue
381
+ # Create an output tensor filled with zeros (safe default for OpenFold features)
382
+ # and copy the existing values into the top-left slice.
383
+ out = x.new_zeros(max_shape)
384
+ slices = tuple(slice(0, int(s)) for s in x.shape)
385
+ out[slices] = x
386
+ padded.append(out)
387
+ return torch.stack(padded, dim=0)
388
+ return dict_multimap(pad_and_stack, prots)
389
 
390
 
391
  def collate_fn(data_list):