Spaces:
Paused
Paused
fix bug
Browse files- Dockerfile +7 -12
- PepTron/esm2/model/attention.py +3 -0
- PepTron/esm2/model/model.py +11 -2
- PepTron/peptron/data/data.py +38 -2
Dockerfile
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# Use the BioNeMo Framework image as the base
|
| 2 |
-
FROM nvcr.io/nvidia/clara/bionemo-framework:2.
|
| 3 |
|
| 4 |
# Switch to root user to perform system-level operations
|
| 5 |
USER root
|
|
@@ -27,24 +27,19 @@ WORKDIR /openfold2
|
|
| 27 |
# Install uv
|
| 28 |
RUN pip install uv
|
| 29 |
|
| 30 |
-
|
| 31 |
-
# Install nvidia-ml-py (replacement for deprecated pynvml) for cuequivariance
|
| 32 |
-
RUN pip uninstall -y triton || true && \
|
| 33 |
-
pip uninstall -y pynvml || true && \
|
| 34 |
-
pip install triton==3.3.0 && \
|
| 35 |
-
pip install nvidia-ml-py
|
| 36 |
|
| 37 |
# Install cuequivariance and its CUDA operations
|
| 38 |
-
RUN pip install cuequivariance_torch==0.
|
| 39 |
-
pip install cuequivariance-ops-torch-cu12==0.
|
| 40 |
|
| 41 |
RUN uv pip install --upgrade pip && \
|
| 42 |
uv pip install --no-cache-dir wheel setuptools && \
|
| 43 |
uv pip install --no-cache-dir --no-build-isolation -e . && \
|
| 44 |
uv pip install --no-cache-dir \
|
| 45 |
-
biopython==1.
|
| 46 |
mdtraj==1.11.0 \
|
| 47 |
-
modelcif==1.
|
| 48 |
ml_collections==1.1.0 \
|
| 49 |
bionemo-moco==0.0.2.2 \
|
| 50 |
"huggingface-hub>=0.24.0,<1.0" \
|
|
@@ -78,7 +73,7 @@ ENV PEPTRON_AUTO_DOWNLOAD="true"
|
|
| 78 |
|
| 79 |
EXPOSE 7860
|
| 80 |
|
| 81 |
-
RUN chown 1000 /app
|
| 82 |
USER 1000
|
| 83 |
|
| 84 |
CMD ["python", "app.py"]
|
|
|
|
| 1 |
# Use the BioNeMo Framework image as the base
|
| 2 |
+
FROM nvcr.io/nvidia/clara/bionemo-framework:2.7.1 AS openfold-bionemo-image
|
| 3 |
|
| 4 |
# Switch to root user to perform system-level operations
|
| 5 |
USER root
|
|
|
|
| 27 |
# Install uv
|
| 28 |
RUN pip install uv
|
| 29 |
|
| 30 |
+
RUN pip install nvidia-ml-py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
# Install cuequivariance and its CUDA operations
|
| 33 |
+
RUN pip install cuequivariance_torch==0.8.0 && \
|
| 34 |
+
pip install cuequivariance-ops-torch-cu12==0.8.0
|
| 35 |
|
| 36 |
RUN uv pip install --upgrade pip && \
|
| 37 |
uv pip install --no-cache-dir wheel setuptools && \
|
| 38 |
uv pip install --no-cache-dir --no-build-isolation -e . && \
|
| 39 |
uv pip install --no-cache-dir \
|
| 40 |
+
biopython==1.86 \
|
| 41 |
mdtraj==1.11.0 \
|
| 42 |
+
modelcif==1.6 \
|
| 43 |
ml_collections==1.1.0 \
|
| 44 |
bionemo-moco==0.0.2.2 \
|
| 45 |
"huggingface-hub>=0.24.0,<1.0" \
|
|
|
|
| 73 |
|
| 74 |
EXPOSE 7860
|
| 75 |
|
| 76 |
+
RUN chown -R 1000:1000 /app
|
| 77 |
USER 1000
|
| 78 |
|
| 79 |
CMD ["python", "app.py"]
|
PepTron/esm2/model/attention.py
CHANGED
|
@@ -54,6 +54,7 @@ class ESM2TEDotProductAttention(TEDotProductAttention):
|
|
| 54 |
k_channels: int | None = None,
|
| 55 |
v_channels: int | None = None,
|
| 56 |
cp_comm_type: str = "p2p",
|
|
|
|
| 57 |
):
|
| 58 |
"""Initialize ESM2TEDotProductAttention."""
|
| 59 |
self.config = config
|
|
@@ -168,6 +169,7 @@ class ESM2DotProductAttention(DotProductAttention):
|
|
| 168 |
attn_mask_type: AttnMaskType,
|
| 169 |
attention_type: str,
|
| 170 |
attention_dropout: Optional[float] = None,
|
|
|
|
| 171 |
) -> None:
|
| 172 |
"""Initializes the Attention class.
|
| 173 |
|
|
@@ -177,6 +179,7 @@ class ESM2DotProductAttention(DotProductAttention):
|
|
| 177 |
attn_mask_type: The type of attention mask to be used.
|
| 178 |
attention_type: The type of attention mechanism.
|
| 179 |
attention_dropout: The dropout rate for attention weights. Defaults to None.
|
|
|
|
| 180 |
"""
|
| 181 |
super().__init__(
|
| 182 |
config=config,
|
|
|
|
| 54 |
k_channels: int | None = None,
|
| 55 |
v_channels: int | None = None,
|
| 56 |
cp_comm_type: str = "p2p",
|
| 57 |
+
model_comm_pgs=None, # Added for BioNeMo 2.7+ / Megatron-Core compatibility
|
| 58 |
):
|
| 59 |
"""Initialize ESM2TEDotProductAttention."""
|
| 60 |
self.config = config
|
|
|
|
| 169 |
attn_mask_type: AttnMaskType,
|
| 170 |
attention_type: str,
|
| 171 |
attention_dropout: Optional[float] = None,
|
| 172 |
+
model_comm_pgs=None, # Added for BioNeMo 2.7+ / Megatron-Core compatibility
|
| 173 |
) -> None:
|
| 174 |
"""Initializes the Attention class.
|
| 175 |
|
|
|
|
| 179 |
attn_mask_type: The type of attention mask to be used.
|
| 180 |
attention_type: The type of attention mechanism.
|
| 181 |
attention_dropout: The dropout rate for attention weights. Defaults to None.
|
| 182 |
+
model_comm_pgs: Model communication process groups (for Megatron-Core compatibility).
|
| 183 |
"""
|
| 184 |
super().__init__(
|
| 185 |
config=config,
|
PepTron/esm2/model/model.py
CHANGED
|
@@ -446,8 +446,17 @@ class ESM2Model(MegatronBioBertModel):
|
|
| 446 |
else:
|
| 447 |
for l_no, layer in enumerate(self.encoder.layers):
|
| 448 |
with self.encoder.offload_context:
|
| 449 |
-
|
| 450 |
-
if (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 451 |
hidden_states, context = layer(
|
| 452 |
hidden_states=hidden_states,
|
| 453 |
attention_mask=attention_mask,
|
|
|
|
| 446 |
else:
|
| 447 |
for l_no, layer in enumerate(self.encoder.layers):
|
| 448 |
with self.encoder.offload_context:
|
| 449 |
+
# Check if use_cudagraph is supported (not available in newer Megatron-Core)
|
| 450 |
+
if hasattr(layer, 'use_cudagraph'):
|
| 451 |
+
layer.use_cudagraph = True
|
| 452 |
+
# Check if cuda_graphs is supported (not available in newer Megatron-Core)
|
| 453 |
+
has_cuda_graphs = hasattr(self.encoder, 'cuda_graphs')
|
| 454 |
+
use_standard_forward = (
|
| 455 |
+
not has_cuda_graphs or
|
| 456 |
+
(has_cuda_graphs and len(self.encoder.cuda_graphs) == 0) or
|
| 457 |
+
not self.encoder.training
|
| 458 |
+
)
|
| 459 |
+
if use_standard_forward:
|
| 460 |
hidden_states, context = layer(
|
| 461 |
hidden_states=hidden_states,
|
| 462 |
attention_mask=attention_mask,
|
PepTron/peptron/data/data.py
CHANGED
|
@@ -348,8 +348,44 @@ class OpenFoldDataset(torch.utils.data.Dataset):
|
|
| 348 |
|
| 349 |
class OpenFoldBatchCollator:
|
| 350 |
def __call__(self, prots):
|
| 351 |
-
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
|
| 354 |
|
| 355 |
def collate_fn(data_list):
|
|
|
|
| 348 |
|
| 349 |
class OpenFoldBatchCollator:
|
| 350 |
def __call__(self, prots):
|
| 351 |
+
"""
|
| 352 |
+
Collate a list of OpenFold-style feature dicts into a single batch.
|
| 353 |
+
Many features are padded to a *per-example* fixed size (e.g. predict mode sets
|
| 354 |
+
crop_size = num_res). When batching multiple proteins, the per-example sizes
|
| 355 |
+
can differ (L=138 vs L=134), so naive torch.stack fails. Here we pad tensors
|
| 356 |
+
to the max shape observed in the batch (per key/leaf) and then stack.
|
| 357 |
+
"""
|
| 358 |
+
def pad_and_stack(xs):
|
| 359 |
+
if not isinstance(xs[0], torch.Tensor):
|
| 360 |
+
return xs
|
| 361 |
+
# Fast path: already uniform shapes.
|
| 362 |
+
first_shape = tuple(xs[0].shape)
|
| 363 |
+
if all(tuple(x.shape) == first_shape for x in xs):
|
| 364 |
+
return torch.stack(xs, dim=0)
|
| 365 |
+
# Scalars are always stackable.
|
| 366 |
+
if xs[0].ndim == 0:
|
| 367 |
+
return torch.stack(xs, dim=0)
|
| 368 |
+
# Require consistent rank; OpenFold features should meet this.
|
| 369 |
+
nd = xs[0].ndim
|
| 370 |
+
if any(x.ndim != nd for x in xs):
|
| 371 |
+
raise RuntimeError(
|
| 372 |
+
f"Cannot collate tensors with different ranks: {[tuple(x.shape) for x in xs]}"
|
| 373 |
+
)
|
| 374 |
+
# Pad each dimension to the maximum size in this batch for this leaf.
|
| 375 |
+
max_shape = [max(int(x.shape[d]) for x in xs) for d in range(nd)]
|
| 376 |
+
padded = []
|
| 377 |
+
for x in xs:
|
| 378 |
+
if list(x.shape) == max_shape:
|
| 379 |
+
padded.append(x)
|
| 380 |
+
continue
|
| 381 |
+
# Create an output tensor filled with zeros (safe default for OpenFold features)
|
| 382 |
+
# and copy the existing values into the top-left slice.
|
| 383 |
+
out = x.new_zeros(max_shape)
|
| 384 |
+
slices = tuple(slice(0, int(s)) for s in x.shape)
|
| 385 |
+
out[slices] = x
|
| 386 |
+
padded.append(out)
|
| 387 |
+
return torch.stack(padded, dim=0)
|
| 388 |
+
return dict_multimap(pad_and_stack, prots)
|
| 389 |
|
| 390 |
|
| 391 |
def collate_fn(data_list):
|