Add missing seqcoding method implementation to adapter class
Browse files- adapter.py +22 -0
adapter.py
CHANGED
|
@@ -754,6 +754,28 @@ class AbLang2PairedHuggingFaceAdapter(AbEncoding, AbRestore, AbAlignment, AbScor
|
|
| 754 |
plls.append(float('nan'))
|
| 755 |
return np.array(plls)
|
| 756 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 757 |
def confidence(self, seqs, **kwargs):
|
| 758 |
"""Confidence calculation - match original ablang2 implementation by excluding all special tokens from loss."""
|
| 759 |
# Format input: join VH and VL with '|'
|
|
|
|
| 754 |
plls.append(float('nan'))
|
| 755 |
return np.array(plls)
|
| 756 |
|
| 757 |
+
def seqcoding(self, seqs, **kwargs):
|
| 758 |
+
"""Sequence specific representations - returns 480-dimensional embeddings for each sequence."""
|
| 759 |
+
# Format input: join VH and VL with '|'
|
| 760 |
+
formatted_seqs = []
|
| 761 |
+
for s in seqs:
|
| 762 |
+
if isinstance(s, (list, tuple)):
|
| 763 |
+
formatted_seqs.append('|'.join(s))
|
| 764 |
+
else:
|
| 765 |
+
formatted_seqs.append(s)
|
| 766 |
+
|
| 767 |
+
# Get embeddings using the model
|
| 768 |
+
embeddings = self._encode_sequences(formatted_seqs)
|
| 769 |
+
|
| 770 |
+
# Return sequence-level embeddings (mean pooling over sequence length)
|
| 771 |
+
# Remove batch dimension and take mean over sequence dimension
|
| 772 |
+
if len(embeddings.shape) == 3: # [batch_size, seq_len, hidden_size]
|
| 773 |
+
seq_embeddings = embeddings.mean(dim=1) # [batch_size, hidden_size]
|
| 774 |
+
else:
|
| 775 |
+
seq_embeddings = embeddings
|
| 776 |
+
|
| 777 |
+
return seq_embeddings.cpu().numpy()
|
| 778 |
+
|
| 779 |
def confidence(self, seqs, **kwargs):
|
| 780 |
"""Confidence calculation - match original ablang2 implementation by excluding all special tokens from loss."""
|
| 781 |
# Format input: join VH and VL with '|'
|