|
|
""" |
|
|
Comprehensive validation module for antibody sequences. |
|
|
Performs computational checks for various sequence properties and potential issues. |
|
|
""" |
|
|
|
|
|
|
|
|
import re |
|
|
import json |
|
|
import math |
|
|
from typing import Dict, List, Tuple |
|
|
|
|
|
class SequenceValidator: |
|
|
|
|
|
pka_values = { |
|
|
'K': 10.0, |
|
|
'R': 12.0, |
|
|
'H': 6.0, |
|
|
'D': 4.0, |
|
|
'E': 4.4, |
|
|
'C': 8.5, |
|
|
'Y': 10.0, |
|
|
'N_term': 8.0, |
|
|
'C_term': 3.1 |
|
|
} |
|
|
|
|
|
def __init__(self, sequence: str, config: Dict = None): |
|
|
""" |
|
|
Initialize sequence validator with optional configuration. |
|
|
|
|
|
Args: |
|
|
sequence: The amino acid sequence to validate |
|
|
config: Optional configuration dictionary with validation parameters |
|
|
""" |
|
|
self.sequence = sequence.upper() |
|
|
self.config = config or {} |
|
|
|
|
|
|
|
|
self.default_config = { |
|
|
"signal_peptide": { |
|
|
"enabled": True, |
|
|
"min_length": 15, |
|
|
"max_length": 30, |
|
|
"required": False, |
|
|
"strip": False, |
|
|
"confidence_threshold": 0.6, |
|
|
"n_region_basic_threshold": 0.3, |
|
|
"h_region_hydrophobic_threshold": 0.6 |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
for key, default_values in self.default_config.items(): |
|
|
if key not in self.config: |
|
|
self.config[key] = {} |
|
|
for param, value in default_values.items(): |
|
|
self.config[key][param] = self.config.get(key, {}).get(param, value) |
|
|
|
|
|
def validate_binder(sequence: str, config: Dict = None) -> Dict: |
|
|
""" |
|
|
Perform comprehensive validation of a single binder sequence. |
|
|
|
|
|
Args: |
|
|
sequence: The amino acid sequence to validate |
|
|
config: Optional configuration dictionary with validation parameters |
|
|
|
|
|
Checks: |
|
|
- Sequence length |
|
|
- Disorder prediction |
|
|
- Signal peptide presence (configurable) |
|
|
- Cysteine content and spacing |
|
|
- Glycosylation sites |
|
|
- Physicochemical properties |
|
|
- Sequence complexity |
|
|
|
|
|
Returns: |
|
|
Dict containing comprehensive validation results |
|
|
""" |
|
|
validator = SequenceValidator(sequence, config) |