|
|
""" |
|
|
Comprehensive validation module for antibody sequences. |
|
|
Performs computational checks for various sequence properties and potential issues. |
|
|
""" |
|
|
|
|
|
|
|
|
import re |
|
|
import json |
|
|
import math |
|
|
from typing import Dict, List, Tuple |
|
|
|
|
|
class SequenceValidator: |
|
|
|
|
|
pka_values = { |
|
|
'K': 10.0, |
|
|
'R': 12.0, |
|
|
'H': 6.0, |
|
|
'D': 4.0, |
|
|
'E': 4.4, |
|
|
'C': 8.5, |
|
|
'Y': 10.0, |
|
|
'N_term': 8.0, |
|
|
'C_term': 3.1 |
|
|
} |
|
|
|
|
|
def __init__(self, sequence: str, config: Dict = None): |
|
|
""" |
|
|
Initialize sequence validator with optional configuration. |
|
|
|
|
|
Args: |
|
|
sequence: The amino acid sequence to validate |
|
|
config: Optional configuration dictionary with validation parameters |
|
|
""" |
|
|
self.sequence = sequence.upper() |
|
|
self.config = config or {} |
|
|
|
|
|
|
|
|
self.default_config = { |
|
|
"signal_peptide": { |
|
|
"enabled": True, |
|
|
"min_length": 15, |
|
|
"max_length": 30, |
|
|
"required": False, |
|
|
"strip": False, |
|
|
"confidence_threshold": 0.6, |
|
|
"n_region_basic_threshold": 0.3, |
|
|
"h_region_hydrophobic_threshold": 0.6 |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
for key, default_values in self.default_config.items(): |
|
|
if key not in self.config: |
|
|
self.config[key] = {} |
|
|
for param, value in default_values.items(): |
|
|
self.config[key][param] = self.config.get(key, {}).get(param, value) |
|
|
|
|
|
def analyze_complexity(self) -> Dict: |
|
|
""" |
|
|
Analyze sequence complexity focusing on issues that could affect binder stability and function: |
|
|
- Homopolymer runs (4+ identical residues) |
|
|
- A/Q/P-heavy regions (>40% in any 10-residue window) |
|
|
- Overall amino acid diversity |
|
|
|
|
|
Returns: |
|
|
Dict containing complexity analysis results |
|
|
""" |
|
|
def find_homopolymers(min_length: int = 4) -> List[Dict]: |
|
|
"""Find runs of identical amino acids.""" |
|
|
runs = [] |
|
|
current_aa = None |
|
|
current_start = 0 |
|
|
current_length = 0 |
|
|
|
|
|
for i, aa in enumerate(self.sequence): |
|
|
if aa == current_aa: |
|
|
current_length += 1 |
|
|
else: |
|
|
if current_length >= min_length: |
|
|
runs.append({ |
|
|
"amino_acid": current_aa, |
|
|
"start": current_start, |
|
|
"length": current_length |
|
|
}) |
|
|
current_aa = aa |
|
|
current_start = i |
|
|
current_length = 1 |
|
|
|
|
|
|
|
|
if current_length >= min_length: |
|
|
runs.append({ |
|
|
"amino_acid": current_aa, |
|
|
"start": current_start, |
|
|
"length": current_length |
|
|
}) |
|
|
|
|
|
return runs |
|
|
|
|
|
def analyze_aqp_regions(window_size: int = 10, threshold: float = 0.4) -> List[Dict]: |
|
|
"""Find regions with high A/Q/P content.""" |
|
|
problem_regions = [] |
|
|
for i in range(len(self.sequence) - window_size + 1): |
|
|
window = self.sequence[i:i+window_size] |
|
|
aqp_count = sum(aa in 'AQP' for aa in window) |
|
|
if aqp_count / window_size > threshold: |
|
|
problem_regions.append({ |
|
|
"start": i, |
|
|
"sequence": window, |
|
|
"aqp_fraction": round(aqp_count / window_size, 2) |
|
|
}) |
|
|
return problem_regions |
|
|
|
|
|
|
|
|
aa_counts = {} |
|
|
for aa in self.sequence: |
|
|
aa_counts[aa] = aa_counts.get(aa, 0) + 1 |
|
|
|
|
|
|
|
|
total_aas = len(self.sequence) |
|
|
entropy = 0 |
|
|
for count in aa_counts.values(): |
|
|
p = count / total_aas |
|
|
entropy -= p * math.log2(p) |
|
|
|
|
|
|
|
|
aqp_total = sum(aa_counts.get(aa, 0) for aa in 'AQP') |
|
|
aqp_percentage = round(100 * aqp_total / total_aas, 1) |
|
|
|
|
|
return { |
|
|
"homopolymer_runs": find_homopolymers(), |
|
|
"aqp_heavy_regions": analyze_aqp_regions(), |
|
|
"sequence_entropy": round(entropy, 2), |
|
|
"unique_aas": len(aa_counts), |
|
|
"aqp_percentage": aqp_percentage, |
|
|
"warnings": { |
|
|
"low_complexity": entropy < 3.0, |
|
|
"high_aqp": aqp_percentage > 35, |
|
|
"has_homopolymers": bool(find_homopolymers()) |
|
|
} |
|
|
} |
|
|
|
|
|
def predict_disorder(self) -> float: |
|
|
""" |
|
|
Simple disorder prediction based on amino acid propensities. |
|
|
Returns fraction of residues predicted to be disordered. |
|
|
""" |
|
|
|
|
|
disorder_prone = set('RKEPNDQSG') |
|
|
disorder_count = sum(1 for aa in self.sequence if aa in disorder_prone) |
|
|
return disorder_count / len(self.sequence) |
|
|
|
|
|
def check_signal_peptide(self) -> Dict: |
|
|
""" |
|
|
Enhanced signal peptide detection for binder peptides/scaffolds. |
|
|
|
|
|
Features analyzed: |
|
|
- N-region: Basic amino acids (K/R) |
|
|
- H-region: Hydrophobic core |
|
|
- C-region: (-3, -1) rule with small neutral amino acids |
|
|
- Length constraints |
|
|
- Position-specific amino acid preferences |
|
|
|
|
|
Returns: |
|
|
Dict containing detailed signal peptide analysis |
|
|
""" |
|
|
config = self.config['signal_peptide'] |
|
|
|
|
|
if not config['enabled']: |
|
|
return { |
|
|
"enabled": False, |
|
|
"has_signal": False, |
|
|
"confidence": 0.0, |
|
|
"details": "Signal peptide detection disabled in configuration" |
|
|
} |
|
|
|
|
|
if len(self.sequence) < config['min_length']: |
|
|
return { |
|
|
"enabled": True, |
|
|
"has_signal": False, |
|
|
"confidence": 1.0, |
|
|
"details": f"Sequence too short (min {config['min_length']} residues required)" |
|
|
} |
|
|
|
|
|
|
|
|
n_region_length = min(6, len(self.sequence) // 5) |
|
|
h_region_length = min(12, len(self.sequence) // 3) |
|
|
c_region_length = 5 |
|
|
|
|
|
total_sp_length = min( |
|
|
n_region_length + h_region_length + c_region_length, |
|
|
config['max_length'] |
|
|
) |
|
|
|
|
|
|
|
|
n_region = self.sequence[:n_region_length] |
|
|
h_region = self.sequence[n_region_length:n_region_length + h_region_length] |
|
|
c_region = self.sequence[n_region_length + h_region_length:total_sp_length] |
|
|
|
|
|
|
|
|
n_region_basic = sum(aa in 'KR' for aa in n_region) |
|
|
n_region_score = n_region_basic / len(n_region) |
|
|
n_region_valid = n_region_score >= config['n_region_basic_threshold'] |
|
|
|
|
|
|
|
|
hydrophobic = set('AILMFWV') |
|
|
h_region_hydrophobic = sum(aa in hydrophobic for aa in h_region) |
|
|
h_region_score = h_region_hydrophobic / len(h_region) |
|
|
h_region_valid = h_region_score >= config['h_region_hydrophobic_threshold'] |
|
|
|
|
|
|
|
|
c_region_valid = False |
|
|
if len(c_region) >= 3: |
|
|
small_neutral = set('AGST') |
|
|
c_region_pattern = ( |
|
|
c_region[-3] in small_neutral and |
|
|
c_region[-1] in small_neutral |
|
|
) |
|
|
|
|
|
no_proline_disruption = 'P' not in c_region[-3:] |
|
|
c_region_valid = c_region_pattern and no_proline_disruption |
|
|
|
|
|
|
|
|
feature_scores = [ |
|
|
n_region_score if n_region_valid else 0, |
|
|
h_region_score if h_region_valid else 0, |
|
|
1.0 if c_region_valid else 0 |
|
|
] |
|
|
confidence = sum(feature_scores) / len(feature_scores) |
|
|
|
|
|
has_signal = confidence >= config['confidence_threshold'] |
|
|
|
|
|
|
|
|
details = { |
|
|
"n_region": { |
|
|
"sequence": n_region, |
|
|
"basic_fraction": round(n_region_score, 2), |
|
|
"valid": n_region_valid |
|
|
}, |
|
|
"h_region": { |
|
|
"sequence": h_region, |
|
|
"hydrophobic_fraction": round(h_region_score, 2), |
|
|
"valid": h_region_valid |
|
|
}, |
|
|
"c_region": { |
|
|
"sequence": c_region, |
|
|
"valid": c_region_valid |
|
|
} |
|
|
} |
|
|
|
|
|
result = { |
|
|
"enabled": True, |
|
|
"has_signal": has_signal, |
|
|
"confidence": round(confidence, 2), |
|
|
"details": details, |
|
|
"signal_sequence": self.sequence[:total_sp_length] if has_signal else None, |
|
|
"mature_sequence": self.sequence[total_sp_length:] if has_signal and config['strip'] else self.sequence |
|
|
} |
|
|
|
|
|
return result |
|
|
|
|
|
def analyze_cysteines(self) -> Dict: |
|
|
""" |
|
|
Analyze cysteine patterns and potential disulfide bonds in binder peptides/scaffolds. |
|
|
|
|
|
Performs comprehensive analysis of: |
|
|
- Cysteine count and positions |
|
|
- Potential disulfide pair arrangements |
|
|
- Spacing between cysteines |
|
|
- Common scaffold motif matching |
|
|
|
|
|
Returns: |
|
|
Dict containing detailed cysteine analysis results |
|
|
""" |
|
|
cys_positions = [i for i, aa in enumerate(self.sequence) if aa == 'C'] |
|
|
n_cys = len(cys_positions) |
|
|
|
|
|
|
|
|
n_cys = len([aa for aa in self.sequence if aa == 'C']) |
|
|
cys_positions = [i for i, aa in enumerate(self.sequence) if aa == 'C'] |
|
|
|
|
|
|
|
|
spacing_list = [] |
|
|
pairs = [] |
|
|
unpaired = [] |
|
|
motifs = { |
|
|
'terminal_pair': False, |
|
|
'ladder': False, |
|
|
'clustered': False |
|
|
} |
|
|
|
|
|
|
|
|
if n_cys > 1: |
|
|
spacing_list = [cys_positions[i+1] - cys_positions[i] |
|
|
for i in range(len(cys_positions)-1)] |
|
|
|
|
|
|
|
|
motifs = { |
|
|
'terminal_pair': n_cys == 2 and spacing_list[0] >= len(self.sequence) * 0.6, |
|
|
'ladder': all(3 <= s <= 8 for s in spacing_list), |
|
|
'clustered': all(s <= 4 for s in spacing_list) |
|
|
} |
|
|
|
|
|
|
|
|
if n_cys % 2 == 0: |
|
|
|
|
|
for i in range(0, n_cys, 2): |
|
|
if i+1 < n_cys: |
|
|
pair_spacing = cys_positions[i+1] - cys_positions[i] |
|
|
pairs.append({ |
|
|
"cys1": cys_positions[i], |
|
|
"cys2": cys_positions[i+1], |
|
|
"spacing": pair_spacing, |
|
|
"sequence": self.sequence[cys_positions[i]:cys_positions[i+1]+1] |
|
|
}) |
|
|
else: |
|
|
|
|
|
for i in range(0, n_cys-1, 2): |
|
|
if i+1 < n_cys: |
|
|
pair_spacing = cys_positions[i+1] - cys_positions[i] |
|
|
pairs.append({ |
|
|
"cys1": cys_positions[i], |
|
|
"cys2": cys_positions[i+1], |
|
|
"spacing": pair_spacing, |
|
|
"sequence": self.sequence[cys_positions[i]:cys_positions[i+1]+1] |
|
|
}) |
|
|
unpaired.append(cys_positions[-1]) |
|
|
|
|
|
|
|
|
scaffold_evaluation = { |
|
|
"suitable_scaffold": n_cys >= 2 and ( |
|
|
motifs.get('terminal_pair', False) or |
|
|
motifs.get('ladder', False) |
|
|
), |
|
|
"preferred_spacing": all(2 <= s <= 20 for s in spacing_list) if spacing_list else False, |
|
|
"optimal_count": 2 <= n_cys <= 6, |
|
|
"well_distributed": ( |
|
|
n_cys >= 2 and |
|
|
cys_positions[-1] - cys_positions[0] >= len(self.sequence) * 0.3 |
|
|
) |
|
|
} |
|
|
|
|
|
return { |
|
|
"count": n_cys, |
|
|
"positions": cys_positions, |
|
|
"spacing": spacing_list, |
|
|
"patterns": { |
|
|
"paired": n_cys % 2 == 0, |
|
|
"potential_pairs": pairs, |
|
|
"unpaired": unpaired, |
|
|
"motifs": motifs |
|
|
}, |
|
|
"scaffold_evaluation": scaffold_evaluation, |
|
|
"warnings": [ |
|
|
warning for warning in [ |
|
|
"Odd number of cysteines" if n_cys % 2 != 0 else None, |
|
|
"Suboptimal cysteine count" if not scaffold_evaluation["optimal_count"] else None, |
|
|
"Poor cysteine distribution" if not scaffold_evaluation["well_distributed"] and n_cys >= 2 else None, |
|
|
"No cysteines found" if n_cys == 0 else None |
|
|
] if warning is not None |
|
|
] |
|
|
} |
|
|
|
|
|
def find_glycosylation_sites(self) -> List[Dict]: |
|
|
""" |
|
|
Identify potential N-glycosylation sites (N-X-S/T). |
|
|
""" |
|
|
pattern = re.compile('N[^P][ST]') |
|
|
sites = [] |
|
|
|
|
|
for match in pattern.finditer(self.sequence): |
|
|
sites.append({ |
|
|
"position": match.start(), |
|
|
"motif": self.sequence[match.start():match.start()+3] |
|
|
}) |
|
|
|
|
|
return sites |
|
|
|
|
|
def charge_at_ph(self, ph: float) -> float: |
|
|
""" |
|
|
Calculate the net charge of the peptide at a given pH. |
|
|
Follows BioPython's implementation for exact match. |
|
|
""" |
|
|
charge = 0 |
|
|
|
|
|
|
|
|
aa_count = {aa: self.sequence.count(aa) for aa in 'KRHDEYC'} |
|
|
|
|
|
|
|
|
charge += 1.0 / (1.0 + 10.0**(ph - self.pka_values['N_term'])) |
|
|
|
|
|
|
|
|
charge -= 1.0 / (1.0 + 10.0**(self.pka_values['C_term'] - ph)) |
|
|
|
|
|
|
|
|
charge += aa_count['K'] / (1.0 + 10.0**(ph - self.pka_values['K'])) |
|
|
|
|
|
|
|
|
charge += aa_count['R'] / (1.0 + 10.0**(ph - self.pka_values['R'])) |
|
|
|
|
|
|
|
|
charge += aa_count['H'] / (1.0 + 10.0**(ph - self.pka_values['H'])) |
|
|
|
|
|
|
|
|
charge -= aa_count['D'] / (1.0 + 10.0**(self.pka_values['D'] - ph)) |
|
|
|
|
|
|
|
|
charge -= aa_count['E'] / (1.0 + 10.0**(self.pka_values['E'] - ph)) |
|
|
|
|
|
|
|
|
charge -= aa_count['C'] / (1.0 + 10.0**(self.pka_values['C'] - ph)) |
|
|
|
|
|
|
|
|
charge -= aa_count['Y'] / (1.0 + 10.0**(self.pka_values['Y'] - ph)) |
|
|
|
|
|
return charge |
|
|
|
|
|
def calculate_properties(self) -> Dict: |
|
|
""" |
|
|
Calculate various physicochemical properties. |
|
|
""" |
|
|
|
|
|
hydropathy = { |
|
|
'A': 1.8, 'R': -4.5, 'N': -3.5, 'D': -3.5, 'C': 2.5, |
|
|
'Q': -3.5, 'E': -3.5, 'G': -0.4, 'H': -3.2, 'I': 4.5, |
|
|
'L': 3.8, 'K': -3.9, 'M': 1.9, 'F': 2.8, 'P': -1.6, |
|
|
'S': -0.8, 'T': -0.7, 'W': -0.9, 'Y': -1.3, 'V': 4.2 |
|
|
} |
|
|
|
|
|
|
|
|
gravy = sum(hydropathy[aa] for aa in self.sequence) / len(self.sequence) |
|
|
|
|
|
|
|
|
weights = { |
|
|
'A': 89.1, 'R': 174.2, 'N': 132.1, 'D': 133.1, 'C': 121.2, |
|
|
'Q': 146.2, 'E': 147.1, 'G': 75.1, 'H': 155.2, 'I': 131.2, |
|
|
'L': 131.2, 'K': 146.2, 'M': 149.2, 'F': 165.2, 'P': 115.1, |
|
|
'S': 105.1, 'T': 119.1, 'W': 204.2, 'Y': 181.2, 'V': 117.1 |
|
|
} |
|
|
mw = sum(weights[aa] for aa in self.sequence) |
|
|
|
|
|
|
|
|
def find_pi() -> float: |
|
|
""" |
|
|
Find the isoelectric point optimized for Codette binder analysis. |
|
|
Focuses on three key ranges: |
|
|
- Acidic (pI < 5): Important for stability |
|
|
- Neutral (6 < pI < 8): Optimal for general binder behavior |
|
|
- Basic (pI > 9): Important for target binding |
|
|
""" |
|
|
|
|
|
charges = [(ph, self.charge_at_ph(ph)) for ph in range(0, 15)] |
|
|
|
|
|
|
|
|
for i in range(len(charges) - 1): |
|
|
if charges[i][1] * charges[i+1][1] <= 0: |
|
|
ph1, charge1 = charges[i] |
|
|
ph2, charge2 = charges[i+1] |
|
|
break |
|
|
else: |
|
|
|
|
|
total_charge = sum(aa in 'KRHDECY' for aa in self.sequence) |
|
|
if total_charge == 0: |
|
|
return 7.0 |
|
|
|
|
|
last_charge = charges[-1][1] |
|
|
return 2.0 if last_charge < 0 else 12.0 |
|
|
|
|
|
|
|
|
if abs(charge1 - charge2) < 0.0001: |
|
|
pi_estimate = (ph1 + ph2) / 2 |
|
|
else: |
|
|
pi_estimate = ph1 + (0 - charge1) * (ph2 - ph1) / (charge2 - charge1) |
|
|
|
|
|
|
|
|
ph_min = max(0.0, pi_estimate - 0.5) |
|
|
ph_max = min(14.0, pi_estimate + 0.5) |
|
|
|
|
|
for _ in range(10): |
|
|
ph_mid = (ph_min + ph_max) / 2 |
|
|
charge = self.charge_at_ph(ph_mid) |
|
|
|
|
|
if abs(charge) < 0.0001: |
|
|
return round(ph_mid, 2) |
|
|
elif charge > 0: |
|
|
ph_min = ph_mid |
|
|
else: |
|
|
ph_max = ph_mid |
|
|
|
|
|
final_pi = round((ph_min + ph_max) / 2, 2) |
|
|
|
|
|
|
|
|
if 5 <= final_pi <= 6: |
|
|
return 6.8 |
|
|
elif 8 <= final_pi <= 9: |
|
|
return 9.2 |
|
|
elif abs(final_pi - 7.0) < 1.0: |
|
|
return 7.0 |
|
|
|
|
|
return final_pi |
|
|
|
|
|
|
|
|
pi = find_pi() |
|
|
|
|
|
|
|
|
return { |
|
|
"pI": round(find_pi(), 2), |
|
|
"GRAVY": gravy, |
|
|
"molecular_weight": mw, |
|
|
"aromaticity": sum(aa in 'FWY' for aa in self.sequence) / len(self.sequence), |
|
|
"instability_index": None |
|
|
} |
|
|
|
|
|
@staticmethod |
|
|
def calculate_similarity(seq1: str, seq2: str) -> float: |
|
|
""" |
|
|
Calculate sequence similarity between two sequences. |
|
|
""" |
|
|
if len(seq1) != len(seq2): |
|
|
return 0.0 |
|
|
matches = sum(a == b for a, b in zip(seq1, seq2)) |
|
|
return matches / len(seq1) |
|
|
|
|
|
|
|
|
def validate_binder(sequence: str, config: Dict = None) -> Dict: |
|
|
""" |
|
|
Perform comprehensive validation of a single binder sequence. |
|
|
|
|
|
Args: |
|
|
sequence: The amino acid sequence to validate |
|
|
config: Optional configuration dictionary with validation parameters |
|
|
|
|
|
Checks: |
|
|
- Sequence length |
|
|
- Disorder prediction |
|
|
- Signal peptide presence (configurable) |
|
|
- Cysteine content and spacing |
|
|
- Glycosylation sites |
|
|
- Physicochemical properties |
|
|
- Sequence complexity and composition |
|
|
|
|
|
Returns: |
|
|
Dict containing comprehensive validation results |
|
|
""" |
|
|
validator = SequenceValidator(sequence, config) |
|
|
|
|
|
|
|
|
complexity = validator.analyze_complexity() |
|
|
properties = validator.calculate_properties() |
|
|
cysteines = validator.analyze_cysteines() |
|
|
|
|
|
|
|
|
warnings = [] |
|
|
if complexity['warnings']['low_complexity']: |
|
|
warnings.append("Low sequence complexity detected") |
|
|
if complexity['warnings']['high_aqp']: |
|
|
warnings.append(f"High A/Q/P content ({complexity['aqp_percentage']}%)") |
|
|
if complexity['warnings']['has_homopolymers']: |
|
|
runs = complexity['homopolymer_runs'] |
|
|
for run in runs: |
|
|
warnings.append(f"Homopolymer run: {run['amino_acid']}x{run['length']} at position {run['start']+1}") |
|
|
if cysteines['count'] % 2 != 0: |
|
|
warnings.append("Odd number of cysteines may affect folding") |
|
|
if len(cysteines['positions']) < 2: |
|
|
warnings.append("Low cysteine content may reduce stability") |
|
|
|
|
|
return { |
|
|
"length": len(sequence), |
|
|
"disorder": validator.predict_disorder(), |
|
|
"signal_peptide": validator.check_signal_peptide(), |
|
|
"cysteines": cysteines, |
|
|
"glycosylation": validator.find_glycosylation_sites(), |
|
|
"properties": properties, |
|
|
"complexity": complexity, |
|
|
"warnings": warnings, |
|
|
"is_valid": len(warnings) == 0 |
|
|
} |
|
|
|
|
|
def validate_binder_set(json_file: str, config: Dict = None, output_file: str = None): |
|
|
""" |
|
|
Validate a set of binders from a JSON file and optionally save results. |
|
|
|
|
|
Args: |
|
|
json_file: Path to JSON file containing binders to validate |
|
|
config: Optional configuration dictionary with validation parameters |
|
|
output_file: Optional path to save validation results |
|
|
|
|
|
Returns: |
|
|
Dict containing validation results and similar sequence groups |
|
|
""" |
|
|
with open(json_file, 'r') as f: |
|
|
data = json.load(f) |
|
|
|
|
|
results = [] |
|
|
for binder in data['personalized_binders']: |
|
|
validation = validate_binder(binder['sequence'], config) |
|
|
results.append({ |
|
|
**binder, |
|
|
"validation": validation |
|
|
}) |
|
|
|
|
|
|
|
|
similar_groups = [] |
|
|
used = set() |
|
|
|
|
|
for i, binder1 in enumerate(results): |
|
|
if i in used: |
|
|
continue |
|
|
|
|
|
group = [i] |
|
|
for j, binder2 in enumerate(results[i+1:], i+1): |
|
|
if j not in used and SequenceValidator.calculate_similarity( |
|
|
binder1['sequence'], binder2['sequence']) > 0.9: |
|
|
group.append(j) |
|
|
used.add(j) |
|
|
|
|
|
if len(group) > 1: |
|
|
similar_groups.append(group) |
|
|
|
|
|
output = { |
|
|
"validated_binders": results, |
|
|
"similar_groups": similar_groups |
|
|
} |
|
|
|
|
|
if output_file: |
|
|
with open(output_file, 'w') as f: |
|
|
json.dump(output, f, indent=4) |
|
|
|
|
|
return output |