| | """ |
| | Parse Docking Results and Calculate Inhibition Factors |
| | |
| | This module translates in silico molecular docking predictions into metabolic model |
| | constraints. It accounts for the gap between docking predictions and in vivo reality |
| | through: |
| | |
| | 1. Saturation model: Accounts for limited drug molecules relative to enzyme targets |
| | 2. Global relaxation: Accounts for in vivo complexity (enzyme dynamics, competitive |
| | binding, compartmentalization, etc.) |
| | |
| | Scientific justification: Docking predictions often overestimate in vivo effects due to: |
| | - Static vs dynamic enzyme structures |
| | - Competitive binding with natural substrates |
| | - Cellular compartmentalization reducing effective drug concentration |
| | - Protein-protein interactions affecting binding sites |
| | - Metabolic network robustness and alternative pathways |
| | |
| | See SCIENTIFIC_JUSTIFICATION.md for detailed rationale. |
| | """ |
| |
|
| | import pandas as pd |
| | import numpy as np |
| | from typing import Dict, List, Tuple |
| | import os |
| |
|
| | def calculate_inhibition_factor(binding_affinity_um: float, |
| | inhibition_type: str = 'competitive', |
| | confidence_score: float = 1.0, |
| | drug_concentration_um: float = 10.0, |
| | enzyme_concentration_um: float = None, |
| | global_relaxation_factor: float = 1.0) -> float: |
| | """ |
| | Calculate inhibition factor from binding affinity. |
| | |
| | Args: |
| | binding_affinity_um: Binding affinity in µM (Kd, Ki, or IC50) |
| | inhibition_type: Type of inhibition (competitive, non-competitive, uncompetitive, activator) |
| | confidence_score: Confidence in docking result (0-1) |
| | drug_concentration_um: Concentration of drug in rumen (µM) - default 10 µM |
| | enzyme_concentration_um: Concentration of enzyme in rumen (µM) - if None, uses saturation model |
| | |
| | Returns: |
| | Inhibition factor (0-1 for inhibitors, >1 for activators) |
| | - Accounts for limited drug molecules relative to enzyme targets |
| | - Uses saturation model: only fraction of enzymes bound = (drug_conc / (drug_conc + Kd)) |
| | """ |
| | if pd.isna(binding_affinity_um) or binding_affinity_um <= 0: |
| | return 1.0 |
| | |
| | |
| | |
| | |
| | |
| | if inhibition_type.lower() in ['activator', 'activation']: |
| | |
| | |
| | activation_factor = 1 + (1.0 / (1.0 + binding_affinity_um / drug_concentration_um)) |
| | return activation_factor * confidence_score |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | fraction_bound = drug_concentration_um / (drug_concentration_um + binding_affinity_um) |
| | |
| | |
| | if enzyme_concentration_um is not None and enzyme_concentration_um > 0: |
| | |
| | |
| | max_fraction_bound = min(1.0, drug_concentration_um / enzyme_concentration_um) |
| | fraction_bound = min(fraction_bound, max_fraction_bound) |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | if binding_affinity_um < 0.001: |
| | base_inhibition_per_enzyme = 0.995 |
| | elif binding_affinity_um < 0.01: |
| | log_kd = np.log10(binding_affinity_um) |
| | base_inhibition_per_enzyme = 0.98 + (log_kd + 3) * (0.95 - 0.98) |
| | base_inhibition_per_enzyme = max(0.95, min(0.98, base_inhibition_per_enzyme)) |
| | elif binding_affinity_um < 0.1: |
| | base_inhibition_per_enzyme = 0.90 + (0.1 - binding_affinity_um) / 0.09 * 0.05 |
| | elif binding_affinity_um < 1.0: |
| | base_inhibition_per_enzyme = 0.70 + (1.0 - binding_affinity_um) / 0.9 * 0.20 |
| | elif binding_affinity_um < 10.0: |
| | base_inhibition_per_enzyme = 0.40 + (10.0 - binding_affinity_um) / 9.0 * 0.30 |
| | elif binding_affinity_um < 100.0: |
| | base_inhibition_per_enzyme = 0.10 + (100.0 - binding_affinity_um) / 90.0 * 0.30 |
| | else: |
| | base_inhibition_per_enzyme = 0.10 |
| | |
| | |
| | |
| | inhibition_strength = fraction_bound * base_inhibition_per_enzyme |
| | |
| | |
| | |
| | |
| | |
| | inhibition_strength = inhibition_strength * global_relaxation_factor |
| | |
| | |
| | inhibition_factor = 1.0 - (inhibition_strength * confidence_score) |
| | |
| | |
| | return max(0.005, inhibition_factor) |
| |
|
| |
|
| | def parse_docking_csv(csv_path: str) -> pd.DataFrame: |
| | """ |
| | Parse docking results CSV file. |
| | |
| | Expected columns: |
| | - molecule_name |
| | - protein_name (or enzyme_name) |
| | - enzyme_id (optional) |
| | - reaction_id (if available) |
| | - binding_affinity_um |
| | - binding_energy_kcal_mol (optional) |
| | - inhibition_type |
| | - confidence_score |
| | - species (MGK, PRM, or RFL) |
| | - binding_site (optional) |
| | """ |
| | df = pd.read_csv(csv_path) |
| | |
| | |
| | if 'protein_name' in df.columns and 'enzyme_name' not in df.columns: |
| | df['enzyme_name'] = df['protein_name'] |
| | |
| | required_cols = ['molecule_name', 'binding_affinity_um', |
| | 'inhibition_type', 'species'] |
| | missing_cols = [col for col in required_cols if col not in df.columns] |
| | if missing_cols: |
| | raise ValueError(f"Missing required columns: {missing_cols}") |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | drug_conc = 0.01 |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | global_relaxation = 0.15 |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | per_molecule_relaxation = { |
| | 'imidazole': 1.0, |
| | 'l-carnitine': 30.0, |
| | 'methyl jasmonate': 7.0, |
| | 'propylpyrazine': 40.0 |
| | } |
| |
|
| | def _calc_row_factor(row): |
| | molecule = str(row.get('molecule_name', '')).strip().lower() |
| | mol_relax = per_molecule_relaxation.get(molecule, 1.0) |
| | effective_relaxation = global_relaxation / mol_relax |
| | return calculate_inhibition_factor( |
| | row.get('binding_affinity_um', np.nan), |
| | row.get('inhibition_type', 'competitive'), |
| | row.get('confidence_score', 1.0), |
| | drug_concentration_um=drug_conc, |
| | global_relaxation_factor=effective_relaxation |
| | ) |
| |
|
| | df['inhibition_factor'] = df.apply(_calc_row_factor, axis=1) |
| | |
| | return df |
| |
|
| |
|
| | def group_by_reaction(docking_df: pd.DataFrame) -> Dict[str, Dict]: |
| | """ |
| | Group docking results by reaction ID. |
| | |
| | Returns: |
| | Dictionary: {reaction_id: {species: inhibition_factor, ...}} |
| | """ |
| | reaction_data = {} |
| | |
| | for _, row in docking_df.iterrows(): |
| | reaction_id = row.get('reaction_id') |
| | if pd.isna(reaction_id): |
| | continue |
| | |
| | species = row['species'] |
| | inhibition_factor = row['inhibition_factor'] |
| | inhibition_type = row.get('inhibition_type', 'competitive') |
| | |
| | if reaction_id not in reaction_data: |
| | reaction_data[reaction_id] = { |
| | 'species': species, |
| | 'inhibition_factors': [], |
| | 'inhibition_types': [], |
| | 'binding_affinities': [], |
| | 'confidence_scores': [] |
| | } |
| | |
| | reaction_data[reaction_id]['inhibition_factors'].append(inhibition_factor) |
| | reaction_data[reaction_id]['inhibition_types'].append(inhibition_type) |
| | reaction_data[reaction_id]['binding_affinities'].append(row.get('binding_affinity_um', np.nan)) |
| | reaction_data[reaction_id]['confidence_scores'].append(row.get('confidence_score', 1.0)) |
| | |
| | |
| | for reaction_id, data in reaction_data.items(): |
| | if len(data['inhibition_factors']) > 1: |
| | |
| | weights = np.array(data['confidence_scores']) |
| | factors = np.array(data['inhibition_factors']) |
| | weighted_avg = np.average(factors, weights=weights) |
| | data['inhibition_factor'] = weighted_avg |
| | else: |
| | data['inhibition_factor'] = data['inhibition_factors'][0] |
| | |
| | |
| | data['inhibition_type'] = data['inhibition_types'][0] |
| | |
| | return reaction_data |
| |
|
| |
|
| | def create_constraint_dict(molecule_name: str, docking_csv_path: str, |
| | enzyme_map_path: str = None) -> Dict[str, Dict]: |
| | """ |
| | Create a dictionary of constraints to apply to the model. |
| | |
| | Returns: |
| | Dictionary: { |
| | 'MGK': {reaction_id: {inhibition_factor: X, inhibition_type: Y}, ...}, |
| | 'PRM': {reaction_id: {inhibition_factor: X, inhibition_type: Y}, ...}, |
| | 'RFL': {reaction_id: {inhibition_factor: X, inhibition_type: Y}, ...} |
| | } |
| | """ |
| | |
| | docking_df = parse_docking_csv(docking_csv_path) |
| | |
| | |
| | if 'molecule_name' in docking_df.columns: |
| | docking_df = docking_df[docking_df['molecule_name'].str.strip().str.lower() == molecule_name.strip().lower()].copy() |
| | if len(docking_df) == 0: |
| | print(f"Warning: No docking results found for molecule '{molecule_name}'") |
| | return {'MGK': {}, 'PRM': {}, 'RFL': {}} |
| | |
| | |
| | if enzyme_map_path and 'reaction_id' not in docking_df.columns: |
| | try: |
| | from .enzyme_reaction_mapper import map_docking_to_reactions, create_enzyme_reaction_map |
| | model_files = ['M. gottschalkii.xml', 'P. ruminicola.xml', 'R. flavefaciens.xml'] |
| | enzyme_map = create_enzyme_reaction_map(model_files) |
| | docking_df = map_docking_to_reactions(docking_df, enzyme_map) |
| | except ImportError: |
| | |
| | import sys |
| | import os |
| | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| | from docking_integration.enzyme_reaction_mapper import map_docking_to_reactions, create_enzyme_reaction_map |
| | model_files = ['M. gottschalkii.xml', 'P. ruminicola.xml', 'R. flavefaciens.xml'] |
| | enzyme_map = create_enzyme_reaction_map(model_files) |
| | docking_df = map_docking_to_reactions(docking_df, enzyme_map) |
| | |
| | |
| | reaction_data = group_by_reaction(docking_df) |
| | |
| | |
| | constraints = {'MGK': {}, 'PRM': {}, 'RFL': {}} |
| | |
| | for reaction_id, data in reaction_data.items(): |
| | species = data['species'] |
| | if species in constraints: |
| | constraints[species][reaction_id] = { |
| | 'inhibition_factor': data['inhibition_factor'], |
| | 'inhibition_type': data['inhibition_type'] |
| | } |
| | |
| | return constraints |
| |
|