File size: 9,685 Bytes
43e7ae4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
"""
Molecular Analysis Module

This module contains functions for molecular structure analysis,
property calculations, and drug-likeness assessment.
"""

from rdkit import Chem
from rdkit.Chem import Draw, Descriptors, Crippen


def calculate_molecular_properties(smiles):
    """Calculate key molecular properties for drug discovery."""
    mol = Chem.MolFromSmiles(smiles)
    if not mol:
        return None
    
    properties = {
        'Molecular Weight': round(Descriptors.MolWt(mol), 2),
        'LogP': round(Crippen.MolLogP(mol), 2),
        'HBD': Descriptors.NumHDonors(mol),
        'HBA': Descriptors.NumHAcceptors(mol),
        'TPSA': round(Descriptors.TPSA(mol), 2),
        'Rotatable Bonds': Descriptors.NumRotatableBonds(mol),
        'Aromatic Rings': Descriptors.NumAromaticRings(mol),
        'Heavy Atoms': mol.GetNumHeavyAtoms()
    }
    
    # Lipinski's Rule of Five
    lipinski_violations = 0
    if properties['Molecular Weight'] > 500:
        lipinski_violations += 1
    if properties['LogP'] > 5:
        lipinski_violations += 1
    if properties['HBD'] > 5:
        lipinski_violations += 1
    if properties['HBA'] > 10:
        lipinski_violations += 1
    
    properties['Lipinski Violations'] = lipinski_violations
    properties['Drug-like'] = lipinski_violations <= 1
    
    return properties


def generate_molecule_image(smiles, size=(300, 300)):
    """Generate a molecular structure image from SMILES string."""
    try:
        mol = Chem.MolFromSmiles(smiles)
        if not mol:
            print(f"Failed to parse SMILES: {smiles}")
            return None
        
        # Create a high-quality image
        img = Draw.MolToImage(mol, size=size, kekulize=True)
        if img is None:
            print(f"Draw.MolToImage returned None for SMILES: {smiles}")
            return None
        
        print(f"Generated image successfully: {size}, mode: {img.mode}")
        return img
    except Exception as e:
        print(f"Error in generate_molecule_image: {e}")
        return None


def validate_smiles(smiles):
    """Validate SMILES string and return error message if invalid."""
    if not smiles or not smiles.strip():
        return "Please enter a SMILES string"
    
    # Try to parse the SMILES
    mol = Chem.MolFromSmiles(smiles.strip())
    if not mol:
        # Provide more helpful error messages based on common issues
        error_msg = f"❌ **Invalid SMILES string:** `{smiles}`\n\n"
        
        # Check for specific common issues
        if smiles.count('(') != smiles.count(')'):
            error_msg += "πŸ” **Issue detected:** Unmatched parentheses\n"
        elif smiles.count('[') != smiles.count(']'):
            error_msg += "πŸ” **Issue detected:** Unmatched brackets\n"
        elif any(char in smiles for char in ['@', '\\', '/']) and 'C' not in smiles:
            error_msg += "πŸ” **Issue detected:** Invalid stereochemistry notation\n"
        else:
            error_msg += "πŸ” **Issue detected:** General syntax error\n"
        
        error_msg += "\n**πŸ’‘ Tips for complex SMILES:**\n"
        error_msg += "- Complex molecules are supported! The issue is likely syntax\n"
        error_msg += "- Check parentheses and brackets are balanced\n"
        error_msg += "- Verify ring closure numbers (e.g., C1CCCC1)\n"
        error_msg += "- Use proper stereochemistry notation (@, @@, /, \\)\n"
        error_msg += "- Try breaking complex molecules into smaller parts first\n\n"
        error_msg += "**πŸ§ͺ Examples of complex valid SMILES:**\n"
        error_msg += "- `CC(=O)OC1=CC=CC=C1C(=O)O` (Aspirin)\n"
        error_msg += "- `CN1C=NC2=C1C(=O)N(C(=O)N2C)C` (Caffeine)\n"
        error_msg += "- `C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O` (Glucose)\n"
        error_msg += "- `CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)` (Penicillin)\n"
        
        return error_msg
    
    return None


def validate_smiles_realtime(smiles):
    """Real-time SMILES validation for user feedback."""
    if not smiles or not smiles.strip():
        return "βœ… Ready to analyze", None
    
    validation_error = validate_smiles(smiles)
    if validation_error:
        return f"❌ Invalid SMILES", None
    
    # Try to generate a preview image
    try:
        mol = Chem.MolFromSmiles(smiles.strip())
        if mol:
            img = Draw.MolToImage(mol, size=(200, 200), kekulize=True)
            return "βœ… Valid SMILES", img
    except:
        pass
    
    return "βœ… Valid SMILES", None


def analyze_molecule(smiles):
    """Analyze a molecule and return its properties with robust error handling."""
    # Validate SMILES first
    validation_error = validate_smiles(smiles)
    if validation_error:
        return validation_error, None
    
    # Calculate properties
    properties = calculate_molecular_properties(smiles)
    if not properties:
        return "Error calculating molecular properties", None
    
    # Format the properties nicely - use raw string to prevent hyperlink conversion
    result = f"**Molecular Analysis for:**\n```\n{smiles}\n```\n\n"
    result += "**Basic Properties:**\n"
    result += f"- Molecular Weight: {properties['Molecular Weight']} g/mol\n"
    result += f"- LogP: {properties['LogP']}\n"
    result += f"- TPSA: {properties['TPSA']} Γ…Β²\n"
    result += f"- Heavy Atoms: {properties['Heavy Atoms']}\n\n"
    
    result += "**Hydrogen Bonding:**\n"
    result += f"- HBD (Donors): {properties['HBD']}\n"
    result += f"- HBA (Acceptors): {properties['HBA']}\n\n"
    
    result += "**Structural Features:**\n"
    result += f"- Rotatable Bonds: {properties['Rotatable Bonds']}\n"
    result += f"- Aromatic Rings: {properties['Aromatic Rings']}\n\n"
    
    result += "**Drug-likeness:**\n"
    result += f"- Lipinski Violations: {properties['Lipinski Violations']}/4\n"
    result += f"- Drug-like: {'Yes' if properties['Drug-like'] else 'No'}\n"
    
    # Generate molecular structure image with error handling
    try:
        molecule_img = generate_molecule_image(smiles)
        if not molecule_img:
            result += "\n\n⚠️ **Warning:** Could not generate molecular structure image"
    except Exception as e:
        result += f"\n\n⚠️ **Warning:** Error generating molecular structure: {str(e)}"
        molecule_img = None
    
    return result, molecule_img


def analyze_molecule_image_only(smiles):
    """Analyze a molecule and return only the image for compact UI."""
    # Validate SMILES first
    validation_error = validate_smiles(smiles)
    if validation_error:
        print(f"SMILES validation error: {validation_error}")
        return None
    
    # Generate molecule image
    try:
        molecule_img = generate_molecule_image(smiles, size=(500, 400))
        if molecule_img is None:
            print(f"Failed to generate image for SMILES: {smiles}")
            # Try with a different size as fallback
            molecule_img = generate_molecule_image(smiles, size=(300, 300))
            if molecule_img is None:
                print(f"Fallback image generation also failed for SMILES: {smiles}")
                return None
        else:
            print(f"Successfully generated image for SMILES: {smiles}")
        return molecule_img
    except Exception as e:
        print(f"Error generating molecule image: {e}")
        # Try to create a simple fallback image
        try:
            from PIL import Image, ImageDraw, ImageFont
            # Create a simple text image as fallback
            img = Image.new('RGB', (500, 400), color='white')
            draw = ImageDraw.Draw(img)
            try:
                # Try to use a default font
                font = ImageFont.load_default()
            except:
                font = None
            
            # Draw error message
            text = f"Error generating molecule\nSMILES: {smiles[:50]}..."
            draw.text((50, 200), text, fill='red', font=font)
            return img
        except:
            return None


def get_molecule_properties_for_hover(smiles):
    """Get molecular properties formatted for hover tooltip."""
    # Validate SMILES first
    validation_error = validate_smiles(smiles)
    if validation_error:
        print(f"SMILES validation error in properties: {validation_error}")
        return f"**Error:** {validation_error}"
    
    # Calculate properties
    properties = calculate_molecular_properties(smiles)
    if not properties:
        print(f"Failed to calculate properties for SMILES: {smiles}")
        return f"**Error:** Could not calculate molecular properties for {smiles}"
    
    # Format properties for display
    hover_text = f"**Basic Properties:**\n"
    hover_text += f"β€’ Molecular Weight: {properties['Molecular Weight']} g/mol\n"
    hover_text += f"β€’ LogP: {properties['LogP']}\n"
    hover_text += f"β€’ TPSA: {properties['TPSA']} Γ…Β²\n"
    hover_text += f"β€’ Heavy Atoms: {properties['Heavy Atoms']}\n\n"
    
    hover_text += f"**Hydrogen Bonding:**\n"
    hover_text += f"β€’ HBD (Donors): {properties['HBD']}\n"
    hover_text += f"β€’ HBA (Acceptors): {properties['HBA']}\n\n"
    
    hover_text += f"**Structural Features:**\n"
    hover_text += f"β€’ Rotatable Bonds: {properties['Rotatable Bonds']}\n"
    hover_text += f"β€’ Aromatic Rings: {properties['Aromatic Rings']}\n\n"
    
    hover_text += f"**Drug-likeness:**\n"
    hover_text += f"β€’ Lipinski Violations: {properties['Lipinski Violations']}/4\n"
    hover_text += f"β€’ Drug-like: {'Yes' if properties['Drug-like'] else 'No'}"
    
    print(f"Generated properties text for SMILES: {smiles}")
    return hover_text