gsstec's picture
Upload 4 files
45befc5 verified
Raw
History Blame Contribute Delete
8.81 kB
import gradio as gr
import os
import json
from fastapi import FastAPI
from fastapi.responses import JSONResponse
# Simplified molecular docking API using RDKit for basic binding affinity estimation
# This avoids the complexity of DiffDock setup while providing functional drug screening
def run_diffdock_inference(protein_pdb_content, ligand_smiles_string):
"""
Performs molecular docking analysis using RDKit for binding affinity estimation.
This is a lightweight alternative to full DiffDock that works on free CPU tier.
Generates PDBQT format for compatibility with AutoDock Vina.
Returns a JSON-serializable dictionary.
"""
try:
# Validate ligand input (protein is optional for drug-likeness analysis)
if not ligand_smiles_string or not ligand_smiles_string.strip():
return {
"success": False,
"error_log": "Missing ligand SMILES string"
}
# Import RDKit for molecular analysis
from rdkit import Chem
from rdkit.Chem import Descriptors, Lipinski, AllChem
# Parse SMILES string
mol = Chem.MolFromSmiles(ligand_smiles_string)
if mol is None:
return {
"success": False,
"error_log": "Invalid SMILES string"
}
# Calculate molecular properties that correlate with binding affinity
mw = Descriptors.MolWt(mol)
logp = Descriptors.MolLogP(mol)
hbd = Descriptors.NumHDonors(mol)
hba = Descriptors.NumHAcceptors(mol)
tpsa = Descriptors.TPSA(mol)
rotatable_bonds = Descriptors.NumRotatableBonds(mol)
# Lipinski's Rule of Five compliance
lipinski_pass = (
mw <= 500 and
logp <= 5 and
hbd <= 5 and
hba <= 10
)
# Estimate binding confidence based on drug-likeness properties
# This is a simplified scoring function
confidence_score = 0.5 # Base score
# Adjust based on Lipinski compliance
if lipinski_pass:
confidence_score += 0.2
# Adjust based on molecular weight (optimal range 300-500)
if 300 <= mw <= 500:
confidence_score += 0.1
elif mw > 500:
confidence_score -= 0.1
# Adjust based on LogP (optimal range 0-3)
if 0 <= logp <= 3:
confidence_score += 0.1
elif logp > 5:
confidence_score -= 0.15
# Adjust based on rotatable bonds (fewer is better for binding)
if rotatable_bonds <= 5:
confidence_score += 0.1
elif rotatable_bonds > 10:
confidence_score -= 0.1
# Clamp score between 0 and 1
confidence_score = max(0.0, min(1.0, confidence_score))
# Generate 3D structure and PDBQT format for Vina compatibility
ligand_pdbqt = None
try:
# Add hydrogens and generate 3D coordinates
mol_3d = Chem.AddHs(mol)
AllChem.EmbedMolecule(mol_3d, randomSeed=42)
AllChem.MMFFOptimizeMolecule(mol_3d)
# Convert to PDB format first
pdb_block = Chem.MolToPDBBlock(mol_3d)
# Simple PDBQT conversion (add charges and atom types)
# This is a simplified version - full PDBQT requires proper charge calculation
pdbqt_lines = []
for line in pdb_block.split('\n'):
if line.startswith('HETATM') or line.startswith('ATOM'):
# Add Gasteiger charges (simplified - just use 0.0 for now)
pdbqt_line = line[:66] + " 0.00 0.00 0.000 " + line[77:78]
pdbqt_lines.append(pdbqt_line)
ligand_pdbqt = '\n'.join(pdbqt_lines) if pdbqt_lines else None
except Exception as pdbqt_error:
# If PDBQT generation fails, continue without it
ligand_pdbqt = None
# Build result with explicit JSON-serializable types
result = {
"success": True,
"diffdock_confidence_score": float(round(confidence_score, 4)),
"hardware_allocation": str("HF_FREE_CPU_TIER"),
"molecular_properties": {
"molecular_weight": float(round(mw, 2)),
"logP": float(round(logp, 2)),
"h_bond_donors": int(hbd),
"h_bond_acceptors": int(hba),
"tpsa": float(round(tpsa, 2)),
"rotatable_bonds": int(rotatable_bonds),
"lipinski_compliant": bool(lipinski_pass)
},
"ligand_pdbqt": ligand_pdbqt,
"note": str("RDKit-based drug-likeness scoring with PDBQT generation")
}
# Return dict directly - Gradio will handle JSON serialization
return result
except Exception as runtime_fault:
# Build error result with explicit JSON-serializable types
result = {
"success": False,
"error_log": str(runtime_fault)[:200],
"diffdock_confidence_score": float(-1.0)
}
# Return dict directly - Gradio will handle JSON serialization
return result
# 🌐 FastAPI app for custom endpoints
app = FastAPI()
@app.get("/ping")
@app.head("/ping")
async def ping():
"""Health check endpoint for UptimeRobot monitoring - supports both GET and HEAD"""
return JSONResponse({
"status": "online",
"service": "RDKit Drug-Likeness Engine",
"hardware": "HF_FREE_CPU_RDKIT_CORE",
"uptime": "active"
})
# Create Gradio interface
with gr.Blocks(title="GSS DiffDock Engine") as demo:
gr.Markdown("# Gaston Software Solutions LLP — Window 8 Engine")
gr.Markdown("**Active Mode**: RDKit Molecular Analysis on CPU")
gr.Markdown("Analyzes drug-likeness and binding potential using computational chemistry.")
with gr.Row():
with gr.Column():
protein_input = gr.Textbox(
label="Protein PDB Content",
placeholder="Paste PDB file content here (used for context)...",
lines=10
)
ligand_input = gr.Textbox(
label="Ligand SMILES String",
placeholder="e.g., CC(=O)Oc1ccccc1C(=O)O (Aspirin)",
lines=2,
value="CC(=O)Oc1ccccc1C(=O)O"
)
submit_btn = gr.Button("Analyze Drug Candidate", variant="primary")
with gr.Column():
output_json = gr.Textbox(
label="Analysis Result (JSON)",
lines=20,
max_lines=30,
show_copy_button=True
)
submit_btn.click(
fn=run_diffdock_inference,
inputs=[protein_input, ligand_input],
outputs=output_json,
api_name="execute_diffdock_prediction"
)
gr.Markdown("---")
gr.Markdown("### Example SMILES Strings")
gr.Markdown("""
- **Aspirin**: `CC(=O)Oc1ccccc1C(=O)O`
- **Ibuprofen**: `CC(C)Cc1ccc(cc1)C(C)C(=O)O`
- **Remdesivir**: `CCC(CC)COC(=O)C(C)NP(=O)(OCC1C(C(C(O1)n2ccc(=O)[nH]c2=O)O)O)Oc3ccccc3`
- **Chloroquine**: `CCN(CC)CCCC(C)Nc1ccnc2cc(Cl)ccc12`
""")
gr.Markdown("---")
gr.Markdown("### API Usage")
gr.Markdown("""
**Endpoint**: `/api/execute_diffdock_prediction`
**Request**:
```bash
curl -X POST "https://YOUR-SPACE.hf.space/api/execute_diffdock_prediction" \\
-H "Content-Type: application/json" \\
-d '{"data": ["PDB_CONTENT", "SMILES_STRING"]}'
```
**Response**:
```json
{
"data": [{
"success": true,
"diffdock_confidence_score": 0.7500,
"molecular_properties": {
"molecular_weight": 180.16,
"logP": 1.19,
"h_bond_donors": 1,
"h_bond_acceptors": 4,
"lipinski_compliant": true
}
}]
}
```
**Note**: This uses RDKit for fast drug-likeness analysis. Scores are based on Lipinski's Rule of Five and molecular properties that correlate with binding affinity.
""")
# Launch with proper configuration for Hugging Face Spaces
if __name__ == "__main__":
# Mount Gradio app to FastAPI
app = gr.mount_gradio_app(app, demo, path="/")
# Launch using uvicorn
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
# Made with Bob