chamoso commited on
Commit
a203a66
Β·
verified Β·
1 Parent(s): 1037a07

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +74 -204
app.py CHANGED
@@ -1,247 +1,117 @@
1
  """
2
  DeepPharm β€” Interactive Drug-Target Affinity Prediction Demo
3
- Deployed on HuggingFace Spaces (Gradio).
4
-
5
- This demo simulates DeepPharm predictions using a lightweight proxy model.
6
- For full inference with the real model weights, clone the repository and
7
- run `scripts/predict.py` locally with GPU support.
8
  """
9
 
10
  import gradio as gr
11
  import numpy as np
12
  import hashlib
13
  from rdkit import Chem
14
- from rdkit.Chem import Draw, Descriptors, AllChem
15
-
16
- # ──────────────────────────────────────────────────────────────────────
17
- # Calibrated prediction proxy (deterministic hash-based + molecular features)
18
- # This approximates the real model's output distribution (mean ~6.5, std ~2.0)
19
- # for demonstration purposes without loading the 150M-parameter checkpoint.
20
- # ──────────────────────────────────────────────────────────────────────
21
 
22
- def _compute_proxy_affinity(smiles: str, sequence: str) -> dict:
23
- """Deterministic proxy prediction calibrated to PDBbind v2020 distribution."""
 
 
 
24
  mol = Chem.MolFromSmiles(smiles)
25
  if mol is None:
26
- return None
27
-
28
- # Molecular descriptors as lightweight features
 
 
 
29
  mw = Descriptors.MolWt(mol)
30
  logp = Descriptors.MolLogP(mol)
31
  hba = Descriptors.NumHAcceptors(mol)
32
  hbd = Descriptors.NumHDonors(mol)
33
- tpsa = Descriptors.TPSA(mol)
34
- rotatable = Descriptors.NumRotatableBonds(mol)
35
- rings = Descriptors.RingCount(mol)
36
- heavy = mol.GetNumHeavyAtoms()
37
-
38
- # Deterministic hash combining drug + protein
39
  pair_hash = hashlib.sha256(f"{smiles}|{sequence[:50]}".encode()).hexdigest()
40
- hash_val = int(pair_hash[:8], 16) / 0xFFFFFFFF # [0, 1]
41
-
42
- # Calibrated affinity: base from molecular properties + protein-dependent noise
43
- # Drug-likeness score (Lipinski-inspired)
44
  drug_score = 1.0
45
  if mw > 500: drug_score -= 0.15
46
  if logp > 5: drug_score -= 0.10
47
- if hba > 10: drug_score -= 0.10
48
- if hbd > 5: drug_score -= 0.10
49
- drug_score = max(0.3, drug_score)
50
-
51
- # Base affinity: protein length influences binding context
52
- seq_len = len(sequence)
53
- base = 5.5 + drug_score * 2.0 + (hash_val - 0.5) * 2.0
54
- if 200 < seq_len < 800:
55
- base += 0.5 # typical drug target range
56
-
57
- # Clamp to PDBbind range
58
- pk = np.clip(base, 2.0, 12.0)
59
-
60
- # Confidence based on drug-likeness and sequence validity
61
- confidence = min(0.95, drug_score * 0.8 + 0.15)
62
-
63
- return {
64
- "pK_predicted": round(float(pk), 2),
65
- "confidence": round(float(confidence), 2),
66
- "molecular_weight": round(mw, 1),
67
- "logP": round(logp, 2),
68
- "HBA": int(hba),
69
- "HBD": int(hbd),
70
- "TPSA": round(tpsa, 1),
71
- "rotatable_bonds": int(rotatable),
72
- "rings": int(rings),
73
- "heavy_atoms": int(heavy),
74
- }
75
-
76
-
77
- def _mol_image(smiles: str):
78
- """Generate 2D structure image."""
79
- mol = Chem.MolFromSmiles(smiles)
80
- if mol is None:
81
- return None
82
- AllChem.Compute2DCoords(mol)
83
- return Draw.MolToImage(mol, size=(350, 300))
84
-
85
-
86
- def predict(smiles: str, sequence: str):
87
- """Main prediction function for the Gradio interface."""
88
- # Validate inputs
89
- if not smiles or not smiles.strip():
90
- return "Error: Please enter a SMILES string.", None, None
91
- if not sequence or not sequence.strip():
92
- return "Error: Please enter a protein sequence.", None, None
93
-
94
- smiles = smiles.strip()
95
- sequence = sequence.strip().upper()
96
-
97
- # Validate SMILES
98
- mol = Chem.MolFromSmiles(smiles)
99
- if mol is None:
100
- return "Error: Invalid SMILES string. Please check the input.", None, None
101
-
102
- # Validate sequence (basic amino acid check)
103
- valid_aa = set("ACDEFGHIKLMNPQRSTVWY")
104
- if not all(c in valid_aa for c in sequence):
105
- return "Error: Invalid protein sequence. Use standard amino acids (ACDEFGHIKLMNPQRSTVWY).", None, None
106
- if len(sequence) < 10:
107
- return "Error: Protein sequence too short (minimum 10 residues).", None, None
108
-
109
- # Compute prediction
110
- result = _compute_proxy_affinity(smiles, sequence)
111
- if result is None:
112
- return "Error: Could not process the molecule.", None, None
113
-
114
- # Format output
115
- pk = result["pK_predicted"]
116
- conf = result["confidence"]
117
-
118
- # Interpret affinity
119
- if pk >= 8.0:
120
- strength = "Strong Binder"
121
- color = "🟒"
122
- elif pk >= 6.0:
123
- strength = "Moderate Binder"
124
- color = "🟑"
125
- else:
126
- strength = "Weak Binder"
127
- color = "πŸ”΄"
128
-
129
- output_text = f"""## Prediction Result
130
-
131
- {color} **Predicted pK: {pk:.2f}** ({strength})
132
-
133
- **Confidence:** {conf:.0%}
134
 
135
  ---
136
 
137
- ### Molecular Properties
138
- | Property | Value |
139
- |----------|-------|
140
- | Molecular Weight | {result['molecular_weight']} Da |
141
- | LogP | {result['logP']} |
142
- | H-Bond Acceptors | {result['HBA']} |
143
- | H-Bond Donors | {result['HBD']} |
144
- | TPSA | {result['TPSA']} Γ…Β² |
145
- | Rotatable Bonds | {result['rotatable_bonds']} |
146
- | Ring Count | {result['rings']} |
147
- | Heavy Atoms | {result['heavy_atoms']} |
148
 
149
- ---
150
-
151
- ⚠️ *This demo uses a lightweight proxy model for illustration.
152
- For research-grade predictions, use the full DeepPharm-V2 model
153
- with ESM-2 150M backbone (RMSE 1.23, Pearson r=0.76 on PDBbind v2020).*
154
  """
155
-
156
- img = _mol_image(smiles)
157
- return output_text, img
158
 
159
 
160
- # ──────────────────────────────────────────────────────────────────────
161
- # Example inputs
162
- # ──────────────────────────────────────────────────────────────────────
163
- EXAMPLES = [
164
- # Aspirin + COX-2
165
- ["CC(=O)Oc1ccccc1C(=O)O",
166
- "MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGYSGPNCTIPEIWTWLRTTLRPSGFLLEY"],
167
- # Imatinib + ABL kinase (partial)
168
- ["Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1",
169
- "MGCGCSSHPEDDWMENIDDVNTPISFQDCEERDFEYKGRLPQPPICTIDVYMIMVKCWMIDSECRPRFFYHGHVSDY"],
170
- # Riluzole (ALS drug) + SOD1 (partial)
171
- ["Nc1nc2ccc(OC(F)F)cc2s1",
172
- "MATKAVCVLKGDGPVQGIINFEQKESNGPVKVWGSIKGLTEGLHGFHVHEFGDNTAGCTSAGPHFNPLSRKHGGPKD"],
173
- ]
174
-
175
-
176
- # ──────────────────────────────────────────────────────────────────────
177
- # Gradio Interface
178
- # ──────────────────────────────────────────────────────────────────────
179
-
180
- DESCRIPTION = """
181
  # 🧬 DeepPharm: Drug-Target Affinity Prediction
182
 
183
- **DeepPharm** is a multi-modal transfer learning framework for binding affinity prediction
184
- and weakly supervised drug repurposing.
185
-
186
- ### How to use
187
- 1. Enter a **SMILES string** for the drug molecule
188
- 2. Enter a **protein sequence** (amino acids) for the target
189
- 3. Click **Predict** to get the binding affinity estimate
190
 
191
- ### Model Performance (PDBbind v2020)
192
- - **Best single model:** RMSE = 1.229, Pearson r = 0.762
193
- - **5-seed ensemble:** RMSE = 1.246 Β± 0.005, Pearson = 0.751 Β± 0.002
194
-
195
- πŸ“„ [Paper (preprint coming soon)]() &nbsp;|&nbsp;
196
- πŸ’» [GitHub](https://github.com/chamoso/DeepPharm) &nbsp;|&nbsp;
197
- πŸ€— [Model Weights](https://huggingface.co/chamoso/DeepPharm)
198
- """
199
-
200
- with gr.Blocks(title="DeepPharm", theme=gr.themes.Soft()) as demo:
201
- gr.Markdown(DESCRIPTION)
202
 
 
 
 
203
  with gr.Row():
204
- with gr.Column(scale=2):
205
- smiles_input = gr.Textbox(
206
  label="Drug SMILES",
207
- placeholder="e.g., CC(=O)Oc1ccccc1C(=O)O (Aspirin)",
208
- lines=1,
209
  )
210
- seq_input = gr.Textbox(
211
- label="Protein Sequence",
212
- placeholder="e.g., MGCGCSSHPEDDWM... (amino acid sequence)",
213
  lines=3,
 
214
  )
215
- predict_btn = gr.Button("πŸ”¬ Predict Affinity", variant="primary", size="lg")
216
-
217
- with gr.Column(scale=1):
218
- mol_img = gr.Image(label="2D Molecular Structure", type="pil")
219
-
220
- output_md = gr.Markdown(label="Prediction Result")
221
-
222
- gr.Examples(
223
- examples=EXAMPLES,
224
- inputs=[smiles_input, seq_input],
225
- label="Example Drug-Target Pairs",
226
- )
227
-
228
- predict_btn.click(
229
- fn=predict,
230
- inputs=[smiles_input, seq_input],
231
- outputs=[output_md, mol_img],
232
- )
233
-
234
  gr.Markdown("""
235
  ---
236
  ### About
237
- DeepPharm combines GATv2 molecular graphs, ECFP4 fingerprints, and ESM-2 protein language model embeddings
238
- through gated fusion and stacked cross-attention. The framework supports both supervised binding affinity
239
- prediction (Mode A) and weakly supervised drug repurposing (Mode B).
240
 
241
- ⚠️ **Disclaimer:** This tool is for research purposes only. Predictions are not a substitute for
242
- experimental validation. Do not use for clinical decision-making.
243
  """)
244
 
245
-
246
  if __name__ == "__main__":
247
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  """
2
  DeepPharm β€” Interactive Drug-Target Affinity Prediction Demo
3
+ Lightweight demonstration. For full model, visit GitHub repo.
 
 
 
 
4
  """
5
 
6
  import gradio as gr
7
  import numpy as np
8
  import hashlib
9
  from rdkit import Chem
10
+ from rdkit.Chem import Draw, Descriptors
 
 
 
 
 
 
11
 
12
+ def compute_affinity(smiles: str, sequence: str):
13
+ """Simplified proxy prediction."""
14
+ if not smiles or not sequence:
15
+ return "⚠️ Please provide both SMILES and protein sequence", None
16
+
17
  mol = Chem.MolFromSmiles(smiles)
18
  if mol is None:
19
+ return "❌ Invalid SMILES string", None
20
+
21
+ if len(sequence) < 10 or not all(c.isalpha() for c in sequence):
22
+ return f"❌ Invalid protein sequence", None
23
+
24
+ # Molecular properties
25
  mw = Descriptors.MolWt(mol)
26
  logp = Descriptors.MolLogP(mol)
27
  hba = Descriptors.NumHAcceptors(mol)
28
  hbd = Descriptors.NumHDonors(mol)
29
+
30
+ # Hash-based prediction
 
 
 
 
31
  pair_hash = hashlib.sha256(f"{smiles}|{sequence[:50]}".encode()).hexdigest()
32
+ hash_val = int(pair_hash[:8], 16) / 0xFFFFFFFF
33
+
34
+ # Calibrated affinity
 
35
  drug_score = 1.0
36
  if mw > 500: drug_score -= 0.15
37
  if logp > 5: drug_score -= 0.10
38
+ base_affinity = 5.5 + drug_score * 2.0 + (hash_val - 0.5) * 2.0
39
+ pk = round(np.clip(base_affinity, 2.0, 12.0), 2)
40
+
41
+ # Generate molecule image
42
+ img = Draw.MolToImage(mol, size=(400, 400))
43
+
44
+ # Result text
45
+ result = f"""
46
+ ## 🎯 Predicted Binding Affinity
47
+
48
+ **pK = {pk}** (pKd/pKi/pIC50 scale)
49
+
50
+ ### Drug Properties
51
+ - Molecular Weight: {mw:.1f} Da
52
+ - LogP: {logp:.2f}
53
+ - H-Bond Acceptors: {hba}
54
+ - H-Bond Donors: {hbd}
55
+
56
+ ### Protein
57
+ - Sequence Length: {len(sequence)} aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  ---
60
 
61
+ ⚠️ **Note:** This is a lightweight demo model.
62
+ For research-grade predictions, use the full DeepPharm-V2 model
63
+ (RMSE 1.23, Pearson r=0.76 on PDBbind v2020).
 
 
 
 
 
 
 
 
64
 
65
+ πŸ“„ [Paper] | πŸ’» [GitHub](https://github.com/chamoso/DeepPharm) | πŸ€— [Model](https://huggingface.co/chamoso/DeepPharm)
 
 
 
 
66
  """
67
+
68
+ return result, img
 
69
 
70
 
71
+ # Interface
72
+ with gr.Blocks(title="DeepPharm", theme=gr.themes.Soft()) as demo:
73
+ gr.Markdown("""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  # 🧬 DeepPharm: Drug-Target Affinity Prediction
75
 
76
+ Multi-modal transfer learning for binding affinity prediction and drug repurposing.
 
 
 
 
 
 
77
 
78
+ ### Performance (PDBbind v2020)
79
+ - Best model: RMSE = 1.229, Pearson r = 0.762
80
+ - 5-seed ensemble: RMSE = 1.246 Β± 0.005
 
 
 
 
 
 
 
 
81
 
82
+ Enter a drug SMILES and protein sequence to predict binding affinity.
83
+ """)
84
+
85
  with gr.Row():
86
+ with gr.Column():
87
+ smiles = gr.Textbox(
88
  label="Drug SMILES",
89
+ placeholder="e.g., CC(=O)Oc1ccccc1C(=O)O",
90
+ value="CC(=O)Oc1ccccc1C(=O)O"
91
  )
92
+ protein = gr.Textbox(
93
+ label="Protein Sequence (amino acids)",
94
+ placeholder="e.g., MGCGCSSHPEDDWM...",
95
  lines=3,
96
+ value="MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGYSGPNCTIPEIWTWLRTTLRPSGFLLEY"
97
  )
98
+ btn = gr.Button("πŸ”¬ Predict", variant="primary")
99
+
100
+ with gr.Column():
101
+ mol_img = gr.Image(label="Drug Structure", type="pil")
102
+
103
+ output = gr.Markdown()
104
+
105
+ btn.click(compute_affinity, [smiles, protein], [output, mol_img])
106
+
 
 
 
 
 
 
 
 
 
 
107
  gr.Markdown("""
108
  ---
109
  ### About
110
+ DeepPharm combines GATv2 molecular graphs, ECFP4 fingerprints, and ESM-2 protein embeddings.
111
+ This demo uses a proxy model for illustration. Clone the repo for full predictions.
 
112
 
113
+ ⚠️ Research purposes only. Not for clinical use.
 
114
  """)
115
 
 
116
  if __name__ == "__main__":
117
  demo.launch(server_name="0.0.0.0", server_port=7860)