Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
| 1 |
from Bio import PDB
|
| 2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 3 |
from rdkit import Chem
|
| 4 |
-
import py3Dmol
|
| 5 |
-
import re
|
| 6 |
-
import io
|
| 7 |
import selfies as sf
|
| 8 |
import torch
|
| 9 |
import time
|
|
|
|
|
|
|
| 10 |
import gradio as gr
|
| 11 |
|
| 12 |
# إعداد العشوائية
|
|
@@ -14,7 +13,7 @@ torch.manual_seed(int(time.time()))
|
|
| 14 |
if torch.cuda.is_available():
|
| 15 |
torch.cuda.manual_seed_all(int(time.time()))
|
| 16 |
|
| 17 |
-
# تحميل
|
| 18 |
model_name = "ncfrey/ChemGPT-1.2B"
|
| 19 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 20 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
|
@@ -24,13 +23,6 @@ def load_pdb(file_obj):
|
|
| 24 |
structure = parser.get_structure('protein', file_obj)
|
| 25 |
return structure
|
| 26 |
|
| 27 |
-
def get_protein_3d_html(pdb_str):
|
| 28 |
-
view = py3Dmol.view(width=600, height=400)
|
| 29 |
-
view.addModel(pdb_str, "pdb")
|
| 30 |
-
view.setStyle({"cartoon": {"color": "spectrum"}})
|
| 31 |
-
view.zoomTo()
|
| 32 |
-
return view._make_html()
|
| 33 |
-
|
| 34 |
def clean_and_decode_selfies(raw_output):
|
| 35 |
tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
|
| 36 |
valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
|
|
@@ -63,58 +55,33 @@ def generate_multiple_valid_smiles(prompt, n=10, max_length=100):
|
|
| 63 |
tries += 1
|
| 64 |
return list(valid_smiles)
|
| 65 |
|
| 66 |
-
def
|
| 67 |
try:
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
return "❌ الملف فارغ أو غير صالح", None, None
|
| 73 |
-
|
| 74 |
-
pdb_file_io = io.StringIO(pdb_str)
|
| 75 |
-
try:
|
| 76 |
-
load_pdb(pdb_file_io)
|
| 77 |
-
except Exception as e:
|
| 78 |
-
return f"❌ خطأ أثناء تحليل ملف PDB:\n{str(e)}", None, None
|
| 79 |
-
|
| 80 |
-
html_3d = get_protein_3d_html(pdb_str)
|
| 81 |
|
| 82 |
prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein"
|
| 83 |
smiles_list = generate_multiple_valid_smiles(prompt, n=10)
|
| 84 |
|
| 85 |
if not smiles_list:
|
| 86 |
-
return "❌ لم يتم توليد أي SMILES صالحة",
|
| 87 |
-
|
| 88 |
-
smiles_txt = "\n".join(smiles_list)
|
| 89 |
-
smiles_file_path = "/tmp/generated_smiles.txt"
|
| 90 |
-
with open(smiles_file_path, "w") as f:
|
| 91 |
-
f.write(smiles_txt)
|
| 92 |
|
| 93 |
-
|
|
|
|
| 94 |
|
| 95 |
except Exception as e:
|
| 96 |
-
return f"❌
|
| 97 |
-
|
| 98 |
-
css = """
|
| 99 |
-
body {background-color: #f0f9ff;}
|
| 100 |
-
h1 {color: #004d66; text-align: center;}
|
| 101 |
-
"""
|
| 102 |
-
|
| 103 |
-
with gr.Blocks(css=css) as demo:
|
| 104 |
-
gr.Markdown("<h1>🔬 Drug-like Molecule Generation from PDB using ChemGPT</h1>")
|
| 105 |
-
gr.Markdown("🧪 Upload a PDB file containing mutations in the KRAS protein. The system will generate suitable SMILES drug candidates.")
|
| 106 |
|
|
|
|
|
|
|
|
|
|
| 107 |
with gr.Row():
|
| 108 |
-
pdb_input = gr.File(label="📁
|
| 109 |
-
run_btn = gr.Button("🚀
|
| 110 |
-
status = gr.Textbox(label="📢
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
run_btn.click(
|
| 114 |
-
fn=generate_from_pdb,
|
| 115 |
-
inputs=pdb_input,
|
| 116 |
-
outputs=[status, view3d, file_output]
|
| 117 |
-
)
|
| 118 |
|
| 119 |
-
|
| 120 |
-
demo.launch()
|
|
|
|
| 1 |
from Bio import PDB
|
| 2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 3 |
from rdkit import Chem
|
|
|
|
|
|
|
|
|
|
| 4 |
import selfies as sf
|
| 5 |
import torch
|
| 6 |
import time
|
| 7 |
+
import re
|
| 8 |
+
import io
|
| 9 |
import gradio as gr
|
| 10 |
|
| 11 |
# إعداد العشوائية
|
|
|
|
| 13 |
if torch.cuda.is_available():
|
| 14 |
torch.cuda.manual_seed_all(int(time.time()))
|
| 15 |
|
| 16 |
+
# تحميل نموذج ChemGPT
|
| 17 |
model_name = "ncfrey/ChemGPT-1.2B"
|
| 18 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 19 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
|
|
|
| 23 |
structure = parser.get_structure('protein', file_obj)
|
| 24 |
return structure
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
def clean_and_decode_selfies(raw_output):
|
| 27 |
tokens = re.findall(r'\[[^\[\]]+\]', raw_output)
|
| 28 |
valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])]
|
|
|
|
| 55 |
tries += 1
|
| 56 |
return list(valid_smiles)
|
| 57 |
|
| 58 |
+
def generate_drugs_from_pdb(pdb_file):
|
| 59 |
try:
|
| 60 |
+
pdb_bytes = pdb_file.read()
|
| 61 |
+
pdb_str = pdb_bytes.decode('utf-8', errors='ignore')
|
| 62 |
+
pdb_io = io.StringIO(pdb_str)
|
| 63 |
+
load_pdb(pdb_io)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein"
|
| 66 |
smiles_list = generate_multiple_valid_smiles(prompt, n=10)
|
| 67 |
|
| 68 |
if not smiles_list:
|
| 69 |
+
return "❌ لم يتم توليد أي SMILES صالحة", ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
+
smiles_text = "\n".join(smiles_list)
|
| 72 |
+
return "✅ تم توليد المركبات بنجاح", smiles_text
|
| 73 |
|
| 74 |
except Exception as e:
|
| 75 |
+
return f"❌ خطأ: {str(e)}", ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
+
# واجهة Gradio
|
| 78 |
+
with gr.Blocks() as demo:
|
| 79 |
+
gr.Markdown("# 🧬 توليد مركبات دوائية من ملف PDB باستخدام ChemGPT")
|
| 80 |
with gr.Row():
|
| 81 |
+
pdb_input = gr.File(label="📁 ارفع ملف PDB")
|
| 82 |
+
run_btn = gr.Button("🚀 توليد SMILES")
|
| 83 |
+
status = gr.Textbox(label="📢 الحالة")
|
| 84 |
+
smiles_output = gr.Textbox(label="📄 المركبات (SMILES)", lines=10)
|
| 85 |
+
run_btn.click(fn=generate_drugs_from_pdb, inputs=pdb_input, outputs=[status, smiles_output])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
+
demo.launch()
|
|
|