Spaces:
Sleeping
Sleeping
| from Bio import PDB | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from rdkit import Chem | |
| import selfies as sf | |
| import torch | |
| import time | |
| import re | |
| import io | |
| import gradio as gr | |
| torch.manual_seed(int(time.time())) | |
| if torch.cuda.is_available(): | |
| torch.cuda.manual_seed_all(int(time.time())) | |
| model_name = "ncfrey/ChemGPT-1.2B" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| def load_pdb(file_obj): | |
| parser = PDB.PDBParser(QUIET=True) | |
| structure = parser.get_structure('protein', file_obj) | |
| return structure | |
| def clean_and_decode_selfies(raw_output): | |
| tokens = re.findall(r'\[[^\[\]]+\]', raw_output) | |
| valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])] | |
| cleaned_selfies = ''.join(valid_tokens) | |
| try: | |
| smiles = sf.decoder(cleaned_selfies) | |
| mol = Chem.MolFromSmiles(smiles) | |
| if mol: | |
| return Chem.MolToSmiles(mol) | |
| except: | |
| return None | |
| def generate_multiple_valid_smiles(prompt, n=10, max_length=100): | |
| valid_smiles = set() | |
| tries = 0 | |
| while len(valid_smiles) < n and tries < n * 5: | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| outputs = model.generate( | |
| **inputs, | |
| max_length=max_length, | |
| do_sample=True, | |
| temperature=1.0, | |
| top_k=100, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| selfies_output = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| smiles = clean_and_decode_selfies(selfies_output) | |
| if smiles: | |
| valid_smiles.add(smiles) | |
| tries += 1 | |
| return list(valid_smiles) | |
| def generate_drugs_from_pdb(pdb_file): | |
| try: | |
| with open(pdb_file.name, 'r') as f: | |
| pdb_str = f.read() | |
| pdb_io = io.StringIO(pdb_str) | |
| load_pdb(pdb_io) | |
| prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein" | |
| smiles_list = generate_multiple_valid_smiles(prompt, n=10) | |
| if not smiles_list: | |
| return "❌ لم يتم توليد أي SMILES صالحة", "" | |
| smiles_text = "\n".join(smiles_list) | |
| return "✅ تم توليد المركبات بنجاح", smiles_text | |
| except Exception as e: | |
| return f"❌ خطأ: {str(e)}", "" | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🧬 توليد مركبات دوائية من ملف PDB باستخدام ChemGPT") | |
| with gr.Row(): | |
| pdb_input = gr.File(label="📁 ارفع ملف PDB") | |
| run_btn = gr.Button("🚀 توليد SMILES") | |
| status = gr.Textbox(label="📢 الحالة") | |
| smiles_output = gr.Textbox(label="📄 المركبات (SMILES)", lines=10) | |
| run_btn.click(fn=generate_drugs_from_pdb, inputs=pdb_input, outputs=[status, smiles_output]) | |
| demo.launch() | |