from Bio import PDB from transformers import AutoTokenizer, AutoModelForCausalLM from rdkit import Chem import selfies as sf import torch import time import re import io import gradio as gr torch.manual_seed(int(time.time())) if torch.cuda.is_available(): torch.cuda.manual_seed_all(int(time.time())) model_name = "ncfrey/ChemGPT-1.2B" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) def load_pdb(file_obj): parser = PDB.PDBParser(QUIET=True) structure = parser.get_structure('protein', file_obj) return structure def clean_and_decode_selfies(raw_output): tokens = re.findall(r'\[[^\[\]]+\]', raw_output) valid_tokens = [t for t in tokens if all(x not in t for x in ['Branch', 'Ring', 'expl'])] cleaned_selfies = ''.join(valid_tokens) try: smiles = sf.decoder(cleaned_selfies) mol = Chem.MolFromSmiles(smiles) if mol: return Chem.MolToSmiles(mol) except: return None def generate_multiple_valid_smiles(prompt, n=10, max_length=100): valid_smiles = set() tries = 0 while len(valid_smiles) < n and tries < n * 5: inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate( **inputs, max_length=max_length, do_sample=True, temperature=1.0, top_k=100, pad_token_id=tokenizer.eos_token_id ) selfies_output = tokenizer.decode(outputs[0], skip_special_tokens=True) smiles = clean_and_decode_selfies(selfies_output) if smiles: valid_smiles.add(smiles) tries += 1 return list(valid_smiles) def generate_drugs_from_pdb(pdb_file): try: with open(pdb_file.name, 'r') as f: pdb_str = f.read() pdb_io = io.StringIO(pdb_str) load_pdb(pdb_io) prompt = "Generate a molecule in SELFIES that binds to the mutated KRAS protein" smiles_list = generate_multiple_valid_smiles(prompt, n=10) if not smiles_list: return "❌ لم يتم توليد أي SMILES صالحة", "" smiles_text = "\n".join(smiles_list) return "✅ تم توليد المركبات بنجاح", smiles_text except Exception as e: return f"❌ خطأ: {str(e)}", "" with gr.Blocks() as demo: gr.Markdown("# 🧬 توليد مركبات دوائية من ملف PDB باستخدام ChemGPT") with gr.Row(): pdb_input = gr.File(label="📁 ارفع ملف PDB") run_btn = gr.Button("🚀 توليد SMILES") status = gr.Textbox(label="📢 الحالة") smiles_output = gr.Textbox(label="📄 المركبات (SMILES)", lines=10) run_btn.click(fn=generate_drugs_from_pdb, inputs=pdb_input, outputs=[status, smiles_output]) demo.launch()