import gradio as gr import numpy as np import spaces from transformers import AutoModelForSequenceClassification, AutoTokenizer from rdkit import Chem from rdkit.Chem import Draw from PIL import Image import io # Global variables to store model and tokenizer model = None tokenizer = None def load_model(model_path): """Load the fine-tuned model and tokenizer from Hugging Face""" global model, tokenizer model = AutoModelForSequenceClassification.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(model_path) print(f"Model loaded from {model_path}") def draw_molecule(smiles): """Draw molecule structure from SMILES""" try: mol = Chem.MolFromSmiles(smiles) if mol is None: return None img = Draw.MolToImage(mol, size=(400, 400)) return img except Exception as e: print(f"Error drawing molecule: {e}") return None @spaces.GPU def predict(input_text): """Make prediction on the input text directly without creating a dataset""" if model is None or tokenizer is None: return "Error: Model not loaded" # Draw molecule structure mol_image = draw_molecule(input_text) model.to('cuda') # Tokenize input directly inputs = tokenizer(input_text, padding='max_length', truncation=True, max_length=512, return_tensors="pt") # Move input tensors to GPU inputs = {k: v.to('cuda') for k, v in inputs.items()} # Get model predictions outputs = model(**inputs) logits = outputs.logits.detach().cpu().numpy() # Stable softmax to get probabilities exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True)) probs = exp_logits / np.sum(exp_logits, axis=1, keepdims=True) # Get predicted label pred_label = np.argmax(probs, axis=1)[0] # Map prediction to label label_map = {0: "Unnatural", 1: "Natural"} pred_label_text = label_map[pred_label] # Format output result = f"Type: {pred_label_text}\n" natural_prob = probs[0][1] if pred_label == 1 else 1 - probs[0][0] result += f"Natural Product Probability: {natural_prob:.4f}\n" return mol_image, result # Load model on initialization load_model("shulik7/NP_SMILES_tokenized_PubChem_shard00_160k") # Create Gradio interface demo = gr.Interface( fn=predict, inputs=gr.Textbox(lines=5, placeholder="Enter the SMILES here...", label="SMILES Input"), outputs=[ gr.Image(label="Molecule Structure", type="pil"), gr.Textbox(lines=5, placeholder="Prediction results will appear here...", label="Prediction") ], title="Naturalness Prediction", description="Enter SMILES string to get the prediction from the fine-tuned ChemBERTa model.", examples=[ ["CN1C=NC2=C1C(=O)N(C(=O)N2C)C"], # Caffeine - natural product ["CC(C)NCC(COC1=CC=C(C=C1)COCCOC(C)C)O"], # Atenolol - synthetic ["CC1=C(C(CCC1)(C)C)C=CC(=CC=CC(=CC(=O)O)C)C"] # Retinoic acid - natural product ], flagging_mode="never" ) if __name__ == "__main__": demo.launch()