ACA050's picture
Create app.py
68d2c69 verified
# -*- coding: utf-8 -*-
"""Drug Discovery Using ANN - Hugging Face Deployment"""
import os
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf
from tensorflow import keras
import gradio as gr
# ----------------------
# Constants
# ----------------------
features = ['Molecular Weight','LogP','HBA','HBD','TPSA']
# ----------------------
# Load models and preprocessors
# ----------------------
rf = joblib.load("models/rf_model.joblib")
scaler = joblib.load("models/scaler.joblib")
le = joblib.load("models/le.joblib")
keras_model = keras.models.load_model("models/keras_mlp.h5")
# ----------------------
# Prediction function
# ----------------------
def predict_protein(mol_weight, logp, hba, hbd, tpsa):
sample = pd.DataFrame([{
'Molecular Weight': mol_weight,
'LogP': logp,
'HBA': hba,
'HBD': hbd,
'TPSA': tpsa
}])
# Scale features
s_scaled = scaler.transform(sample[features].values)
# Random Forest prediction
pred_rf_idx = rf.predict(s_scaled)[0]
pred_rf_class = le.inverse_transform([pred_rf_idx])[0]
# Keras prediction (top 3)
pred_prob = keras_model.predict(s_scaled)[0]
top3_idx = np.argsort(pred_prob)[-3:][::-1]
top3_predictions = [(le.inverse_transform([i])[0], float(pred_prob[i])) for i in top3_idx]
# Convert top3 predictions to DataFrame
top3_df = pd.DataFrame(top3_predictions, columns=["Protein", "Probability"])
top3_df["Probability %"] = (top3_df["Probability"] * 100).round(2)
return pred_rf_class, top3_df
# ----------------------
# Gradio Inputs
# ----------------------
inputs = [
gr.Slider(0, 1000, step=0.1, label="Molecular Weight", info="Weight of molecule"),
gr.Slider(-5, 10, step=0.1, label="LogP", info="Lipid solubility"),
gr.Slider(0, 20, step=1, label="HBA", info="Hydrogen bond acceptors"),
gr.Slider(0, 10, step=1, label="HBD", info="Hydrogen bond donors"),
gr.Slider(0, 200, step=0.1, label="TPSA", info="Topological Polar Surface Area")
]
# ----------------------
# Gradio Outputs
# ----------------------
outputs = [
gr.Textbox(label="Random Forest Prediction"),
gr.Dataframe(headers=["Protein", "Probability", "Probability %"], label="Top 3 Keras Predictions")
]
# ----------------------
# Markdown description
# ----------------------
description_md = """
# 🧬 Drug Discovery Protein Predictor
Predict protein targets for molecules using **Random Forest** and **Keras MLP**.
- Adjust molecular descriptors using sliders.
- Random Forest shows the top predicted protein.
- Keras MLP shows top 3 predictions with probabilities.
"""
# ----------------------
# Gradio Interface
# ----------------------
iface = gr.Interface(
fn=predict_protein,
inputs=inputs,
outputs=outputs,
title="Drug Discovery Protein Predictor",
description=description_md
)
# Launch app (share=True works in Colab, ignored on Hugging Face)
iface.launch(share=True)