Spaces:

ACA050
/

Drug_Discovery_App

Sleeping

App Files Files Community

Drug_Discovery_App / app.py

ACA050

Create app.py

68d2c69 verified 4 months ago

raw

history blame contribute delete

2.99 kB

	# -- coding: utf-8 --
	"""Drug Discovery Using ANN - Hugging Face Deployment"""

	import os
	import numpy as np
	import pandas as pd
	import joblib
	import tensorflow as tf
	from tensorflow import keras
	import gradio as gr

	# ----------------------
	# Constants
	# ----------------------
	features = ['Molecular Weight','LogP','HBA','HBD','TPSA']

	# ----------------------
	# Load models and preprocessors
	# ----------------------
	rf = joblib.load("models/rf_model.joblib")
	scaler = joblib.load("models/scaler.joblib")
	le = joblib.load("models/le.joblib")
	keras_model = keras.models.load_model("models/keras_mlp.h5")

	# ----------------------
	# Prediction function
	# ----------------------
	def predict_protein(mol_weight, logp, hba, hbd, tpsa):
	sample = pd.DataFrame([{
	'Molecular Weight': mol_weight,
	'LogP': logp,
	'HBA': hba,
	'HBD': hbd,
	'TPSA': tpsa
	}])

	# Scale features
	s_scaled = scaler.transform(sample[features].values)

	# Random Forest prediction
	pred_rf_idx = rf.predict(s_scaled)[0]
	pred_rf_class = le.inverse_transform([pred_rf_idx])[0]

	# Keras prediction (top 3)
	pred_prob = keras_model.predict(s_scaled)[0]
	top3_idx = np.argsort(pred_prob)[-3:][::-1]
	top3_predictions = [(le.inverse_transform([i])[0], float(pred_prob[i])) for i in top3_idx]

	# Convert top3 predictions to DataFrame
	top3_df = pd.DataFrame(top3_predictions, columns=["Protein", "Probability"])
	top3_df["Probability %"] = (top3_df["Probability"] * 100).round(2)

	return pred_rf_class, top3_df

	# ----------------------
	# Gradio Inputs
	# ----------------------
	inputs = [
	gr.Slider(0, 1000, step=0.1, label="Molecular Weight", info="Weight of molecule"),
	gr.Slider(-5, 10, step=0.1, label="LogP", info="Lipid solubility"),
	gr.Slider(0, 20, step=1, label="HBA", info="Hydrogen bond acceptors"),
	gr.Slider(0, 10, step=1, label="HBD", info="Hydrogen bond donors"),
	gr.Slider(0, 200, step=0.1, label="TPSA", info="Topological Polar Surface Area")
	]

	# ----------------------
	# Gradio Outputs
	# ----------------------
	outputs = [
	gr.Textbox(label="Random Forest Prediction"),
	gr.Dataframe(headers=["Protein", "Probability", "Probability %"], label="Top 3 Keras Predictions")
	]

	# ----------------------
	# Markdown description
	# ----------------------
	description_md = """
	# 🧬 Drug Discovery Protein Predictor

	Predict protein targets for molecules using Random Forest and Keras MLP.

	- Adjust molecular descriptors using sliders.
	- Random Forest shows the top predicted protein.
	- Keras MLP shows top 3 predictions with probabilities.
	"""

	# ----------------------
	# Gradio Interface
	# ----------------------
	iface = gr.Interface(
	fn=predict_protein,
	inputs=inputs,
	outputs=outputs,
	title="Drug Discovery Protein Predictor",
	description=description_md
	)

	# Launch app (share=True works in Colab, ignored on Hugging Face)
	iface.launch(share=True)