Ym420's picture
Update app.py
4765f4d verified
raw
history blame
4.88 kB
import gradio as gr
import joblib
from huggingface_hub import hf_hub_download
import pandas as pd
import numpy as np
from collections import Counter
# --- Download model from HF Hub ---
repo_id = "Ym420/Peptide-Function" # replace with your HF repo
model_filename = "xgb_multilabel_model_full_extendedFuture.pkl"
model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
model_package = joblib.load(model_path)
# --- Debug: Check what we loaded ---
print("Loaded model_package type:", type(model_package))
print("model_package keys:", list(model_package.keys()))
print("Type of model:", type(model_package['model']))
model = model_package['model']
feature_columns = model_package['feature_columns']
# --- Metadata ---
aa_list = model_package['aa_list']
dipeptides = model_package['dipeptides']
hydrophobicity_scale = model_package['hydrophobicity_scale']
eisenberg_scale = model_package['eisenberg_scale']
aa_mass = model_package['aa_mass']
aa_charge = model_package['aa_charge']
aa_boman = model_package['aa_boman']
aa_flexibility = model_package['aa_flexibility']
aa_polarizability = model_package['aa_polarizability']
aa_aliphatic = model_package['aa_aliphatic']
aa_deltaG = model_package['aa_deltaG']
# --- Target cells ---
TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"]
# --- Feature extraction ---
def extract_features_app(seq: str) -> pd.DataFrame:
seq = seq.upper()
# 1. Dipeptide composition
count = Counter([seq[i:i+2] for i in range(len(seq)-1)])
total = max(len(seq)-1, 1)
dipep_features = [count.get(dp, 0) / total for dp in dipeptides]
# 2. Physicochemical features
def g(aa, table): return table.get(aa, 0)
def h(dp, table): return (g(dp[0], table) + g(dp[1], table)) / 2.0
dipeptides_seq = [seq[i:i+2] for i in range(len(seq)-1)]
if len(seq) < 2:
physchem_features = [0]*12
else:
mw = np.mean([h(dp, aa_mass) for dp in dipeptides_seq])
charge = np.mean([h(dp, aa_charge) for dp in dipeptides_seq])
hydro = np.mean([h(dp, hydrophobicity_scale) for dp in dipeptides_seq])
aromatic = np.mean([(dp[0] in 'FWY') + (dp[1] in 'FWY') for dp in dipeptides_seq]) / 2.0
pI = np.mean([h(dp, {aa: 7 + (int(aa in 'KRH') - int(aa in 'DE')) for aa in aa_list}) for dp in dipeptides_seq])
instability = np.mean([((dp[0] in 'DEKR') + (dp[1] in 'DEKR')) / 2.0 for dp in dipeptides_seq])
hydro_moment = np.sqrt(np.mean([(h(dp, eisenberg_scale))**2 for dp in dipeptides_seq]))
aliphatic = np.mean([h(dp, aa_aliphatic) for dp in dipeptides_seq])
boman = np.mean([h(dp, aa_boman) for dp in dipeptides_seq])
flexibility = np.mean([h(dp, aa_flexibility) for dp in dipeptides_seq])
polarizability = np.mean([h(dp, aa_polarizability) for dp in dipeptides_seq])
deltag = np.mean([h(dp, aa_deltaG) for dp in dipeptides_seq])
physchem_features = [mw, charge, hydro, aromatic, pI, instability,
hydro_moment, aliphatic, boman, flexibility, polarizability, deltag]
features = dipep_features + physchem_features
df = pd.DataFrame([features], columns=feature_columns)
df = df.astype('float32') # ensure same type as training
return df
# --- Prediction function ---
def predict_peptide(sequence: str):
seq = "".join(sequence.split()).upper()
if not seq:
return []
X = extract_features_app(seq)
# --- Handle model being a list of estimators ---
if isinstance(model, list):
probs_list = [est.predict_proba(X) for est in model]
else:
probs_list = [model.predict_proba(X)] # single model
table = []
for i, target in enumerate(TARGET_CELLS):
prob = float(probs_list[i][0][1])
table.append([target, round(prob, 4)])
return table
# --- Gradio Interface ---
custom_css = """
footer, .footer {display:none !important;}
"""
with gr.Blocks(css=custom_css, theme="default") as demo:
gr.Markdown("## Peptide Antimicrobial Predictor\nEnter a peptide sequence to predict efficacy/toxicity.")
seq_input = gr.Textbox(label="Enter Peptide Sequence")
with gr.Row():
predict_btn = gr.Button("Predict", variant="primary")
clear_btn = gr.Button("Clear")
table_output = gr.Dataframe(
headers=["Target Cell", "Probability of Efficacy/Toxicity"],
datatype=["str","number"],
interactive=False
)
predict_btn.click(fn=predict_peptide, inputs=seq_input, outputs=table_output)
clear_btn.click(fn=lambda: ("", []), outputs=[seq_input, table_output])
# Optional API endpoint for iOS app
# Note: use only if Gradio version supports `api`
# gr.api(predict_peptide, api_name="predict_peptide")
if __name__ == "__main__":
demo.launch(show_error=True)