File size: 4,571 Bytes
82250a1 5a0ce9f 82250a1 a3aabc6 82250a1 5a0ce9f 0fe07d0 1b2c004 5a0ce9f 0fe07d0 82250a1 a3aabc6 82250a1 d8b9b3c 82250a1 d8b9b3c 82250a1 a3aabc6 82250a1 a3aabc6 0fe07d0 a3aabc6 82250a1 0fe07d0 a3aabc6 82250a1 a3aabc6 d8b9b3c a3aabc6 b0cda6a a3aabc6 d8b9b3c a3aabc6 d8b9b3c 450bdf1 a3aabc6 d8b9b3c a3aabc6 0fe07d0 82250a1 5a0ce9f 82250a1 a3aabc6 450bdf1 5a0ce9f 0fe07d0 82250a1 0fe07d0 450bdf1 82250a1 5a0ce9f 450bdf1 2392427 a810988 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import gradio as gr
import joblib
from huggingface_hub import hf_hub_download
import pandas as pd
import numpy as np
from collections import Counter
# --- Download model from HF Hub ---
repo_id = "Ym420/Peptide-Function"
model_filename = "xgb_multilabel_model_full.pkl"
model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
model_package = joblib.load(model_path)
# --- Unwrap model dict ---
model_dict = model_package['model'] # dict: {'Gram+': XGBClassifier, ...}
feature_columns = model_package['feature_columns']
# --- Metadata ---
aa_list = model_package['aa_list']
dipeptides = model_package['dipeptides']
hydrophobicity_scale = model_package['hydrophobicity_scale']
eisenberg_scale = model_package['eisenberg_scale']
aa_mass = model_package['aa_mass']
aa_charge = model_package['aa_charge']
aa_boman = model_package['aa_boman']
aa_flexibility = model_package['aa_flexibility']
aa_polarizability = model_package['aa_polarizability']
aa_aliphatic = model_package['aa_aliphatic']
aa_deltaG = model_package['aa_deltaG']
# --- Target cells ---
TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"]
# --- Feature extraction ---
def extract_features_app(seq: str) -> pd.DataFrame:
seq = seq.upper()
# Dipeptide composition
count = Counter([seq[i:i+2] for i in range(len(seq)-1)])
total = max(len(seq)-1, 1)
dipep_features = [count.get(dp, 0) / total for dp in dipeptides]
# Physicochemical features
def g(aa, table): return table.get(aa, 0)
def h(dp, table): return (g(dp[0], table) + g(dp[1], table)) / 2.0
dipeptides_seq = [seq[i:i+2] for i in range(len(seq)-1)]
if len(seq) < 2:
physchem_features = [0]*12
else:
mw = np.mean([h(dp, aa_mass) for dp in dipeptides_seq])
charge = np.mean([h(dp, aa_charge) for dp in dipeptides_seq])
hydro = np.mean([h(dp, hydrophobicity_scale) for dp in dipeptides_seq])
aromatic = np.mean([(dp[0] in 'FWY') + (dp[1] in 'FWY') for dp in dipeptides_seq]) / 2.0
pI = np.mean([h(dp, {aa: 7 + (int(aa in 'KRH') - int(aa in 'DE')) for aa in aa_list}) for dp in dipeptides_seq])
instability = np.mean([((dp[0] in 'DEKR') + (dp[1] in 'DEKR')) / 2.0 for dp in dipeptides_seq])
hydro_moment = np.sqrt(np.mean([(h(dp, eisenberg_scale))**2 for dp in dipeptides_seq]))
aliphatic = np.mean([h(dp, aa_aliphatic) for dp in dipeptides_seq])
boman = np.mean([h(dp, aa_boman) for dp in dipeptides_seq])
flexibility = np.mean([h(dp, aa_flexibility) for dp in dipeptides_seq])
polarizability = np.mean([h(dp, aa_polarizability) for dp in dipeptides_seq])
deltag = np.mean([h(dp, aa_deltaG) for dp in dipeptides_seq])
physchem_features = [mw, charge, hydro, aromatic, pI, instability,
hydro_moment, aliphatic, boman, flexibility, polarizability, deltag]
features = dipep_features + physchem_features
df = pd.DataFrame([features], columns=feature_columns)
df = df.astype('float32')
return df
# --- Prediction function ---
def predict_peptide(sequence: str):
seq = "".join(sequence.split()).upper()
if not seq:
return []
X = extract_features_app(seq)
table = []
# Iterate over each target classifier
for target in TARGET_CELLS:
clf = model_dict.get(target)
if clf is not None:
prob = clf.predict_proba(X)[0][1] # positive-class probability (0-1)
table.append([target, round(float(prob), 4)])
else:
table.append([target, None])
return table
# --- Gradio Interface ---
custom_css = """
footer, .footer {display:none !important;}
"""
with gr.Blocks(css=custom_css, theme="default") as demo:
gr.Markdown("## Peptide Antimicrobial Predictor\nEnter a peptide sequence to predict efficacy/toxicity.")
seq_input = gr.Textbox(label="Enter Peptide Sequence")
with gr.Row():
predict_btn = gr.Button("Predict", variant="primary")
clear_btn = gr.Button("Clear")
table_output = gr.Dataframe(
headers=["Target Cell", "Probability of Efficacy/Toxicity"],
datatype=["str","number"],
interactive=False
)
predict_btn.click(fn=predict_peptide, inputs=seq_input, outputs=table_output)
clear_btn.click(fn=lambda: ("", []), outputs=[seq_input, table_output])
# API endpoint for iOS app
gr.api(predict_peptide, api_name="predict_peptide")
if __name__ == "__main__":
demo.launch(show_error=True)
|