|
|
import gradio as gr |
|
|
import joblib |
|
|
from huggingface_hub import hf_hub_download |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from collections import Counter |
|
|
|
|
|
|
|
|
repo_id = "Ym420/Peptide-Function" |
|
|
model_filename = "xgb_multilabel_model_full.pkl" |
|
|
|
|
|
|
|
|
model_path = hf_hub_download(repo_id=repo_id, filename=model_filename) |
|
|
model_package = joblib.load(model_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_dict = model_package['model'] |
|
|
|
|
|
|
|
|
|
|
|
feature_columns = model_package['feature_columns'] |
|
|
|
|
|
|
|
|
|
|
|
aa_list = model_package.get('aa_list', []) |
|
|
dipeptides = model_package.get('dipeptides', []) |
|
|
hydrophobicity_scale = model_package.get('hydrophobicity_scale', {}) |
|
|
eisenberg_scale = model_package.get('eisenberg_scale', {}) |
|
|
aa_mass = model_package.get('aa_mass', {}) |
|
|
aa_charge = model_package.get('aa_charge', {}) |
|
|
aa_boman = model_package.get('aa_boman', {}) |
|
|
aa_flexibility = model_package.get('aa_flexibility', {}) |
|
|
aa_polarizability = model_package.get('aa_polarizability', {}) |
|
|
aa_aliphatic = model_package.get('aa_aliphatic', {}) |
|
|
aa_deltaG = model_package.get('aa_deltaG', {}) |
|
|
aa_pucker = model_package.get('aa_pucker', {}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"] |
|
|
|
|
|
|
|
|
|
|
|
def extract_features_app(seq: str) -> pd.DataFrame: |
|
|
seq = seq.upper() |
|
|
|
|
|
|
|
|
count = Counter([seq[i:i+2] for i in range(len(seq)-1)]) |
|
|
total = max(len(seq)-1, 1) |
|
|
dipep_features = [count.get(dp, 0) / total for dp in dipeptides] |
|
|
|
|
|
|
|
|
def g(aa, table): return table.get(aa, 0) |
|
|
def h(dp, table): return (g(dp[0], table) + g(dp[1], table)) / 2.0 |
|
|
|
|
|
dipeptides_seq = [seq[i:i+2] for i in range(len(seq)-1)] |
|
|
|
|
|
if len(seq) < 2: |
|
|
|
|
|
physchem_features = [0]*13 |
|
|
else: |
|
|
|
|
|
mw = np.mean([h(dp, aa_mass) for dp in dipeptides_seq]) |
|
|
charge = np.mean([h(dp, aa_charge) for dp in dipeptides_seq]) |
|
|
hydro = np.mean([h(dp, hydrophobicity_scale) for dp in dipeptides_seq]) |
|
|
aromatic = np.mean([(dp[0] in 'FWY') + (dp[1] in 'FWY') for dp in dipeptides_seq]) / 2.0 |
|
|
pI = np.mean([h(dp, {aa: 7 + (int(aa in 'KRH') - int(aa in 'DE')) for aa in aa_list}) for dp in dipeptides_seq]) |
|
|
instability = np.mean([((dp[0] in 'DEKR') + (dp[1] in 'DEKR')) / 2.0 for dp in dipeptides_seq]) |
|
|
hydro_moment = np.sqrt(np.mean([(h(dp, eisenberg_scale))**2 for dp in dipeptides_seq])) |
|
|
aliphatic = np.mean([h(dp, aa_aliphatic) for dp in dipeptides_seq]) |
|
|
boman = np.mean([h(dp, aa_boman) for dp in dipeptides_seq]) |
|
|
flexibility = np.mean([h(dp, aa_flexibility) for dp in dipeptides_seq]) |
|
|
polarizability = np.mean([h(dp, aa_polarizability) for dp in dipeptides_seq]) |
|
|
deltag = np.mean([h(dp, aa_deltaG) for dp in dipeptides_seq]) |
|
|
pucker = np.mean([h(dp, aa_pucker) for dp in dipeptides_seq]) |
|
|
|
|
|
physchem_features = [mw, charge, hydro, aromatic, pI, instability, |
|
|
hydro_moment, aliphatic, boman, flexibility, polarizability, deltag, pucker] |
|
|
|
|
|
|
|
|
features = dipep_features + physchem_features |
|
|
|
|
|
|
|
|
|
|
|
df = pd.DataFrame([features], columns=feature_columns) |
|
|
df = df.astype('float32') |
|
|
return df |
|
|
|
|
|
|
|
|
|
|
|
def predict_peptide(sequence: str): |
|
|
seq = "".join(sequence.split()).upper() |
|
|
if not seq: |
|
|
return [] |
|
|
|
|
|
X = extract_features_app(seq) |
|
|
|
|
|
table = [] |
|
|
for target in TARGET_CELLS: |
|
|
clf = model_dict.get(target) |
|
|
if clf is not None: |
|
|
|
|
|
prob = clf.predict_proba(X)[0][1] |
|
|
table.append([target, round(float(prob), 4)]) |
|
|
else: |
|
|
table.append([target, None]) |
|
|
|
|
|
return table |
|
|
|
|
|
|
|
|
custom_css = """ |
|
|
footer, .footer {display:none !important;} |
|
|
""" |
|
|
|
|
|
with gr.Blocks(css=custom_css, theme="default") as demo: |
|
|
gr.Markdown("## AMP Spectrum") |
|
|
|
|
|
seq_input = gr.Textbox(label="Enter Peptide Sequence") |
|
|
|
|
|
with gr.Row(): |
|
|
predict_btn = gr.Button("Predict", variant="primary") |
|
|
clear_btn = gr.Button("Clear") |
|
|
|
|
|
table_output = gr.Dataframe( |
|
|
headers=["Target", "Confidence"], |
|
|
datatype=["str","number"], |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
predict_btn.click(fn=predict_peptide, inputs=seq_input, outputs=table_output) |
|
|
clear_btn.click(fn=lambda: ("", []), outputs=[seq_input, table_output]) |
|
|
|
|
|
|
|
|
gr.api(predict_peptide, api_name="predict_peptide") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(show_error=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|