Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import joblib
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
# --- Load model package ---
|
| 6 |
+
model_package_path = "xgb_multilabel_model_full.pkl"
|
| 7 |
+
model_package = joblib.load(model_package_path)
|
| 8 |
+
model = model_package['model']
|
| 9 |
+
feature_columns = model_package['feature_columns']
|
| 10 |
+
|
| 11 |
+
# Metadata
|
| 12 |
+
aa_list = model_package['aa_list']
|
| 13 |
+
dipeptides = model_package['dipeptides']
|
| 14 |
+
hydrophobicity_scale = model_package['hydrophobicity_scale']
|
| 15 |
+
aa_mass = model_package['aa_mass']
|
| 16 |
+
aa_charge = model_package['aa_charge']
|
| 17 |
+
aa_boman = model_package['aa_boman']
|
| 18 |
+
aa_flexibility = model_package['aa_flexibility']
|
| 19 |
+
aa_polarizability = model_package['aa_polarizability']
|
| 20 |
+
aa_aliphatic = model_package['aa_aliphatic']
|
| 21 |
+
|
| 22 |
+
# --- Feature extraction ---
|
| 23 |
+
def extract_features(sequence: str) -> pd.DataFrame:
|
| 24 |
+
seq = sequence.upper()
|
| 25 |
+
features = {}
|
| 26 |
+
|
| 27 |
+
# Amino acid composition
|
| 28 |
+
for aa in aa_list:
|
| 29 |
+
features[f"AA_{aa}"] = seq.count(aa) / len(seq) if len(seq) > 0 else 0
|
| 30 |
+
|
| 31 |
+
# Dipeptide composition
|
| 32 |
+
for dp in dipeptides:
|
| 33 |
+
count = sum(1 for i in range(len(seq)-1) if seq[i:i+2] == dp)
|
| 34 |
+
features[f"DP_{dp}"] = count / (len(seq)-1) if len(seq) > 1 else 0
|
| 35 |
+
|
| 36 |
+
# Hydrophobicity
|
| 37 |
+
features['hydrophobicity'] = sum(hydrophobicity_scale.get(aa, 0) for aa in seq) / len(seq) if len(seq) > 0 else 0
|
| 38 |
+
|
| 39 |
+
# Other physicochemical properties
|
| 40 |
+
props = ['mass', 'charge', 'boman', 'flexibility', 'polarizability', 'aliphatic']
|
| 41 |
+
for prop, table in zip(props, [aa_mass, aa_charge, aa_boman, aa_flexibility, aa_polarizability, aa_aliphatic]):
|
| 42 |
+
features[prop] = sum(table.get(aa, 0) for aa in seq) / len(seq) if len(seq) > 0 else 0
|
| 43 |
+
|
| 44 |
+
df = pd.DataFrame([features])
|
| 45 |
+
df = df.reindex(columns=feature_columns, fill_value=0)
|
| 46 |
+
return df
|
| 47 |
+
|
| 48 |
+
# --- Prediction ---
|
| 49 |
+
def predict(sequence):
|
| 50 |
+
sequence = sequence.strip()
|
| 51 |
+
if not sequence:
|
| 52 |
+
return "Sequence cannot be empty.", None
|
| 53 |
+
|
| 54 |
+
X = extract_features(sequence)
|
| 55 |
+
probs = model.predict_proba(X) # List of arrays per target
|
| 56 |
+
|
| 57 |
+
# Format output
|
| 58 |
+
output = []
|
| 59 |
+
for i, col in enumerate(model.classes_):
|
| 60 |
+
output.append({
|
| 61 |
+
"Target Cell": col,
|
| 62 |
+
"Probability of Efficacy/Toxicity": float(probs[i][0][1])
|
| 63 |
+
})
|
| 64 |
+
|
| 65 |
+
return sequence, output
|
| 66 |
+
|
| 67 |
+
# --- Gradio Interface ---
|
| 68 |
+
iface = gr.Interface(
|
| 69 |
+
fn=predict,
|
| 70 |
+
inputs=gr.Textbox(lines=2, placeholder="Enter peptide sequence here..."),
|
| 71 |
+
outputs=[
|
| 72 |
+
gr.Textbox(label="Input Sequence"),
|
| 73 |
+
gr.Dataframe(headers=["Target Cell", "Probability of Efficacy/Toxicity"], datatype="json")
|
| 74 |
+
],
|
| 75 |
+
title="Peptide Antimicrobial Predictor",
|
| 76 |
+
description="Enter a peptide sequence to predict its antimicrobial efficacy/toxicity against target cells."
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
if __name__ == "__main__":
|
| 80 |
+
iface.launch(server_name="0.0.0.0", server_port=7860)
|