Update app.py
Browse files
app.py
CHANGED
|
@@ -9,21 +9,14 @@ from collections import Counter
|
|
| 9 |
repo_id = "Ym420/Peptide-Function"
|
| 10 |
model_filename = "xgb_multilabel_model_full.pkl"
|
| 11 |
|
| 12 |
-
# Download and load the saved model package
|
| 13 |
model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
|
| 14 |
model_package = joblib.load(model_path)
|
| 15 |
|
| 16 |
# --- Unwrap model dict ---
|
| 17 |
-
|
| 18 |
-
# e.g., {'Gram+': XGBClassifier(...), 'Fungus': XGBClassifier(...), ...}
|
| 19 |
-
model_dict = model_package['model']
|
| 20 |
-
|
| 21 |
-
# feature_columns must match the columns returned by extract_features_app
|
| 22 |
-
# If you add new features, ensure they are included here and in extract_features_app
|
| 23 |
feature_columns = model_package['feature_columns']
|
| 24 |
|
| 25 |
# --- Metadata (all restored) ---
|
| 26 |
-
# If you add new features that depend on new tables or scales, add them here
|
| 27 |
aa_list = model_package.get('aa_list', [])
|
| 28 |
dipeptides = model_package.get('dipeptides', [])
|
| 29 |
hydrophobicity_scale = model_package.get('hydrophobicity_scale', {})
|
|
@@ -36,32 +29,31 @@ aa_polarizability = model_package.get('aa_polarizability', {})
|
|
| 36 |
aa_aliphatic = model_package.get('aa_aliphatic', {})
|
| 37 |
aa_deltaG = model_package.get('aa_deltaG', {})
|
| 38 |
|
| 39 |
-
# ---
|
| 40 |
-
|
| 41 |
-
# Or make it dynamic: TARGET_CELLS = list(model_dict.keys())
|
| 42 |
-
TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"]
|
| 43 |
|
| 44 |
-
# --- Feature extraction ---
|
| 45 |
-
# When adding new features, compute them here and make sure their names match feature_columns
|
| 46 |
def extract_features_app(seq: str) -> pd.DataFrame:
|
| 47 |
seq = seq.upper()
|
| 48 |
-
|
| 49 |
# --- 1. Dipeptide composition ---
|
| 50 |
count = Counter([seq[i:i+2] for i in range(len(seq)-1)])
|
| 51 |
total = max(len(seq)-1, 1)
|
| 52 |
-
dipep_features =
|
| 53 |
|
| 54 |
# --- 2. Physicochemical features ---
|
| 55 |
def g(aa, table): return table.get(aa, 0)
|
| 56 |
def h(dp, table): return (g(dp[0], table) + g(dp[1], table)) / 2.0
|
| 57 |
|
| 58 |
dipeptides_seq = [seq[i:i+2] for i in range(len(seq)-1)]
|
| 59 |
-
|
| 60 |
if len(seq) < 2:
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
| 63 |
else:
|
| 64 |
-
# Compute physico-chemical properties
|
| 65 |
mw = np.mean([h(dp, aa_mass) for dp in dipeptides_seq])
|
| 66 |
charge = np.mean([h(dp, aa_charge) for dp in dipeptides_seq])
|
| 67 |
hydro = np.mean([h(dp, hydrophobicity_scale) for dp in dipeptides_seq])
|
|
@@ -75,37 +67,36 @@ def extract_features_app(seq: str) -> pd.DataFrame:
|
|
| 75 |
polarizability = np.mean([h(dp, aa_polarizability) for dp in dipeptides_seq])
|
| 76 |
deltag = np.mean([h(dp, aa_deltaG) for dp in dipeptides_seq])
|
| 77 |
|
| 78 |
-
physchem_features =
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
| 81 |
# --- Combine features ---
|
| 82 |
-
|
| 83 |
-
|
| 84 |
# --- Align with feature_columns ---
|
| 85 |
-
|
| 86 |
-
df = pd.DataFrame([features], columns=feature_columns)
|
| 87 |
df = df.astype('float32')
|
| 88 |
return df
|
| 89 |
|
| 90 |
# --- Prediction function ---
|
| 91 |
-
# Returns probability for each target cell
|
| 92 |
def predict_peptide(sequence: str):
|
| 93 |
seq = "".join(sequence.split()).upper()
|
| 94 |
if not seq:
|
| 95 |
return []
|
| 96 |
|
| 97 |
X = extract_features_app(seq)
|
| 98 |
-
|
| 99 |
table = []
|
| 100 |
for target in TARGET_CELLS:
|
| 101 |
clf = model_dict.get(target)
|
| 102 |
if clf is not None:
|
| 103 |
-
#
|
| 104 |
-
prob = clf.predict_proba(X)[0][1]
|
| 105 |
table.append([target, round(float(prob), 4)])
|
| 106 |
else:
|
| 107 |
table.append([target, None])
|
| 108 |
-
|
| 109 |
return table
|
| 110 |
|
| 111 |
# --- Gradio Interface ---
|
|
@@ -115,40 +106,28 @@ footer, .footer {display:none !important;}
|
|
| 115 |
|
| 116 |
with gr.Blocks(css=custom_css, theme="default") as demo:
|
| 117 |
gr.Markdown("## Peptide Antimicrobial Predictor\nEnter a peptide sequence to predict efficacy/toxicity.")
|
| 118 |
-
|
| 119 |
seq_input = gr.Textbox(label="Enter Peptide Sequence")
|
| 120 |
-
|
| 121 |
with gr.Row():
|
| 122 |
predict_btn = gr.Button("Predict", variant="primary")
|
| 123 |
clear_btn = gr.Button("Clear")
|
| 124 |
-
|
| 125 |
table_output = gr.Dataframe(
|
| 126 |
headers=["Target Cell", "Probability of Efficacy/Toxicity"],
|
| 127 |
datatype=["str","number"],
|
| 128 |
interactive=False
|
| 129 |
)
|
| 130 |
-
|
| 131 |
predict_btn.click(fn=predict_peptide, inputs=seq_input, outputs=table_output)
|
| 132 |
clear_btn.click(fn=lambda: ("", []), outputs=[seq_input, table_output])
|
| 133 |
-
|
| 134 |
# API endpoint for iOS app
|
| 135 |
gr.api(predict_peptide, api_name="predict_peptide")
|
| 136 |
|
| 137 |
-
if __name__ == "__main__":
|
| 138 |
demo.launch(show_error=True)
|
| 139 |
|
| 140 |
-
# --- Notes for manual update ---
|
| 141 |
-
# 1. When adding new features in your Colab model:
|
| 142 |
-
# - Add the new feature computation in extract_features_app
|
| 143 |
-
# - Update feature_columns in the model package if needed
|
| 144 |
-
# - Add any new metadata tables to the model_package if used
|
| 145 |
-
# 2. If you add new target labels:
|
| 146 |
-
# - Add them to TARGET_CELLS manually
|
| 147 |
-
# - Or switch to dynamic TARGET_CELLS = list(model_dict.keys()) for auto-detection
|
| 148 |
-
# 3. Always ensure the DataFrame returned from extract_features_app matches feature_columns in order and names
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
|
| 153 |
|
| 154 |
|
|
|
|
| 9 |
repo_id = "Ym420/Peptide-Function"
|
| 10 |
model_filename = "xgb_multilabel_model_full.pkl"
|
| 11 |
|
|
|
|
| 12 |
model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
|
| 13 |
model_package = joblib.load(model_path)
|
| 14 |
|
| 15 |
# --- Unwrap model dict ---
|
| 16 |
+
model_dict = model_package['model'] # dict: {'Gram+': XGBClassifier, ...}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
feature_columns = model_package['feature_columns']
|
| 18 |
|
| 19 |
# --- Metadata (all restored) ---
|
|
|
|
| 20 |
aa_list = model_package.get('aa_list', [])
|
| 21 |
dipeptides = model_package.get('dipeptides', [])
|
| 22 |
hydrophobicity_scale = model_package.get('hydrophobicity_scale', {})
|
|
|
|
| 29 |
aa_aliphatic = model_package.get('aa_aliphatic', {})
|
| 30 |
aa_deltaG = model_package.get('aa_deltaG', {})
|
| 31 |
|
| 32 |
+
# --- Dynamic TARGET_CELLS ---
|
| 33 |
+
TARGET_CELLS = list(model_dict.keys()) # automatically detects all targets
|
|
|
|
|
|
|
| 34 |
|
| 35 |
+
# --- Feature extraction (future-proof) ---
|
|
|
|
| 36 |
def extract_features_app(seq: str) -> pd.DataFrame:
|
| 37 |
seq = seq.upper()
|
| 38 |
+
|
| 39 |
# --- 1. Dipeptide composition ---
|
| 40 |
count = Counter([seq[i:i+2] for i in range(len(seq)-1)])
|
| 41 |
total = max(len(seq)-1, 1)
|
| 42 |
+
dipep_features = {dp: count.get(dp, 0) / total for dp in dipeptides}
|
| 43 |
|
| 44 |
# --- 2. Physicochemical features ---
|
| 45 |
def g(aa, table): return table.get(aa, 0)
|
| 46 |
def h(dp, table): return (g(dp[0], table) + g(dp[1], table)) / 2.0
|
| 47 |
|
| 48 |
dipeptides_seq = [seq[i:i+2] for i in range(len(seq)-1)]
|
| 49 |
+
|
| 50 |
if len(seq) < 2:
|
| 51 |
+
physchem_features = {
|
| 52 |
+
'mw': 0, 'charge': 0, 'hydro': 0, 'aromatic': 0, 'pI': 0,
|
| 53 |
+
'instability': 0, 'hydro_moment': 0, 'aliphatic': 0,
|
| 54 |
+
'boman': 0, 'flexibility': 0, 'polarizability': 0, 'deltag': 0
|
| 55 |
+
}
|
| 56 |
else:
|
|
|
|
| 57 |
mw = np.mean([h(dp, aa_mass) for dp in dipeptides_seq])
|
| 58 |
charge = np.mean([h(dp, aa_charge) for dp in dipeptides_seq])
|
| 59 |
hydro = np.mean([h(dp, hydrophobicity_scale) for dp in dipeptides_seq])
|
|
|
|
| 67 |
polarizability = np.mean([h(dp, aa_polarizability) for dp in dipeptides_seq])
|
| 68 |
deltag = np.mean([h(dp, aa_deltaG) for dp in dipeptides_seq])
|
| 69 |
|
| 70 |
+
physchem_features = {
|
| 71 |
+
'mw': mw, 'charge': charge, 'hydro': hydro, 'aromatic': aromatic, 'pI': pI,
|
| 72 |
+
'instability': instability, 'hydro_moment': hydro_moment, 'aliphatic': aliphatic,
|
| 73 |
+
'boman': boman, 'flexibility': flexibility, 'polarizability': polarizability, 'deltag': deltag
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
# --- Combine features ---
|
| 77 |
+
all_features = {**dipep_features, **physchem_features}
|
| 78 |
+
|
| 79 |
# --- Align with feature_columns ---
|
| 80 |
+
df = pd.DataFrame([[all_features.get(col, 0) for col in feature_columns]], columns=feature_columns)
|
|
|
|
| 81 |
df = df.astype('float32')
|
| 82 |
return df
|
| 83 |
|
| 84 |
# --- Prediction function ---
|
|
|
|
| 85 |
def predict_peptide(sequence: str):
|
| 86 |
seq = "".join(sequence.split()).upper()
|
| 87 |
if not seq:
|
| 88 |
return []
|
| 89 |
|
| 90 |
X = extract_features_app(seq)
|
| 91 |
+
|
| 92 |
table = []
|
| 93 |
for target in TARGET_CELLS:
|
| 94 |
clf = model_dict.get(target)
|
| 95 |
if clf is not None:
|
| 96 |
+
prob = clf.predict_proba(X)[0][1] # positive-class probability
|
|
|
|
| 97 |
table.append([target, round(float(prob), 4)])
|
| 98 |
else:
|
| 99 |
table.append([target, None])
|
|
|
|
| 100 |
return table
|
| 101 |
|
| 102 |
# --- Gradio Interface ---
|
|
|
|
| 106 |
|
| 107 |
with gr.Blocks(css=custom_css, theme="default") as demo:
|
| 108 |
gr.Markdown("## Peptide Antimicrobial Predictor\nEnter a peptide sequence to predict efficacy/toxicity.")
|
| 109 |
+
|
| 110 |
seq_input = gr.Textbox(label="Enter Peptide Sequence")
|
| 111 |
+
|
| 112 |
with gr.Row():
|
| 113 |
predict_btn = gr.Button("Predict", variant="primary")
|
| 114 |
clear_btn = gr.Button("Clear")
|
| 115 |
+
|
| 116 |
table_output = gr.Dataframe(
|
| 117 |
headers=["Target Cell", "Probability of Efficacy/Toxicity"],
|
| 118 |
datatype=["str","number"],
|
| 119 |
interactive=False
|
| 120 |
)
|
| 121 |
+
|
| 122 |
predict_btn.click(fn=predict_peptide, inputs=seq_input, outputs=table_output)
|
| 123 |
clear_btn.click(fn=lambda: ("", []), outputs=[seq_input, table_output])
|
| 124 |
+
|
| 125 |
# API endpoint for iOS app
|
| 126 |
gr.api(predict_peptide, api_name="predict_peptide")
|
| 127 |
|
| 128 |
+
if __name__ == "__main__":
|
| 129 |
demo.launch(show_error=True)
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
|
| 133 |
|