Update app.py
Browse files
app.py
CHANGED
|
@@ -11,12 +11,6 @@ model_filename = "xgb_multilabel_model_full_extendedFuture.pkl"
|
|
| 11 |
|
| 12 |
model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
|
| 13 |
model_package = joblib.load(model_path)
|
| 14 |
-
|
| 15 |
-
# --- Debug: Check what we loaded ---
|
| 16 |
-
print("Loaded model_package type:", type(model_package))
|
| 17 |
-
print("model_package keys:", list(model_package.keys()))
|
| 18 |
-
print("Type of model:", type(model_package['model']))
|
| 19 |
-
|
| 20 |
model = model_package['model']
|
| 21 |
feature_columns = model_package['feature_columns']
|
| 22 |
|
|
@@ -24,14 +18,12 @@ feature_columns = model_package['feature_columns']
|
|
| 24 |
aa_list = model_package['aa_list']
|
| 25 |
dipeptides = model_package['dipeptides']
|
| 26 |
hydrophobicity_scale = model_package['hydrophobicity_scale']
|
| 27 |
-
eisenberg_scale = model_package['eisenberg_scale']
|
| 28 |
aa_mass = model_package['aa_mass']
|
| 29 |
aa_charge = model_package['aa_charge']
|
| 30 |
aa_boman = model_package['aa_boman']
|
| 31 |
aa_flexibility = model_package['aa_flexibility']
|
| 32 |
aa_polarizability = model_package['aa_polarizability']
|
| 33 |
aa_aliphatic = model_package['aa_aliphatic']
|
| 34 |
-
aa_deltaG = model_package['aa_deltaG']
|
| 35 |
|
| 36 |
# --- Target cells ---
|
| 37 |
TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"]
|
|
@@ -40,19 +32,19 @@ TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"]
|
|
| 40 |
def extract_features_app(seq: str) -> pd.DataFrame:
|
| 41 |
seq = seq.upper()
|
| 42 |
|
| 43 |
-
# 1. Dipeptide composition
|
| 44 |
count = Counter([seq[i:i+2] for i in range(len(seq)-1)])
|
| 45 |
total = max(len(seq)-1, 1)
|
| 46 |
dipep_features = [count.get(dp, 0) / total for dp in dipeptides]
|
| 47 |
|
| 48 |
-
# 2. Physicochemical features
|
| 49 |
def g(aa, table): return table.get(aa, 0)
|
| 50 |
def h(dp, table): return (g(dp[0], table) + g(dp[1], table)) / 2.0
|
| 51 |
|
| 52 |
dipeptides_seq = [seq[i:i+2] for i in range(len(seq)-1)]
|
| 53 |
|
| 54 |
if len(seq) < 2:
|
| 55 |
-
physchem_features = [0]*
|
| 56 |
else:
|
| 57 |
mw = np.mean([h(dp, aa_mass) for dp in dipeptides_seq])
|
| 58 |
charge = np.mean([h(dp, aa_charge) for dp in dipeptides_seq])
|
|
@@ -60,15 +52,14 @@ def extract_features_app(seq: str) -> pd.DataFrame:
|
|
| 60 |
aromatic = np.mean([(dp[0] in 'FWY') + (dp[1] in 'FWY') for dp in dipeptides_seq]) / 2.0
|
| 61 |
pI = np.mean([h(dp, {aa: 7 + (int(aa in 'KRH') - int(aa in 'DE')) for aa in aa_list}) for dp in dipeptides_seq])
|
| 62 |
instability = np.mean([((dp[0] in 'DEKR') + (dp[1] in 'DEKR')) / 2.0 for dp in dipeptides_seq])
|
| 63 |
-
hydro_moment = np.sqrt(np.mean([(h(dp,
|
| 64 |
aliphatic = np.mean([h(dp, aa_aliphatic) for dp in dipeptides_seq])
|
| 65 |
boman = np.mean([h(dp, aa_boman) for dp in dipeptides_seq])
|
| 66 |
flexibility = np.mean([h(dp, aa_flexibility) for dp in dipeptides_seq])
|
| 67 |
polarizability = np.mean([h(dp, aa_polarizability) for dp in dipeptides_seq])
|
| 68 |
-
|
| 69 |
-
|
| 70 |
physchem_features = [mw, charge, hydro, aromatic, pI, instability,
|
| 71 |
-
hydro_moment, aliphatic, boman, flexibility, polarizability
|
| 72 |
|
| 73 |
features = dipep_features + physchem_features
|
| 74 |
|
|
@@ -83,20 +74,18 @@ def predict_peptide(sequence: str):
|
|
| 83 |
return []
|
| 84 |
|
| 85 |
X = extract_features_app(seq)
|
| 86 |
-
|
| 87 |
-
#
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
else:
|
| 91 |
-
probs_list = [model.predict_proba(X)] # single model
|
| 92 |
|
| 93 |
table = []
|
| 94 |
for i, target in enumerate(TARGET_CELLS):
|
| 95 |
prob = float(probs_list[i][0][1])
|
| 96 |
table.append([target, round(prob, 4)])
|
| 97 |
-
|
| 98 |
return table
|
| 99 |
|
|
|
|
| 100 |
# --- Gradio Interface ---
|
| 101 |
custom_css = """
|
| 102 |
footer, .footer {display:none !important;}
|
|
@@ -120,11 +109,11 @@ with gr.Blocks(css=custom_css, theme="default") as demo:
|
|
| 120 |
predict_btn.click(fn=predict_peptide, inputs=seq_input, outputs=table_output)
|
| 121 |
clear_btn.click(fn=lambda: ("", []), outputs=[seq_input, table_output])
|
| 122 |
|
| 123 |
-
#
|
| 124 |
-
|
| 125 |
-
|
|
|
|
|
|
|
| 126 |
|
| 127 |
if __name__ == "__main__":
|
| 128 |
demo.launch(show_error=True)
|
| 129 |
-
|
| 130 |
-
|
|
|
|
| 11 |
|
| 12 |
model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
|
| 13 |
model_package = joblib.load(model_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
model = model_package['model']
|
| 15 |
feature_columns = model_package['feature_columns']
|
| 16 |
|
|
|
|
| 18 |
aa_list = model_package['aa_list']
|
| 19 |
dipeptides = model_package['dipeptides']
|
| 20 |
hydrophobicity_scale = model_package['hydrophobicity_scale']
|
|
|
|
| 21 |
aa_mass = model_package['aa_mass']
|
| 22 |
aa_charge = model_package['aa_charge']
|
| 23 |
aa_boman = model_package['aa_boman']
|
| 24 |
aa_flexibility = model_package['aa_flexibility']
|
| 25 |
aa_polarizability = model_package['aa_polarizability']
|
| 26 |
aa_aliphatic = model_package['aa_aliphatic']
|
|
|
|
| 27 |
|
| 28 |
# --- Target cells ---
|
| 29 |
TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"]
|
|
|
|
| 32 |
def extract_features_app(seq: str) -> pd.DataFrame:
|
| 33 |
seq = seq.upper()
|
| 34 |
|
| 35 |
+
# --- 1. Dipeptide composition ---
|
| 36 |
count = Counter([seq[i:i+2] for i in range(len(seq)-1)])
|
| 37 |
total = max(len(seq)-1, 1)
|
| 38 |
dipep_features = [count.get(dp, 0) / total for dp in dipeptides]
|
| 39 |
|
| 40 |
+
# --- 2. Physicochemical features ---
|
| 41 |
def g(aa, table): return table.get(aa, 0)
|
| 42 |
def h(dp, table): return (g(dp[0], table) + g(dp[1], table)) / 2.0
|
| 43 |
|
| 44 |
dipeptides_seq = [seq[i:i+2] for i in range(len(seq)-1)]
|
| 45 |
|
| 46 |
if len(seq) < 2:
|
| 47 |
+
physchem_features = [0]*11
|
| 48 |
else:
|
| 49 |
mw = np.mean([h(dp, aa_mass) for dp in dipeptides_seq])
|
| 50 |
charge = np.mean([h(dp, aa_charge) for dp in dipeptides_seq])
|
|
|
|
| 52 |
aromatic = np.mean([(dp[0] in 'FWY') + (dp[1] in 'FWY') for dp in dipeptides_seq]) / 2.0
|
| 53 |
pI = np.mean([h(dp, {aa: 7 + (int(aa in 'KRH') - int(aa in 'DE')) for aa in aa_list}) for dp in dipeptides_seq])
|
| 54 |
instability = np.mean([((dp[0] in 'DEKR') + (dp[1] in 'DEKR')) / 2.0 for dp in dipeptides_seq])
|
| 55 |
+
hydro_moment = np.sqrt(np.mean([(h(dp, hydrophobicity_scale))**2 for dp in dipeptides_seq]))
|
| 56 |
aliphatic = np.mean([h(dp, aa_aliphatic) for dp in dipeptides_seq])
|
| 57 |
boman = np.mean([h(dp, aa_boman) for dp in dipeptides_seq])
|
| 58 |
flexibility = np.mean([h(dp, aa_flexibility) for dp in dipeptides_seq])
|
| 59 |
polarizability = np.mean([h(dp, aa_polarizability) for dp in dipeptides_seq])
|
| 60 |
+
|
|
|
|
| 61 |
physchem_features = [mw, charge, hydro, aromatic, pI, instability,
|
| 62 |
+
hydro_moment, aliphatic, boman, flexibility, polarizability]
|
| 63 |
|
| 64 |
features = dipep_features + physchem_features
|
| 65 |
|
|
|
|
| 74 |
return []
|
| 75 |
|
| 76 |
X = extract_features_app(seq)
|
| 77 |
+
|
| 78 |
+
#probs_list = model.predict_proba(X) # list of arrays per target
|
| 79 |
+
# --- probs_list: list of arrays from each estimator ---
|
| 80 |
+
probs_list = [est.predict_proba(X) for est in model] # model is a list
|
|
|
|
|
|
|
| 81 |
|
| 82 |
table = []
|
| 83 |
for i, target in enumerate(TARGET_CELLS):
|
| 84 |
prob = float(probs_list[i][0][1])
|
| 85 |
table.append([target, round(prob, 4)])
|
|
|
|
| 86 |
return table
|
| 87 |
|
| 88 |
+
|
| 89 |
# --- Gradio Interface ---
|
| 90 |
custom_css = """
|
| 91 |
footer, .footer {display:none !important;}
|
|
|
|
| 109 |
predict_btn.click(fn=predict_peptide, inputs=seq_input, outputs=table_output)
|
| 110 |
clear_btn.click(fn=lambda: ("", []), outputs=[seq_input, table_output])
|
| 111 |
|
| 112 |
+
# API endpoint for iOS app
|
| 113 |
+
gr.api(predict_peptide, api_name="predict_peptide")
|
| 114 |
+
|
| 115 |
+
#if __name__ == "__main__":
|
| 116 |
+
# demo.launch()
|
| 117 |
|
| 118 |
if __name__ == "__main__":
|
| 119 |
demo.launch(show_error=True)
|
|
|
|
|
|