Ym420 commited on
Commit
752bac2
·
verified ·
1 Parent(s): 2b374d6

Upload app_11futures.py

Browse files
Files changed (1) hide show
  1. app_11futures.py +119 -0
app_11futures.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ from huggingface_hub import hf_hub_download
4
+ import pandas as pd
5
+ import numpy as np
6
+ from collections import Counter
7
+
8
+ # --- Download model from HF Hub ---
9
+ repo_id = "Ym420/Peptide-Function" # replace with your HF repo
10
+ model_filename = "xgb_multilabel_model_full.pkl"
11
+
12
+ model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
13
+ model_package = joblib.load(model_path)
14
+ model = model_package['model']
15
+ feature_columns = model_package['feature_columns']
16
+
17
+ # --- Metadata ---
18
+ aa_list = model_package['aa_list']
19
+ dipeptides = model_package['dipeptides']
20
+ hydrophobicity_scale = model_package['hydrophobicity_scale']
21
+ aa_mass = model_package['aa_mass']
22
+ aa_charge = model_package['aa_charge']
23
+ aa_boman = model_package['aa_boman']
24
+ aa_flexibility = model_package['aa_flexibility']
25
+ aa_polarizability = model_package['aa_polarizability']
26
+ aa_aliphatic = model_package['aa_aliphatic']
27
+
28
+ # --- Target cells ---
29
+ TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"]
30
+
31
+ # --- Feature extraction ---
32
+ def extract_features_app(seq: str) -> pd.DataFrame:
33
+ seq = seq.upper()
34
+
35
+ # --- 1. Dipeptide composition ---
36
+ count = Counter([seq[i:i+2] for i in range(len(seq)-1)])
37
+ total = max(len(seq)-1, 1)
38
+ dipep_features = [count.get(dp, 0) / total for dp in dipeptides]
39
+
40
+ # --- 2. Physicochemical features ---
41
+ def g(aa, table): return table.get(aa, 0)
42
+ def h(dp, table): return (g(dp[0], table) + g(dp[1], table)) / 2.0
43
+
44
+ dipeptides_seq = [seq[i:i+2] for i in range(len(seq)-1)]
45
+
46
+ if len(seq) < 2:
47
+ physchem_features = [0]*11
48
+ else:
49
+ mw = np.mean([h(dp, aa_mass) for dp in dipeptides_seq])
50
+ charge = np.mean([h(dp, aa_charge) for dp in dipeptides_seq])
51
+ hydro = np.mean([h(dp, hydrophobicity_scale) for dp in dipeptides_seq])
52
+ aromatic = np.mean([(dp[0] in 'FWY') + (dp[1] in 'FWY') for dp in dipeptides_seq]) / 2.0
53
+ pI = np.mean([h(dp, {aa: 7 + (int(aa in 'KRH') - int(aa in 'DE')) for aa in aa_list}) for dp in dipeptides_seq])
54
+ instability = np.mean([((dp[0] in 'DEKR') + (dp[1] in 'DEKR')) / 2.0 for dp in dipeptides_seq])
55
+ hydro_moment = np.sqrt(np.mean([(h(dp, hydrophobicity_scale))**2 for dp in dipeptides_seq]))
56
+ aliphatic = np.mean([h(dp, aa_aliphatic) for dp in dipeptides_seq])
57
+ boman = np.mean([h(dp, aa_boman) for dp in dipeptides_seq])
58
+ flexibility = np.mean([h(dp, aa_flexibility) for dp in dipeptides_seq])
59
+ polarizability = np.mean([h(dp, aa_polarizability) for dp in dipeptides_seq])
60
+
61
+ physchem_features = [mw, charge, hydro, aromatic, pI, instability,
62
+ hydro_moment, aliphatic, boman, flexibility, polarizability]
63
+
64
+ features = dipep_features + physchem_features
65
+
66
+ df = pd.DataFrame([features], columns=feature_columns)
67
+ df = df.astype('float32') # ensure same type as training
68
+ return df
69
+
70
+ # --- Prediction function ---
71
+ def predict_peptide(sequence: str):
72
+ seq = "".join(sequence.split()).upper()
73
+ if not seq:
74
+ return []
75
+
76
+ X = extract_features_app(seq)
77
+
78
+ probs_list = model.predict_proba(X) # list of arrays per target
79
+ # --- probs_list: list of arrays from each estimator ---
80
+ #probs_list = [est.predict_proba(X) for est in model] # model is a list
81
+
82
+ table = []
83
+ for i, target in enumerate(TARGET_CELLS):
84
+ prob = float(probs_list[i][0][1])
85
+ table.append([target, round(prob, 4)])
86
+ return table
87
+
88
+
89
+ # --- Gradio Interface ---
90
+ custom_css = """
91
+ footer, .footer {display:none !important;}
92
+ """
93
+
94
+ with gr.Blocks(css=custom_css, theme="default") as demo:
95
+ gr.Markdown("## Peptide Antimicrobial Predictor\nEnter a peptide sequence to predict efficacy/toxicity.")
96
+
97
+ seq_input = gr.Textbox(label="Enter Peptide Sequence")
98
+
99
+ with gr.Row():
100
+ predict_btn = gr.Button("Predict", variant="primary")
101
+ clear_btn = gr.Button("Clear")
102
+
103
+ table_output = gr.Dataframe(
104
+ headers=["Target Cell", "Probability of Efficacy/Toxicity"],
105
+ datatype=["str","number"],
106
+ interactive=False
107
+ )
108
+
109
+ predict_btn.click(fn=predict_peptide, inputs=seq_input, outputs=table_output)
110
+ clear_btn.click(fn=lambda: ("", []), outputs=[seq_input, table_output])
111
+
112
+ # API endpoint for iOS app
113
+ gr.api(predict_peptide, api_name="predict_peptide")
114
+
115
+ #if __name__ == "__main__":
116
+ # demo.launch()
117
+
118
+ if __name__ == "__main__":
119
+ demo.launch(show_error=True)