Ym420 commited on
Commit
d2a0224
·
verified ·
1 Parent(s): 7e036f2

Upload extendedFuturePucker_app copy.py

Browse files
Files changed (1) hide show
  1. extendedFuturePucker_app copy.py +150 -0
extendedFuturePucker_app copy.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ from huggingface_hub import hf_hub_download
4
+ import pandas as pd
5
+ import numpy as np
6
+ from collections import Counter
7
+
8
+ # --- Download model from HF Hub ---
9
+ repo_id = "Ym420/Peptide-Function"
10
+ model_filename = "xgb_multilabel_model_full.pkl"
11
+
12
+ # Download and load the saved model package
13
+ model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
14
+ model_package = joblib.load(model_path)
15
+
16
+ # --- Unwrap model dict ---
17
+ # model_dict contains all XGB classifiers for each target cell
18
+ # e.g., {'Gram+': XGBClassifier(...), 'Fungus': XGBClassifier(...), ...}
19
+ model_dict = model_package['model']
20
+
21
+ # feature_columns must match the columns returned by extract_features_app
22
+ # If you add new features, ensure they are included here and in extract_features_app
23
+ feature_columns = model_package['feature_columns']
24
+
25
+ # --- Metadata (all restored) ---
26
+ # If you add new features that depend on new tables or scales, add them here
27
+ aa_list = model_package.get('aa_list', [])
28
+ dipeptides = model_package.get('dipeptides', [])
29
+ hydrophobicity_scale = model_package.get('hydrophobicity_scale', {})
30
+ eisenberg_scale = model_package.get('eisenberg_scale', {})
31
+ aa_mass = model_package.get('aa_mass', {})
32
+ aa_charge = model_package.get('aa_charge', {})
33
+ aa_boman = model_package.get('aa_boman', {})
34
+ aa_flexibility = model_package.get('aa_flexibility', {})
35
+ aa_polarizability = model_package.get('aa_polarizability', {})
36
+ aa_aliphatic = model_package.get('aa_aliphatic', {})
37
+ aa_deltaG = model_package.get('aa_deltaG', {})
38
+ aa_pucker = model_package.get('aa_pucker', {})
39
+
40
+ # --- Target cells ---
41
+ # If you add new labels in the model, you can update this list manually
42
+ # Or make it dynamic: TARGET_CELLS = list(model_dict.keys())
43
+ TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"]
44
+
45
+ # --- Feature extraction ---
46
+ # When adding new features, compute them here and make sure their names match feature_columns
47
+ def extract_features_app(seq: str) -> pd.DataFrame:
48
+ seq = seq.upper()
49
+
50
+ # --- 1. Dipeptide composition ---
51
+ count = Counter([seq[i:i+2] for i in range(len(seq)-1)])
52
+ total = max(len(seq)-1, 1)
53
+ dipep_features = [count.get(dp, 0) / total for dp in dipeptides]
54
+
55
+ # --- 2. Physicochemical features ---
56
+ def g(aa, table): return table.get(aa, 0)
57
+ def h(dp, table): return (g(dp[0], table) + g(dp[1], table)) / 2.0
58
+
59
+ dipeptides_seq = [seq[i:i+2] for i in range(len(seq)-1)]
60
+
61
+ if len(seq) < 2:
62
+ # For very short sequences, fill physchem features with zeros
63
+ physchem_features = [0]*13 # Use the total futures
64
+ else:
65
+ # Compute physico-chemical properties
66
+ mw = np.mean([h(dp, aa_mass) for dp in dipeptides_seq])
67
+ charge = np.mean([h(dp, aa_charge) for dp in dipeptides_seq])
68
+ hydro = np.mean([h(dp, hydrophobicity_scale) for dp in dipeptides_seq])
69
+ aromatic = np.mean([(dp[0] in 'FWY') + (dp[1] in 'FWY') for dp in dipeptides_seq]) / 2.0
70
+ pI = np.mean([h(dp, {aa: 7 + (int(aa in 'KRH') - int(aa in 'DE')) for aa in aa_list}) for dp in dipeptides_seq])
71
+ instability = np.mean([((dp[0] in 'DEKR') + (dp[1] in 'DEKR')) / 2.0 for dp in dipeptides_seq])
72
+ hydro_moment = np.sqrt(np.mean([(h(dp, eisenberg_scale))**2 for dp in dipeptides_seq]))
73
+ aliphatic = np.mean([h(dp, aa_aliphatic) for dp in dipeptides_seq])
74
+ boman = np.mean([h(dp, aa_boman) for dp in dipeptides_seq])
75
+ flexibility = np.mean([h(dp, aa_flexibility) for dp in dipeptides_seq])
76
+ polarizability = np.mean([h(dp, aa_polarizability) for dp in dipeptides_seq])
77
+ deltag = np.mean([h(dp, aa_deltaG) for dp in dipeptides_seq])
78
+ pucker = np.mean([h(dp, aa_pucker) for dp in dipeptides])
79
+
80
+ physchem_features = [mw, charge, hydro, aromatic, pI, instability,
81
+ hydro_moment, aliphatic, boman, flexibility, polarizability, deltag, pucker]
82
+
83
+ # --- Combine features ---
84
+ features = dipep_features + physchem_features
85
+
86
+ # --- Align with feature_columns ---
87
+ # Always ensure the order and names match the training data
88
+ df = pd.DataFrame([features], columns=feature_columns)
89
+ df = df.astype('float32')
90
+ return df
91
+
92
+ # --- Prediction function ---
93
+ # Returns probability for each target cell
94
+ def predict_peptide(sequence: str):
95
+ seq = "".join(sequence.split()).upper()
96
+ if not seq:
97
+ return []
98
+
99
+ X = extract_features_app(seq)
100
+
101
+ table = []
102
+ for target in TARGET_CELLS:
103
+ clf = model_dict.get(target)
104
+ if clf is not None:
105
+ # Positive-class probability between 0-1
106
+ prob = clf.predict_proba(X)[0][1]
107
+ table.append([target, round(float(prob), 4)])
108
+ else:
109
+ table.append([target, None])
110
+
111
+ return table
112
+
113
+ # --- Gradio Interface ---
114
+ custom_css = """
115
+ footer, .footer {display:none !important;}
116
+ """
117
+
118
+ with gr.Blocks(css=custom_css, theme="default") as demo:
119
+ gr.Markdown("## Peptide Antimicrobial Predictor\nEnter a peptide sequence to predict efficacy/toxicity.")
120
+
121
+ seq_input = gr.Textbox(label="Enter Peptide Sequence")
122
+
123
+ with gr.Row():
124
+ predict_btn = gr.Button("Predict", variant="primary")
125
+ clear_btn = gr.Button("Clear")
126
+
127
+ table_output = gr.Dataframe(
128
+ headers=["Target Cell", "Probability of Efficacy/Toxicity"],
129
+ datatype=["str","number"],
130
+ interactive=False
131
+ )
132
+
133
+ predict_btn.click(fn=predict_peptide, inputs=seq_input, outputs=table_output)
134
+ clear_btn.click(fn=lambda: ("", []), outputs=[seq_input, table_output])
135
+
136
+ # API endpoint for iOS app
137
+ gr.api(predict_peptide, api_name="predict_peptide")
138
+
139
+ if __name__ == "__main__":
140
+ demo.launch(show_error=True)
141
+
142
+ # --- Notes for manual update ---
143
+ # 1. When adding new features in your Colab model:
144
+ # - Add the new feature computation in extract_features_app
145
+ # - Update feature_columns in the model package if needed
146
+ # - Add any new metadata tables to the model_package if used
147
+ # 2. If you add new target labels:
148
+ # - Add them to TARGET_CELLS manually
149
+ # - Or switch to dynamic TARGET_CELLS = list(model_dict.keys()) for auto-detection
150
+ # 3. Always ensure the DataFrame returned from extract_features_app matches feature_columns in order and names