Ym420 commited on
Commit
7e036f2
·
verified ·
1 Parent(s): 749fff4

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -148
app.py DELETED
@@ -1,148 +0,0 @@
1
- import gradio as gr
2
- import joblib
3
- from huggingface_hub import hf_hub_download
4
- import pandas as pd
5
- import numpy as np
6
- from collections import Counter
7
-
8
- # --- Download model from HF Hub ---
9
- repo_id = "Ym420/Peptide-Function"
10
- model_filename = "xgb_multilabel_model_full.pkl"
11
-
12
- # Download and load the saved model package
13
- model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
14
- model_package = joblib.load(model_path)
15
-
16
- # --- Unwrap model dict ---
17
- # model_dict contains all XGB classifiers for each target cell
18
- # e.g., {'Gram+': XGBClassifier(...), 'Fungus': XGBClassifier(...), ...}
19
- model_dict = model_package['model']
20
-
21
- # feature_columns must match the columns returned by extract_features_app
22
- # If you add new features, ensure they are included here and in extract_features_app
23
- feature_columns = model_package['feature_columns']
24
-
25
- # --- Metadata (all restored) ---
26
- # If you add new features that depend on new tables or scales, add them here
27
- aa_list = model_package.get('aa_list', [])
28
- dipeptides = model_package.get('dipeptides', [])
29
- hydrophobicity_scale = model_package.get('hydrophobicity_scale', {})
30
- eisenberg_scale = model_package.get('eisenberg_scale', {})
31
- aa_mass = model_package.get('aa_mass', {})
32
- aa_charge = model_package.get('aa_charge', {})
33
- aa_boman = model_package.get('aa_boman', {})
34
- aa_flexibility = model_package.get('aa_flexibility', {})
35
- aa_polarizability = model_package.get('aa_polarizability', {})
36
- aa_aliphatic = model_package.get('aa_aliphatic', {})
37
- aa_deltaG = model_package.get('aa_deltaG', {})
38
-
39
- # --- Target cells ---
40
- # If you add new labels in the model, you can update this list manually
41
- # Or make it dynamic: TARGET_CELLS = list(model_dict.keys())
42
- TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"]
43
-
44
- # --- Feature extraction ---
45
- # When adding new features, compute them here and make sure their names match feature_columns
46
- def extract_features_app(seq: str) -> pd.DataFrame:
47
- seq = seq.upper()
48
-
49
- # --- 1. Dipeptide composition ---
50
- count = Counter([seq[i:i+2] for i in range(len(seq)-1)])
51
- total = max(len(seq)-1, 1)
52
- dipep_features = [count.get(dp, 0) / total for dp in dipeptides]
53
-
54
- # --- 2. Physicochemical features ---
55
- def g(aa, table): return table.get(aa, 0)
56
- def h(dp, table): return (g(dp[0], table) + g(dp[1], table)) / 2.0
57
-
58
- dipeptides_seq = [seq[i:i+2] for i in range(len(seq)-1)]
59
-
60
- if len(seq) < 2:
61
- # For very short sequences, fill physchem features with zeros
62
- physchem_features = [0]*12
63
- else:
64
- # Compute physico-chemical properties
65
- mw = np.mean([h(dp, aa_mass) for dp in dipeptides_seq])
66
- charge = np.mean([h(dp, aa_charge) for dp in dipeptides_seq])
67
- hydro = np.mean([h(dp, hydrophobicity_scale) for dp in dipeptides_seq])
68
- aromatic = np.mean([(dp[0] in 'FWY') + (dp[1] in 'FWY') for dp in dipeptides_seq]) / 2.0
69
- pI = np.mean([h(dp, {aa: 7 + (int(aa in 'KRH') - int(aa in 'DE')) for aa in aa_list}) for dp in dipeptides_seq])
70
- instability = np.mean([((dp[0] in 'DEKR') + (dp[1] in 'DEKR')) / 2.0 for dp in dipeptides_seq])
71
- hydro_moment = np.sqrt(np.mean([(h(dp, eisenberg_scale))**2 for dp in dipeptides_seq]))
72
- aliphatic = np.mean([h(dp, aa_aliphatic) for dp in dipeptides_seq])
73
- boman = np.mean([h(dp, aa_boman) for dp in dipeptides_seq])
74
- flexibility = np.mean([h(dp, aa_flexibility) for dp in dipeptides_seq])
75
- polarizability = np.mean([h(dp, aa_polarizability) for dp in dipeptides_seq])
76
- deltag = np.mean([h(dp, aa_deltaG) for dp in dipeptides_seq])
77
-
78
- physchem_features = [mw, charge, hydro, aromatic, pI, instability,
79
- hydro_moment, aliphatic, boman, flexibility, polarizability, deltag]
80
-
81
- # --- Combine features ---
82
- features = dipep_features + physchem_features
83
-
84
- # --- Align with feature_columns ---
85
- # Always ensure the order and names match the training data
86
- df = pd.DataFrame([features], columns=feature_columns)
87
- df = df.astype('float32')
88
- return df
89
-
90
- # --- Prediction function ---
91
- # Returns probability for each target cell
92
- def predict_peptide(sequence: str):
93
- seq = "".join(sequence.split()).upper()
94
- if not seq:
95
- return []
96
-
97
- X = extract_features_app(seq)
98
-
99
- table = []
100
- for target in TARGET_CELLS:
101
- clf = model_dict.get(target)
102
- if clf is not None:
103
- # Positive-class probability between 0-1
104
- prob = clf.predict_proba(X)[0][1]
105
- table.append([target, round(float(prob), 4)])
106
- else:
107
- table.append([target, None])
108
-
109
- return table
110
-
111
- # --- Gradio Interface ---
112
- custom_css = """
113
- footer, .footer {display:none !important;}
114
- """
115
-
116
- with gr.Blocks(css=custom_css, theme="default") as demo:
117
- gr.Markdown("## Peptide Antimicrobial Predictor\nEnter a peptide sequence to predict efficacy/toxicity.")
118
-
119
- seq_input = gr.Textbox(label="Enter Peptide Sequence")
120
-
121
- with gr.Row():
122
- predict_btn = gr.Button("Predict", variant="primary")
123
- clear_btn = gr.Button("Clear")
124
-
125
- table_output = gr.Dataframe(
126
- headers=["Target Cell", "Probability of Efficacy/Toxicity"],
127
- datatype=["str","number"],
128
- interactive=False
129
- )
130
-
131
- predict_btn.click(fn=predict_peptide, inputs=seq_input, outputs=table_output)
132
- clear_btn.click(fn=lambda: ("", []), outputs=[seq_input, table_output])
133
-
134
- # API endpoint for iOS app
135
- gr.api(predict_peptide, api_name="predict_peptide")
136
-
137
- if __name__ == "__main__":
138
- demo.launch(show_error=True)
139
-
140
- # --- Notes for manual update ---
141
- # 1. When adding new features in your Colab model:
142
- # - Add the new feature computation in extract_features_app
143
- # - Update feature_columns in the model package if needed
144
- # - Add any new metadata tables to the model_package if used
145
- # 2. If you add new target labels:
146
- # - Add them to TARGET_CELLS manually
147
- # - Or switch to dynamic TARGET_CELLS = list(model_dict.keys()) for auto-detection
148
- # 3. Always ensure the DataFrame returned from extract_features_app matches feature_columns in order and names