Ym420 commited on
Commit
450bdf1
·
verified ·
1 Parent(s): 4765f4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -27
app.py CHANGED
@@ -11,12 +11,6 @@ model_filename = "xgb_multilabel_model_full_extendedFuture.pkl"
11
 
12
  model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
13
  model_package = joblib.load(model_path)
14
-
15
- # --- Debug: Check what we loaded ---
16
- print("Loaded model_package type:", type(model_package))
17
- print("model_package keys:", list(model_package.keys()))
18
- print("Type of model:", type(model_package['model']))
19
-
20
  model = model_package['model']
21
  feature_columns = model_package['feature_columns']
22
 
@@ -24,14 +18,12 @@ feature_columns = model_package['feature_columns']
24
  aa_list = model_package['aa_list']
25
  dipeptides = model_package['dipeptides']
26
  hydrophobicity_scale = model_package['hydrophobicity_scale']
27
- eisenberg_scale = model_package['eisenberg_scale']
28
  aa_mass = model_package['aa_mass']
29
  aa_charge = model_package['aa_charge']
30
  aa_boman = model_package['aa_boman']
31
  aa_flexibility = model_package['aa_flexibility']
32
  aa_polarizability = model_package['aa_polarizability']
33
  aa_aliphatic = model_package['aa_aliphatic']
34
- aa_deltaG = model_package['aa_deltaG']
35
 
36
  # --- Target cells ---
37
  TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"]
@@ -40,19 +32,19 @@ TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"]
40
  def extract_features_app(seq: str) -> pd.DataFrame:
41
  seq = seq.upper()
42
 
43
- # 1. Dipeptide composition
44
  count = Counter([seq[i:i+2] for i in range(len(seq)-1)])
45
  total = max(len(seq)-1, 1)
46
  dipep_features = [count.get(dp, 0) / total for dp in dipeptides]
47
 
48
- # 2. Physicochemical features
49
  def g(aa, table): return table.get(aa, 0)
50
  def h(dp, table): return (g(dp[0], table) + g(dp[1], table)) / 2.0
51
 
52
  dipeptides_seq = [seq[i:i+2] for i in range(len(seq)-1)]
53
 
54
  if len(seq) < 2:
55
- physchem_features = [0]*12
56
  else:
57
  mw = np.mean([h(dp, aa_mass) for dp in dipeptides_seq])
58
  charge = np.mean([h(dp, aa_charge) for dp in dipeptides_seq])
@@ -60,15 +52,14 @@ def extract_features_app(seq: str) -> pd.DataFrame:
60
  aromatic = np.mean([(dp[0] in 'FWY') + (dp[1] in 'FWY') for dp in dipeptides_seq]) / 2.0
61
  pI = np.mean([h(dp, {aa: 7 + (int(aa in 'KRH') - int(aa in 'DE')) for aa in aa_list}) for dp in dipeptides_seq])
62
  instability = np.mean([((dp[0] in 'DEKR') + (dp[1] in 'DEKR')) / 2.0 for dp in dipeptides_seq])
63
- hydro_moment = np.sqrt(np.mean([(h(dp, eisenberg_scale))**2 for dp in dipeptides_seq]))
64
  aliphatic = np.mean([h(dp, aa_aliphatic) for dp in dipeptides_seq])
65
  boman = np.mean([h(dp, aa_boman) for dp in dipeptides_seq])
66
  flexibility = np.mean([h(dp, aa_flexibility) for dp in dipeptides_seq])
67
  polarizability = np.mean([h(dp, aa_polarizability) for dp in dipeptides_seq])
68
- deltag = np.mean([h(dp, aa_deltaG) for dp in dipeptides_seq])
69
-
70
  physchem_features = [mw, charge, hydro, aromatic, pI, instability,
71
- hydro_moment, aliphatic, boman, flexibility, polarizability, deltag]
72
 
73
  features = dipep_features + physchem_features
74
 
@@ -83,20 +74,18 @@ def predict_peptide(sequence: str):
83
  return []
84
 
85
  X = extract_features_app(seq)
86
-
87
- # --- Handle model being a list of estimators ---
88
- if isinstance(model, list):
89
- probs_list = [est.predict_proba(X) for est in model]
90
- else:
91
- probs_list = [model.predict_proba(X)] # single model
92
 
93
  table = []
94
  for i, target in enumerate(TARGET_CELLS):
95
  prob = float(probs_list[i][0][1])
96
  table.append([target, round(prob, 4)])
97
-
98
  return table
99
 
 
100
  # --- Gradio Interface ---
101
  custom_css = """
102
  footer, .footer {display:none !important;}
@@ -120,11 +109,11 @@ with gr.Blocks(css=custom_css, theme="default") as demo:
120
  predict_btn.click(fn=predict_peptide, inputs=seq_input, outputs=table_output)
121
  clear_btn.click(fn=lambda: ("", []), outputs=[seq_input, table_output])
122
 
123
- # Optional API endpoint for iOS app
124
- # Note: use only if Gradio version supports `api`
125
- # gr.api(predict_peptide, api_name="predict_peptide")
 
 
126
 
127
  if __name__ == "__main__":
128
  demo.launch(show_error=True)
129
-
130
-
 
11
 
12
  model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
13
  model_package = joblib.load(model_path)
 
 
 
 
 
 
14
  model = model_package['model']
15
  feature_columns = model_package['feature_columns']
16
 
 
18
  aa_list = model_package['aa_list']
19
  dipeptides = model_package['dipeptides']
20
  hydrophobicity_scale = model_package['hydrophobicity_scale']
 
21
  aa_mass = model_package['aa_mass']
22
  aa_charge = model_package['aa_charge']
23
  aa_boman = model_package['aa_boman']
24
  aa_flexibility = model_package['aa_flexibility']
25
  aa_polarizability = model_package['aa_polarizability']
26
  aa_aliphatic = model_package['aa_aliphatic']
 
27
 
28
  # --- Target cells ---
29
  TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"]
 
32
  def extract_features_app(seq: str) -> pd.DataFrame:
33
  seq = seq.upper()
34
 
35
+ # --- 1. Dipeptide composition ---
36
  count = Counter([seq[i:i+2] for i in range(len(seq)-1)])
37
  total = max(len(seq)-1, 1)
38
  dipep_features = [count.get(dp, 0) / total for dp in dipeptides]
39
 
40
+ # --- 2. Physicochemical features ---
41
  def g(aa, table): return table.get(aa, 0)
42
  def h(dp, table): return (g(dp[0], table) + g(dp[1], table)) / 2.0
43
 
44
  dipeptides_seq = [seq[i:i+2] for i in range(len(seq)-1)]
45
 
46
  if len(seq) < 2:
47
+ physchem_features = [0]*11
48
  else:
49
  mw = np.mean([h(dp, aa_mass) for dp in dipeptides_seq])
50
  charge = np.mean([h(dp, aa_charge) for dp in dipeptides_seq])
 
52
  aromatic = np.mean([(dp[0] in 'FWY') + (dp[1] in 'FWY') for dp in dipeptides_seq]) / 2.0
53
  pI = np.mean([h(dp, {aa: 7 + (int(aa in 'KRH') - int(aa in 'DE')) for aa in aa_list}) for dp in dipeptides_seq])
54
  instability = np.mean([((dp[0] in 'DEKR') + (dp[1] in 'DEKR')) / 2.0 for dp in dipeptides_seq])
55
+ hydro_moment = np.sqrt(np.mean([(h(dp, hydrophobicity_scale))**2 for dp in dipeptides_seq]))
56
  aliphatic = np.mean([h(dp, aa_aliphatic) for dp in dipeptides_seq])
57
  boman = np.mean([h(dp, aa_boman) for dp in dipeptides_seq])
58
  flexibility = np.mean([h(dp, aa_flexibility) for dp in dipeptides_seq])
59
  polarizability = np.mean([h(dp, aa_polarizability) for dp in dipeptides_seq])
60
+
 
61
  physchem_features = [mw, charge, hydro, aromatic, pI, instability,
62
+ hydro_moment, aliphatic, boman, flexibility, polarizability]
63
 
64
  features = dipep_features + physchem_features
65
 
 
74
  return []
75
 
76
  X = extract_features_app(seq)
77
+
78
+ #probs_list = model.predict_proba(X) # list of arrays per target
79
+ # --- probs_list: list of arrays from each estimator ---
80
+ probs_list = [est.predict_proba(X) for est in model] # model is a list
 
 
81
 
82
  table = []
83
  for i, target in enumerate(TARGET_CELLS):
84
  prob = float(probs_list[i][0][1])
85
  table.append([target, round(prob, 4)])
 
86
  return table
87
 
88
+
89
  # --- Gradio Interface ---
90
  custom_css = """
91
  footer, .footer {display:none !important;}
 
109
  predict_btn.click(fn=predict_peptide, inputs=seq_input, outputs=table_output)
110
  clear_btn.click(fn=lambda: ("", []), outputs=[seq_input, table_output])
111
 
112
+ # API endpoint for iOS app
113
+ gr.api(predict_peptide, api_name="predict_peptide")
114
+
115
+ #if __name__ == "__main__":
116
+ # demo.launch()
117
 
118
  if __name__ == "__main__":
119
  demo.launch(show_error=True)