Ym420 commited on
Commit
a3aabc6
·
verified ·
1 Parent(s): 073312c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -31
app.py CHANGED
@@ -1,11 +1,12 @@
1
  import gradio as gr
2
  import joblib
3
  from huggingface_hub import hf_hub_download
4
- import numpy as np
5
  import pandas as pd
 
 
6
 
7
  # --- Download model from HF Hub ---
8
- repo_id = "Ym420/Peptide-Function" # replace with your HF repo
9
  model_filename = "xgb_multilabel_model_full.pkl"
10
 
11
  model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
@@ -13,7 +14,7 @@ model_package = joblib.load(model_path)
13
  model = model_package['model']
14
  feature_columns = model_package['feature_columns']
15
 
16
- # Metadata
17
  aa_list = model_package['aa_list']
18
  dipeptides = model_package['dipeptides']
19
  hydrophobicity_scale = model_package['hydrophobicity_scale']
@@ -24,52 +25,63 @@ aa_flexibility = model_package['aa_flexibility']
24
  aa_polarizability = model_package['aa_polarizability']
25
  aa_aliphatic = model_package['aa_aliphatic']
26
 
27
- # --- Feature extraction ---
28
- def extract_features(sequence: str) -> pd.DataFrame:
29
- seq = sequence.upper()
30
- features = {}
31
-
32
- # Amino acid composition
33
- for aa in aa_list:
34
- features[f"AA_{aa}"] = seq.count(aa) / len(seq) if len(seq) > 0 else 0
35
-
36
- # Dipeptide composition
37
- for dp in dipeptides:
38
- count = sum(1 for i in range(len(seq)-1) if seq[i:i+2] == dp)
39
- features[f"DP_{dp}"] = count / (len(seq)-1) if len(seq) > 1 else 0
40
 
41
- # Hydrophobicity
42
- features['hydrophobicity'] = sum(hydrophobicity_scale.get(aa, 0) for aa in seq) / len(seq) if len(seq) > 0 else 0
 
 
 
 
 
 
43
 
44
- # Other physicochemical properties
45
- props = ['mass', 'charge', 'boman', 'flexibility', 'polarizability', 'aliphatic']
46
- for prop, table in zip(props, [aa_mass, aa_charge, aa_boman, aa_flexibility, aa_polarizability, aa_aliphatic]):
47
- features[prop] = sum(table.get(aa, 0) for aa in seq) / len(seq) if len(seq) > 0 else 0
48
 
49
- df = pd.DataFrame([features])
50
- df = df.reindex(columns=feature_columns, fill_value=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  return df
52
 
53
  # --- Prediction function ---
54
- TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"]
55
-
56
-
57
  def predict_peptide(sequence: str):
58
  seq = "".join(sequence.split()).upper()
59
  if not seq:
60
  return []
61
 
62
- X = extract_features(seq)
63
- probs_list = model.predict_proba(X) # list of arrays per target cell
64
 
65
- # Format output with 4 decimal places
66
  table = []
67
  for i, target in enumerate(TARGET_CELLS):
68
  prob = float(probs_list[i][0][1])
69
  table.append([target, round(prob, 4)])
70
  return table
71
 
72
-
73
  # --- Gradio Interface ---
74
  custom_css = """
75
  footer, .footer {display:none !important;}
 
1
  import gradio as gr
2
  import joblib
3
  from huggingface_hub import hf_hub_download
 
4
  import pandas as pd
5
+ import numpy as np
6
+ from collections import Counter
7
 
8
  # --- Download model from HF Hub ---
9
+ repo_id = "GiMikawa/Peptide-Function" # replace with your HF repo
10
  model_filename = "xgb_multilabel_model_full.pkl"
11
 
12
  model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
 
14
  model = model_package['model']
15
  feature_columns = model_package['feature_columns']
16
 
17
+ # --- Metadata ---
18
  aa_list = model_package['aa_list']
19
  dipeptides = model_package['dipeptides']
20
  hydrophobicity_scale = model_package['hydrophobicity_scale']
 
25
  aa_polarizability = model_package['aa_polarizability']
26
  aa_aliphatic = model_package['aa_aliphatic']
27
 
28
+ # --- Target cells ---
29
+ TARGET_CELLS = ["Gram+", "Fungus", "Mammalian Cell", "Cancer", "Gram-"]
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # --- Feature extraction ---
32
+ def extract_features_app(seq: str) -> pd.DataFrame:
33
+ seq = seq.upper()
34
+
35
+ # --- 1. Dipeptide composition ---
36
+ count = Counter([seq[i:i+2] for i in range(len(seq)-1)])
37
+ total = max(len(seq)-1, 1)
38
+ dipep_features = [count.get(dp, 0) / total for dp in dipeptides]
39
 
40
+ # --- 2. Physicochemical features ---
41
+ def g(aa, table): return table.get(aa, 0)
42
+ def h(dp, table): return (g(dp[0], table) + g(dp[1], table)) / 2.0
 
43
 
44
+ dipeptides_seq = [seq[i:i+2] for i in range(len(seq)-1)]
45
+
46
+ if len(seq) < 2:
47
+ physchem_features = [0]*11
48
+ else:
49
+ hydro = np.mean([h(dp, hydrophobicity_scale) for dp in dipeptides_seq])
50
+ mw = np.mean([h(dp, aa_mass) for dp in dipeptides_seq])
51
+ charge = np.mean([h(dp, aa_charge) for dp in dipeptides_seq])
52
+ aromatic = np.mean([(dp[0] in 'FWY') + (dp[1] in 'FWY') for dp in dipeptides_seq]) / 2.0
53
+ pI = np.mean([h(dp, {aa: 7 + (int(aa in 'KRH') - int(aa in 'DE')) for aa in aa_list}) for dp in dipeptides_seq])
54
+ instability = np.mean([((dp[0] in 'DEKR') + (dp[1] in 'DEKR')) / 2.0 for dp in dipeptides_seq])
55
+ hydro_moment = np.sqrt(np.mean([(h(dp, hydrophobicity_scale))**2 for dp in dipeptides_seq]))
56
+ aliphatic = np.mean([h(dp, aa_aliphatic) for dp in dipeptides_seq])
57
+ boman = np.mean([h(dp, aa_boman) for dp in dipeptides_seq])
58
+ flexibility = np.mean([h(dp, aa_flexibility) for dp in dipeptides_seq])
59
+ polarizability = np.mean([h(dp, aa_polarizability) for dp in dipeptides_seq])
60
+
61
+ physchem_features = [mw, charge, hydro, aromatic, pI, instability,
62
+ hydro_moment, aliphatic, boman, flexibility, polarizability]
63
+
64
+ features = dipep_features + physchem_features
65
+
66
+ df = pd.DataFrame([features], columns=feature_columns)
67
+ df = df.astype('float32') # ensure same type as training
68
  return df
69
 
70
  # --- Prediction function ---
 
 
 
71
  def predict_peptide(sequence: str):
72
  seq = "".join(sequence.split()).upper()
73
  if not seq:
74
  return []
75
 
76
+ X = extract_features_app(seq)
77
+ probs_list = model.predict_proba(X) # list of arrays per target
78
 
 
79
  table = []
80
  for i, target in enumerate(TARGET_CELLS):
81
  prob = float(probs_list[i][0][1])
82
  table.append([target, round(prob, 4)])
83
  return table
84
 
 
85
  # --- Gradio Interface ---
86
  custom_css = """
87
  footer, .footer {display:none !important;}