Ym420 commited on
Commit
82250a1
·
verified ·
1 Parent(s): a384a25

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ import pandas as pd
4
+
5
+ # --- Load model package ---
6
+ model_package_path = "xgb_multilabel_model_full.pkl"
7
+ model_package = joblib.load(model_package_path)
8
+ model = model_package['model']
9
+ feature_columns = model_package['feature_columns']
10
+
11
+ # Metadata
12
+ aa_list = model_package['aa_list']
13
+ dipeptides = model_package['dipeptides']
14
+ hydrophobicity_scale = model_package['hydrophobicity_scale']
15
+ aa_mass = model_package['aa_mass']
16
+ aa_charge = model_package['aa_charge']
17
+ aa_boman = model_package['aa_boman']
18
+ aa_flexibility = model_package['aa_flexibility']
19
+ aa_polarizability = model_package['aa_polarizability']
20
+ aa_aliphatic = model_package['aa_aliphatic']
21
+
22
+ # --- Feature extraction ---
23
+ def extract_features(sequence: str) -> pd.DataFrame:
24
+ seq = sequence.upper()
25
+ features = {}
26
+
27
+ # Amino acid composition
28
+ for aa in aa_list:
29
+ features[f"AA_{aa}"] = seq.count(aa) / len(seq) if len(seq) > 0 else 0
30
+
31
+ # Dipeptide composition
32
+ for dp in dipeptides:
33
+ count = sum(1 for i in range(len(seq)-1) if seq[i:i+2] == dp)
34
+ features[f"DP_{dp}"] = count / (len(seq)-1) if len(seq) > 1 else 0
35
+
36
+ # Hydrophobicity
37
+ features['hydrophobicity'] = sum(hydrophobicity_scale.get(aa, 0) for aa in seq) / len(seq) if len(seq) > 0 else 0
38
+
39
+ # Other physicochemical properties
40
+ props = ['mass', 'charge', 'boman', 'flexibility', 'polarizability', 'aliphatic']
41
+ for prop, table in zip(props, [aa_mass, aa_charge, aa_boman, aa_flexibility, aa_polarizability, aa_aliphatic]):
42
+ features[prop] = sum(table.get(aa, 0) for aa in seq) / len(seq) if len(seq) > 0 else 0
43
+
44
+ df = pd.DataFrame([features])
45
+ df = df.reindex(columns=feature_columns, fill_value=0)
46
+ return df
47
+
48
+ # --- Prediction ---
49
+ def predict(sequence):
50
+ sequence = sequence.strip()
51
+ if not sequence:
52
+ return "Sequence cannot be empty.", None
53
+
54
+ X = extract_features(sequence)
55
+ probs = model.predict_proba(X) # List of arrays per target
56
+
57
+ # Format output
58
+ output = []
59
+ for i, col in enumerate(model.classes_):
60
+ output.append({
61
+ "Target Cell": col,
62
+ "Probability of Efficacy/Toxicity": float(probs[i][0][1])
63
+ })
64
+
65
+ return sequence, output
66
+
67
+ # --- Gradio Interface ---
68
+ iface = gr.Interface(
69
+ fn=predict,
70
+ inputs=gr.Textbox(lines=2, placeholder="Enter peptide sequence here..."),
71
+ outputs=[
72
+ gr.Textbox(label="Input Sequence"),
73
+ gr.Dataframe(headers=["Target Cell", "Probability of Efficacy/Toxicity"], datatype="json")
74
+ ],
75
+ title="Peptide Antimicrobial Predictor",
76
+ description="Enter a peptide sequence to predict its antimicrobial efficacy/toxicity against target cells."
77
+ )
78
+
79
+ if __name__ == "__main__":
80
+ iface.launch(server_name="0.0.0.0", server_port=7860)