Fredaaaaaa commited on
Commit
a05f3a1
·
verified ·
1 Parent(s): fd6a421

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -116
app.py CHANGED
@@ -1,116 +1,120 @@
1
- import joblib
2
- import numpy as np
3
- from pubchempy import get_compounds
4
- from rdkit import Chem
5
- from rdkit.Chem import AllChem
6
- import gradio as gr
7
-
8
- # Load saved model and preprocessors
9
- model = joblib.load('random_forest_model.joblib')
10
- scaler = joblib.load('standard_scaler.joblib')
11
- le = joblib.load('label_encoder.joblib')
12
-
13
- # Define numerical columns from training (match exactly)
14
- numerical_cols = [
15
- 'molecular_weight', 'molecular_weight_2', 'xlogp', 'xlogp_2', 'tpsa', 'tpsa_2',
16
- 'rotatable_bond_count', 'rotatable_bond_count_2', 'h_bond_donor_count', 'h_bond_donor_count_2',
17
- 'h_bond_acceptor_count', 'h_bond_acceptor_count_2', 'complexity', 'complexity_2',
18
- 'charge', 'charge_2', 'exact_mass', 'exact_mass_2'
19
- ]
20
-
21
- # Preprocessing function
22
- def get_morgan_fingerprint(smiles, radius=2, n_bits=512):
23
- try:
24
- mol = Chem.MolFromSmiles(smiles)
25
- if mol is None:
26
- return np.zeros(n_bits)
27
- fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
28
- return np.array(fp)
29
- except:
30
- return np.zeros(n_bits)
31
-
32
- # Function to extract features from PubChem
33
- def extract_features(drug1_name, drug2_name):
34
- # Fetch compounds from PubChem
35
- compounds1 = get_compounds(drug1_name, 'name')
36
- compounds2 = get_compounds(drug2_name, 'name')
37
- if not compounds1 or not compounds2:
38
- return None, f"One or both drugs not found: {drug1_name}, {drug2_name}"
39
-
40
- compound1 = compounds1[0] # Take the first match
41
- compound2 = compounds2[0]
42
-
43
- # Extract PubChem properties
44
- props1 = {
45
- 'molecular_weight': compound1.molecular_weight if compound1.molecular_weight else 0,
46
- 'xlogp': compound1.xlogp if compound1.xlogp else 0,
47
- 'tpsa': compound1.tpsa if compound1.tpsa else 0,
48
- 'rotatable_bond_count': compound1.rotatable_bond_count if compound1.rotatable_bond_count else 0,
49
- 'h_bond_donor_count': compound1.h_bond_donor_count if compound1.h_bond_donor_count else 0,
50
- 'h_bond_acceptor_count': compound1.h_bond_acceptor_count if compound1.h_bond_acceptor_count else 0,
51
- 'complexity': compound1.complexity if compound1.complexity else 0,
52
- 'charge': 0, # PubChem doesn't provide direct charge, assume 0
53
- 'exact_mass': compound1.exact_mass if compound1.exact_mass else 0
54
- }
55
- props2 = {
56
- 'molecular_weight_2': compound2.molecular_weight if compound2.molecular_weight else 0,
57
- 'xlogp_2': compound2.xlogp if compound2.xlogp else 0,
58
- 'tpsa_2': compound2.tpsa if compound2.tpsa else 0,
59
- 'rotatable_bond_count_2': compound2.rotatable_bond_count if compound2.rotatable_bond_count else 0,
60
- 'h_bond_donor_count_2': compound2.h_bond_donor_count if compound2.h_bond_donor_count else 0,
61
- 'h_bond_acceptor_count_2': compound2.h_bond_acceptor_count if compound2.h_bond_acceptor_count else 0,
62
- 'complexity_2': compound2.complexity if compound2.complexity else 0,
63
- 'charge_2': 0, # Assume 0
64
- 'exact_mass_2': compound2.exact_mass if compound2.exact_mass else 0
65
- }
66
-
67
- # Combine properties into a single feature vector
68
- features = [props1.get(col, 0) for col in numerical_cols[:9]] + [props2.get(col, 0) for col in numerical_cols[9:]]
69
-
70
- # Get SMILES for fingerprints
71
- smiles1 = compound1.canonical_smiles
72
- smiles2 = compound2.canonical_smiles
73
- fp1 = get_morgan_fingerprint(smiles1)
74
- fp2 = get_morgan_fingerprint(smiles2)
75
-
76
- # Combine all features with padding for BioBERT (768 dimensions)
77
- X = np.hstack([np.array(features).reshape(1, -1), fp1.reshape(1, -1), fp2.reshape(1, -1), np.zeros((1, 768))])
78
-
79
- return X, None
80
-
81
- # Function to predict severity
82
- def predict_severity(drug1, drug2):
83
- # Fetch drug features from PubChem
84
- X, error = extract_features(drug1, drug2)
85
- if error:
86
- return error
87
-
88
- # Scale and predict
89
- X_scaled = scaler.transform(X)
90
- prediction = model.predict(X_scaled)
91
- severity = le.inverse_transform(prediction)[0]
92
- probabilities = model.predict_proba(X_scaled)[0]
93
-
94
- # Format output similar to your model’s severity labels
95
- result = f"Predicted Severity: {severity}\n"
96
- result += "Prediction Probabilities:\n"
97
- for i, label in enumerate(le.classes_):
98
- result += f" {label}: {probabilities[i]:.2%}\n"
99
- return result
100
-
101
- # Gradio Interface
102
- interface = gr.Interface(
103
- fn=predict_severity,
104
- inputs=[gr.Textbox(label="Drug 1"), gr.Textbox(label="Drug 2")],
105
- outputs="text",
106
- live=True,
107
- title="Drug Interaction Severity Predictor",
108
- description="Enter two drug names to predict the severity of their interaction."
109
- )
110
-
111
- # Launch the interface
112
- interface.launch()
113
-
114
-
115
-
116
-
 
 
 
 
 
1
+ import joblib
2
+ import numpy as np
3
+ from pubchempy import get_compounds
4
+ from rdkit import Chem
5
+ from rdkit.Chem import AllChem
6
+ import gradio as gr
7
+
8
+ print("Loading model and preprocessors...")
9
+ try:
10
+ # Load saved model and preprocessors
11
+ model = joblib.load('random_forest_model.joblib')
12
+ scaler = joblib.load('standard_scaler.joblib')
13
+ le = joblib.load('label_encoder.joblib')
14
+ print(f"Model loaded successfully. Type: {type(model).__name__}")
15
+ except Exception as e:
16
+ print(f"Error loading model: {e}")
17
+ raise
18
+
19
+ # Define numerical columns from training (match exactly)
20
+ numerical_cols = [
21
+ 'molecular_weight', 'molecular_weight_2', 'xlogp', 'xlogp_2', 'tpsa', 'tpsa_2',
22
+ 'rotatable_bond_count', 'rotatable_bond_count_2', 'h_bond_donor_count', 'h_bond_donor_count_2',
23
+ 'h_bond_acceptor_count', 'h_bond_acceptor_count_2', 'complexity', 'complexity_2',
24
+ 'charge', 'charge_2', 'exact_mass', 'exact_mass_2'
25
+ ]
26
+
27
+ # Preprocessing function
28
+ def get_morgan_fingerprint(smiles, radius=2, n_bits=512):
29
+ try:
30
+ mol = Chem.MolFromSmiles(smiles)
31
+ if mol is None:
32
+ return np.zeros(n_bits)
33
+ fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
34
+ return np.array(fp)
35
+ except:
36
+ return np.zeros(n_bits)
37
+
38
+ # Function to extract features from PubChem
39
+ def extract_features(drug1_name, drug2_name):
40
+ # Fetch compounds from PubChem
41
+ compounds1 = get_compounds(drug1_name, 'name')
42
+ compounds2 = get_compounds(drug2_name, 'name')
43
+ if not compounds1 or not compounds2:
44
+ return None, f"One or both drugs not found: {drug1_name}, {drug2_name}"
45
+
46
+ compound1 = compounds1[0] # Take the first match
47
+ compound2 = compounds2[0]
48
+
49
+ # Extract PubChem properties
50
+ props1 = {
51
+ 'molecular_weight': compound1.molecular_weight if compound1.molecular_weight else 0,
52
+ 'xlogp': compound1.xlogp if compound1.xlogp else 0,
53
+ 'tpsa': compound1.tpsa if compound1.tpsa else 0,
54
+ 'rotatable_bond_count': compound1.rotatable_bond_count if compound1.rotatable_bond_count else 0,
55
+ 'h_bond_donor_count': compound1.h_bond_donor_count if compound1.h_bond_donor_count else 0,
56
+ 'h_bond_acceptor_count': compound1.h_bond_acceptor_count if compound1.h_bond_acceptor_count else 0,
57
+ 'complexity': compound1.complexity if compound1.complexity else 0,
58
+ 'charge': 0, # PubChem doesn't provide direct charge, assume 0
59
+ 'exact_mass': compound1.exact_mass if compound1.exact_mass else 0
60
+ }
61
+ props2 = {
62
+ 'molecular_weight_2': compound2.molecular_weight if compound2.molecular_weight else 0,
63
+ 'xlogp_2': compound2.xlogp if compound2.xlogp else 0,
64
+ 'tpsa_2': compound2.tpsa if compound2.tpsa else 0,
65
+ 'rotatable_bond_count_2': compound2.rotatable_bond_count if compound2.rotatable_bond_count else 0,
66
+ 'h_bond_donor_count_2': compound2.h_bond_donor_count if compound2.h_bond_donor_count else 0,
67
+ 'h_bond_acceptor_count_2': compound2.h_bond_acceptor_count if compound2.h_bond_acceptor_count else 0,
68
+ 'complexity_2': compound2.complexity if compound2.complexity else 0,
69
+ 'charge_2': 0, # Assume 0
70
+ 'exact_mass_2': compound2.exact_mass if compound2.exact_mass else 0
71
+ }
72
+
73
+ # Combine properties into a single feature vector
74
+ features = [props1.get(col, 0) for col in numerical_cols[:9]] + [props2.get(col, 0) for col in numerical_cols[9:]]
75
+
76
+ # Get SMILES for fingerprints
77
+ smiles1 = compound1.canonical_smiles
78
+ smiles2 = compound2.canonical_smiles
79
+ fp1 = get_morgan_fingerprint(smiles1)
80
+ fp2 = get_morgan_fingerprint(smiles2)
81
+
82
+ # Combine all features with padding for BioBERT (768 dimensions)
83
+ X = np.hstack([np.array(features).reshape(1, -1), fp1.reshape(1, -1), fp2.reshape(1, -1), np.zeros((1, 768))])
84
+
85
+ return X, None, smiles1, smiles2
86
+
87
+ # Function to predict severity
88
+ def predict_severity(drug1, drug2):
89
+ # Fetch drug features from PubChem
90
+ X, error, smiles1, smiles2 = extract_features(drug1, drug2)
91
+ if error:
92
+ return error
93
+
94
+ # Scale and predict
95
+ X_scaled = scaler.transform(X)
96
+ prediction = model.predict(X_scaled)
97
+ severity = le.inverse_transform(prediction)[0]
98
+ probabilities = model.predict_proba(X_scaled)[0]
99
+
100
+ # Format output with SMILES
101
+ result = f"Predicted Severity: {severity}\n"
102
+ result += f"Drug 1 SMILES: {smiles1}\n"
103
+ result += f"Drug 2 SMILES: {smiles2}\n"
104
+ result += "Prediction Probabilities:\n"
105
+ for i, label in enumerate(le.classes_):
106
+ result += f" {label}: {probabilities[i]:.2%}\n"
107
+ return result
108
+
109
+ # Gradio Interface
110
+ interface = gr.Interface(
111
+ fn=predict_severity,
112
+ inputs=[gr.Textbox(label="Drug 1"), gr.Textbox(label="Drug 2")],
113
+ outputs="text",
114
+ live=True,
115
+ title="Drug Interaction Severity Predictor",
116
+ description="Enter two drug names to predict the severity of their interaction."
117
+ )
118
+
119
+ # Launch the interface
120
+ interface.launch()