Fredaaaaaa commited on
Commit
bfdb887
·
verified ·
1 Parent(s): 3c86017

Upload 5 files

Browse files
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import joblib
2
+ import numpy as np
3
+ from pubchempy import get_compounds
4
+ from rdkit import Chem
5
+ from rdkit.Chem import AllChem
6
+ import gradio as gr
7
+
8
+ # Load saved model and preprocessors
9
+ model = joblib.load('random_forest_model.joblib')
10
+ scaler = joblib.load('standard_scaler.joblib')
11
+ le = joblib.load('label_encoder.joblib')
12
+
13
+ # Define numerical columns from training (match exactly)
14
+ numerical_cols = [
15
+ 'molecular_weight', 'molecular_weight_2', 'xlogp', 'xlogp_2', 'tpsa', 'tpsa_2',
16
+ 'rotatable_bond_count', 'rotatable_bond_count_2', 'h_bond_donor_count', 'h_bond_donor_count_2',
17
+ 'h_bond_acceptor_count', 'h_bond_acceptor_count_2', 'complexity', 'complexity_2',
18
+ 'charge', 'charge_2', 'exact_mass', 'exact_mass_2'
19
+ ]
20
+
21
+ # Preprocessing function
22
+ def get_morgan_fingerprint(smiles, radius=2, n_bits=512):
23
+ try:
24
+ mol = Chem.MolFromSmiles(smiles)
25
+ if mol is None:
26
+ return np.zeros(n_bits)
27
+ fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
28
+ return np.array(fp)
29
+ except:
30
+ return np.zeros(n_bits)
31
+
32
+ # Function to extract features from PubChem
33
+ def extract_features(drug1_name, drug2_name):
34
+ # Fetch compounds from PubChem
35
+ compounds1 = get_compounds(drug1_name, 'name')
36
+ compounds2 = get_compounds(drug2_name, 'name')
37
+ if not compounds1 or not compounds2:
38
+ return None, f"One or both drugs not found: {drug1_name}, {drug2_name}"
39
+
40
+ compound1 = compounds1[0] # Take the first match
41
+ compound2 = compounds2[0]
42
+
43
+ # Extract PubChem properties
44
+ props1 = {
45
+ 'molecular_weight': compound1.molecular_weight if compound1.molecular_weight else 0,
46
+ 'xlogp': compound1.xlogp if compound1.xlogp else 0,
47
+ 'tpsa': compound1.tpsa if compound1.tpsa else 0,
48
+ 'rotatable_bond_count': compound1.rotatable_bond_count if compound1.rotatable_bond_count else 0,
49
+ 'h_bond_donor_count': compound1.h_bond_donor_count if compound1.h_bond_donor_count else 0,
50
+ 'h_bond_acceptor_count': compound1.h_bond_acceptor_count if compound1.h_bond_acceptor_count else 0,
51
+ 'complexity': compound1.complexity if compound1.complexity else 0,
52
+ 'charge': 0, # PubChem doesn't provide direct charge, assume 0
53
+ 'exact_mass': compound1.exact_mass if compound1.exact_mass else 0
54
+ }
55
+ props2 = {
56
+ 'molecular_weight_2': compound2.molecular_weight if compound2.molecular_weight else 0,
57
+ 'xlogp_2': compound2.xlogp if compound2.xlogp else 0,
58
+ 'tpsa_2': compound2.tpsa if compound2.tpsa else 0,
59
+ 'rotatable_bond_count_2': compound2.rotatable_bond_count if compound2.rotatable_bond_count else 0,
60
+ 'h_bond_donor_count_2': compound2.h_bond_donor_count if compound2.h_bond_donor_count else 0,
61
+ 'h_bond_acceptor_count_2': compound2.h_bond_acceptor_count if compound2.h_bond_acceptor_count else 0,
62
+ 'complexity_2': compound2.complexity if compound2.complexity else 0,
63
+ 'charge_2': 0, # Assume 0
64
+ 'exact_mass_2': compound2.exact_mass if compound2.exact_mass else 0
65
+ }
66
+
67
+ # Combine properties into a single feature vector
68
+ features = [props1.get(col, 0) for col in numerical_cols[:9]] + [props2.get(col, 0) for col in numerical_cols[9:]]
69
+
70
+ # Get SMILES for fingerprints
71
+ smiles1 = compound1.canonical_smiles
72
+ smiles2 = compound2.canonical_smiles
73
+ fp1 = get_morgan_fingerprint(smiles1)
74
+ fp2 = get_morgan_fingerprint(smiles2)
75
+
76
+ # Combine all features with padding for BioBERT (768 dimensions)
77
+ X = np.hstack([np.array(features).reshape(1, -1), fp1.reshape(1, -1), fp2.reshape(1, -1), np.zeros((1, 768))])
78
+
79
+ return X, None
80
+
81
+ # Function to predict severity
82
+ def predict_severity(drug1, drug2):
83
+ # Fetch drug features from PubChem
84
+ X, error = extract_features(drug1, drug2)
85
+ if error:
86
+ return error
87
+
88
+ # Scale and predict
89
+ X_scaled = scaler.transform(X)
90
+ prediction = model.predict(X_scaled)
91
+ severity = le.inverse_transform(prediction)[0]
92
+ probabilities = model.predict_proba(X_scaled)[0]
93
+
94
+ # Format output similar to your model’s severity labels
95
+ result = f"Predicted Severity: {severity}\n"
96
+ result += "Prediction Probabilities:\n"
97
+ for i, label in enumerate(le.classes_):
98
+ result += f" {label}: {probabilities[i]:.2%}\n"
99
+ return result
100
+
101
+ # Gradio Interface
102
+ interface = gr.Interface(
103
+ fn=predict_severity,
104
+ inputs=[gr.Textbox(label="Drug 1"), gr.Textbox(label="Drug 2")],
105
+ outputs="text",
106
+ live=True,
107
+ title="Drug Interaction Severity Predictor",
108
+ description="Enter two drug names to predict the severity of their interaction."
109
+ )
110
+
111
+ # Launch the interface
112
+ interface.launch()
113
+
114
+
115
+
116
+
label_encoder.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1ff7db333c6ab01dac75b277890ab1977ff2246af0d26fc8c7b66812ffcde47
3
+ size 516
random_forest_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25460b3cf770c838d5f6773f8892b2fb54116e55b21828c8e4c282464b157fca
3
+ size 909988
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers
2
+ torch
3
+ gradio
standard_scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f231431d1222580f882c4ebb60cef05b5c130972ad6627610f31c6f27c8953c
3
+ size 1591