Robert Elder commited on
Commit ·
108b9bc
1
Parent(s): f6b0807
fixing melting point calculation
Browse files- ChemID.py +10 -10
- mp/model-opera-knn-alldata.xlsx +1 -1
- mp/model-opera-knn.pkl +2 -2
ChemID.py
CHANGED
|
@@ -70,7 +70,7 @@ import padelpy
|
|
| 70 |
from functions import weight_func
|
| 71 |
class opera_data_mp():
|
| 72 |
n_neighbors = 5
|
| 73 |
-
weight_factor = 5e-3
|
| 74 |
desc_list = ['SHBd', 'nN', 'maxHBd', 'ATSC1v', 'AATS1i', 'TopoPSA', 'nT6Ring', 'nHBDon', 'WTPT-5', 'minHBd', 'nHBint2', 'IC0', 'MLFER_S', 'MLFER_BO', 'WTPT-3']
|
| 75 |
with open('mp/model-opera-knn.pkl', 'rb') as fp:
|
| 76 |
knn_all = pickle.load(fp)
|
|
@@ -611,7 +611,7 @@ def string2density(name):
|
|
| 611 |
rho_string = rho_string.replace('Relative density (water = 1): ', '')
|
| 612 |
#print(rho_string)
|
| 613 |
#tmp_rho = re.match('(?:\d+(?:\.\d*)?|\.\d+)',rho_string)
|
| 614 |
-
m = re.match('((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?',rho_string)
|
| 615 |
groups = m.groups()
|
| 616 |
if len(groups):
|
| 617 |
for g in groups:
|
|
@@ -645,7 +645,7 @@ def string2density(name):
|
|
| 645 |
url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={name_urlsafe}'
|
| 646 |
fid = urllib.request.urlopen(url)
|
| 647 |
webpage = fid.read().decode('utf-8')
|
| 648 |
-
hits = re.findall('DTXSID[0-9]+', webpage)
|
| 649 |
if len(hits):
|
| 650 |
dtxsid = hits[0]
|
| 651 |
except:
|
|
@@ -679,7 +679,7 @@ def string2density(name):
|
|
| 679 |
if ifound:
|
| 680 |
rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
|
| 681 |
text = rows[ifound].text
|
| 682 |
-
value = re.sub(' \([0-9]*\)', '', text.strip())
|
| 683 |
try:
|
| 684 |
rho = float(value)
|
| 685 |
rho_origin = 'dsstox/expt'
|
|
@@ -688,7 +688,7 @@ def string2density(name):
|
|
| 688 |
if pd.isna(rho):
|
| 689 |
rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
|
| 690 |
text = rows[ifound].text
|
| 691 |
-
value = re.sub(' \([0-9]*\)', '', text.strip())
|
| 692 |
try:
|
| 693 |
rho = float(value)
|
| 694 |
rho_origin = 'dsstox/pred'
|
|
@@ -993,7 +993,7 @@ def string2mp(name, namespace='name'):
|
|
| 993 |
#print(mp_string)
|
| 994 |
#tmp_rho = re.match('(?:\d+(?:\.\d*)?|\.\d+)',rho_string)
|
| 995 |
#m = re.match('((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?',rho_string)
|
| 996 |
-
m = re.match('(-?(?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?( ?°?C)',mp_string)
|
| 997 |
if m is not None:
|
| 998 |
groups = m.groups()
|
| 999 |
if len(groups):
|
|
@@ -1003,7 +1003,7 @@ def string2mp(name, namespace='name'):
|
|
| 1003 |
mp_list.append(tmp_mp)
|
| 1004 |
except:
|
| 1005 |
continue
|
| 1006 |
-
m = re.match('(-?(?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?( ?°?F)',mp_string)
|
| 1007 |
if m is not None:
|
| 1008 |
groups = m.groups()
|
| 1009 |
if len(groups):
|
|
@@ -1039,7 +1039,7 @@ def string2mp(name, namespace='name'):
|
|
| 1039 |
url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={name_urlsafe}'
|
| 1040 |
fid = urllib.request.urlopen(url)
|
| 1041 |
webpage = fid.read().decode('utf-8')
|
| 1042 |
-
hits = re.findall('DTXSID[0-9]+', webpage)
|
| 1043 |
if len(hits):
|
| 1044 |
dtxsid = hits[0]
|
| 1045 |
except:
|
|
@@ -1078,7 +1078,7 @@ def string2mp(name, namespace='name'):
|
|
| 1078 |
if ifound:
|
| 1079 |
rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
|
| 1080 |
text = rows[ifound].text
|
| 1081 |
-
value = re.sub(' \([0-9]*\)', '', text.strip())
|
| 1082 |
try:
|
| 1083 |
mp = float(value)
|
| 1084 |
mp_origin = 'dsstox/expt'
|
|
@@ -1087,7 +1087,7 @@ def string2mp(name, namespace='name'):
|
|
| 1087 |
if pd.isna(mp):
|
| 1088 |
rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
|
| 1089 |
text = rows[ifound].text
|
| 1090 |
-
value = re.sub(' \([0-9]*\)', '', text.strip())
|
| 1091 |
try:
|
| 1092 |
mp = float(value)
|
| 1093 |
mp_origin = 'dsstox/pred'
|
|
|
|
| 70 |
from functions import weight_func
|
| 71 |
class opera_data_mp():
|
| 72 |
n_neighbors = 5
|
| 73 |
+
#weight_factor = 5e-3
|
| 74 |
desc_list = ['SHBd', 'nN', 'maxHBd', 'ATSC1v', 'AATS1i', 'TopoPSA', 'nT6Ring', 'nHBDon', 'WTPT-5', 'minHBd', 'nHBint2', 'IC0', 'MLFER_S', 'MLFER_BO', 'WTPT-3']
|
| 75 |
with open('mp/model-opera-knn.pkl', 'rb') as fp:
|
| 76 |
knn_all = pickle.load(fp)
|
|
|
|
| 611 |
rho_string = rho_string.replace('Relative density (water = 1): ', '')
|
| 612 |
#print(rho_string)
|
| 613 |
#tmp_rho = re.match('(?:\d+(?:\.\d*)?|\.\d+)',rho_string)
|
| 614 |
+
m = re.match(r'((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?',rho_string)
|
| 615 |
groups = m.groups()
|
| 616 |
if len(groups):
|
| 617 |
for g in groups:
|
|
|
|
| 645 |
url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={name_urlsafe}'
|
| 646 |
fid = urllib.request.urlopen(url)
|
| 647 |
webpage = fid.read().decode('utf-8')
|
| 648 |
+
hits = re.findall(r'DTXSID[0-9]+', webpage)
|
| 649 |
if len(hits):
|
| 650 |
dtxsid = hits[0]
|
| 651 |
except:
|
|
|
|
| 679 |
if ifound:
|
| 680 |
rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
|
| 681 |
text = rows[ifound].text
|
| 682 |
+
value = re.sub(r' \([0-9]*\)', '', text.strip())
|
| 683 |
try:
|
| 684 |
rho = float(value)
|
| 685 |
rho_origin = 'dsstox/expt'
|
|
|
|
| 688 |
if pd.isna(rho):
|
| 689 |
rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
|
| 690 |
text = rows[ifound].text
|
| 691 |
+
value = re.sub(r' \([0-9]*\)', '', text.strip())
|
| 692 |
try:
|
| 693 |
rho = float(value)
|
| 694 |
rho_origin = 'dsstox/pred'
|
|
|
|
| 993 |
#print(mp_string)
|
| 994 |
#tmp_rho = re.match('(?:\d+(?:\.\d*)?|\.\d+)',rho_string)
|
| 995 |
#m = re.match('((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?',rho_string)
|
| 996 |
+
m = re.match(r'(-?(?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?( ?°?C)',mp_string)
|
| 997 |
if m is not None:
|
| 998 |
groups = m.groups()
|
| 999 |
if len(groups):
|
|
|
|
| 1003 |
mp_list.append(tmp_mp)
|
| 1004 |
except:
|
| 1005 |
continue
|
| 1006 |
+
m = re.match(r'(-?(?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?( ?°?F)',mp_string)
|
| 1007 |
if m is not None:
|
| 1008 |
groups = m.groups()
|
| 1009 |
if len(groups):
|
|
|
|
| 1039 |
url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={name_urlsafe}'
|
| 1040 |
fid = urllib.request.urlopen(url)
|
| 1041 |
webpage = fid.read().decode('utf-8')
|
| 1042 |
+
hits = re.findall(r'DTXSID[0-9]+', webpage)
|
| 1043 |
if len(hits):
|
| 1044 |
dtxsid = hits[0]
|
| 1045 |
except:
|
|
|
|
| 1078 |
if ifound:
|
| 1079 |
rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
|
| 1080 |
text = rows[ifound].text
|
| 1081 |
+
value = re.sub(r' \([0-9]*\)', '', text.strip())
|
| 1082 |
try:
|
| 1083 |
mp = float(value)
|
| 1084 |
mp_origin = 'dsstox/expt'
|
|
|
|
| 1087 |
if pd.isna(mp):
|
| 1088 |
rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
|
| 1089 |
text = rows[ifound].text
|
| 1090 |
+
value = re.sub(r' \([0-9]*\)', '', text.strip())
|
| 1091 |
try:
|
| 1092 |
mp = float(value)
|
| 1093 |
mp_origin = 'dsstox/pred'
|
mp/model-opera-knn-alldata.xlsx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6551300
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29df92a2ae63cacaaaefb7c2fede726114ea50351903457cac663fa8356bb006
|
| 3 |
size 6551300
|
mp/model-opera-knn.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36df3f65fdb081bd89543a127e09f8c1bcce2eb0663f6c7ace135dc054a9f3a5
|
| 3 |
+
size 5498897
|