Robert Elder commited on
Commit
108b9bc
·
1 Parent(s): f6b0807

fixing melting point calculation

Browse files
ChemID.py CHANGED
@@ -70,7 +70,7 @@ import padelpy
70
  from functions import weight_func
71
  class opera_data_mp():
72
  n_neighbors = 5
73
- weight_factor = 5e-3
74
  desc_list = ['SHBd', 'nN', 'maxHBd', 'ATSC1v', 'AATS1i', 'TopoPSA', 'nT6Ring', 'nHBDon', 'WTPT-5', 'minHBd', 'nHBint2', 'IC0', 'MLFER_S', 'MLFER_BO', 'WTPT-3']
75
  with open('mp/model-opera-knn.pkl', 'rb') as fp:
76
  knn_all = pickle.load(fp)
@@ -611,7 +611,7 @@ def string2density(name):
611
  rho_string = rho_string.replace('Relative density (water = 1): ', '')
612
  #print(rho_string)
613
  #tmp_rho = re.match('(?:\d+(?:\.\d*)?|\.\d+)',rho_string)
614
- m = re.match('((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?',rho_string)
615
  groups = m.groups()
616
  if len(groups):
617
  for g in groups:
@@ -645,7 +645,7 @@ def string2density(name):
645
  url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={name_urlsafe}'
646
  fid = urllib.request.urlopen(url)
647
  webpage = fid.read().decode('utf-8')
648
- hits = re.findall('DTXSID[0-9]+', webpage)
649
  if len(hits):
650
  dtxsid = hits[0]
651
  except:
@@ -679,7 +679,7 @@ def string2density(name):
679
  if ifound:
680
  rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
681
  text = rows[ifound].text
682
- value = re.sub(' \([0-9]*\)', '', text.strip())
683
  try:
684
  rho = float(value)
685
  rho_origin = 'dsstox/expt'
@@ -688,7 +688,7 @@ def string2density(name):
688
  if pd.isna(rho):
689
  rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
690
  text = rows[ifound].text
691
- value = re.sub(' \([0-9]*\)', '', text.strip())
692
  try:
693
  rho = float(value)
694
  rho_origin = 'dsstox/pred'
@@ -993,7 +993,7 @@ def string2mp(name, namespace='name'):
993
  #print(mp_string)
994
  #tmp_rho = re.match('(?:\d+(?:\.\d*)?|\.\d+)',rho_string)
995
  #m = re.match('((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?',rho_string)
996
- m = re.match('(-?(?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?( ?°?C)',mp_string)
997
  if m is not None:
998
  groups = m.groups()
999
  if len(groups):
@@ -1003,7 +1003,7 @@ def string2mp(name, namespace='name'):
1003
  mp_list.append(tmp_mp)
1004
  except:
1005
  continue
1006
- m = re.match('(-?(?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?( ?°?F)',mp_string)
1007
  if m is not None:
1008
  groups = m.groups()
1009
  if len(groups):
@@ -1039,7 +1039,7 @@ def string2mp(name, namespace='name'):
1039
  url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={name_urlsafe}'
1040
  fid = urllib.request.urlopen(url)
1041
  webpage = fid.read().decode('utf-8')
1042
- hits = re.findall('DTXSID[0-9]+', webpage)
1043
  if len(hits):
1044
  dtxsid = hits[0]
1045
  except:
@@ -1078,7 +1078,7 @@ def string2mp(name, namespace='name'):
1078
  if ifound:
1079
  rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
1080
  text = rows[ifound].text
1081
- value = re.sub(' \([0-9]*\)', '', text.strip())
1082
  try:
1083
  mp = float(value)
1084
  mp_origin = 'dsstox/expt'
@@ -1087,7 +1087,7 @@ def string2mp(name, namespace='name'):
1087
  if pd.isna(mp):
1088
  rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
1089
  text = rows[ifound].text
1090
- value = re.sub(' \([0-9]*\)', '', text.strip())
1091
  try:
1092
  mp = float(value)
1093
  mp_origin = 'dsstox/pred'
 
70
  from functions import weight_func
71
  class opera_data_mp():
72
  n_neighbors = 5
73
+ #weight_factor = 5e-3
74
  desc_list = ['SHBd', 'nN', 'maxHBd', 'ATSC1v', 'AATS1i', 'TopoPSA', 'nT6Ring', 'nHBDon', 'WTPT-5', 'minHBd', 'nHBint2', 'IC0', 'MLFER_S', 'MLFER_BO', 'WTPT-3']
75
  with open('mp/model-opera-knn.pkl', 'rb') as fp:
76
  knn_all = pickle.load(fp)
 
611
  rho_string = rho_string.replace('Relative density (water = 1): ', '')
612
  #print(rho_string)
613
  #tmp_rho = re.match('(?:\d+(?:\.\d*)?|\.\d+)',rho_string)
614
+ m = re.match(r'((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?',rho_string)
615
  groups = m.groups()
616
  if len(groups):
617
  for g in groups:
 
645
  url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={name_urlsafe}'
646
  fid = urllib.request.urlopen(url)
647
  webpage = fid.read().decode('utf-8')
648
+ hits = re.findall(r'DTXSID[0-9]+', webpage)
649
  if len(hits):
650
  dtxsid = hits[0]
651
  except:
 
679
  if ifound:
680
  rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
681
  text = rows[ifound].text
682
+ value = re.sub(r' \([0-9]*\)', '', text.strip())
683
  try:
684
  rho = float(value)
685
  rho_origin = 'dsstox/expt'
 
688
  if pd.isna(rho):
689
  rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
690
  text = rows[ifound].text
691
+ value = re.sub(r' \([0-9]*\)', '', text.strip())
692
  try:
693
  rho = float(value)
694
  rho_origin = 'dsstox/pred'
 
993
  #print(mp_string)
994
  #tmp_rho = re.match('(?:\d+(?:\.\d*)?|\.\d+)',rho_string)
995
  #m = re.match('((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?',rho_string)
996
+ m = re.match(r'(-?(?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?( ?°?C)',mp_string)
997
  if m is not None:
998
  groups = m.groups()
999
  if len(groups):
 
1003
  mp_list.append(tmp_mp)
1004
  except:
1005
  continue
1006
+ m = re.match(r'(-?(?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?( ?°?F)',mp_string)
1007
  if m is not None:
1008
  groups = m.groups()
1009
  if len(groups):
 
1039
  url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={name_urlsafe}'
1040
  fid = urllib.request.urlopen(url)
1041
  webpage = fid.read().decode('utf-8')
1042
+ hits = re.findall(r'DTXSID[0-9]+', webpage)
1043
  if len(hits):
1044
  dtxsid = hits[0]
1045
  except:
 
1078
  if ifound:
1079
  rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
1080
  text = rows[ifound].text
1081
+ value = re.sub(r' \([0-9]*\)', '', text.strip())
1082
  try:
1083
  mp = float(value)
1084
  mp_origin = 'dsstox/expt'
 
1087
  if pd.isna(mp):
1088
  rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
1089
  text = rows[ifound].text
1090
+ value = re.sub(r' \([0-9]*\)', '', text.strip())
1091
  try:
1092
  mp = float(value)
1093
  mp_origin = 'dsstox/pred'
mp/model-opera-knn-alldata.xlsx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef6d799fcec1a459968cbf0957adfa0cfc636b08995e9c1e920480d7b6542e09
3
  size 6551300
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29df92a2ae63cacaaaefb7c2fede726114ea50351903457cac663fa8356bb006
3
  size 6551300
mp/model-opera-knn.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bea1836e5f60a763e8510023d13dcef92bb028bdf3c43523d06cfd99f9ff80c
3
- size 5498909
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36df3f65fdb081bd89543a127e09f8c1bcce2eb0663f6c7ace135dc054a9f3a5
3
+ size 5498897