Spaces:

dmsaylor
/

CHRIS

Running

App Files Files Community

Robert Elder commited on Apr 24, 2025

Commit

108b9bc

1 Parent(s): f6b0807

fixing melting point calculation

Browse files

Files changed (3) hide show

ChemID.py +10 -10
mp/model-opera-knn-alldata.xlsx +1 -1
mp/model-opera-knn.pkl +2 -2

ChemID.py CHANGED Viewed

@@ -70,7 +70,7 @@ import padelpy
 from functions import weight_func
 class opera_data_mp():
     n_neighbors = 5
-    weight_factor = 5e-3
     desc_list = ['SHBd', 'nN', 'maxHBd', 'ATSC1v', 'AATS1i', 'TopoPSA', 'nT6Ring', 'nHBDon', 'WTPT-5', 'minHBd', 'nHBint2', 'IC0', 'MLFER_S', 'MLFER_BO', 'WTPT-3']
     with open('mp/model-opera-knn.pkl', 'rb') as fp:
         knn_all = pickle.load(fp)
@@ -611,7 +611,7 @@ def string2density(name):
                                             rho_string = rho_string.replace('Relative density (water = 1): ', '')
                                             #print(rho_string)
                                             #tmp_rho = re.match('(?:\d+(?:\.\d*)?|\.\d+)',rho_string)
-                                            m = re.match('((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?',rho_string)
                                             groups = m.groups()
                                             if len(groups):
                                                 for g in groups:
@@ -645,7 +645,7 @@ def string2density(name):
                 url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={name_urlsafe}'
                 fid = urllib.request.urlopen(url)
                 webpage = fid.read().decode('utf-8')
-                hits = re.findall('DTXSID[0-9]+', webpage)
                 if len(hits):
                     dtxsid = hits[0]
             except:
@@ -679,7 +679,7 @@ def string2density(name):
                     if ifound:
                         rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
                         text = rows[ifound].text
-                        value = re.sub(' \([0-9]*\)', '', text.strip())
                         try:
                             rho = float(value)
                             rho_origin = 'dsstox/expt'
@@ -688,7 +688,7 @@ def string2density(name):
                         if pd.isna(rho):
                             rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
                             text = rows[ifound].text
-                            value = re.sub(' \([0-9]*\)', '', text.strip())
                             try:
                                 rho = float(value)
                                 rho_origin = 'dsstox/pred'
@@ -993,7 +993,7 @@ def string2mp(name, namespace='name'):
                                             #print(mp_string)
                                             #tmp_rho = re.match('(?:\d+(?:\.\d*)?|\.\d+)',rho_string)
                                             #m = re.match('((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?',rho_string)
-                                            m = re.match('(-?(?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?( ?°?C)',mp_string)
                                             if m is not None:
                                                 groups = m.groups()
                                                 if len(groups):
@@ -1003,7 +1003,7 @@ def string2mp(name, namespace='name'):
                                                             mp_list.append(tmp_mp)
                                                         except:
                                                             continue
-                                            m = re.match('(-?(?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?( ?°?F)',mp_string)
                                             if m is not None:
                                                 groups = m.groups()
                                                 if len(groups):
@@ -1039,7 +1039,7 @@ def string2mp(name, namespace='name'):
                 url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={name_urlsafe}'
                 fid = urllib.request.urlopen(url)
                 webpage = fid.read().decode('utf-8')
-                hits = re.findall('DTXSID[0-9]+', webpage)
                 if len(hits):
                     dtxsid = hits[0]
             except:
@@ -1078,7 +1078,7 @@ def string2mp(name, namespace='name'):
                     if ifound:
                         rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
                         text = rows[ifound].text
-                        value = re.sub(' \([0-9]*\)', '', text.strip())
                         try:
                             mp = float(value)
                             mp_origin = 'dsstox/expt'
@@ -1087,7 +1087,7 @@ def string2mp(name, namespace='name'):
                         if pd.isna(mp):
                             rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
                             text = rows[ifound].text
-                            value = re.sub(' \([0-9]*\)', '', text.strip())
                             try:
                                 mp = float(value)
                                 mp_origin = 'dsstox/pred'

 from functions import weight_func
 class opera_data_mp():
     n_neighbors = 5
+    #weight_factor = 5e-3
     desc_list = ['SHBd', 'nN', 'maxHBd', 'ATSC1v', 'AATS1i', 'TopoPSA', 'nT6Ring', 'nHBDon', 'WTPT-5', 'minHBd', 'nHBint2', 'IC0', 'MLFER_S', 'MLFER_BO', 'WTPT-3']
     with open('mp/model-opera-knn.pkl', 'rb') as fp:
         knn_all = pickle.load(fp)
                                             rho_string = rho_string.replace('Relative density (water = 1): ', '')
                                             #print(rho_string)
                                             #tmp_rho = re.match('(?:\d+(?:\.\d*)?|\.\d+)',rho_string)
+                                            m = re.match(r'((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?',rho_string)
                                             groups = m.groups()
                                             if len(groups):
                                                 for g in groups:
                 url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={name_urlsafe}'
                 fid = urllib.request.urlopen(url)
                 webpage = fid.read().decode('utf-8')
+                hits = re.findall(r'DTXSID[0-9]+', webpage)
                 if len(hits):
                     dtxsid = hits[0]
             except:
                     if ifound:
                         rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
                         text = rows[ifound].text
+                        value = re.sub(r' \([0-9]*\)', '', text.strip())
                         try:
                             rho = float(value)
                             rho_origin = 'dsstox/expt'
                         if pd.isna(rho):
                             rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
                             text = rows[ifound].text
+                            value = re.sub(r' \([0-9]*\)', '', text.strip())
                             try:
                                 rho = float(value)
                                 rho_origin = 'dsstox/pred'
                                             #print(mp_string)
                                             #tmp_rho = re.match('(?:\d+(?:\.\d*)?|\.\d+)',rho_string)
                                             #m = re.match('((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?',rho_string)
+                                            m = re.match(r'(-?(?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?( ?°?C)',mp_string)
                                             if m is not None:
                                                 groups = m.groups()
                                                 if len(groups):
                                                             mp_list.append(tmp_mp)
                                                         except:
                                                             continue
+                                            m = re.match(r'(-?(?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?( ?°?F)',mp_string)
                                             if m is not None:
                                                 groups = m.groups()
                                                 if len(groups):
                 url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={name_urlsafe}'
                 fid = urllib.request.urlopen(url)
                 webpage = fid.read().decode('utf-8')
+                hits = re.findall(r'DTXSID[0-9]+', webpage)
                 if len(hits):
                     dtxsid = hits[0]
             except:
                     if ifound:
                         rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
                         text = rows[ifound].text
+                        value = re.sub(r' \([0-9]*\)', '', text.strip())
                         try:
                             mp = float(value)
                             mp_origin = 'dsstox/expt'
                         if pd.isna(mp):
                             rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
                             text = rows[ifound].text
+                            value = re.sub(r' \([0-9]*\)', '', text.strip())
                             try:
                                 mp = float(value)
                                 mp_origin = 'dsstox/pred'

mp/model-opera-knn-alldata.xlsx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef6d799fcec1a459968cbf0957adfa0cfc636b08995e9c1e920480d7b6542e09
 size 6551300

 version https://git-lfs.github.com/spec/v1
+oid sha256:29df92a2ae63cacaaaefb7c2fede726114ea50351903457cac663fa8356bb006
 size 6551300

mp/model-opera-knn.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0bea1836e5f60a763e8510023d13dcef92bb028bdf3c43523d06cfd99f9ff80c
-size 5498909

 version https://git-lfs.github.com/spec/v1
+oid sha256:36df3f65fdb081bd89543a127e09f8c1bcce2eb0663f6c7ace135dc054a9f3a5
+size 5498897