Spaces:

dmsaylor
/

CHRIS

Running

App Files Files Community

robert.elder commited on Jul 13, 2022

Commit

41b73a8

1 Parent(s): c8e574b

better error checking on selenium codeblocks

Browse files

Files changed (1) hide show

ChemID.py +64 -54

ChemID.py CHANGED Viewed

@@ -39,7 +39,7 @@ dfmp_expt = pd.read_csv('PHYSPROP_MP_data.tsv', sep='\t')
 #df_pred = pd.read_excel('Comptox_pred_data.xlsx')
 df_pred = pd.read_csv('Comptox_pred_data.tsv', sep='\t')
-def ResolveChemical(chemName, IDtype):
     #LogP_func = Crippen.MolLogP
     LogP_func = getLogP
@@ -52,6 +52,9 @@ def ResolveChemical(chemName, IDtype):
     rho = None
     mp = None
     im64 = None
     error = 0
     if IDtype == 'CAS':
@@ -156,7 +159,10 @@ def ResolveChemical(chemName, IDtype):
         if not cas:
             cas = 'Not found'
-    return (name, cas, smiles, Mw, LogP, rho, mp, im64, error)
 #Generates an image of the molecule represented by the SMILES code given.
 #Returns None if the image cannot be generated. From https://github.com/ronaldo-prata/flask-test/blob/master/functions.py
@@ -498,7 +504,7 @@ def string2density(name):
             rho = float(df_pred[mask]['DENSITY_G/CM^3_TEST_PRED'])
             rho_origin = 'pred'
     # try to scrape from PubChem
-    if rho is None:
         try:
             content = None
             compounds = pcp.get_compounds(name, namespace='name')
@@ -551,7 +557,7 @@ def string2density(name):
                 rho, rho_origin = None, None
     # try to scrape from DSSTOX
     if try_dsstox:
-        if rho is None:
             dtxsid = None
             try:
                 # try to find it via the dsstox dashboard
@@ -565,6 +571,7 @@ def string2density(name):
             except:
                 pass
             if dtxsid:
                 url = f'https://comptox.epa.gov/dashboard/chemical/properties/{dtxsid}'
                 #print(url)
                 try:
@@ -575,45 +582,46 @@ def string2density(name):
                     driver = selenium.webdriver.Chrome(service=service, options=options)
                     driver.set_page_load_timeout(15)
                     driver.get(url)
                 except KeyboardInterrupt:
                     raise
                 except:
                     pass
-                    #print("timeout")
-                webpage = driver.page_source
-                driver.quit()
-                mysoup = bs4.BeautifulSoup(webpage, features='lxml')
-                # column of property names
-                ifound = None
-                rows = mysoup.find_all('div', attrs={'col-id':'property'})
-                for i,row in enumerate(rows):
-                    if 'Density' in row.text:
-                        ifound = i
-                        break
-                if ifound:
-                    rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
-                    text = rows[ifound].text
-                    value = re.sub(' \([0-9]*\)', '', text.strip())
-                    try:
-                        rho = float(value)
-                        rho_origin = 'expt/dsstox'
-                    except:
-                        rho, rho_origin = None, None
-                    if rho is None:
-                        rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
                         text = rows[ifound].text
                         value = re.sub(' \([0-9]*\)', '', text.strip())
                         try:
                             rho = float(value)
-                            rho_origin = 'pred/dsstox'
                         except:
                             rho, rho_origin = None, None
-                else:
-                    rho, rho_origin = None, None
             else:
                 rho, rho_origin = None, None
-    if rho is not None and np.isnan(rho): rho = None
-    if rho is None: rho_origin = None
     return rho, rho_origin
 def return_non_duplicate_index(tuples): ##Given a list of sets return index of non_duplicate items
@@ -960,6 +968,7 @@ def string2mp(name):
             except:
                 pass
             if dtxsid:
                 url = f'https://comptox.epa.gov/dashboard/chemical/properties/{dtxsid}'
                 #print(url)
                 try:
@@ -975,39 +984,40 @@ def string2mp(name):
                     #driver = selenium.webdriver.Chrome(driver_exe, options=options)
                     driver.set_page_load_timeout(15)
                     driver.get(url)
                 except:
                     pass
                     #print("timeout")
-                webpage = driver.page_source
-                driver.quit()
-                mysoup = bs4.BeautifulSoup(webpage, features='lxml')
                 # column of property names
-                ifound = None
-                rows = mysoup.find_all('div', attrs={'col-id':'property'})
-                for i,row in enumerate(rows):
-                    if 'Melting Point' in row.text:
-                        ifound = i
-                        break
-                if ifound:
-                    rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
-                    text = rows[ifound].text
-                    value = re.sub(' \([0-9]*\)', '', text.strip())
-                    try:
-                        mp = float(value)
-                        mp_origin = 'expt/dsstox'
-                    except:
-                        mp, mp_origin = None, None
-                    if mp is None:
-                        rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
                         text = rows[ifound].text
                         value = re.sub(' \([0-9]*\)', '', text.strip())
                         try:
                             mp = float(value)
-                            mp_origin = 'pred/dsstox'
                         except:
                             mp, mp_origin = None, None
-                else:
-                    mp, mp_origin = None, None
             else:
                 mp, mp_origin = None, None
     if mp is not None and np.isnan(mp): mp = None

 #df_pred = pd.read_excel('Comptox_pred_data.xlsx')
 df_pred = pd.read_csv('Comptox_pred_data.tsv', sep='\t')
+def ResolveChemical(chemName, IDtype, debug=False):
     #LogP_func = Crippen.MolLogP
     LogP_func = getLogP
     rho = None
     mp = None
     im64 = None
+    mp_origin = None
+    rho_origin = None
+    LogP_origin = None
     error = 0
     if IDtype == 'CAS':
         if not cas:
             cas = 'Not found'
+    if debug:
+        return (name, cas, smiles, Mw, LogP, LogP_origin, rho, rho_origin, mp, mp_origin, im64, error)
+    else:
+        return (name, cas, smiles, Mw, LogP, rho, mp, im64, error)
 #Generates an image of the molecule represented by the SMILES code given.
 #Returns None if the image cannot be generated. From https://github.com/ronaldo-prata/flask-test/blob/master/functions.py
             rho = float(df_pred[mask]['DENSITY_G/CM^3_TEST_PRED'])
             rho_origin = 'pred'
     # try to scrape from PubChem
+    if pd.isna(rho):
         try:
             content = None
             compounds = pcp.get_compounds(name, namespace='name')
                 rho, rho_origin = None, None
     # try to scrape from DSSTOX
     if try_dsstox:
+        if pd.isna(rho):
             dtxsid = None
             try:
                 # try to find it via the dsstox dashboard
             except:
                 pass
             if dtxsid:
+                mysoup = None
                 url = f'https://comptox.epa.gov/dashboard/chemical/properties/{dtxsid}'
                 #print(url)
                 try:
                     driver = selenium.webdriver.Chrome(service=service, options=options)
                     driver.set_page_load_timeout(15)
                     driver.get(url)
+                    webpage = driver.page_source
+                    driver.quit()
+                    mysoup = bs4.BeautifulSoup(webpage, features='lxml')
                 except KeyboardInterrupt:
                     raise
                 except:
                     pass
+                if mysoup:
+                    ifound = None
+                    # column of property names
+                    rows = mysoup.find_all('div', attrs={'col-id':'property'})
+                    for i,row in enumerate(rows):
+                        if 'Density' in row.text:
+                            ifound = i
+                            break
+                    if ifound:
+                        rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
                         text = rows[ifound].text
                         value = re.sub(' \([0-9]*\)', '', text.strip())
                         try:
                             rho = float(value)
+                            rho_origin = 'expt/dsstox'
                         except:
                             rho, rho_origin = None, None
+                        if pd.isna(rho):
+                            rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
+                            text = rows[ifound].text
+                            value = re.sub(' \([0-9]*\)', '', text.strip())
+                            try:
+                                rho = float(value)
+                                rho_origin = 'pred/dsstox'
+                            except:
+                                rho, rho_origin = None, None
+                    else:
+                        rho, rho_origin = None, None
             else:
                 rho, rho_origin = None, None
+    if pd.isna(rho):
+        rho = None
+        rho_origin = None
     return rho, rho_origin
 def return_non_duplicate_index(tuples): ##Given a list of sets return index of non_duplicate items
             except:
                 pass
             if dtxsid:
+                mysoup = None
                 url = f'https://comptox.epa.gov/dashboard/chemical/properties/{dtxsid}'
                 #print(url)
                 try:
                     #driver = selenium.webdriver.Chrome(driver_exe, options=options)
                     driver.set_page_load_timeout(15)
                     driver.get(url)
+                    webpage = driver.page_source
+                    driver.quit()
+                    mysoup = bs4.BeautifulSoup(webpage, features='lxml')
                 except:
                     pass
                     #print("timeout")
                 # column of property names
+                if mysoup:
+                    ifound = None
+                    rows = mysoup.find_all('div', attrs={'col-id':'property'})
+                    for i,row in enumerate(rows):
+                        if 'Melting Point' in row.text:
+                            ifound = i
+                            break
+                    if ifound:
+                        rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
                         text = rows[ifound].text
                         value = re.sub(' \([0-9]*\)', '', text.strip())
                         try:
                             mp = float(value)
+                            mp_origin = 'expt/dsstox'
                         except:
                             mp, mp_origin = None, None
+                        if mp is None:
+                            rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
+                            text = rows[ifound].text
+                            value = re.sub(' \([0-9]*\)', '', text.strip())
+                            try:
+                                mp = float(value)
+                                mp_origin = 'pred/dsstox'
+                            except:
+                                mp, mp_origin = None, None
+                    else:
+                        mp, mp_origin = None, None
             else:
                 mp, mp_origin = None, None
     if mp is not None and np.isnan(mp): mp = None