robert.elder commited on
Commit
41b73a8
·
1 Parent(s): c8e574b

better error checking on selenium codeblocks

Browse files
Files changed (1) hide show
  1. ChemID.py +64 -54
ChemID.py CHANGED
@@ -39,7 +39,7 @@ dfmp_expt = pd.read_csv('PHYSPROP_MP_data.tsv', sep='\t')
39
  #df_pred = pd.read_excel('Comptox_pred_data.xlsx')
40
  df_pred = pd.read_csv('Comptox_pred_data.tsv', sep='\t')
41
 
42
- def ResolveChemical(chemName, IDtype):
43
 
44
  #LogP_func = Crippen.MolLogP
45
  LogP_func = getLogP
@@ -52,6 +52,9 @@ def ResolveChemical(chemName, IDtype):
52
  rho = None
53
  mp = None
54
  im64 = None
 
 
 
55
  error = 0
56
 
57
  if IDtype == 'CAS':
@@ -156,7 +159,10 @@ def ResolveChemical(chemName, IDtype):
156
  if not cas:
157
  cas = 'Not found'
158
 
159
- return (name, cas, smiles, Mw, LogP, rho, mp, im64, error)
 
 
 
160
 
161
  #Generates an image of the molecule represented by the SMILES code given.
162
  #Returns None if the image cannot be generated. From https://github.com/ronaldo-prata/flask-test/blob/master/functions.py
@@ -498,7 +504,7 @@ def string2density(name):
498
  rho = float(df_pred[mask]['DENSITY_G/CM^3_TEST_PRED'])
499
  rho_origin = 'pred'
500
  # try to scrape from PubChem
501
- if rho is None:
502
  try:
503
  content = None
504
  compounds = pcp.get_compounds(name, namespace='name')
@@ -551,7 +557,7 @@ def string2density(name):
551
  rho, rho_origin = None, None
552
  # try to scrape from DSSTOX
553
  if try_dsstox:
554
- if rho is None:
555
  dtxsid = None
556
  try:
557
  # try to find it via the dsstox dashboard
@@ -565,6 +571,7 @@ def string2density(name):
565
  except:
566
  pass
567
  if dtxsid:
 
568
  url = f'https://comptox.epa.gov/dashboard/chemical/properties/{dtxsid}'
569
  #print(url)
570
  try:
@@ -575,45 +582,46 @@ def string2density(name):
575
  driver = selenium.webdriver.Chrome(service=service, options=options)
576
  driver.set_page_load_timeout(15)
577
  driver.get(url)
 
 
 
578
  except KeyboardInterrupt:
579
  raise
580
  except:
581
  pass
582
- #print("timeout")
583
- webpage = driver.page_source
584
- driver.quit()
585
- mysoup = bs4.BeautifulSoup(webpage, features='lxml')
586
- # column of property names
587
- ifound = None
588
- rows = mysoup.find_all('div', attrs={'col-id':'property'})
589
- for i,row in enumerate(rows):
590
- if 'Density' in row.text:
591
- ifound = i
592
- break
593
- if ifound:
594
- rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
595
- text = rows[ifound].text
596
- value = re.sub(' \([0-9]*\)', '', text.strip())
597
- try:
598
- rho = float(value)
599
- rho_origin = 'expt/dsstox'
600
- except:
601
- rho, rho_origin = None, None
602
- if rho is None:
603
- rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
604
  text = rows[ifound].text
605
  value = re.sub(' \([0-9]*\)', '', text.strip())
606
  try:
607
  rho = float(value)
608
- rho_origin = 'pred/dsstox'
609
  except:
610
  rho, rho_origin = None, None
611
- else:
612
- rho, rho_origin = None, None
 
 
 
 
 
 
 
 
 
613
  else:
614
  rho, rho_origin = None, None
615
- if rho is not None and np.isnan(rho): rho = None
616
- if rho is None: rho_origin = None
 
617
  return rho, rho_origin
618
 
619
  def return_non_duplicate_index(tuples): ##Given a list of sets return index of non_duplicate items
@@ -960,6 +968,7 @@ def string2mp(name):
960
  except:
961
  pass
962
  if dtxsid:
 
963
  url = f'https://comptox.epa.gov/dashboard/chemical/properties/{dtxsid}'
964
  #print(url)
965
  try:
@@ -975,39 +984,40 @@ def string2mp(name):
975
  #driver = selenium.webdriver.Chrome(driver_exe, options=options)
976
  driver.set_page_load_timeout(15)
977
  driver.get(url)
 
 
 
978
  except:
979
  pass
980
  #print("timeout")
981
- webpage = driver.page_source
982
- driver.quit()
983
- mysoup = bs4.BeautifulSoup(webpage, features='lxml')
984
  # column of property names
985
- ifound = None
986
- rows = mysoup.find_all('div', attrs={'col-id':'property'})
987
- for i,row in enumerate(rows):
988
- if 'Melting Point' in row.text:
989
- ifound = i
990
- break
991
- if ifound:
992
- rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
993
- text = rows[ifound].text
994
- value = re.sub(' \([0-9]*\)', '', text.strip())
995
- try:
996
- mp = float(value)
997
- mp_origin = 'expt/dsstox'
998
- except:
999
- mp, mp_origin = None, None
1000
- if mp is None:
1001
- rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
1002
  text = rows[ifound].text
1003
  value = re.sub(' \([0-9]*\)', '', text.strip())
1004
  try:
1005
  mp = float(value)
1006
- mp_origin = 'pred/dsstox'
1007
  except:
1008
  mp, mp_origin = None, None
1009
- else:
1010
- mp, mp_origin = None, None
 
 
 
 
 
 
 
 
 
1011
  else:
1012
  mp, mp_origin = None, None
1013
  if mp is not None and np.isnan(mp): mp = None
 
39
  #df_pred = pd.read_excel('Comptox_pred_data.xlsx')
40
  df_pred = pd.read_csv('Comptox_pred_data.tsv', sep='\t')
41
 
42
+ def ResolveChemical(chemName, IDtype, debug=False):
43
 
44
  #LogP_func = Crippen.MolLogP
45
  LogP_func = getLogP
 
52
  rho = None
53
  mp = None
54
  im64 = None
55
+ mp_origin = None
56
+ rho_origin = None
57
+ LogP_origin = None
58
  error = 0
59
 
60
  if IDtype == 'CAS':
 
159
  if not cas:
160
  cas = 'Not found'
161
 
162
+ if debug:
163
+ return (name, cas, smiles, Mw, LogP, LogP_origin, rho, rho_origin, mp, mp_origin, im64, error)
164
+ else:
165
+ return (name, cas, smiles, Mw, LogP, rho, mp, im64, error)
166
 
167
  #Generates an image of the molecule represented by the SMILES code given.
168
  #Returns None if the image cannot be generated. From https://github.com/ronaldo-prata/flask-test/blob/master/functions.py
 
504
  rho = float(df_pred[mask]['DENSITY_G/CM^3_TEST_PRED'])
505
  rho_origin = 'pred'
506
  # try to scrape from PubChem
507
+ if pd.isna(rho):
508
  try:
509
  content = None
510
  compounds = pcp.get_compounds(name, namespace='name')
 
557
  rho, rho_origin = None, None
558
  # try to scrape from DSSTOX
559
  if try_dsstox:
560
+ if pd.isna(rho):
561
  dtxsid = None
562
  try:
563
  # try to find it via the dsstox dashboard
 
571
  except:
572
  pass
573
  if dtxsid:
574
+ mysoup = None
575
  url = f'https://comptox.epa.gov/dashboard/chemical/properties/{dtxsid}'
576
  #print(url)
577
  try:
 
582
  driver = selenium.webdriver.Chrome(service=service, options=options)
583
  driver.set_page_load_timeout(15)
584
  driver.get(url)
585
+ webpage = driver.page_source
586
+ driver.quit()
587
+ mysoup = bs4.BeautifulSoup(webpage, features='lxml')
588
  except KeyboardInterrupt:
589
  raise
590
  except:
591
  pass
592
+ if mysoup:
593
+ ifound = None
594
+ # column of property names
595
+ rows = mysoup.find_all('div', attrs={'col-id':'property'})
596
+ for i,row in enumerate(rows):
597
+ if 'Density' in row.text:
598
+ ifound = i
599
+ break
600
+ if ifound:
601
+ rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
 
 
 
 
 
 
 
 
 
 
 
 
602
  text = rows[ifound].text
603
  value = re.sub(' \([0-9]*\)', '', text.strip())
604
  try:
605
  rho = float(value)
606
+ rho_origin = 'expt/dsstox'
607
  except:
608
  rho, rho_origin = None, None
609
+ if pd.isna(rho):
610
+ rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
611
+ text = rows[ifound].text
612
+ value = re.sub(' \([0-9]*\)', '', text.strip())
613
+ try:
614
+ rho = float(value)
615
+ rho_origin = 'pred/dsstox'
616
+ except:
617
+ rho, rho_origin = None, None
618
+ else:
619
+ rho, rho_origin = None, None
620
  else:
621
  rho, rho_origin = None, None
622
+ if pd.isna(rho):
623
+ rho = None
624
+ rho_origin = None
625
  return rho, rho_origin
626
 
627
  def return_non_duplicate_index(tuples): ##Given a list of sets return index of non_duplicate items
 
968
  except:
969
  pass
970
  if dtxsid:
971
+ mysoup = None
972
  url = f'https://comptox.epa.gov/dashboard/chemical/properties/{dtxsid}'
973
  #print(url)
974
  try:
 
984
  #driver = selenium.webdriver.Chrome(driver_exe, options=options)
985
  driver.set_page_load_timeout(15)
986
  driver.get(url)
987
+ webpage = driver.page_source
988
+ driver.quit()
989
+ mysoup = bs4.BeautifulSoup(webpage, features='lxml')
990
  except:
991
  pass
992
  #print("timeout")
 
 
 
993
  # column of property names
994
+ if mysoup:
995
+ ifound = None
996
+ rows = mysoup.find_all('div', attrs={'col-id':'property'})
997
+ for i,row in enumerate(rows):
998
+ if 'Melting Point' in row.text:
999
+ ifound = i
1000
+ break
1001
+ if ifound:
1002
+ rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
 
 
 
 
 
 
 
 
1003
  text = rows[ifound].text
1004
  value = re.sub(' \([0-9]*\)', '', text.strip())
1005
  try:
1006
  mp = float(value)
1007
+ mp_origin = 'expt/dsstox'
1008
  except:
1009
  mp, mp_origin = None, None
1010
+ if mp is None:
1011
+ rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
1012
+ text = rows[ifound].text
1013
+ value = re.sub(' \([0-9]*\)', '', text.strip())
1014
+ try:
1015
+ mp = float(value)
1016
+ mp_origin = 'pred/dsstox'
1017
+ except:
1018
+ mp, mp_origin = None, None
1019
+ else:
1020
+ mp, mp_origin = None, None
1021
  else:
1022
  mp, mp_origin = None, None
1023
  if mp is not None and np.isnan(mp): mp = None