Elder commited on
Commit
74acc1b
·
1 Parent(s): 17509ee

added density lookup

Browse files
ChemID.py CHANGED
@@ -6,6 +6,10 @@ import pubchempy as pcp
6
  import cirpy
7
  import chemicals
8
 
 
 
 
 
9
  from io import BytesIO
10
  from PIL import ImageOps
11
  import base64
@@ -24,17 +28,21 @@ def ResolveChemical(chemName, IDtype):
24
  name = None
25
  Mw = None
26
  LogP = None
 
27
  im64 = None
28
  error = 0
29
 
30
  if not is_cas(cas):
31
  error = 3 #invalid cas
32
- return (name, cas, smiles, Mw, LogP, im64, error)
33
 
34
  smiles = cas2smiles(cas)
35
 
36
  if smiles:
37
  name = cas2name(cas)
 
 
 
38
  try:
39
  mol = Chem.MolFromSmiles(smiles)
40
  except:
@@ -54,12 +62,16 @@ def ResolveChemical(chemName, IDtype):
54
  name = None
55
  Mw = None
56
  LogP = None
 
57
  im64 = None
58
  error = 0
59
 
60
  name = smiles2name(smiles)
61
  if name:
62
  cas = name2cas(name)
 
 
 
63
 
64
  try:
65
  mol = Chem.MolFromSmiles(smiles)
@@ -78,6 +90,7 @@ def ResolveChemical(chemName, IDtype):
78
  cas = None
79
  Mw = None
80
  LogP = None
 
81
  im64 = None
82
  error = 0
83
 
@@ -87,6 +100,10 @@ def ResolveChemical(chemName, IDtype):
87
  cas = name2cas(name)
88
  smiles = cas2smiles(cas)
89
 
 
 
 
 
90
  if smiles:
91
  if not cas:
92
  cas = name2cas(name)
@@ -110,6 +127,7 @@ def ResolveChemical(chemName, IDtype):
110
  cas = None
111
  Mw = None
112
  LogP = None
 
113
  im64 = None
114
  error = 4 # invalid IDtype selection, probably not possible
115
 
@@ -119,8 +137,10 @@ def ResolveChemical(chemName, IDtype):
119
  name = 'Not found'
120
  if not cas:
121
  cas = 'Not found'
 
 
122
 
123
- return (name, cas, smiles, Mw, LogP, im64, error)
124
 
125
  #Generates an image of the molecule represented by the SMILES code given.
126
  #Returns None if the image cannot be generated. From https://github.com/ronaldo-prata/flask-test/blob/master/functions.py
@@ -455,3 +475,111 @@ def is_cas(cas):
455
  except:
456
  return False
457
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import cirpy
7
  import chemicals
8
 
9
+ import bs4
10
+ import urllib
11
+ import json
12
+
13
  from io import BytesIO
14
  from PIL import ImageOps
15
  import base64
 
28
  name = None
29
  Mw = None
30
  LogP = None
31
+ rho = None
32
  im64 = None
33
  error = 0
34
 
35
  if not is_cas(cas):
36
  error = 3 #invalid cas
37
+ return (name, cas, smiles, Mw, LogP, rho, im64, error)
38
 
39
  smiles = cas2smiles(cas)
40
 
41
  if smiles:
42
  name = cas2name(cas)
43
+ rho = string2density(cas)
44
+ if not rho and name:
45
+ rho = string2density(name)
46
  try:
47
  mol = Chem.MolFromSmiles(smiles)
48
  except:
 
62
  name = None
63
  Mw = None
64
  LogP = None
65
+ rho = None
66
  im64 = None
67
  error = 0
68
 
69
  name = smiles2name(smiles)
70
  if name:
71
  cas = name2cas(name)
72
+ rho = string2density(name)
73
+ if not rho and cas:
74
+ rho = string2density(cas)
75
 
76
  try:
77
  mol = Chem.MolFromSmiles(smiles)
 
90
  cas = None
91
  Mw = None
92
  LogP = None
93
+ rho = None
94
  im64 = None
95
  error = 0
96
 
 
100
  cas = name2cas(name)
101
  smiles = cas2smiles(cas)
102
 
103
+ rho = string2density(name)
104
+ if not rho and cas:
105
+ rho = string2density(cas)
106
+
107
  if smiles:
108
  if not cas:
109
  cas = name2cas(name)
 
127
  cas = None
128
  Mw = None
129
  LogP = None
130
+ rho = None
131
  im64 = None
132
  error = 4 # invalid IDtype selection, probably not possible
133
 
 
137
  name = 'Not found'
138
  if not cas:
139
  cas = 'Not found'
140
+ if not rho:
141
+ rho = 'Not found'
142
 
143
+ return (name, cas, smiles, Mw, LogP, rho, im64, error)
144
 
145
  #Generates an image of the molecule represented by the SMILES code given.
146
  #Returns None if the image cannot be generated. From https://github.com/ronaldo-prata/flask-test/blob/master/functions.py
 
475
  except:
476
  return False
477
 
478
+ def string2density(string):
479
+ rho, rho_origin = None, None
480
+ # try to scrape from PubChem
481
+ if not rho:
482
+ try:
483
+ content = None
484
+ compounds = pcp.get_compounds(string, namespace='name')
485
+ c = compounds[0]
486
+ cid = c.cid
487
+ fid = urllib.request.urlopen(f'https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/{cid}/JSON')
488
+ webpage = fid.read().decode('utf-8')
489
+ content = json.loads(webpage)
490
+ except:
491
+ pass
492
+ if content:
493
+ rho_list = []
494
+ for i in content['Record']['Section']:
495
+ if i['TOCHeading'] == 'Chemical and Physical Properties':
496
+ for j in i['Section']:
497
+ if j['TOCHeading'] == 'Experimental Properties':
498
+ for k in j['Section']:
499
+ if k['TOCHeading'] == 'Density':
500
+ for ii in k['Information']:
501
+ rho_string = ii['Value']['StringWithMarkup'][0]['String']
502
+ try:
503
+ tmp_rho = re.match('(?:\d+(?:\.\d*)?|\.\d+)',rho_string)
504
+ tmp_rho = float(tmp_rho.group())
505
+ rho_list.append(tmp_rho)
506
+ except:
507
+ continue
508
+ if rho_list:
509
+ rho = np.mean(rho_list)
510
+ rho_origin = 'pubchem'
511
+ else:
512
+ rho, rho_origin = None, None
513
+ # try to scrape from DSSTOX
514
+ # if not rho:
515
+ # dtxsid = None
516
+ # try:
517
+ # # try to find it via the dsstox dashboard
518
+ # string_urlsafe = urllib.parse.quote(string)
519
+ # url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={string_urlsafe}'
520
+ # fid = urllib.request.urlopen(url)
521
+ # webpage = fid.read().decode('utf-8')
522
+ # hits = re.findall('DTXSID[0-9]+', webpage)
523
+ # if len(hits):
524
+ # dtxsid = hits[0]
525
+ # except:
526
+ # pass
527
+ # if dtxsid:
528
+ # url = f'https://comptox.epa.gov/dashboard/chemical/properties/{dtxsid}'
529
+ # #print(url)
530
+ # try:
531
+ # driver_exe = chromedriver_binary.chromedriver_filename
532
+ # options = Options()
533
+ # options.add_argument("--headless") # runs in background instead of showing browser window
534
+ # service = Service(driver_exe)
535
+ # driver = selenium.webdriver.Chrome(service=service, options=options)
536
+ # #driver = selenium.webdriver.Chrome(driver_exe, options=options)
537
+ # driver.set_page_load_timeout(15)
538
+ # driver.get(url)
539
+ # except KeyboardInterrupt:
540
+ # raise
541
+ # except:
542
+ # pass
543
+ # #print("timeout")
544
+ # webpage = driver.page_source
545
+ # driver.quit()
546
+ # mysoup = bs4.BeautifulSoup(webpage, features='lxml')
547
+ # # column of property names
548
+ # ifound = None
549
+ # rows = mysoup.find_all('div', attrs={'col-id':'property'})
550
+ # for i,row in enumerate(rows):
551
+ # if 'Density' in row.text:
552
+ # ifound = i
553
+ # break
554
+ # if ifound:
555
+ # rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
556
+ # text = rows[ifound].text
557
+ # value = re.sub(' \([0-9]*\)', '', text.strip())
558
+ # try:
559
+ # rho = float(value)
560
+ # rho_origin = 'expt/dsstox'
561
+ # except:
562
+ # rho, rho_origin = None, None
563
+ # if not rho:
564
+ # rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
565
+ # text = rows[ifound].text
566
+ # value = re.sub(' \([0-9]*\)', '', text.strip())
567
+ # try:
568
+ # rho = float(value)
569
+ # rho_origin = 'pred/dsstox'
570
+ # except:
571
+ # rho, rho_origin = None, None
572
+ # else:
573
+ # rho, rho_origin = None, None
574
+ # else:
575
+ # rho, rho_origin = None, None
576
+ # if rho:
577
+ # myrho_dict[string] = rho, rho_origin
578
+ if rho and np.isnan(rho): rho = None
579
+ if not rho: rho_origin = None
580
+ #print(rho, rho_origin)
581
+ # with open('/Users/robert.elder/Documents/experiments/07-data-mining/databases/custom_density_data.json', 'w') as fp:
582
+ # json.dump(myrho_dict, fp, indent=True, sort_keys=True)
583
+ return rho
584
+ #return rho, rho_origin
585
+
exposure_module/exposure.py CHANGED
@@ -21,7 +21,7 @@ def exp_post():
21
  chemName = request.form["chemName"]
22
  IDtype = request.form["IDtype"]
23
 
24
- iupac, cas, smiles, MW, LogP, molImage, error = ResolveChemical(chemName, IDtype)
25
 
26
  if error > 0:
27
  return render_template('chemError.html')
@@ -66,6 +66,7 @@ def exp_post():
66
  diff = SigFigs(diff, 2)
67
  MW = SigFigs(MW, 6)
68
  LogP = SigFigs(LogP, 6)
 
69
 
70
  # Generate the rate plot using matplotlib
71
  tarray = np.arange(1., 31., 1.)
@@ -74,4 +75,4 @@ def exp_post():
74
 
75
  return render_template('exposure_report.html', polymers=polymers, pIndex=pIndex, release=release,
76
  assume=assume, area=area, vol=vol, amount=amount, diff=diff, time=time, exposure=exposure, TTC=TTC,
77
- MOS=MOS, chemName=chemName, image=pngImageB64String, MW=MW, LogP=LogP, iupac=iupac, cas=cas, smiles=smiles, molImage=molImage)
 
21
  chemName = request.form["chemName"]
22
  IDtype = request.form["IDtype"]
23
 
24
+ iupac, cas, smiles, MW, LogP, rho, molImage, error = ResolveChemical(chemName, IDtype)
25
 
26
  if error > 0:
27
  return render_template('chemError.html')
 
66
  diff = SigFigs(diff, 2)
67
  MW = SigFigs(MW, 6)
68
  LogP = SigFigs(LogP, 6)
69
+ #rho = SigFigs(rho[0], 4)
70
 
71
  # Generate the rate plot using matplotlib
72
  tarray = np.arange(1., 31., 1.)
 
75
 
76
  return render_template('exposure_report.html', polymers=polymers, pIndex=pIndex, release=release,
77
  assume=assume, area=area, vol=vol, amount=amount, diff=diff, time=time, exposure=exposure, TTC=TTC,
78
+ MOS=MOS, chemName=chemName, image=pngImageB64String, MW=MW, LogP=LogP, rho=rho, iupac=iupac, cas=cas, smiles=smiles, molImage=molImage)
exposure_module/templates/exposure_report.html CHANGED
@@ -55,6 +55,7 @@
55
  CAS :: {{cas}} <br> <br>
56
  Molecular weight :: {{MW}} <br> <br>
57
  LogKow :: {{LogP}} <br> <br>
 
58
  SMILES :: {{smiles}}
59
  </div>
60
  <div class="column">
 
55
  CAS :: {{cas}} <br> <br>
56
  Molecular weight :: {{MW}} <br> <br>
57
  LogKow :: {{LogP}} <br> <br>
58
+ Density :: {{rho}} <br> <br>
59
  SMILES :: {{smiles}}
60
  </div>
61
  <div class="column">