robert.elder commited on
Commit ·
3b7f36c
1
Parent(s): dafa47a
added MP lookup and various bug fixes
Browse files- ChemID.py +562 -119
- Comptox_pred_data.xlsx +3 -0
- PHYSPROP_MP_data.xlsx +3 -0
- exposure_module/exposure.py +8 -3
- exposure_module/templates/MwError.html +2 -1
- exposure_module/templates/exposure_report.html +2 -1
- functions.py +1 -1
ChemID.py
CHANGED
|
@@ -6,15 +6,21 @@ import pubchempy as pcp
|
|
| 6 |
import cirpy
|
| 7 |
|
| 8 |
import chemicals
|
| 9 |
-
## add custom chemical definitions (i.e., to correct confusion between methane and carbon)
|
| 10 |
-
db = chemicals.identifiers.get_pubchem_db()
|
| 11 |
-
db.load('custom_chemicals_db.tsv')
|
| 12 |
|
| 13 |
import bs4
|
| 14 |
import urllib
|
| 15 |
import requests
|
| 16 |
import json
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
from io import BytesIO
|
| 19 |
from PIL import ImageOps
|
| 20 |
import base64
|
|
@@ -23,38 +29,51 @@ import rdkit
|
|
| 23 |
from rdkit.Chem import AllChem as Chem
|
| 24 |
from rdkit.Chem import Descriptors,Descriptors3D,Draw,Crippen
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
def ResolveChemical(chemName, IDtype):
|
| 27 |
|
| 28 |
-
LogP_func = Crippen.MolLogP
|
|
|
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
if IDtype == 'CAS':
|
| 31 |
cas = chemName
|
| 32 |
-
smiles = None
|
| 33 |
-
name = None
|
| 34 |
-
Mw = None
|
| 35 |
-
LogP = None
|
| 36 |
-
rho = None
|
| 37 |
-
im64 = None
|
| 38 |
-
error = 0
|
| 39 |
|
| 40 |
if not is_cas(cas):
|
| 41 |
error = 3 #invalid cas
|
| 42 |
-
return (name, cas, smiles, Mw, LogP, rho, im64, error)
|
| 43 |
|
| 44 |
smiles = cas2smiles(cas)
|
| 45 |
|
| 46 |
if smiles:
|
| 47 |
name = cas2name(cas)
|
| 48 |
-
rho = string2density(cas)
|
| 49 |
-
|
| 50 |
-
|
|
|
|
| 51 |
try:
|
| 52 |
mol = Chem.MolFromSmiles(smiles)
|
| 53 |
except:
|
| 54 |
error = 2 #invalid smiles
|
| 55 |
if mol:
|
| 56 |
Mw = Descriptors.MolWt(mol)
|
| 57 |
-
LogP = LogP_func(mol)
|
| 58 |
im = ImageFromSmiles(smiles)
|
| 59 |
im64 = Imageto64(im)
|
| 60 |
else:
|
|
@@ -63,20 +82,15 @@ def ResolveChemical(chemName, IDtype):
|
|
| 63 |
error = 1 # no smiles found
|
| 64 |
elif IDtype == 'SMILES':
|
| 65 |
smiles = chemName
|
| 66 |
-
cas = None
|
| 67 |
-
name = None
|
| 68 |
-
Mw = None
|
| 69 |
-
LogP = None
|
| 70 |
-
rho = None
|
| 71 |
-
im64 = None
|
| 72 |
-
error = 0
|
| 73 |
|
| 74 |
name = smiles2name(smiles)
|
| 75 |
if name:
|
| 76 |
cas = name2cas(name)
|
| 77 |
-
rho = string2density(name)
|
| 78 |
-
if
|
| 79 |
-
rho = string2density(cas)
|
|
|
|
|
|
|
| 80 |
|
| 81 |
try:
|
| 82 |
mol = Chem.MolFromSmiles(smiles)
|
|
@@ -84,44 +98,37 @@ def ResolveChemical(chemName, IDtype):
|
|
| 84 |
error = 2
|
| 85 |
if mol:
|
| 86 |
Mw = Descriptors.MolWt(mol)
|
| 87 |
-
LogP = LogP_func(mol)
|
| 88 |
im = ImageFromSmiles(smiles)
|
| 89 |
im64 = Imageto64(im)
|
| 90 |
else:
|
| 91 |
error = 2
|
| 92 |
elif IDtype == 'common':
|
| 93 |
name = chemName
|
| 94 |
-
smiles = None
|
| 95 |
-
cas = None
|
| 96 |
-
Mw = None
|
| 97 |
-
LogP = None
|
| 98 |
-
rho = None
|
| 99 |
-
im64 = None
|
| 100 |
-
error = 0
|
| 101 |
|
| 102 |
-
|
|
|
|
| 103 |
|
| 104 |
name, name_origin = name2iupac(name)
|
| 105 |
smiles = name2smiles(name)
|
|
|
|
| 106 |
if not smiles:
|
| 107 |
-
cas = name2cas(name)
|
| 108 |
smiles = cas2smiles(cas)
|
| 109 |
|
| 110 |
-
if
|
| 111 |
-
rho = string2density(
|
| 112 |
-
if
|
| 113 |
-
rho = string2density(
|
| 114 |
|
| 115 |
if smiles:
|
| 116 |
-
|
| 117 |
-
cas = name2cas(name)
|
| 118 |
try:
|
| 119 |
mol = Chem.MolFromSmiles(smiles)
|
| 120 |
except:
|
| 121 |
error = 2
|
| 122 |
if mol:
|
| 123 |
Mw = Descriptors.MolWt(mol)
|
| 124 |
-
LogP = LogP_func(mol)
|
| 125 |
im = ImageFromSmiles(smiles)
|
| 126 |
im64 = Imageto64(im)
|
| 127 |
else:
|
|
@@ -136,6 +143,7 @@ def ResolveChemical(chemName, IDtype):
|
|
| 136 |
Mw = None
|
| 137 |
LogP = None
|
| 138 |
rho = None
|
|
|
|
| 139 |
im64 = None
|
| 140 |
error = 4 # invalid IDtype selection, probably not possible
|
| 141 |
|
|
@@ -146,7 +154,7 @@ def ResolveChemical(chemName, IDtype):
|
|
| 146 |
if not cas:
|
| 147 |
cas = 'Not found'
|
| 148 |
|
| 149 |
-
return (name, cas, smiles, Mw, LogP, rho, im64, error)
|
| 150 |
|
| 151 |
#Generates an image of the molecule represented by the SMILES code given.
|
| 152 |
#Returns None if the image cannot be generated. From https://github.com/ronaldo-prata/flask-test/blob/master/functions.py
|
|
@@ -309,8 +317,6 @@ def name2iupac(string):
|
|
| 309 |
elif cm.common_name:
|
| 310 |
name = cm.common_name
|
| 311 |
origin = 'chemicals'
|
| 312 |
-
except KeyboardInterrupt:
|
| 313 |
-
raise
|
| 314 |
except:
|
| 315 |
name = None
|
| 316 |
origin = None
|
|
@@ -483,8 +489,14 @@ def is_cas(cas):
|
|
| 483 |
|
| 484 |
def string2density(name):
|
| 485 |
rho, rho_origin = None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
# try to scrape from PubChem
|
| 487 |
-
if
|
| 488 |
try:
|
| 489 |
content = None
|
| 490 |
compounds = pcp.get_compounds(name, namespace='name')
|
|
@@ -529,82 +541,513 @@ def string2density(name):
|
|
| 529 |
intr_qr = q75-q25
|
| 530 |
hi = q75+(1.5*intr_qr)
|
| 531 |
lo = q25-(1.5*intr_qr)
|
| 532 |
-
mask = (rho_list < hi) & (rho_list > lo)
|
| 533 |
rho_list = rho_list[mask]
|
| 534 |
rho = np.mean(rho_list)
|
| 535 |
rho_origin = 'pubchem'
|
| 536 |
else:
|
| 537 |
rho, rho_origin = None, None
|
| 538 |
# try to scrape from DSSTOX
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
#
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
#
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
#
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
#
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
#
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
#
|
| 608 |
-
|
| 609 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 610 |
|
|
|
|
| 6 |
import cirpy
|
| 7 |
|
| 8 |
import chemicals
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
import bs4
|
| 11 |
import urllib
|
| 12 |
import requests
|
| 13 |
import json
|
| 14 |
|
| 15 |
+
## not sure if this will be possible on pythonanywhere; use this flag to disable related code blocks
|
| 16 |
+
try_dsstox = True
|
| 17 |
+
if try_dsstox:
|
| 18 |
+
import selenium
|
| 19 |
+
import selenium.webdriver
|
| 20 |
+
from selenium.webdriver.chrome.options import Options
|
| 21 |
+
from selenium.webdriver.chrome.service import Service
|
| 22 |
+
import chromedriver_binary
|
| 23 |
+
|
| 24 |
from io import BytesIO
|
| 25 |
from PIL import ImageOps
|
| 26 |
import base64
|
|
|
|
| 29 |
from rdkit.Chem import AllChem as Chem
|
| 30 |
from rdkit.Chem import Descriptors,Descriptors3D,Draw,Crippen
|
| 31 |
|
| 32 |
+
## add custom chemical definitions (i.e., to correct confusion between methane and carbon)
|
| 33 |
+
db = chemicals.identifiers.get_pubchem_db()
|
| 34 |
+
db.load('custom_chemicals_db.tsv')
|
| 35 |
+
## load experimental and predicted properties
|
| 36 |
+
dfmp_expt = pd.read_excel('PHYSPROP_MP_data.xlsx')
|
| 37 |
+
#dfmp_pred = pd.read_excel('DSSTOX_MP_pred_data.xlsx')
|
| 38 |
+
df_pred = pd.read_excel('Comptox_pred_data.xlsx')
|
| 39 |
+
|
| 40 |
def ResolveChemical(chemName, IDtype):
|
| 41 |
|
| 42 |
+
#LogP_func = Crippen.MolLogP
|
| 43 |
+
LogP_func = getLogP
|
| 44 |
|
| 45 |
+
name = None
|
| 46 |
+
smiles = None
|
| 47 |
+
cas = None
|
| 48 |
+
Mw = None
|
| 49 |
+
LogP = None
|
| 50 |
+
rho = None
|
| 51 |
+
mp = None
|
| 52 |
+
im64 = None
|
| 53 |
+
error = 0
|
| 54 |
+
|
| 55 |
if IDtype == 'CAS':
|
| 56 |
cas = chemName
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
if not is_cas(cas):
|
| 59 |
error = 3 #invalid cas
|
| 60 |
+
return (name, cas, smiles, Mw, LogP, rho, mp, im64, error)
|
| 61 |
|
| 62 |
smiles = cas2smiles(cas)
|
| 63 |
|
| 64 |
if smiles:
|
| 65 |
name = cas2name(cas)
|
| 66 |
+
rho, rho_origin = string2density(cas)
|
| 67 |
+
mp, mp_origin = mol2mp(cas, name, smiles)
|
| 68 |
+
if rho is None and name:
|
| 69 |
+
rho, rho_origin = string2density(name)
|
| 70 |
try:
|
| 71 |
mol = Chem.MolFromSmiles(smiles)
|
| 72 |
except:
|
| 73 |
error = 2 #invalid smiles
|
| 74 |
if mol:
|
| 75 |
Mw = Descriptors.MolWt(mol)
|
| 76 |
+
LogP, LogP_origin = LogP_func(cas, mol)
|
| 77 |
im = ImageFromSmiles(smiles)
|
| 78 |
im64 = Imageto64(im)
|
| 79 |
else:
|
|
|
|
| 82 |
error = 1 # no smiles found
|
| 83 |
elif IDtype == 'SMILES':
|
| 84 |
smiles = chemName
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
name = smiles2name(smiles)
|
| 87 |
if name:
|
| 88 |
cas = name2cas(name)
|
| 89 |
+
rho, rho_origin = string2density(name)
|
| 90 |
+
if rho is None and cas:
|
| 91 |
+
rho, rho_origin = string2density(cas)
|
| 92 |
+
|
| 93 |
+
mp, mp_origin = mol2mp(cas, name, smiles)
|
| 94 |
|
| 95 |
try:
|
| 96 |
mol = Chem.MolFromSmiles(smiles)
|
|
|
|
| 98 |
error = 2
|
| 99 |
if mol:
|
| 100 |
Mw = Descriptors.MolWt(mol)
|
| 101 |
+
LogP, LogP_origin = LogP_func(cas, mol)
|
| 102 |
im = ImageFromSmiles(smiles)
|
| 103 |
im64 = Imageto64(im)
|
| 104 |
else:
|
| 105 |
error = 2
|
| 106 |
elif IDtype == 'common':
|
| 107 |
name = chemName
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
+
# try this first because sometimes iupac names don't work
|
| 110 |
+
rho, rho_origin = string2density(name)
|
| 111 |
|
| 112 |
name, name_origin = name2iupac(name)
|
| 113 |
smiles = name2smiles(name)
|
| 114 |
+
cas = name2cas(name)
|
| 115 |
if not smiles:
|
|
|
|
| 116 |
smiles = cas2smiles(cas)
|
| 117 |
|
| 118 |
+
if rho is None and cas:
|
| 119 |
+
rho, rho_origin = string2density(cas)
|
| 120 |
+
if rho is None:
|
| 121 |
+
rho, rho_origin = string2density(name)
|
| 122 |
|
| 123 |
if smiles:
|
| 124 |
+
mp, mp_origin = mol2mp(cas, name, smiles)
|
|
|
|
| 125 |
try:
|
| 126 |
mol = Chem.MolFromSmiles(smiles)
|
| 127 |
except:
|
| 128 |
error = 2
|
| 129 |
if mol:
|
| 130 |
Mw = Descriptors.MolWt(mol)
|
| 131 |
+
LogP, LogP_origin = LogP_func(cas, mol)
|
| 132 |
im = ImageFromSmiles(smiles)
|
| 133 |
im64 = Imageto64(im)
|
| 134 |
else:
|
|
|
|
| 143 |
Mw = None
|
| 144 |
LogP = None
|
| 145 |
rho = None
|
| 146 |
+
mp = None
|
| 147 |
im64 = None
|
| 148 |
error = 4 # invalid IDtype selection, probably not possible
|
| 149 |
|
|
|
|
| 154 |
if not cas:
|
| 155 |
cas = 'Not found'
|
| 156 |
|
| 157 |
+
return (name, cas, smiles, Mw, LogP, rho, mp, im64, error)
|
| 158 |
|
| 159 |
#Generates an image of the molecule represented by the SMILES code given.
|
| 160 |
#Returns None if the image cannot be generated. From https://github.com/ronaldo-prata/flask-test/blob/master/functions.py
|
|
|
|
| 317 |
elif cm.common_name:
|
| 318 |
name = cm.common_name
|
| 319 |
origin = 'chemicals'
|
|
|
|
|
|
|
| 320 |
except:
|
| 321 |
name = None
|
| 322 |
origin = None
|
|
|
|
| 489 |
|
| 490 |
def string2density(name):
|
| 491 |
rho, rho_origin = None, None
|
| 492 |
+
# predicted values from TEST (CompTox dashboard)
|
| 493 |
+
if is_cas(name):
|
| 494 |
+
mask = df_pred['CASRN'] == name
|
| 495 |
+
if sum(mask):
|
| 496 |
+
rho = float(df_pred[mask]['DENSITY_G/CM^3_TEST_PRED'])
|
| 497 |
+
rho_origin = 'pred'
|
| 498 |
# try to scrape from PubChem
|
| 499 |
+
if rho is None:
|
| 500 |
try:
|
| 501 |
content = None
|
| 502 |
compounds = pcp.get_compounds(name, namespace='name')
|
|
|
|
| 541 |
intr_qr = q75-q25
|
| 542 |
hi = q75+(1.5*intr_qr)
|
| 543 |
lo = q25-(1.5*intr_qr)
|
| 544 |
+
mask = (rho_list <= hi) & (rho_list >= lo)
|
| 545 |
rho_list = rho_list[mask]
|
| 546 |
rho = np.mean(rho_list)
|
| 547 |
rho_origin = 'pubchem'
|
| 548 |
else:
|
| 549 |
rho, rho_origin = None, None
|
| 550 |
# try to scrape from DSSTOX
|
| 551 |
+
if try_dsstox:
|
| 552 |
+
if rho is None:
|
| 553 |
+
dtxsid = None
|
| 554 |
+
try:
|
| 555 |
+
# try to find it via the dsstox dashboard
|
| 556 |
+
name_urlsafe = urllib.parse.quote(name)
|
| 557 |
+
url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={name_urlsafe}'
|
| 558 |
+
fid = urllib.request.urlopen(url)
|
| 559 |
+
webpage = fid.read().decode('utf-8')
|
| 560 |
+
hits = re.findall('DTXSID[0-9]+', webpage)
|
| 561 |
+
if len(hits):
|
| 562 |
+
dtxsid = hits[0]
|
| 563 |
+
except:
|
| 564 |
+
pass
|
| 565 |
+
if dtxsid:
|
| 566 |
+
url = f'https://comptox.epa.gov/dashboard/chemical/properties/{dtxsid}'
|
| 567 |
+
#print(url)
|
| 568 |
+
try:
|
| 569 |
+
driver_exe = chromedriver_binary.chromedriver_filename
|
| 570 |
+
options = Options()
|
| 571 |
+
options.add_argument("--headless") # runs in background instead of showing browser window
|
| 572 |
+
service = Service(driver_exe)
|
| 573 |
+
driver = selenium.webdriver.Chrome(service=service, options=options)
|
| 574 |
+
driver.set_page_load_timeout(15)
|
| 575 |
+
driver.get(url)
|
| 576 |
+
except KeyboardInterrupt:
|
| 577 |
+
raise
|
| 578 |
+
except:
|
| 579 |
+
pass
|
| 580 |
+
#print("timeout")
|
| 581 |
+
webpage = driver.page_source
|
| 582 |
+
driver.quit()
|
| 583 |
+
mysoup = bs4.BeautifulSoup(webpage, features='lxml')
|
| 584 |
+
# column of property names
|
| 585 |
+
ifound = None
|
| 586 |
+
rows = mysoup.find_all('div', attrs={'col-id':'property'})
|
| 587 |
+
for i,row in enumerate(rows):
|
| 588 |
+
if 'Density' in row.text:
|
| 589 |
+
ifound = i
|
| 590 |
+
break
|
| 591 |
+
if ifound:
|
| 592 |
+
rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
|
| 593 |
+
text = rows[ifound].text
|
| 594 |
+
value = re.sub(' \([0-9]*\)', '', text.strip())
|
| 595 |
+
try:
|
| 596 |
+
rho = float(value)
|
| 597 |
+
rho_origin = 'expt/dsstox'
|
| 598 |
+
except:
|
| 599 |
+
rho, rho_origin = None, None
|
| 600 |
+
if rho is None:
|
| 601 |
+
rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
|
| 602 |
+
text = rows[ifound].text
|
| 603 |
+
value = re.sub(' \([0-9]*\)', '', text.strip())
|
| 604 |
+
try:
|
| 605 |
+
rho = float(value)
|
| 606 |
+
rho_origin = 'pred/dsstox'
|
| 607 |
+
except:
|
| 608 |
+
rho, rho_origin = None, None
|
| 609 |
+
else:
|
| 610 |
+
rho, rho_origin = None, None
|
| 611 |
+
else:
|
| 612 |
+
rho, rho_origin = None, None
|
| 613 |
+
if rho is not None and np.isnan(rho): rho = None
|
| 614 |
+
if rho is None: rho_origin = None
|
| 615 |
+
return rho, rho_origin
|
| 616 |
+
|
| 617 |
+
def return_non_duplicate_index(tuples): ##Given a list of sets return index of non_duplicate items
|
| 618 |
+
## from https://github.com/curieshicy/JRgui/
|
| 619 |
+
##step 1, create a new tuple, named "new_tuples"
|
| 620 |
+
new_tuples = [] ##the elements are the sets
|
| 621 |
+
for i in tuples:
|
| 622 |
+
for j in i:
|
| 623 |
+
new_tuples.append(set(j))
|
| 624 |
+
##step 2, create a dictionary storing one to one relationship between new_tuple and old_tuple
|
| 625 |
+
values = []
|
| 626 |
+
for index, item in enumerate(tuples):
|
| 627 |
+
if len(item) == 1:
|
| 628 |
+
values.append(index)
|
| 629 |
+
else:
|
| 630 |
+
for i in [index]*len(item):
|
| 631 |
+
values.append(i)
|
| 632 |
+
keys = [i for i in range(len(new_tuples))]
|
| 633 |
+
dict_tuples = {} ## {0:0, 1:1, 2:2, 3:3, 4:3, 5:3, 6:4, 7:4, 8:4, 9:5, 10:6, 11:7, 12:8}
|
| 634 |
+
for i, j in zip(keys, values):
|
| 635 |
+
dict_tuples[i] = j
|
| 636 |
+
##step 3, remove duplicates in sets terminology
|
| 637 |
+
remove_index = []
|
| 638 |
+
for index_1, item in enumerate(new_tuples): ##starting from beginning
|
| 639 |
+
for index_2 in range(index_1 + 1, len(new_tuples)): ##loop over the rest items
|
| 640 |
+
if len(item & new_tuples[index_2]) != 0:
|
| 641 |
+
if len(item)>len(new_tuples[index_2]):
|
| 642 |
+
remove_index.append(index_2) #indefoirx
|
| 643 |
+
elif len(item)<len(new_tuples[index_2]):
|
| 644 |
+
remove_index.append(index_1) #index
|
| 645 |
+
elif len(item)==len(new_tuples[index_2]):
|
| 646 |
+
remove_index.append(index_2) #index
|
| 647 |
+
remain_sets = set(range(len(new_tuples))).difference(set(remove_index))
|
| 648 |
+
##step 4, spit out final index and length
|
| 649 |
+
index_1 = [] ## [0,1,2,3,3,3,4,4]
|
| 650 |
+
index_length = []
|
| 651 |
+
for i in remain_sets:
|
| 652 |
+
index_1.append(dict_tuples[i])
|
| 653 |
+
counts = Counter(index_1) ##this is a dictionary return Counter({3:3, 4:2, 0:1, 1:1, 2:1}) ##index:length
|
| 654 |
+
list_counts = counts.most_common() ## convert to a list [(3,3), (4,2), (0,1), (1,1), (2,1)]
|
| 655 |
+
for i in range(len(list_counts)):
|
| 656 |
+
index_length.append([list_counts[i][0], list_counts[i][1]])
|
| 657 |
+
index_length = sorted(index_length, key = itemgetter(0))
|
| 658 |
+
return index_length
|
| 659 |
+
|
| 660 |
+
def search_func_groups(smiles): ##this is to search functional groups and print out them with numbers
|
| 661 |
+
## from https://github.com/curieshicy/JRgui/
|
| 662 |
+
smarts = ["[$([CX2H0](=*)=*)]", "[$([CX2H1]#[!#7])]", "[$([CX2H0]#[!#7])]", "[OX2H]-[C]=O", "[#6X3H0;!$([#6X3H0](~O)(~O)(~O))](=[#8X1])[#8X2H0]",
|
| 663 |
+
"[$([#6X3H0](=[OX1]));!$([#6X3](=[#8X1])~[#8X2]);R]=O", "[CH;D2;$(C-!@C)](=O)", "[OX2H;!$([OX2H]-[#6]=[O]);!$([OX2H]-a)]", "[O;H1;$(O-!@c)]",
|
| 664 |
+
"[#8X2H0;R;!$([#8X2H0]~[#6]=[#8])]", "[$([CX3H0](=[OX1]));!$([CX3](=[OX1])-[OX2]);!R]=O", "[OX2H0;!R;!$([OX2H0]-[#6]=[#8])]",
|
| 665 |
+
"[$([#7X3,#7X3+][!#8])](=[O])~[O-]", "[OX1H0;!$([OX1H0]~[#6X3]);!$([OX1H0]~[#7X3]~[#8])]", "[#7X2H0;R]", "[#7X3H1;R]", "[#7X2H1]",
|
| 666 |
+
"[#7X2H0;!R]","[#6X2]#[#7X1H0]","[NX3H2]", "[NX3H1;!R]", "[#7X3H0;!$([#7](~O)~O)]","[SX2H]","[#16X2H0;!R]","[#16X2H0;R]", "[R;CX3H1,cX3H1]",
|
| 667 |
+
"[$([R;#6X3H0]);!$([R;#6X3H0]=[#8])]","[R;CX4H2]","[R;CX4H]","[R;CX4H0]", "[CX3H2]", "[!R;CX3H1;!$([CX3H1](=O))]",
|
| 668 |
+
"[$([!R;#6X3H0]);!$([!R;#6X3H0]=[#8])]","[CX4H3]","[!R;CX4H2]", "[!R;CX4H]","[!R;CX4H0]","[F]","[Cl]","[Br]", "[I]"]
|
| 669 |
+
tuples = []
|
| 670 |
+
index_list = []
|
| 671 |
+
final_index_and_length = []
|
| 672 |
+
m = Chem.MolFromSmiles(str(smiles))
|
| 673 |
+
for index, smart in enumerate(smarts):
|
| 674 |
+
if m.HasSubstructMatch(Chem.MolFromSmarts(smart)) == True:
|
| 675 |
+
tuples.append(m.GetSubstructMatches(Chem.MolFromSmarts(smart))) ## this is atom position
|
| 676 |
+
index_list.append(index)
|
| 677 |
+
temp = return_non_duplicate_index(tuples) # [[0, 1], [1, 1], [3, 1], [4, 7], [5, 6], [6, 1], [7, 1], [8, 1], [9, 1]]
|
| 678 |
+
for i in temp:
|
| 679 |
+
final_index_and_length.append([index_list[i[0]], i[1]])
|
| 680 |
+
return final_index_and_length
|
| 681 |
+
|
| 682 |
+
def compute_phys_properties(smiles):
|
| 683 |
+
## from https://github.com/curieshicy/JRgui/
|
| 684 |
+
## method from: K. G. Joback, R. C. Reid, ESTIMATION OF PURE-COMPONENT PROPERTIES FROM GROUP-CONTRIBUTIONS. Chemical Engineering Communications 57, 233-243 (1987).
|
| 685 |
+
## this doesn't look very accurate, but it's a start
|
| 686 |
+
##[[], [], ...[]] in total 41 nested list inside a list
|
| 687 |
+
DB = [[0.0026, 0.0028, 36, 26.15, 17.78, 142.14, 136.70, 2.74E+1, -5.57E-2, 1.01E-4, -5.02E-8, 4.720, 2.661, None, None],
|
| 688 |
+
[0.0027, -0.0008,46, 9.20, -11.18, 79.30, 77.71, 2.45E+1, -2.71E-2, 1.11E-4, -6.78E-8, 2.322, 1.155, None, None],
|
| 689 |
+
[0.0020, 0.0016, 37, 27.38, 64.32, 115.51, 109.82, 7.87, 2.01E-2, -8.33E-6, 1.39E-9, 4.151, 3.302, None, None],
|
| 690 |
+
[0.0791, 0.0077, 89, 169.09, 155.50, -426.72,-387.87,2.41E+1, 4.27E-2, 8.04E-5, -6.87E-8, 11.051, 19.537, 1317.23,-2.578],
|
| 691 |
+
[0.0481, 0.0005, 82, 81.10, 53.60, -337.92,-301.95,2.45E+1, 4.02E-2, 4.02E-5, -4.52E-8, 6.959, 9.633, 483.88, -0.966],
|
| 692 |
+
[0.0284, 0.0028, 55, 94.97, 75.97, -164.50,-126.27,3.04E+1, -8.29E-2, 2.36E-4, -1.31E-7, None, 6.645, None, None],
|
| 693 |
+
[0.0379, 0.0030, 82, 72.24, 36.90, -162.03,-143.48,3.09E+1, -3.36E-2, 1.60E-4, -9.88E-8, 3.197, 9.093, 740.92, -1.713],
|
| 694 |
+
[0.0741, 0.0112, 28, 92.88, 44.45, -208.04,-189.20,2.57E+1, -6.91E-2, 1.77E-4, -9.88E-8, 2.406, 16.826, 2173.72,-5.057],
|
| 695 |
+
[0.0240, 0.0184, -25, 76.34, 82.83, -221.65,-197.37,-2.81, 1.11E-1, -1.16E-4, 4.94E-8, 4.490, 12.499, 3018.17,-7.314],
|
| 696 |
+
[0.0098, 0.0048, 13, 31.22, 23.05, -138.16,-98.22, 1.22E+1, -1.26E-2, 6.03E-5, -3.86E-8, 5.879, 4.682, 440.24, -0.953],
|
| 697 |
+
[0.0380, 0.0031, 62, 76.75, 61.20, -133.22,-120.50,6.45, 6.70E-2, -3.57E-5, 2.86E-9, 4.189, 8.972, 340.35, -0.350],
|
| 698 |
+
[0.0168, 0.0015, 18, 22.42, 22.23, -132.22,-105.00,2.55E+1, -6.32E-2, 1.11E-4, -5.48E-8, 1.188, 2.410, 122.09, -0.386],
|
| 699 |
+
[0.0437, 0.0064, 91, 152.54, 127.24, -66.57, -16.83, 2.59E+1, -3.74E-3, 1.29E-4, -8.88E-8, 9.679, 16.738, None, None],
|
| 700 |
+
[0.0143, 0.0101, 36, -10.50, 2.08, -247.61,-250.83,6.82, 1.96E-2, 1.27E-5, -1.78E-8, 3.624, 5.909, 675.24, -1.340],
|
| 701 |
+
[0.0085, 0.0076, 34, 57.55, 68.40, 55.52, 79.93, 8.83, -3.84E-3, 4.35E-5, -2.60E-8, 3.649, 6.528, None, None],
|
| 702 |
+
[0.0130, 0.0114, 29, 52.82, 101.51, 31.65,75.61,1.18E+1, -2.30E-2, 1.07E-4, -6.28E-8, 7.490, 6.930, None, None],
|
| 703 |
+
[None, None, None, 83.08, 68.91, 93.70, 119.66, 5.69, -4.12E-3, 1.28E-4, -8.88E-8, None, 12.169, None, None],
|
| 704 |
+
[0.0255, -0.0099,None, 74.60, None, 23.61, None, None, None, None, None, None, 3.335, None, None],
|
| 705 |
+
[0.0496, -0.0101,91, 125.66, 59.89, 88.43, 89.22, 3.65E+1, -7.33E-2, 1.84E-4, -1.03E-7, 2.414, 12.851, None, None],
|
| 706 |
+
[0.0243, 0.0109, 38, 73.23, 66.89, -2.02, 14.07,2.69E+1, -4.12E-2, 1.64E-4, -9.76E-8, 3.515, 10.788, None, None],
|
| 707 |
+
[0.0295, 0.0077, 35, 50.17, 52.66, 53.47, 89.39,-1.21, 7.62E-2, -4.86E-5, 1.05E-8, 5.009, 6.436, None, None],
|
| 708 |
+
[0.0169, 0.0074, 9, 11.74, 48.84, 123.34, 163.16,-3.11E+1, 2.27E-1, -3.20E-4, 1.46E-7, 4.703, 1.896, None, None],
|
| 709 |
+
[0.0031, 0.0084, 63, 63.56, 20.09, -17.33, -22.99, 3.53E+1, -7.58E-2, 1.85E-4, -1.03E-7, 2.360, 6.884, None, None],
|
| 710 |
+
[0.0119, 0.0049, 54, 68.78, 34.40, 41.87, 33.12, 1.96E+1, -5.61E-3, 4.02E-5, -2.76E-8, 4.130, 6.817, None, None],
|
| 711 |
+
[0.0019, 0.0051, 38, 52.10, 79.93, 39.10, 27.76, 1.67E+1, 4.81E-3, 2.77E-5, -2.11E-8, 1.557, 5.984, None, None],
|
| 712 |
+
[0.0082, 0.0011, 41, 26.73, 8.13, 2.09, 11.30, -2.14, 5.74E-2, -1.64E-6, -1.59E-8, 1.101, 2.544, 259.65, -0.702],
|
| 713 |
+
[0.0143, 0.0008, 32, 31.01, 37.02, 46.43, 54.05, -8.25, 1.01E-1, -1.42E-4, 6.78E-8, 2.394, 3.059, -245.74,0.912],
|
| 714 |
+
[0.0100, 0.0025, 48, 27.15, 7.75, -26.80, -3.68, -6.03, 8.54E-2, -8.00E-6, -1.80E-8, 0.490, 2.398, 307.53, -0.798],
|
| 715 |
+
[0.0122, 0.0004, 38, 21.78, 19.88, 8.67, 40.99, -2.05E+1, 1.62E-1, -1.60E-4, 6.24E-8, 3.243, 1.942, -394.29,1.251],
|
| 716 |
+
[0.0042, 0.0061, 27, 21.32, 60.15, 79.72, 87.88, -9.09E+1, 5.57E-1, -9.00E-4, 4.69E-7, -1.373, 0.644, None, None],
|
| 717 |
+
[0.0113, -0.0028,56, 18.18, -4.32, -9.630, 3.77, 2.36E+1, -3.81E-2, 1.72E-4, -1.03E-7, -0.473, 1.724, 495.01, -1.539],
|
| 718 |
+
[0.0129, -0.0006,46, 24.96, 8.73, 37.97, 48.53, -8.00, 1.05E-1, -9.63E-5, 3.56E-8, 2.691, 2.205, 82.28, -0.242],
|
| 719 |
+
[0.0117, 0.0011, 38, 24.14, 11.14, 83.99, 92.36, -2.81E+1, 2.08E-1, -3.06E-4, 1.46E-7, 3.063, 2.138, None, None],
|
| 720 |
+
[0.0141, -0.0012,65, 23.58, -5.10, -76.45, -43.96, 1.95E+1, -8.08E-3, 1.53E-4, -9.67E-8, 0.908, 2.373, 548.29, -1.719],
|
| 721 |
+
[0.0189, 0.0000, 56, 22.88, 11.27, -20.64, 8.42, -9.09E-1, 9.50E-2, -5.44E-5, 1.19E-8, 2.590, 2.226, 94.16, -0.199],
|
| 722 |
+
[0.0164, 0.0020, 41, 21.74, 12.64, 29.89, 58.36, -2.30E+1, 2.04E-1, -2.65E-4, 1.20E-7, 0.749, 1.691, -322.15,1.187],
|
| 723 |
+
[0.0067, 0.0043, 27, 18.25, 46.43, 82.23, 116.02, -6.62E+1, 4.27E-1, -6.41E-4, 3.01E-7, -1.460, 0.636, -573.56,2.307],
|
| 724 |
+
[0.0111, -0.0057,27, -0.03, -15.78, -251.92,-247.19,2.65E+1, -9.13E-2, 1.91E-4, -1.03E-7, 1.398, -0.670, None, None],
|
| 725 |
+
[0.0105, -0.0049,58, 38.13, 13.55, -71.55,-64.31, 3.33E+1, -9.63E-2, 1.87E-4, -9.96E-8, 2.515, 4.532, 625.45, -1.814],
|
| 726 |
+
[0.0133, 0.0057, 71, 66.86, 43.43, -29.48, -38.06, 2.86E+1, -6.49E-2, 1.36E-4, -7.45E-8, 3.603, 6.582, 738.91, -2.038],
|
| 727 |
+
[0.0068, -0.0034,97, 93.84, 41.69, 21.06, 5.74, 3.21E+1, -6.41E-2, 1.26E-4, -6.87E-8, 2.724, 9.520, 809.55, -2.224]]
|
| 728 |
+
|
| 729 |
+
mol = Chem.MolFromSmiles(str(smiles))
|
| 730 |
+
NoA = Chem.AddHs(mol).GetNumAtoms()
|
| 731 |
+
MW = Descriptors.MolWt(Chem.AddHs(mol))
|
| 732 |
+
LogP = Descriptors.MolLogP(Chem.AddHs(mol))
|
| 733 |
+
MR = Descriptors.MolMR(Chem.AddHs(mol))
|
| 734 |
+
|
| 735 |
+
double_lists = search_func_groups(smiles)
|
| 736 |
+
|
| 737 |
+
entry_index_by_users = []
|
| 738 |
+
entry_data_by_users = []
|
| 739 |
+
|
| 740 |
+
for item in double_lists:
|
| 741 |
+
entry_index_by_users.append(item[0])
|
| 742 |
+
entry_data_by_users.append(item[1])
|
| 743 |
+
|
| 744 |
+
fiveteen_columns = [] ##length = 15*len(entry_index_by_users)
|
| 745 |
+
for index, data in zip(entry_index_by_users, entry_data_by_users):
|
| 746 |
+
for i in range(15):
|
| 747 |
+
if DB[index][i] == None:
|
| 748 |
+
temp = None
|
| 749 |
+
else:
|
| 750 |
+
temp = data*DB[index][i]
|
| 751 |
+
fiveteen_columns.append(temp)
|
| 752 |
+
|
| 753 |
+
temperature = 310
|
| 754 |
+
Tc = []
|
| 755 |
+
Pc = []
|
| 756 |
+
Vc = []
|
| 757 |
+
Tb = []
|
| 758 |
+
Tm = []
|
| 759 |
+
Hfor = []
|
| 760 |
+
Gf = []
|
| 761 |
+
Cpa = []
|
| 762 |
+
Cpb = []
|
| 763 |
+
Cpc = []
|
| 764 |
+
Cpd = []
|
| 765 |
+
Hfus = []
|
| 766 |
+
Hvap = []
|
| 767 |
+
Ya = []
|
| 768 |
+
Yb =[]
|
| 769 |
+
fc = fiveteen_columns ## short hand
|
| 770 |
+
for i in range(len(entry_index_by_users)):
|
| 771 |
+
Tc.append(fc[i*15])
|
| 772 |
+
Pc.append(fc[i*15 + 1])
|
| 773 |
+
Vc.append(fc[i*15 + 2])
|
| 774 |
+
Tb.append(fc[i*15 + 3])
|
| 775 |
+
Tm.append(fc[i*15 + 4])
|
| 776 |
+
Hfor.append(fc[i*15 + 5])
|
| 777 |
+
Gf.append(fc[i*15 + 6])
|
| 778 |
+
Cpa.append(fc[i*15 + 7])
|
| 779 |
+
Cpb.append(fc[i*15 + 8])
|
| 780 |
+
Cpc.append(fc[i*15 + 9])
|
| 781 |
+
Cpd.append(fc[i*15 + 10])
|
| 782 |
+
Hfus.append(fc[i*15 + 11])
|
| 783 |
+
Hvap.append(fc[i*15 + 12])
|
| 784 |
+
Ya.append(fc[i*15 + 13])
|
| 785 |
+
Yb.append(fc[i*15 + 14])
|
| 786 |
+
try:
|
| 787 |
+
BoilingPoint = 198.2 + sum(Tb)
|
| 788 |
+
except:
|
| 789 |
+
BoilingPoint = None
|
| 790 |
+
try:
|
| 791 |
+
MeltingPoint = 122.5 + sum(Tm)
|
| 792 |
+
except:
|
| 793 |
+
MeltingPoint = None
|
| 794 |
+
try:
|
| 795 |
+
CriticalTemp = (sum(Tb) + 198.2)/(0.584 + 0.965*sum(Tc) - sum(Tc)**2)
|
| 796 |
+
except:
|
| 797 |
+
CriticalTemp = None
|
| 798 |
+
try:
|
| 799 |
+
CriticalPress = 1./(0.113 + 0.0032*float(NoA) - sum(Pc))**2
|
| 800 |
+
except:
|
| 801 |
+
CriticalPress = None
|
| 802 |
+
try:
|
| 803 |
+
CriticalVolume = 17.5 + sum(Vc)
|
| 804 |
+
except:
|
| 805 |
+
CriticalVolume = None
|
| 806 |
+
try:
|
| 807 |
+
EnthalpyForm = 68.29 + sum(Hfor)
|
| 808 |
+
except:
|
| 809 |
+
EnthalpyForm = None
|
| 810 |
+
try:
|
| 811 |
+
GibbsEnergy = 53.88 + sum(Gf)
|
| 812 |
+
except:
|
| 813 |
+
GibbsEnergy = None
|
| 814 |
+
try:
|
| 815 |
+
HeatCapacity = (sum(Cpa) - 37.93) + (sum(Cpb) + 0.210)*float(temperature) + (sum(Cpc) - 3.91*10**(-4))*float(temperature)**2 + (sum(Cpd) + 2.06*10**(-7))*float(temperature)**3
|
| 816 |
+
except:
|
| 817 |
+
HeatCapacity = None
|
| 818 |
+
try:
|
| 819 |
+
EnthalpyVap = 15.30 + sum(Hvap)
|
| 820 |
+
except:
|
| 821 |
+
EnthalpyVap = None
|
| 822 |
+
try:
|
| 823 |
+
EnthalpyFus = -0.88 + sum(Hfus)
|
| 824 |
+
except:
|
| 825 |
+
EnthalpyFus = None
|
| 826 |
+
try:
|
| 827 |
+
LiquidVisco = float(MW)*math.exp((sum(Ya) - 597.82)/float(temperature) + sum(Yb) - 11.202)
|
| 828 |
+
except:
|
| 829 |
+
LiquidVisco = None
|
| 830 |
+
try:
|
| 831 |
+
CrystalSolub_1 = 10**(0.8 - float(LogP) - 0.01*(sum(Tm)+122.5 - 273.15 - 25.))*1000.*float(MW)
|
| 832 |
+
except:
|
| 833 |
+
CrystalSolub_1 = None
|
| 834 |
+
try:
|
| 835 |
+
CrystalSolub_2 = 10**(0.5 - float(LogP) - 0.01*(sum(Tm)+122.5 - 273.15 - 25.))*1000.*float(MW)
|
| 836 |
+
except:
|
| 837 |
+
CrystalSolub_2 = None
|
| 838 |
+
try:
|
| 839 |
+
AmorphSolub_1 = 10**(0.8 - float(LogP) - 0.01*(sum(Tm)+122.5 - 273.15 - 25.)) *1000.*float(MW)*math.exp((sum(Hfus)-0.88)*(sum(Tm) + 122.5 - float(temperature))*float(temperature)/(sum(Tm) + 122.5)**2/(2.479*float(temperature)/298.))
|
| 840 |
+
except:
|
| 841 |
+
AmorphSolub_1 = None
|
| 842 |
+
try:
|
| 843 |
+
AmorphSolub_2 = 10**(0.5 - float(LogP) - 0.01*(sum(Tm)+122.5 - 273.15 - 25.)) *1000.*float(MW)*math.exp((sum(Hfus)-0.88)*(sum(Tm) + 122.5 - float(temperature))*float(temperature)/(sum(Tm) + 122.5)**2/(2.479*float(temperature)/298.))
|
| 844 |
+
except:
|
| 845 |
+
AmorphSolub_2 = None
|
| 846 |
+
return MeltingPoint-273.15
|
| 847 |
+
|
| 848 |
+
def string2mp(name):
|
| 849 |
+
mp, mp_origin = None, None
|
| 850 |
+
# try the chemicals package
|
| 851 |
+
if is_cas(name):
|
| 852 |
+
cas = name
|
| 853 |
+
if mp is None:
|
| 854 |
+
mp = chemicals.Tm(cas)
|
| 855 |
+
if mp: mp = mp-273.15
|
| 856 |
+
methods = chemicals.Tm_methods(cas)
|
| 857 |
+
if methods: mp_origin = 'chem/'+methods[0]
|
| 858 |
+
# experimental values
|
| 859 |
+
if mp is None:
|
| 860 |
+
# some rows have multiple comma-separated cas numbers
|
| 861 |
+
mask = dfmp_expt['CAS'].str.contains(cas)
|
| 862 |
+
if sum(mask):
|
| 863 |
+
subdf = dfmp_expt[mask]['CAS'].str.split(', ')
|
| 864 |
+
for index,subcas in zip(subdf.index,subdf):
|
| 865 |
+
if cas in subcas:
|
| 866 |
+
mp = float(dfmp_expt.iloc[index]['MP'])
|
| 867 |
+
mp_origin = 'expt'
|
| 868 |
+
# predicted values from Mansouri
|
| 869 |
+
#if mp is None:
|
| 870 |
+
# mask = dfmp_pred['Substance_CASRN'] == cas
|
| 871 |
+
# if sum(mask):
|
| 872 |
+
# mp = float(dfmp_pred[mask]['NCCT_MP'])
|
| 873 |
+
# mp_origin = 'pred'
|
| 874 |
+
# predicted values from OPERA (CompTox dashboard)
|
| 875 |
+
if mp is None:
|
| 876 |
+
mask = df_pred['CASRN'] == cas
|
| 877 |
+
if sum(mask):
|
| 878 |
+
mp = float(df_pred[mask]['MELTING_POINT_DEGC_OPERA_PRED'])
|
| 879 |
+
mp_origin = 'pred'
|
| 880 |
+
# try to scrape from PubChem
|
| 881 |
+
if mp is None:
|
| 882 |
+
try:
|
| 883 |
+
content = None
|
| 884 |
+
compounds = pcp.get_compounds(name, namespace='name')
|
| 885 |
+
c = compounds[0]
|
| 886 |
+
cid = c.cid
|
| 887 |
+
url = f'https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/{cid}/JSON'
|
| 888 |
+
#fid = urllib.request.urlopen(url)
|
| 889 |
+
#webpage = fid.read().decode('utf-8')
|
| 890 |
+
webpage = requests.get(url).text
|
| 891 |
+
content = json.loads(webpage)
|
| 892 |
+
except:
|
| 893 |
+
pass
|
| 894 |
+
if content:
|
| 895 |
+
mp_list = []
|
| 896 |
+
for i in content['Record']['Section']:
|
| 897 |
+
if i['TOCHeading'] == 'Chemical and Physical Properties':
|
| 898 |
+
for j in i['Section']:
|
| 899 |
+
if j['TOCHeading'] == 'Experimental Properties':
|
| 900 |
+
for k in j['Section']:
|
| 901 |
+
if k['TOCHeading'] == 'Melting Point':
|
| 902 |
+
for ii in k['Information']:
|
| 903 |
+
try:
|
| 904 |
+
mp_string = ii['Value']['StringWithMarkup'][0]['String']
|
| 905 |
+
#rho_string = rho_string.replace('Relative density (water = 1): ', '')
|
| 906 |
+
#print(mp_string)
|
| 907 |
+
#tmp_rho = re.match('(?:\d+(?:\.\d*)?|\.\d+)',rho_string)
|
| 908 |
+
#m = re.match('((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?',rho_string)
|
| 909 |
+
m = re.match('((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?( ?°?C)',mp_string)
|
| 910 |
+
if m is not None:
|
| 911 |
+
groups = m.groups()
|
| 912 |
+
if len(groups):
|
| 913 |
+
for g in groups:
|
| 914 |
+
try:
|
| 915 |
+
tmp_mp = float(g)
|
| 916 |
+
mp_list.append(tmp_mp)
|
| 917 |
+
except:
|
| 918 |
+
continue
|
| 919 |
+
m = re.match('((?:\d+(?:\.\d*)?|\.\d+))(?:-((?:\d+(?:\.\d*)?|\.\d+)))?( ?°?F)',mp_string)
|
| 920 |
+
if m is not None:
|
| 921 |
+
groups = m.groups()
|
| 922 |
+
if len(groups):
|
| 923 |
+
for g in groups:
|
| 924 |
+
try:
|
| 925 |
+
# (T/F − 32) × 5/9 = T/C
|
| 926 |
+
tmp_mp = (float(g)-32)*5/9
|
| 927 |
+
mp_list.append(tmp_mp)
|
| 928 |
+
except:
|
| 929 |
+
continue
|
| 930 |
+
except:
|
| 931 |
+
continue
|
| 932 |
+
if mp_list:
|
| 933 |
+
## remove outliers using interquartile range (IQR)
|
| 934 |
+
mp_list = np.array(mp_list)
|
| 935 |
+
q75,q25 = np.percentile(mp_list,[75,25])
|
| 936 |
+
intr_qr = q75-q25
|
| 937 |
+
hi = q75+(1.5*intr_qr)
|
| 938 |
+
lo = q25-(1.5*intr_qr)
|
| 939 |
+
mask = (mp_list <= hi) & (mp_list >= lo)
|
| 940 |
+
mp_list = mp_list[mask]
|
| 941 |
+
mp = np.mean(mp_list)
|
| 942 |
+
mp_origin = 'pubchem'
|
| 943 |
+
else:
|
| 944 |
+
mp, mp_origin = None, None
|
| 945 |
+
# try to scrape from DSSTOX website...
|
| 946 |
+
if try_dsstox:
|
| 947 |
+
if mp is None:
|
| 948 |
+
dtxsid = None
|
| 949 |
+
# try to find it via the dsstox dashboard
|
| 950 |
+
try:
|
| 951 |
+
name_urlsafe = urllib.parse.quote(name)
|
| 952 |
+
url = f'https://comptox.epa.gov/dashboard/search-results?input_type=synonym_substring&inputs={name_urlsafe}'
|
| 953 |
+
fid = urllib.request.urlopen(url)
|
| 954 |
+
webpage = fid.read().decode('utf-8')
|
| 955 |
+
hits = re.findall('DTXSID[0-9]+', webpage)
|
| 956 |
+
if len(hits):
|
| 957 |
+
dtxsid = hits[0]
|
| 958 |
+
except:
|
| 959 |
+
pass
|
| 960 |
+
if dtxsid:
|
| 961 |
+
url = f'https://comptox.epa.gov/dashboard/chemical/properties/{dtxsid}'
|
| 962 |
+
#print(url)
|
| 963 |
+
try:
|
| 964 |
+
#driver = selenium.webdriver.Firefox()
|
| 965 |
+
#driver.set_page_load_timeout(15)
|
| 966 |
+
#driver.get(url)
|
| 967 |
+
#driver_exe = 'chromedriver'
|
| 968 |
+
driver_exe = chromedriver_binary.chromedriver_filename
|
| 969 |
+
options = Options()
|
| 970 |
+
options.add_argument("--headless") # runs in background instead of showing browser window
|
| 971 |
+
service = Service(driver_exe)
|
| 972 |
+
driver = selenium.webdriver.Chrome(service=service, options=options)
|
| 973 |
+
#driver = selenium.webdriver.Chrome(driver_exe, options=options)
|
| 974 |
+
driver.set_page_load_timeout(15)
|
| 975 |
+
driver.get(url)
|
| 976 |
+
except:
|
| 977 |
+
pass
|
| 978 |
+
#print("timeout")
|
| 979 |
+
webpage = driver.page_source
|
| 980 |
+
driver.quit()
|
| 981 |
+
mysoup = bs4.BeautifulSoup(webpage, features='lxml')
|
| 982 |
+
# column of property names
|
| 983 |
+
ifound = None
|
| 984 |
+
rows = mysoup.find_all('div', attrs={'col-id':'property'})
|
| 985 |
+
for i,row in enumerate(rows):
|
| 986 |
+
if 'Melting Point' in row.text:
|
| 987 |
+
ifound = i
|
| 988 |
+
break
|
| 989 |
+
if ifound:
|
| 990 |
+
rows = mysoup.find_all('div', attrs={'col-id':'exavg'})
|
| 991 |
+
text = rows[ifound].text
|
| 992 |
+
value = re.sub(' \([0-9]*\)', '', text.strip())
|
| 993 |
+
try:
|
| 994 |
+
mp = float(value)
|
| 995 |
+
mp_origin = 'expt/dsstox'
|
| 996 |
+
except:
|
| 997 |
+
mp, mp_origin = None, None
|
| 998 |
+
if mp is None:
|
| 999 |
+
rows = mysoup.find_all('div', attrs={'col-id':'predavg'})
|
| 1000 |
+
text = rows[ifound].text
|
| 1001 |
+
value = re.sub(' \([0-9]*\)', '', text.strip())
|
| 1002 |
+
try:
|
| 1003 |
+
mp = float(value)
|
| 1004 |
+
mp_origin = 'pred/dsstox'
|
| 1005 |
+
except:
|
| 1006 |
+
mp, mp_origin = None, None
|
| 1007 |
+
else:
|
| 1008 |
+
mp, mp_origin = None, None
|
| 1009 |
+
else:
|
| 1010 |
+
mp, mp_origin = None, None
|
| 1011 |
+
if mp is not None and np.isnan(mp): mp = None
|
| 1012 |
+
if mp is None: mp_origin = None
|
| 1013 |
+
return mp, mp_origin
|
| 1014 |
+
|
| 1015 |
+
def smiles2mp(smiles):
|
| 1016 |
+
try:
|
| 1017 |
+
mp = compute_phys_properties(smiles)
|
| 1018 |
+
except:
|
| 1019 |
+
mp = None
|
| 1020 |
+
return mp
|
| 1021 |
+
|
| 1022 |
+
def mol2mp(cas, name, smiles):
|
| 1023 |
+
mp, mp_origin = None, None
|
| 1024 |
+
if cas:
|
| 1025 |
+
try:
|
| 1026 |
+
mp, mp_origin = string2mp(cas)
|
| 1027 |
+
except:
|
| 1028 |
+
mp, mp_origin = None, None
|
| 1029 |
+
if mp is None and name:
|
| 1030 |
+
try:
|
| 1031 |
+
mp, mp_origin = string2mp(name)
|
| 1032 |
+
except:
|
| 1033 |
+
mp, mp_origin = None, None
|
| 1034 |
+
if mp is None and smiles:
|
| 1035 |
+
try:
|
| 1036 |
+
mp = smiles2mp(smiles)
|
| 1037 |
+
mp_origin = 'calc'
|
| 1038 |
+
except:
|
| 1039 |
+
mp, mp_origin = None, None
|
| 1040 |
+
return mp, mp_origin
|
| 1041 |
+
|
| 1042 |
+
def getLogP(cas,mol):
|
| 1043 |
+
LogP, LogP_origin = None, None
|
| 1044 |
+
if cas:
|
| 1045 |
+
mask = df_pred['CASRN'] == cas
|
| 1046 |
+
if sum(mask):
|
| 1047 |
+
LogP = float(df_pred[mask]['OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED'])
|
| 1048 |
+
LogP_origin = 'pred'
|
| 1049 |
+
if LogP is None and mol:
|
| 1050 |
+
LogP = Crippen.MolLogP(mol)
|
| 1051 |
+
LogP_origin = 'calc'
|
| 1052 |
+
return LogP, LogP_origin
|
| 1053 |
|
Comptox_pred_data.xlsx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a77e80c823abf37881e942649f9b4afa21b2b70928629cfbf6407efa3300c6c
|
| 3 |
+
size 23755078
|
PHYSPROP_MP_data.xlsx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85bcaed57718ed6ef0e6454a6351ef16f64f4d255b16f595ea5e4d8ef240842e
|
| 3 |
+
size 162360
|
exposure_module/exposure.py
CHANGED
|
@@ -21,7 +21,7 @@ def exp_post():
|
|
| 21 |
chemName = request.form["chemName"]
|
| 22 |
IDtype = request.form["IDtype"]
|
| 23 |
|
| 24 |
-
iupac, cas, smiles, MW, LogP, rho, molImage, error = ResolveChemical(chemName, IDtype)
|
| 25 |
|
| 26 |
if error > 0:
|
| 27 |
return render_template('chemError.html')
|
|
@@ -33,7 +33,9 @@ def exp_post():
|
|
| 33 |
rho = SigFigs(rho, 4)
|
| 34 |
else:
|
| 35 |
rho = 'Not found'
|
| 36 |
-
|
|
|
|
|
|
|
| 37 |
|
| 38 |
amount = float(request.form["amount"])
|
| 39 |
mass = float(request.form["mass"])
|
|
@@ -76,6 +78,8 @@ def exp_post():
|
|
| 76 |
rho = SigFigs(rho, 4)
|
| 77 |
else:
|
| 78 |
rho = 'Not found'
|
|
|
|
|
|
|
| 79 |
|
| 80 |
# Generate the rate plot using matplotlib
|
| 81 |
tarray = np.arange(1., 31., 1.)
|
|
@@ -84,4 +88,5 @@ def exp_post():
|
|
| 84 |
|
| 85 |
return render_template('exposure_report.html', polymers=polymers, pIndex=pIndex, release=release,
|
| 86 |
assume=assume, area=area, vol=vol, amount=amount, diff=diff, time=time, exposure=exposure, TTC=TTC,
|
| 87 |
-
MOS=MOS, chemName=chemName, image=pngImageB64String, MW=MW, LogP=LogP, rho=rho, iupac=iupac, cas=cas, smiles=smiles, molImage=molImage)
|
|
|
|
|
|
| 21 |
chemName = request.form["chemName"]
|
| 22 |
IDtype = request.form["IDtype"]
|
| 23 |
|
| 24 |
+
iupac, cas, smiles, MW, LogP, rho, mp, molImage, error = ResolveChemical(chemName, IDtype)
|
| 25 |
|
| 26 |
if error > 0:
|
| 27 |
return render_template('chemError.html')
|
|
|
|
| 33 |
rho = SigFigs(rho, 4)
|
| 34 |
else:
|
| 35 |
rho = 'Not found'
|
| 36 |
+
if mp is None:
|
| 37 |
+
mp = 'Not found'
|
| 38 |
+
return render_template('MwError.html', chemName=chemName, MW=MW, LogP=LogP, rho=rho, mp=mp, iupac=iupac, cas=cas, smiles=smiles, molImage=molImage)
|
| 39 |
|
| 40 |
amount = float(request.form["amount"])
|
| 41 |
mass = float(request.form["mass"])
|
|
|
|
| 78 |
rho = SigFigs(rho, 4)
|
| 79 |
else:
|
| 80 |
rho = 'Not found'
|
| 81 |
+
if mp is None:
|
| 82 |
+
mp = 'Not found'
|
| 83 |
|
| 84 |
# Generate the rate plot using matplotlib
|
| 85 |
tarray = np.arange(1., 31., 1.)
|
|
|
|
| 88 |
|
| 89 |
return render_template('exposure_report.html', polymers=polymers, pIndex=pIndex, release=release,
|
| 90 |
assume=assume, area=area, vol=vol, amount=amount, diff=diff, time=time, exposure=exposure, TTC=TTC,
|
| 91 |
+
MOS=MOS, chemName=chemName, image=pngImageB64String, MW=MW, LogP=LogP, rho=rho, mp=mp, iupac=iupac, cas=cas, smiles=smiles, molImage=molImage)
|
| 92 |
+
|
exposure_module/templates/MwError.html
CHANGED
|
@@ -46,8 +46,9 @@
|
|
| 46 |
IUPAC Name :: {{iupac}} <br> <br>
|
| 47 |
CAS :: {{cas}} <br> <br>
|
| 48 |
Molecular weight :: {{MW}} <br> <br>
|
| 49 |
-
LogKow :: {{LogP}} <br> <br>
|
| 50 |
Density :: {{rho}} <br> <br>
|
|
|
|
| 51 |
SMILES :: {{smiles}}
|
| 52 |
</div>
|
| 53 |
<div class="column">
|
|
|
|
| 46 |
IUPAC Name :: {{iupac}} <br> <br>
|
| 47 |
CAS :: {{cas}} <br> <br>
|
| 48 |
Molecular weight :: {{MW}} <br> <br>
|
| 49 |
+
LogKow :: {{'%0.2f'%LogP|float}} <br> <br>
|
| 50 |
Density :: {{rho}} <br> <br>
|
| 51 |
+
Melting point :: {{'%0.1f'%mp|float}} <br> <br>
|
| 52 |
SMILES :: {{smiles}}
|
| 53 |
</div>
|
| 54 |
<div class="column">
|
exposure_module/templates/exposure_report.html
CHANGED
|
@@ -54,8 +54,9 @@
|
|
| 54 |
IUPAC Name :: {{iupac}} <br> <br>
|
| 55 |
CAS :: {{cas}} <br> <br>
|
| 56 |
Molecular weight :: {{MW}} <br> <br>
|
| 57 |
-
LogKow :: {{LogP}} <br> <br>
|
| 58 |
Density :: {{rho}} <br> <br>
|
|
|
|
| 59 |
SMILES :: {{smiles}}
|
| 60 |
</div>
|
| 61 |
<div class="column">
|
|
|
|
| 54 |
IUPAC Name :: {{iupac}} <br> <br>
|
| 55 |
CAS :: {{cas}} <br> <br>
|
| 56 |
Molecular weight :: {{MW}} <br> <br>
|
| 57 |
+
LogKow :: {{'%0.2f'%LogP|float}} <br> <br>
|
| 58 |
Density :: {{rho}} <br> <br>
|
| 59 |
+
Melting point :: {{'%0.1f'%mp|float}} <br> <br>
|
| 60 |
SMILES :: {{smiles}}
|
| 61 |
</div>
|
| 62 |
<div class="column">
|
functions.py
CHANGED
|
@@ -8,7 +8,7 @@ import io
|
|
| 8 |
import base64
|
| 9 |
|
| 10 |
def SigFigs(number, n):
|
| 11 |
-
|
| 12 |
return round(number, n - int(math.floor(math.log10(math.fabs(number)))) - 1)
|
| 13 |
|
| 14 |
def Piringer(Mw, Ap):
|
|
|
|
| 8 |
import base64
|
| 9 |
|
| 10 |
def SigFigs(number, n):
|
| 11 |
+
if number == 0: return number
|
| 12 |
return round(number, n - int(math.floor(math.log10(math.fabs(number)))) - 1)
|
| 13 |
|
| 14 |
def Piringer(Mw, Ap):
|