robert-m-elder commited on
Commit
861a5a5
·
1 Parent(s): 521c75f

coded other IDtypes

Browse files
Files changed (1) hide show
  1. ChemID.py +95 -0
ChemID.py CHANGED
@@ -18,6 +18,10 @@ def ResolveChemical(chemName, IDtype):
18
 
19
  if IDtype == 'CAS':
20
  cas = chemName
 
 
 
 
21
  smiles = None
22
  name = None
23
  Mw = None
@@ -28,12 +32,62 @@ def ResolveChemical(chemName, IDtype):
28
 
29
  if smiles:
30
  name = cas2name(cas)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  mol = Chem.MolFromSmiles(smiles)
32
  Mw = Descriptors.MolWt(mol)
33
  im = ImageFromSmiles(smiles)
34
  im64 = Imageto64(im)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  else:
36
  error = 1
 
 
 
 
 
 
 
 
37
 
38
  return (name, cas, smiles, Mw, im64, error)
39
 
@@ -69,6 +123,47 @@ def Imageto64(img):
69
 
70
  return pngImageB64String
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  # function to convert CAS to SMILES
73
  def cas2smiles(cas):
74
  smiles = None
 
18
 
19
  if IDtype == 'CAS':
20
  cas = chemName
21
+ if not is_cas(cas):
22
+ name = 'INVALID CAS'
23
+ error = 3
24
+ return (name, cas, None, None, None, error)
25
  smiles = None
26
  name = None
27
  Mw = None
 
32
 
33
  if smiles:
34
  name = cas2name(cas)
35
+ try:
36
+ mol = Chem.MolFromSmiles(smiles)
37
+ Mw = Descriptors.MolWt(mol)
38
+ im = ImageFromSmiles(smiles)
39
+ im64 = Imageto64(im)
40
+ except:
41
+ error = 2
42
+ else:
43
+ error = 1
44
+ elif IDtype == 'SMILES':
45
+ smiles = chemName
46
+ cas = None
47
+ name = None
48
+ Mw = None
49
+ im64 = None
50
+ error = 0
51
+
52
+ name = smiles2name(smiles)
53
+ cas = name2cas(name)
54
+
55
+ try:
56
  mol = Chem.MolFromSmiles(smiles)
57
  Mw = Descriptors.MolWt(mol)
58
  im = ImageFromSmiles(smiles)
59
  im64 = Imageto64(im)
60
+ except:
61
+ error = 2
62
+ elif IDtype == 'common':
63
+ name = chemName
64
+ smiles = None
65
+ cas = None
66
+ Mw = None
67
+ im64 = None
68
+ error = 0
69
+
70
+ cas = name2cas(name)
71
+ smiles = cas2smiles(cas)
72
+
73
+ if smiles:
74
+ try:
75
+ mol = Chem.MolFromSmiles(smiles)
76
+ Mw = Descriptors.MolWt(mol)
77
+ im = ImageFromSmiles(smiles)
78
+ im64 = Imageto64(im)
79
+ except:
80
+ error = 2
81
  else:
82
  error = 1
83
+ else:
84
+ ## should never be here
85
+ name = None
86
+ smiles = None
87
+ cas = None
88
+ Mw = None
89
+ im64 = None
90
+ error = 4
91
 
92
  return (name, cas, smiles, Mw, im64, error)
93
 
 
123
 
124
  return pngImageB64String
125
 
126
+ # function to convert SMILES to name
127
+ def smiles2name(smiles):
128
+ name = None
129
+ # first try chemicals package
130
+ try:
131
+ cm = chemicals.search_chemical(smiles)
132
+ if cm.common_name:
133
+ name = cm.common_name
134
+ elif cm.iupac_name:
135
+ name = cm.iupac_name
136
+ except:
137
+ name = None
138
+ # then try pubchem for compounds
139
+ if not name:
140
+ try:
141
+ compounds = pcp.get_compounds(smiles, namespace='smiles')
142
+ c = compounds[0]
143
+ name = c.iupac_name
144
+ except:
145
+ name = None
146
+ # next try cirpy
147
+ if not name:
148
+ try:
149
+ name = cirpy.resolve(smiles, 'iupac_name')
150
+ except:
151
+ name = None
152
+ if type(name) is list:
153
+ name = name[0]
154
+ # finally try it as a pubchem substance
155
+ if not name:
156
+ try:
157
+ compounds = pcp.get_substances(smiles, namespace='smiles')
158
+ # sometimes there are multiple substances, and multiple synonyms per substance
159
+ allsyns = [syn for c in compounds for syn in c.iupac_name if cas not in syn]
160
+ # choose the most common synonym
161
+ fd = nltk.FreqDist(allsyns)
162
+ name = fd.most_common(1)[0][0]
163
+ except:
164
+ name = None
165
+ return name
166
+
167
  # function to convert CAS to SMILES
168
  def cas2smiles(cas):
169
  smiles = None