Robert Elder commited on
Commit
6a4a1e6
·
1 Parent(s): ddaa121

kludge for several specific ceramics

Browse files
ChemID.py CHANGED
@@ -1120,9 +1120,10 @@ def smiles2mp(smiles):
1120
 
1121
  def smiles2mp_opera(smiles):
1122
  descs = padelpy.from_smiles(smiles, descriptortypes='mp/descriptors.xml')
1123
- dfd = pd.DataFrame(descs,index=[0])
1124
- dfd = dfd.replace('',0)
1125
- dfd = pd.DataFrame(dfd, dtype=float)
 
1126
  X = np.array(dfd[my_opera_data_mp.desc_list])
1127
  X_scale = my_opera_data_mp.scaler_X.transform(X)
1128
  y_pred = my_opera_data_mp.scaler_y.inverse_transform(my_opera_data_mp.knn_all.predict(X_scale))
@@ -1130,7 +1131,13 @@ def smiles2mp_opera(smiles):
1130
 
1131
  def mol2mp(cas, name, smiles):
1132
  mp, mp_origin = None, None
1133
- if cas:
 
 
 
 
 
 
1134
  try:
1135
  mp, mp_origin = string2mp(cas)
1136
  except:
@@ -1146,12 +1153,6 @@ def mol2mp(cas, name, smiles):
1146
  mp_origin = 'joback-reid/calc'
1147
  except:
1148
  mp, mp_origin = None, None
1149
- if pd.isna(mp) and smiles:
1150
- try:
1151
- mp = smiles2mp_opera(smiles)
1152
- mp_origin = 'opera/calc'
1153
- except:
1154
- mp, mp_origin = None, None
1155
  return mp, mp_origin
1156
 
1157
  def getLogP(cas,mol):
 
1120
 
1121
  def smiles2mp_opera(smiles):
1122
  descs = padelpy.from_smiles(smiles, descriptortypes='mp/descriptors.xml')
1123
+ #dfd = pd.DataFrame(descs,index=[0])
1124
+ #dfd = dfd.replace('',0).infer_objects(copy=False)
1125
+ #dfd = pd.DataFrame(dfd, dtype=float)
1126
+ dfd = pd.DataFrame(descs, index=[0]).apply(pd.to_numeric, errors="coerce").fillna(0.0).astype(float)
1127
  X = np.array(dfd[my_opera_data_mp.desc_list])
1128
  X_scale = my_opera_data_mp.scaler_X.transform(X)
1129
  y_pred = my_opera_data_mp.scaler_y.inverse_transform(my_opera_data_mp.knn_all.predict(X_scale))
 
1131
 
1132
  def mol2mp(cas, name, smiles):
1133
  mp, mp_origin = None, None
1134
+ if pd.isna(mp) and smiles:
1135
+ try:
1136
+ mp = smiles2mp_opera(smiles)
1137
+ mp_origin = 'opera/calc'
1138
+ except:
1139
+ mp, mp_origin = None, None
1140
+ if pd.isna(mp) and cas:
1141
  try:
1142
  mp, mp_origin = string2mp(cas)
1143
  except:
 
1153
  mp_origin = 'joback-reid/calc'
1154
  except:
1155
  mp, mp_origin = None, None
 
 
 
 
 
 
1156
  return mp, mp_origin
1157
 
1158
  def getLogP(cas,mol):
data/ceramics_list.txt CHANGED
@@ -93,6 +93,7 @@ Ba,O,Y,Zr
93
  Ba,O,Zr
94
  Be,C,O,Si
95
  Be,O
 
96
  C,Ce,N,O,Si
97
  C,Ce,N,O,Si,Ti
98
  C,Co,W
 
93
  Ba,O,Zr
94
  Be,C,O,Si
95
  Be,O
96
+ Bi,O,V
97
  C,Ce,N,O,Si
98
  C,Ce,N,O,Si,Ti
99
  C,Co,W
data/custom_chemicals_db.tsv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dc9cf2c56f141aad1c46cc168d1a133d288a7fb51ee3290a62041fd117897f3
3
- size 13049
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a9183ad38e8e8c7e82cdf693be2a3b92f2fec1e29a9a55e910c4d6fcf617732
3
+ size 14568