Robert Elder commited on
Commit
d33329a
Β·
1 Parent(s): 53e90d9

quantity module + qrf updates

Browse files
.gitignore CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  __pycache__/*
2
  .idea/*
3
  .DS_Store
 
1
+ polymer_names.tsv.bkup
2
+ quantity_module/data/copy-data.sh
3
+ qrf/copy-data.sh
4
  __pycache__/*
5
  .idea/*
6
  .DS_Store
ChemID.py CHANGED
@@ -15,10 +15,10 @@ import json
15
 
16
  ORGANIC_ATOM_SET = {5, 6, 7, 8, 9, 15, 16, 17, 35, 53}
17
  METAL_ATOM_SET = set([3,4,11,12,13] + list(range(19,31+1)) + list(range(37,50+1)) + list(range(55,84+1)) + list(range(87,114+1)) + [116])
18
- with open('ceramics_list.txt', 'r') as fp:
19
  lines = fp.readlines()
20
  CERAMICS_SET = {line.strip() for line in lines}
21
- with open('salt_list.txt', 'r') as fp:
22
  lines = fp.readlines()
23
  SALT_SET = {line.strip() for line in lines}
24
 
@@ -53,13 +53,13 @@ from rdkit.Chem import Descriptors,Draw,Crippen
53
 
54
  ## add custom chemical definitions (i.e., to correct confusion between methane and carbon)
55
  db = chemicals.identifiers.get_pubchem_db()
56
- db.load('custom_chemicals_db.tsv')
57
  ## load experimental and predicted properties
58
  #dfmp_expt = pd.read_excel('PHYSPROP_MP_data.xlsx')
59
- dfmp_expt = pd.read_csv('PHYSPROP_MP_data.tsv', sep='\t')
60
  #dfmp_pred = pd.read_excel('DSSTOX_MP_pred_data.xlsx')
61
  #df_pred = pd.read_excel('Comptox_pred_data.xlsx')
62
- df_pred = pd.read_csv('Comptox_pred_data.tsv', sep='\t')
63
 
64
  ## OPERA melting point model
65
  import dill as pickle
@@ -249,7 +249,7 @@ def ImageFromSmiles(smiles):
249
  if type(smiles) is str:
250
  try:
251
  if smiles == 'C1=CC=C2C(=C1)C3=NC4=NC(=NC5=C6C=CC=CC6=C([N-]5)N=C7C8=CC=CC=C8C(=N7)N=C2[N-]3)C9=CC=CC=C94.[Mn+2]':
252
- mol = next(Chem.SDMolSupplier('MnPC.sdf', removeHs=False))
253
  image = Draw.MolToImage(mol, size=(350, 350))
254
  else:
255
  image = Draw.MolToImage(Chem.MolFromSmiles(smiles), size=(350, 350))
 
15
 
16
  ORGANIC_ATOM_SET = {5, 6, 7, 8, 9, 15, 16, 17, 35, 53}
17
  METAL_ATOM_SET = set([3,4,11,12,13] + list(range(19,31+1)) + list(range(37,50+1)) + list(range(55,84+1)) + list(range(87,114+1)) + [116])
18
+ with open('data/ceramics_list.txt', 'r') as fp:
19
  lines = fp.readlines()
20
  CERAMICS_SET = {line.strip() for line in lines}
21
+ with open('data/salt_list.txt', 'r') as fp:
22
  lines = fp.readlines()
23
  SALT_SET = {line.strip() for line in lines}
24
 
 
53
 
54
  ## add custom chemical definitions (i.e., to correct confusion between methane and carbon)
55
  db = chemicals.identifiers.get_pubchem_db()
56
+ db.load('data/custom_chemicals_db.tsv')
57
  ## load experimental and predicted properties
58
  #dfmp_expt = pd.read_excel('PHYSPROP_MP_data.xlsx')
59
+ dfmp_expt = pd.read_csv('data/PHYSPROP_MP_data.tsv', sep='\t')
60
  #dfmp_pred = pd.read_excel('DSSTOX_MP_pred_data.xlsx')
61
  #df_pred = pd.read_excel('Comptox_pred_data.xlsx')
62
+ df_pred = pd.read_csv('data/Comptox_pred_data.tsv', sep='\t')
63
 
64
  ## OPERA melting point model
65
  import dill as pickle
 
249
  if type(smiles) is str:
250
  try:
251
  if smiles == 'C1=CC=C2C(=C1)C3=NC4=NC(=NC5=C6C=CC=CC6=C([N-]5)N=C7C8=CC=CC=C8C(=N7)N=C2[N-]3)C9=CC=CC=C94.[Mn+2]':
252
+ mol = next(Chem.SDMolSupplier('data/MnPC.sdf', removeHs=False))
253
  image = Draw.MolToImage(mol, size=(350, 350))
254
  else:
255
  image = Draw.MolToImage(Chem.MolFromSmiles(smiles), size=(350, 350))
color3_module/colors.py CHANGED
@@ -4,7 +4,7 @@ import numpy as np
4
  import pandas as pd
5
  from functions import SigFigs, Piringer, WilkeChang, SheetRelease, SheetRates, RatePlot
6
  from functions import Piecewise, PowerLaw
7
- from qrf_functions import QRF_Apply, QRF_Ceramic
8
  from . import blueprint
9
  from polymers import Polymers, Polymers3
10
  from ChemID import *
 
4
  import pandas as pd
5
  from functions import SigFigs, Piringer, WilkeChang, SheetRelease, SheetRates, RatePlot
6
  from functions import Piecewise, PowerLaw
7
+ from qrf.functions import QRF_Apply, QRF_Ceramic
8
  from . import blueprint
9
  from polymers import Polymers, Polymers3
10
  from ChemID import *
Comptox_pred_data.tsv β†’ data/Comptox_pred_data.tsv RENAMED
File without changes
MnPC.sdf β†’ data/MnPC.sdf RENAMED
File without changes
PHYSPROP_MP_data.tsv β†’ data/PHYSPROP_MP_data.tsv RENAMED
File without changes
ceramics_list.txt β†’ data/ceramics_list.txt RENAMED
File without changes
custom_chemicals_db.tsv β†’ data/custom_chemicals_db.tsv RENAMED
File without changes
salt_list.txt β†’ data/salt_list.txt RENAMED
File without changes
exposure3_module/exposure.py CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
4
  from flask import render_template, request
5
  from functions import SigFigs, Piringer, WilkeChang, SheetRelease, SheetRates, RatePlot
6
  from functions import Piecewise, PowerLaw
7
- from qrf_functions import QRF_Apply, QRF_Ceramic
8
  from . import blueprint
9
  from polymers import Polymers, Polymers3
10
  from ChemID import *
 
4
  from flask import render_template, request
5
  from functions import SigFigs, Piringer, WilkeChang, SheetRelease, SheetRates, RatePlot
6
  from functions import Piecewise, PowerLaw
7
+ from qrf.functions import QRF_Apply, QRF_Ceramic
8
  from . import blueprint
9
  from polymers import Polymers, Polymers3
10
  from ChemID import *
qrf/db-D-interp-allT-semiclean.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c5c4399929d0bb5f72e3fee3896058e7accfbf68d63df155e409a4c2b6b87a2
3
+ size 13087796
qrf_functions.py β†’ qrf/functions.py RENAMED
@@ -9,11 +9,23 @@ import mordred.descriptors
9
  import rdkit
10
  from rdkit import Chem
11
 
 
 
 
 
 
 
 
 
12
  def QRF_Ceramic(density, polytg, quantiles=[0.03,0.5,0.97], T=37, worstcase='hi'):
13
- with open(f'qrf_model_bundle_{int(T)}.pkl','rb') as f:
 
14
  reg, imp, scaler_X, sub_desc_list = pickle.load(f)
15
- df_X = pd.read_excel('qrf_x.xlsx')
16
- df_y = pd.read_excel('qrf_y.xlsx')
 
 
 
17
  X_all = imp.transform(df_X)
18
  X_all_scale = scaler_X.transform(X_all)
19
  ## use "worst-case" solute values
@@ -39,7 +51,8 @@ def QRF_Ceramic(density, polytg, quantiles=[0.03,0.5,0.97], T=37, worstcase='hi'
39
 
40
 
41
  def QRF_Apply(density, polytg, smiles, quantiles=[0.03,0.5,0.97], T=37):
42
- with open(f'qrf_model_bundle_{int(T)}.pkl','rb') as f:
 
43
  reg, imp, scaler_X, sub_desc_list = pickle.load(f)
44
  # get list of descriptors to calculate
45
  solute_desc_list = sub_desc_list.copy()
@@ -67,7 +80,9 @@ def QRF_Apply(density, polytg, smiles, quantiles=[0.03,0.5,0.97], T=37):
67
  # return 1D array regardless of quantiles setting
68
  D_pred = D_pred[0]
69
  ## domain extrapolation check
70
- df_X = pd.read_excel('qrf_x.xlsx')
 
 
71
  X_all = imp.transform(df_X)
72
  X_all_scale = scaler_X.transform(X_all)
73
  dij = QRF_DomainExtrap(reg, X_all_scale, descs_scale)
 
9
  import rdkit
10
  from rdkit import Chem
11
 
12
+ QRF_T_list = np.array([25,30,35,37,40,45,50,55,60,65,70,75])
13
+ QRF_T_cut = 2.5
14
+ df_QRF = pd.read_excel('qrf/db-D-interp-allT-semiclean.xlsx')
15
+ df_desc = pd.read_excel('qrf/mordred-descriptors.xlsx')
16
+ calc = mordred.Calculator(mordred.descriptors)
17
+ colnames_mordred = [str(d) for d in calc.descriptors]
18
+ df_QRF = pd.merge(df_QRF, df_desc[['Solute_InChIKey',*colnames_mordred]], how='left', on='Solute_InChIKey', suffixes=('', '_dupe'))
19
+
20
  def QRF_Ceramic(density, polytg, quantiles=[0.03,0.5,0.97], T=37, worstcase='hi'):
21
+ nearest_T = QRF_T_list[np.abs(T-QRF_T_list).argmin()]
22
+ with open(f'qrf/qrf_model_bundle_{int(nearest_T)}.pkl','rb') as f:
23
  reg, imp, scaler_X, sub_desc_list = pickle.load(f)
24
+ #df_X = pd.read_excel('qrf/qrf_x.xlsx')
25
+ #df_y = pd.read_excel('qrf/qrf_y.xlsx')
26
+ mask_T = (df_QRF['T']>nearest_T-QRF_T_cut) & (df_QRF['T']<nearest_T+QRF_T_cut)
27
+ df_X = df_QRF.loc[mask_T, sub_desc_list]
28
+ df_y = df_QRF.loc[mask_T, 'LogD']
29
  X_all = imp.transform(df_X)
30
  X_all_scale = scaler_X.transform(X_all)
31
  ## use "worst-case" solute values
 
51
 
52
 
53
  def QRF_Apply(density, polytg, smiles, quantiles=[0.03,0.5,0.97], T=37):
54
+ nearest_T = QRF_T_list[np.abs(T-QRF_T_list).argmin()]
55
+ with open(f'qrf/qrf_model_bundle_{int(nearest_T)}.pkl','rb') as f:
56
  reg, imp, scaler_X, sub_desc_list = pickle.load(f)
57
  # get list of descriptors to calculate
58
  solute_desc_list = sub_desc_list.copy()
 
80
  # return 1D array regardless of quantiles setting
81
  D_pred = D_pred[0]
82
  ## domain extrapolation check
83
+ #df_X = pd.read_excel('qrf/qrf_x.xlsx')
84
+ mask_T = (df_QRF['T']>nearest_T-QRF_T_cut) & (df_QRF['T']<nearest_T+QRF_T_cut)
85
+ df_X = df_QRF.loc[mask_T, sub_desc_list]
86
  X_all = imp.transform(df_X)
87
  X_all_scale = scaler_X.transform(X_all)
88
  dij = QRF_DomainExtrap(reg, X_all_scale, descs_scale)
qrf/mordred-descriptors.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:121f72b88fa46a0f16af6a1244af761ee6b9d679af7ab2e32d545538f8b5c5b5
3
+ size 10251595
qrf/qrf_model_bundle_25.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61e7f3f4acd41d0548897c8d00cd41ba9129b4ab39e4d6d03bb9924a56bae417
3
+ size 8024827
qrf/qrf_model_bundle_30.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de06c8cfff7b657ed755fd6b3dab6d2a09c742b3a1134d5bec4dc224135bba90
3
+ size 6488642
qrf/qrf_model_bundle_35.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebb21d88ccb961223607c258be1b14f45fa67c0dd6fb6a3de41d2ade394b092e
3
+ size 13733182
qrf_model_bundle_37.pkl β†’ qrf/qrf_model_bundle_37.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5c2b2899461c3597863a6271f0bb37658f053daf1d0cba509b98b445fb45698
3
- size 15583438
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6df089e005123321d21b33506ccf0fc4df4dafb4d953e1bd5931b92bd2445d7
3
+ size 14843969
qrf/qrf_model_bundle_40.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e99f5b40d4c460c174e90fa44f82d10f1a54f024d961499fb08c8d18d3835e5
3
+ size 2773504
qrf/qrf_model_bundle_45.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8db0082b872c4ce725e9ebfb54ea2dcd5fd1621e4de13a2020d4842ba18f5753
3
+ size 7288402
qrf/qrf_model_bundle_50.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a38bef3650a1adb76ee07c4e9fa09058e16ffe3fc64ff18b31511f94d78a0d29
3
+ size 7743347
qrf/qrf_model_bundle_55.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07aaf56f2d7dbeac952097146544c8ffef0a48f05a67b5b9961aca2a5c1c1127
3
+ size 6981710
qrf/qrf_model_bundle_60.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:022eba72bc51d42134229f6a9757f07ddafb763a9ae3176dc4a3c08760e3494b
3
+ size 2651150
qrf/qrf_model_bundle_65.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c04563ec96989bd0ba4f44d09070cc543546326ca4b9aba8ef6000b8e27894d
3
+ size 15893003
qrf/qrf_model_bundle_70.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c4e7d02658d5b048d5a5d8d1de63e1bd892b561f0d0dbba95cf35a9280f48c7
3
+ size 6673668
qrf/qrf_model_bundle_75.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36653c20dcb9c7cdcb72f32bd1121cc83d98a174e3969ba0c6dec719d93cac67
3
+ size 2548133
qrf/qrf_parameters_allT.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff8d83fd6f4f5efd9352cb33e9a4c48d05d94c4aee1b64b2a3134d847856ae52
3
+ size 5755
qrf/qrf_train.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import numpy as np
3
+ import pandas as pd
4
+ import sklearn
5
+ import sklearn.impute
6
+ import sklearn.metrics
7
+ from quantile_forest import RandomForestQuantileRegressor
8
+ import mordred
9
+ from mordred import Calculator, descriptors
10
+
11
+ #T_target = 37
12
+ #T_cut = 2.5
13
+ qhiv, qlov = 0.97, 0.03
14
+ state = 12345
15
+
16
+ QRF_T_list = np.array([25,30,35,37,40,45,50,55,60,65,70,75])
17
+ QRF_T_cut = 2.5
18
+ df_QRF = pd.read_excel('db-D-interp-allT-semiclean.xlsx')
19
+ df_desc = pd.read_excel('mordred-descriptors.xlsx')
20
+ calc = mordred.Calculator(mordred.descriptors)
21
+ colnames_mordred = [str(d) for d in calc.descriptors]
22
+ df_QRF = pd.merge(df_QRF, df_desc[['Solute_InChIKey',*colnames_mordred]], how='left', on='Solute_InChIKey', suffixes=('', '_dupe'))
23
+
24
+ df_params = pd.read_excel('qrf_parameters_allT.xlsx')
25
+
26
+ for T_target in QRF_T_list:
27
+ print(T_target)
28
+ if 1:
29
+ if T_target == 37:
30
+ sub_desc_list = ['MW', 'Polymer_Tg', 'Polymer_Density', 'VR2_A', 'ATS0Z', 'AATS5d', 'BCUTv-1h', 'BCUTse-1l', 'Xch-7dv', 'Mp', 'Mi', 'SaasC', 'ETA_epsilon_5', 'fragCpx', 'JGI5', 'JGI8']
31
+ params = {'bootstrap': True, 'max_depth': 7, 'max_features': 0.4, 'max_samples': 1.0, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 1000} # best from -18-2.py w fout<0.040 (and 0.045)
32
+ elif T_target == 50:
33
+ sub_desc_list = ['MW', 'Polymer_Tg', 'Polymer_Density', 'ATS0m', 'ATSC2dv', 'ATSC6dv', 'ATSC0m', 'ATSC6i', 'BCUTse-1l', 'BCUTp-1h', 'Mp', 'Mi', 'SaasC']
34
+ params = {'bootstrap': True, 'max_depth': 6, 'max_features': 0.4, 'max_samples': 1.0, 'min_samples_leaf': 6, 'min_samples_split': 2, 'n_estimators': 1000} # best from -19.py and -19-2.py with fout<0.040
35
+ else:
36
+ mask_T = df_params['T']==T_target
37
+ sub_desc_list = df_params.loc[mask_T, 'sub_desc_list'].iloc[0].split('|')
38
+ params = df_params.loc[mask_T, ['bootstrap', 'max_depth', 'max_features', 'max_samples', 'min_samples_leaf', 'min_samples_split', 'n_estimators']].iloc[0].to_dict()
39
+ params['max_samples'] = float(params['max_samples'])
40
+ if 0:
41
+ sub_desc_list = ['MW', 'Polymer_Tg', 'Polymer_Density', 'VR2_A', 'ATS0Z', 'AATS5d', 'BCUTv-1h', 'BCUTse-1l', 'Xch-7dv', 'Mp', 'Mi', 'SaasC', 'ETA_epsilon_5', 'fragCpx', 'JGI5', 'JGI8']
42
+ params = {'bootstrap': True, 'max_depth': 7, 'max_features': 0.4, 'max_samples': 1.0, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 1000} # best from -18-2.py w fout<0.040 (and 0.045)
43
+
44
+ ## read data
45
+ #df_X = pd.read_excel('qrf_x.xlsx')
46
+ #df_y = pd.read_excel('qrf_y.xlsx')
47
+ mask_T = (df_QRF['T']>T_target-QRF_T_cut) & (df_QRF['T']<T_target+QRF_T_cut)
48
+ df_X = df_QRF.loc[mask_T, sub_desc_list]
49
+ df_y = df_QRF.loc[mask_T, 'LogD']
50
+ #sub_desc_list = list(df_X.columns)
51
+
52
+ ## fit transforms
53
+ imp = sklearn.impute.SimpleImputer(missing_values=np.nan, strategy='mean')
54
+ imp.fit(df_X)
55
+ X_all = imp.transform(df_X)
56
+ y_all = np.array(df_y)
57
+ scaler_X = sklearn.preprocessing.StandardScaler().fit(X_all)
58
+ X_all_scale = scaler_X.transform(X_all)
59
+
60
+ reg_all = RandomForestQuantileRegressor(random_state=state, n_jobs=-1, **params)
61
+ reg_all.fit(X_all_scale,y_all)
62
+
63
+ with open(f'qrf_model_bundle_{T_target}.pkl','wb') as f:
64
+ pickle.dump([reg_all,imp,scaler_X,sub_desc_list],f)
65
+
66
+ print(sub_desc_list)
67
+ print(params)
68
+ print(mask_T.sum())
69
+ y_pred = reg_all.predict(X_all_scale)
70
+ print(y_pred.mean(),y_pred.std())
71
+ print()
72
+
qrf_x.xlsx β†’ qrf/qrf_x.xlsx RENAMED
File without changes
qrf_y.xlsx β†’ qrf/qrf_y.xlsx RENAMED
File without changes
qrf_train.py DELETED
@@ -1,36 +0,0 @@
1
- import pickle
2
- import numpy as np
3
- import pandas as pd
4
- import sklearn
5
- import sklearn.impute
6
- from quantile_forest import RandomForestQuantileRegressor
7
-
8
- T_target = 37
9
- T_cut = 5
10
- qhiv, qlov = 0.97, 0.03
11
- state = 12345
12
-
13
- if T_target == 37:
14
- params = {'bootstrap': True, 'max_depth': 7, 'max_features': 0.4, 'max_samples': 1.0, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 1000} # best from -18-2.py w fout<0.040 (and 0.045)
15
-
16
- if T_target == 50:
17
- params = {'bootstrap': True, 'max_depth': 6, 'max_features': 0.4, 'max_samples': 1.0, 'min_samples_leaf': 6, 'min_samples_split': 2, 'n_estimators': 1000} # best from -19.py and -19-2.py with fout<0.040
18
-
19
- ## read data
20
- df_X = pd.read_excel('qrf_x.xlsx')
21
- df_y = pd.read_excel('qrf_y.xlsx')
22
- sub_desc_list = list(df_X.columns)
23
-
24
- ## fit transforms
25
- imp = sklearn.impute.SimpleImputer(missing_values=np.nan, strategy='mean')
26
- imp.fit(df_X)
27
- X_all = imp.transform(df_X)
28
- y_all = np.array(df_y['LogD'])
29
- scaler_X = sklearn.preprocessing.StandardScaler().fit(X_all)
30
- X_all_scale = scaler_X.transform(X_all)
31
-
32
- reg_all = RandomForestQuantileRegressor(random_state=state, n_jobs=-1, **params)
33
- reg_all.fit(X_all_scale,y_all)
34
-
35
- with open(f'qrf_model_bundle_{T_target}.pkl','wb') as f:
36
- pickle.dump([reg_all,imp,scaler_X,sub_desc_list],f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
quantity_functions.py β†’ quantity_module/functions.py RENAMED
File without changes
quantity_module/quantity.py CHANGED
@@ -4,11 +4,11 @@ import pandas as pd
4
  from flask import render_template, request
5
  from functions import SigFigs, HtmlNumber, Piringer, WilkeChang, CdfPlot
6
  #from functions import Piecewise, PowerLaw
7
- from qrf_functions import QRF_Apply, QRF_Ceramic
8
  from . import blueprint
9
  from polymers import Polymers, Polymers3
10
  from ChemID import *
11
- from quantity_functions import *
12
  import rdkit
13
  from rdkit.Chem import AllChem as Chem
14
 
@@ -104,17 +104,23 @@ def exp_post():
104
  Solvent_MW = Solvent_MWs[Solvent_Name]
105
  Solute_MW = MW
106
 
 
 
 
 
 
107
  polymer = request.form['polymer']
108
  pIndex = np.argmax(polymers == polymer)
109
 
110
- # QRF is only implemented for 37 and 50 C
111
- if polymer == 'Other polymer' and round(T) in [310,323]:
112
  use_qrf = True
113
  else:
114
  use_qrf = False
115
 
116
  if use_qrf:
117
- quantiles = list(np.linspace(0.05,0.95,181))
 
118
  if is_ceramic:
119
  diff,domain_extrap = QRF_Ceramic(Polymer_Density, Polymer_Tg, quantiles=quantiles, T=T-273.15, worstcase='lo')
120
  else:
@@ -143,13 +149,18 @@ def exp_post():
143
  else:
144
  method = 'wc'
145
  if 1:
 
146
  print('Swelling_wtfrac, T, Polymer_Tg, Solvent_Name, Solvent_MW, Solute_MW, CHRIS_category')
147
  print(Swelling_wtfrac, T, Polymer_Tg, Solvent_Name, Solvent_MW, Solute_MW, CHRIS_category)
148
  print(np.nanquantile(D_dist_noswell, [0.05,0.5,0.95]))
149
  print(np.nanquantile(D_dist_swell, [0.05,0.5,0.95]))
150
- print('M_expt, Polymer_Volume, Surface_Area, Solvent_Volume, Extraction_Time*3600, K_expt')
151
- print(M_expt, Polymer_Volume, Surface_Area, Solvent_Volume, Extraction_Time*3600, K_expt)
152
  print(np.nanquantile(M0_pred, [0.05,0.5,0.95]))
 
 
 
 
153
 
154
  # Generate the rate plot using matplotlib
155
  #pngImageB64String = CdfPlot(M0_pred[~np.isnan(M0_pred)], units=units)
@@ -172,11 +183,12 @@ def exp_post():
172
 
173
  M0_out = SigFigs(np.nanquantile(M0_pred,0.5),6)
174
  tau_out = SigFigs(tau,6)
 
175
 
176
  return render_template('quantity_report.html', show_properties=show_properties, polymers=polymers, pIndex=pIndex,
177
  area=Surface_Area, vol=Polymer_Volume, units=units, M=M_expt, M0=M0_out, time=Extraction_Time,
178
  solventvol=Solvent_Volume, solventname=Solvent_Name, swelling=Swelling_percent, K=K_expt, T=T, tau=tau_out,
179
  chemName=chemName, MW=MW, LogP=LogP, rho=rho, mp=mp, iupac=iupac, cas=cas, smiles=smiles, molImage=molImage, table=table,
180
- LogP_origin=LogP_origin, rho_origin=rho_origin, mp_origin=mp_origin, ceramic=is_ceramic, methods=[method,round(Polymer_Tg),Polymer_Density],
181
- mass=mass, density=Polymer_Density)
182
 
 
4
  from flask import render_template, request
5
  from functions import SigFigs, HtmlNumber, Piringer, WilkeChang, CdfPlot
6
  #from functions import Piecewise, PowerLaw
7
+ from qrf.functions import QRF_Apply, QRF_Ceramic
8
  from . import blueprint
9
  from polymers import Polymers, Polymers3
10
  from ChemID import *
11
+ from quantity_module.functions import *
12
  import rdkit
13
  from rdkit.Chem import AllChem as Chem
14
 
 
104
  Solvent_MW = Solvent_MWs[Solvent_Name]
105
  Solute_MW = MW
106
 
107
+ if units == 'mg':
108
+ mass_units = mass*1e3
109
+ elif units == 'Β΅g':
110
+ mass_units = mass*1e6
111
+
112
  polymer = request.form['polymer']
113
  pIndex = np.argmax(polymers == polymer)
114
 
115
+ # QRF is implemented for 25-75 C
116
+ if polymer == 'Other polymer':
117
  use_qrf = True
118
  else:
119
  use_qrf = False
120
 
121
  if use_qrf:
122
+ quantiles = list(np.linspace(0.0,1.0,201))
123
+ #quantiles = list(np.linspace(0.05,0.95,181))
124
  if is_ceramic:
125
  diff,domain_extrap = QRF_Ceramic(Polymer_Density, Polymer_Tg, quantiles=quantiles, T=T-273.15, worstcase='lo')
126
  else:
 
149
  else:
150
  method = 'wc'
151
  if 1:
152
+ print('DEBUG')
153
  print('Swelling_wtfrac, T, Polymer_Tg, Solvent_Name, Solvent_MW, Solute_MW, CHRIS_category')
154
  print(Swelling_wtfrac, T, Polymer_Tg, Solvent_Name, Solvent_MW, Solute_MW, CHRIS_category)
155
  print(np.nanquantile(D_dist_noswell, [0.05,0.5,0.95]))
156
  print(np.nanquantile(D_dist_swell, [0.05,0.5,0.95]))
157
+ print('M_expt, Polymer_Volume, Surface_Area, Solvent_Volume, Extraction_Time*3600, K_expt, method')
158
+ print(M_expt, Polymer_Volume, Surface_Area, Solvent_Volume, Extraction_Time*3600, K_expt, method)
159
  print(np.nanquantile(M0_pred, [0.05,0.5,0.95]))
160
+ V1,V2 = get_D_dists(Swelling_wtfrac, T, Polymer_Tg, Solvent_Name, Solvent_MW, Solute_MW, 'G2', rng, return_DCs=False, N=N_sample)
161
+ V3 = get_M_dist(V2, M_expt, Polymer_Volume, Surface_Area, Solvent_Volume, Extraction_Time*3600, K_expt=K_expt)
162
+ print(np.nanquantile(V2, [0.05,0.5,0.95]))
163
+ print(np.nanquantile(V3, [0.05,0.5,0.95]))
164
 
165
  # Generate the rate plot using matplotlib
166
  #pngImageB64String = CdfPlot(M0_pred[~np.isnan(M0_pred)], units=units)
 
183
 
184
  M0_out = SigFigs(np.nanquantile(M0_pred,0.5),6)
185
  tau_out = SigFigs(tau,6)
186
+ mass_units = SigFigs(mass_units,6)
187
 
188
  return render_template('quantity_report.html', show_properties=show_properties, polymers=polymers, pIndex=pIndex,
189
  area=Surface_Area, vol=Polymer_Volume, units=units, M=M_expt, M0=M0_out, time=Extraction_Time,
190
  solventvol=Solvent_Volume, solventname=Solvent_Name, swelling=Swelling_percent, K=K_expt, T=T, tau=tau_out,
191
  chemName=chemName, MW=MW, LogP=LogP, rho=rho, mp=mp, iupac=iupac, cas=cas, smiles=smiles, molImage=molImage, table=table,
192
+ LogP_origin=LogP_origin, rho_origin=rho_origin, mp_origin=mp_origin, ceramic=is_ceramic, methods=[method,round(Polymer_Tg-273.15),Polymer_Density],
193
+ mass=mass, mass_units=mass_units, density=Polymer_Density)
194
 
quantity_module/templates/quantity_index.html CHANGED
@@ -149,7 +149,7 @@ please see the <a href="{{url_for('.static', filename='RST.html')}}"> RST inform
149
  <tr><td colspan="2"><h4> Extraction parameters <button type=button class="Info_btn" data-toggle="modal" data-target="#ExtractionModal">&#9432;</button></td></tr> </h4>
150
  <tr><th>Device surface area (cm<sup>2</sup>)</th><td> <input name="area" id="area" step="any" value="5.0" min="0.001" type="number" required></td></tr>
151
  <tr><th>Duration (hours)</th><td> <input name="time" id="time" step="any" value="24.0" min="0.001" type="number" required></td></tr>
152
- <tr><th>Temperature (&deg;C)</th><td> <input name="T" id="T" step="any" value="50.0" min="20" max="75" type="number" required></td></tr>
153
  <tr><th>Solvent</th>
154
  <td> <select name="solventname" id="solventname">
155
  <option value="{{solvents[0]}}" selected>{{solvents[0]}}</option>
 
149
  <tr><td colspan="2"><h4> Extraction parameters <button type=button class="Info_btn" data-toggle="modal" data-target="#ExtractionModal">&#9432;</button></td></tr> </h4>
150
  <tr><th>Device surface area (cm<sup>2</sup>)</th><td> <input name="area" id="area" step="any" value="5.0" min="0.001" type="number" required></td></tr>
151
  <tr><th>Duration (hours)</th><td> <input name="time" id="time" step="any" value="24.0" min="0.001" type="number" required></td></tr>
152
+ <tr><th>Temperature (&deg;C)</th><td> <input name="T" id="T" step="any" value="50.0" min="25" max="75" type="number" required></td></tr>
153
  <tr><th>Solvent</th>
154
  <td> <select name="solventname" id="solventname">
155
  <option value="{{solvents[0]}}" selected>{{solvents[0]}}</option>
quantity_module/templates/quantity_report.html CHANGED
@@ -119,6 +119,13 @@ Swelling = {{swelling}} wt% (used to estimate \( D \))<br>
119
 
120
  <p>The progress of the extraction can be expressed through the dimensionless time \( \tau \). For your extraction, \( \tau \) = {{tau}}. Extractions with \( \tau \) &geq; 0.1 result in more accurate estimates of the total quantity, and when \( \tau \) &geq; 1.0 the extracted amount may be used directly as the total quantity if the extraction is diffusion-controlled.</p>
121
 
 
 
 
 
 
 
 
122
  <p><button type="button" onclick="javascript:history.back()">Back</button></p>
123
 
124
  </body>
 
119
 
120
  <p>The progress of the extraction can be expressed through the dimensionless time \( \tau \). For your extraction, \( \tau \) = {{tau}}. Extractions with \( \tau \) &geq; 0.1 result in more accurate estimates of the total quantity, and when \( \tau \) &geq; 1.0 the extracted amount may be used directly as the total quantity if the extraction is diffusion-controlled.</p>
121
 
122
+ {% if M0>=mass_units %}
123
+ <p>
124
+ <font color="red">The predicted amount ({{M0}} {{units}}) is larger than the device mass ({{mass_units}} {{units}}) due to uncertainty and conservatism in the prediction.
125
+ In this case the device mass may be used as a conservative estimate of the total quantity of this extractable.</font>
126
+ </p>
127
+ {% endif %}
128
+
129
  <p><button type="button" onclick="javascript:history.back()">Back</button></p>
130
 
131
  </body>