Spaces:
Sleeping
Sleeping
Ana Sanchez
commited on
Commit
·
5bd2a17
1
Parent(s):
66c0de1
Add data folder
Browse files
app.py
CHANGED
|
@@ -27,17 +27,16 @@ from rdkit.Chem import AllChem
|
|
| 27 |
from rdkit.Chem import DataStructs
|
| 28 |
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
basepath = os.path.dirname(__file__)
|
|
|
|
| 33 |
|
| 34 |
-
MODEL_PATH = os.path.join(basepath, "epoch_55.pt")
|
| 35 |
CLOOME_PATH = "/home/ana/gitrepos/hti-cloob"
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
imgname = "I1"
|
| 38 |
-
molecule_features = "all_molecule_cellpainting_features.pkl"
|
| 39 |
-
image_features = "subset_image_cellpainting_features.pkl"
|
| 40 |
-
images_arr = "subset_npzs_dict_.npz"
|
| 41 |
|
| 42 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 43 |
model_type = "RN50"
|
|
@@ -113,7 +112,6 @@ def get_features(dataset, model, device):
|
|
| 113 |
|
| 114 |
all_ids.append(ids)
|
| 115 |
|
| 116 |
-
|
| 117 |
all_ids = list(chain.from_iterable(all_ids))
|
| 118 |
|
| 119 |
if imgs is not None and mols is not None:
|
|
@@ -158,9 +156,6 @@ def main(df, model_path, model, img_path=None, mol_path=None, image_resolution=N
|
|
| 158 |
val_img_features, val_ids = result
|
| 159 |
return val_img_features, val_ids
|
| 160 |
|
| 161 |
-
#val_img_features, val_ids = get_features(val, model, device)
|
| 162 |
-
|
| 163 |
-
#return val_img_features, val_text_features, val_ids
|
| 164 |
|
| 165 |
def img_to_numpy(file):
|
| 166 |
img = Image.open(file)
|
|
@@ -305,9 +300,6 @@ def reshape_image(arr):
|
|
| 305 |
|
| 306 |
# missing functions: save morgan to to_hdf, create index, load features, calculate similarities
|
| 307 |
|
| 308 |
-
|
| 309 |
-
#model = load(MODEL_PATH, device, model_type, image_resolution)
|
| 310 |
-
|
| 311 |
##### STREAMLIT FUNCTIONS ######
|
| 312 |
st.title('CLOOME: Contrastive Learning for Molecule Representation with Microscopy Images and Chemical Structures')
|
| 313 |
|
|
@@ -375,22 +367,22 @@ def molecules_from_image():
|
|
| 375 |
morgan = [morgan_from_smiles(s) for s in smiles]
|
| 376 |
molnames = [f"M{i}" for i in range(len(morgan))]
|
| 377 |
mol_index_fname = "mol_index.csv"
|
| 378 |
-
mol_index = create_index(
|
| 379 |
-
molpath = os.path.join(
|
| 380 |
fps_fname = save_hdf(morgan, molnames, molpath)
|
| 381 |
mol_imgs = draw_molecules(smiles)
|
| 382 |
mol_features, mol_ids = main(mol_index, MODEL_PATH, model_type, mol_path=molpath, image_resolution=image_resolution)
|
| 383 |
predefined_features = False
|
| 384 |
else:
|
| 385 |
mol_index = pd.read_csv("cellpainting-unique-molecule.csv")
|
| 386 |
-
mol_features_torch = torch.load(
|
| 387 |
mol_features = mol_features_torch["mol_features"]
|
| 388 |
mol_ids = mol_features_torch["mol_ids"]
|
| 389 |
print(len(mol_ids))
|
| 390 |
predefined_features = True
|
| 391 |
|
| 392 |
img_index_fname = "img_index.csv"
|
| 393 |
-
img_index = create_index(
|
| 394 |
img_features, img_ids = main(img_index, MODEL_PATH, model_type, img_path=npzs, image_resolution=image_resolution)
|
| 395 |
|
| 396 |
print(img_features.shape)
|
|
@@ -434,8 +426,8 @@ def images_from_molecule():
|
|
| 434 |
morgan = [morgan_from_smiles(s) for s in smiles]
|
| 435 |
molnames = [f"M{i}" for i in range(len(morgan))]
|
| 436 |
mol_index_fname = "mol_index.csv"
|
| 437 |
-
mol_index = create_index(
|
| 438 |
-
molpath = os.path.join(
|
| 439 |
fps_fname = save_hdf(morgan, molnames, molpath)
|
| 440 |
mol_imgs = draw_molecules(smiles)
|
| 441 |
|
|
@@ -493,6 +485,3 @@ page_names_to_funcs = {
|
|
| 493 |
|
| 494 |
selected_page = st.sidebar.selectbox("What would you like to retrieve?", page_names_to_funcs.keys())
|
| 495 |
page_names_to_funcs[selected_page]()
|
| 496 |
-
|
| 497 |
-
# print(img_features.shape)
|
| 498 |
-
# print(img_ids)
|
|
|
|
| 27 |
from rdkit.Chem import DataStructs
|
| 28 |
|
| 29 |
|
|
|
|
|
|
|
| 30 |
basepath = os.path.dirname(__file__)
|
| 31 |
+
datapath = os.path.join(basepath, "data")
|
| 32 |
|
|
|
|
| 33 |
CLOOME_PATH = "/home/ana/gitrepos/hti-cloob"
|
| 34 |
+
MODEL_PATH = os.path.join(datapath, "epoch_55.pt")
|
| 35 |
+
npzs = os.path.join(datapath, "npzs")
|
| 36 |
+
molecule_features = os.path.join(datapath, "all_molecule_cellpainting_features.pkl")
|
| 37 |
+
image_features = os.path.join(datapath, "subset_image_cellpainting_features.pkl")
|
| 38 |
+
images_arr = os.path.join(datapath, "subset_npzs_dict_.npz")
|
| 39 |
imgname = "I1"
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 42 |
model_type = "RN50"
|
|
|
|
| 112 |
|
| 113 |
all_ids.append(ids)
|
| 114 |
|
|
|
|
| 115 |
all_ids = list(chain.from_iterable(all_ids))
|
| 116 |
|
| 117 |
if imgs is not None and mols is not None:
|
|
|
|
| 156 |
val_img_features, val_ids = result
|
| 157 |
return val_img_features, val_ids
|
| 158 |
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
def img_to_numpy(file):
|
| 161 |
img = Image.open(file)
|
|
|
|
| 300 |
|
| 301 |
# missing functions: save morgan to to_hdf, create index, load features, calculate similarities
|
| 302 |
|
|
|
|
|
|
|
|
|
|
| 303 |
##### STREAMLIT FUNCTIONS ######
|
| 304 |
st.title('CLOOME: Contrastive Learning for Molecule Representation with Microscopy Images and Chemical Structures')
|
| 305 |
|
|
|
|
| 367 |
morgan = [morgan_from_smiles(s) for s in smiles]
|
| 368 |
molnames = [f"M{i}" for i in range(len(morgan))]
|
| 369 |
mol_index_fname = "mol_index.csv"
|
| 370 |
+
mol_index = create_index(datapath, molnames, mol_index_fname)
|
| 371 |
+
molpath = os.path.join(datapath, "mols.hdf")
|
| 372 |
fps_fname = save_hdf(morgan, molnames, molpath)
|
| 373 |
mol_imgs = draw_molecules(smiles)
|
| 374 |
mol_features, mol_ids = main(mol_index, MODEL_PATH, model_type, mol_path=molpath, image_resolution=image_resolution)
|
| 375 |
predefined_features = False
|
| 376 |
else:
|
| 377 |
mol_index = pd.read_csv("cellpainting-unique-molecule.csv")
|
| 378 |
+
mol_features_torch = torch.load(molecule_features, map_location=device)
|
| 379 |
mol_features = mol_features_torch["mol_features"]
|
| 380 |
mol_ids = mol_features_torch["mol_ids"]
|
| 381 |
print(len(mol_ids))
|
| 382 |
predefined_features = True
|
| 383 |
|
| 384 |
img_index_fname = "img_index.csv"
|
| 385 |
+
img_index = create_index(datapath, imgname, img_index_fname)
|
| 386 |
img_features, img_ids = main(img_index, MODEL_PATH, model_type, img_path=npzs, image_resolution=image_resolution)
|
| 387 |
|
| 388 |
print(img_features.shape)
|
|
|
|
| 426 |
morgan = [morgan_from_smiles(s) for s in smiles]
|
| 427 |
molnames = [f"M{i}" for i in range(len(morgan))]
|
| 428 |
mol_index_fname = "mol_index.csv"
|
| 429 |
+
mol_index = create_index(datapath, molnames, mol_index_fname)
|
| 430 |
+
molpath = os.path.join(datapath, "mols.hdf")
|
| 431 |
fps_fname = save_hdf(morgan, molnames, molpath)
|
| 432 |
mol_imgs = draw_molecules(smiles)
|
| 433 |
|
|
|
|
| 485 |
|
| 486 |
selected_page = st.sidebar.selectbox("What would you like to retrieve?", page_names_to_funcs.keys())
|
| 487 |
page_names_to_funcs[selected_page]()
|
|
|
|
|
|
|
|
|
data/all_molecule_cellpainting_features.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8979250025350c6ff67f986c657c14a881710cfe73e315ef5d126abaecf50b4b
|
| 3 |
+
size 62906027
|
data/cellpainting-all-imgpermol.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a10cc3285a7b1c3275c30b2aa3654d00651ae4211d5e057118f32c40725e09ff
|
| 3 |
+
size 14270985
|
data/cellpainting-unique-molecule.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65592c0ee09203c2ba5be15c159c7944c49feea65a24cfb7862bb49af7cd112a
|
| 3 |
+
size 14265091
|
data/epoch_55.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c612c6da6f943caac839b9102fe98ba944838600942897aaa035f552d9a535bd
|
| 3 |
+
size 352013623
|
data/subset_image_cellpainting_features.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a7cbfdf80d0ee6197f4e9118be2d7569399601d6c55c4db7fbc2dcbeadd9d6a
|
| 3 |
+
size 62906027
|
data/subset_npzs_dict_.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ddb3451d5d46a1eed6613c914777e013a6d9a392cfc4f9448d0b9488d099da1
|
| 3 |
+
size 3656596390
|