{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import scanpy as sc\n", "import numpy as np \n", "from scanpy import AnnData\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 14/14 [01:49<00:00, 7.86s/it]\n" ] }, { "data": { "text/plain": [ "[AnnData object with n_obs × n_vars = 171297 × 62710 backed at 'data/h5ad/reduced/plate1.h5ad'\n", " obs: 'sample', 'gene_count', 'tscp_count', 'mread_count', 'drugname_drugconc', 'drug', 'cell_line', 'sublibrary', 'BARCODE', 'pcnt_mito', 'S_score', 'G2M_score', 'phase', 'pass_filter', 'cell_name', 'plate',\n", " AnnData object with n_obs × n_vars = 6892456 × 62710 backed at 'data/h5ad/reduced/plate2.h5ad'\n", " obs: 'sample', 'gene_count', 'tscp_count', 'mread_count', 'drugname_drugconc', 'drug', 'cell_line', 'sublibrary', 'BARCODE', 'pcnt_mito', 'S_score', 'G2M_score', 'phase', 'pass_filter', 'cell_name', 'plate',\n", " AnnData object with n_obs × n_vars = 1417313 × 62710 backed at 'data/h5ad/reduced/plate3.h5ad'\n", " obs: 'sample', 'gene_count', 'tscp_count', 'mread_count', 'drugname_drugconc', 'drug', 'cell_line', 'sublibrary', 'BARCODE', 'pcnt_mito', 'S_score', 'G2M_score', 'phase', 'pass_filter', 'cell_name', 'plate',\n", " AnnData object with n_obs × n_vars = 155688 × 62710 backed at 'data/h5ad/reduced/plate4.h5ad'\n", " obs: 'sample', 'gene_count', 'tscp_count', 'mread_count', 'drugname_drugconc', 'drug', 'cell_line', 'sublibrary', 'BARCODE', 'pcnt_mito', 'S_score', 'G2M_score', 'phase', 'pass_filter', 'cell_name', 'plate',\n", " AnnData object with n_obs × n_vars = 254437 × 62710 backed at 'data/h5ad/reduced/plate5.h5ad'\n", " obs: 'sample', 'gene_count', 'tscp_count', 'mread_count', 'drugname_drugconc', 'drug', 'cell_line', 'sublibrary', 'BARCODE', 'pcnt_mito', 'S_score', 'G2M_score', 'phase', 'pass_filter', 'cell_name', 'plate',\n", " AnnData object with n_obs × n_vars = 7062275 × 62710 backed at 'data/h5ad/reduced/plate6.h5ad'\n", " obs: 'sample', 'gene_count', 'tscp_count', 'mread_count', 'drugname_drugconc', 'drug', 'cell_line', 'sublibrary', 'BARCODE', 'pcnt_mito', 'S_score', 'G2M_score', 'phase', 'pass_filter', 'cell_name', 'plate',\n", " AnnData object with n_obs × n_vars = 88033 × 62710 backed at 'data/h5ad/reduced/plate7.h5ad'\n", " obs: 'sample', 'gene_count', 'tscp_count', 'mread_count', 'drugname_drugconc', 'drug', 'cell_line', 'sublibrary', 'BARCODE', 'pcnt_mito', 'S_score', 'G2M_score', 'phase', 'pass_filter', 'cell_name', 'plate',\n", " AnnData object with n_obs × n_vars = 7611223 × 62710 backed at 'data/h5ad/reduced/plate8.h5ad'\n", " obs: 'sample', 'gene_count', 'tscp_count', 'mread_count', 'drugname_drugconc', 'drug', 'cell_line', 'sublibrary', 'BARCODE', 'pcnt_mito', 'S_score', 'G2M_score', 'phase', 'pass_filter', 'cell_name', 'plate',\n", " AnnData object with n_obs × n_vars = 1217465 × 62710 backed at 'data/h5ad/reduced/plate9.h5ad'\n", " obs: 'sample', 'gene_count', 'tscp_count', 'mread_count', 'drugname_drugconc', 'drug', 'cell_line', 'sublibrary', 'BARCODE', 'pcnt_mito', 'S_score', 'G2M_score', 'phase', 'pass_filter', 'cell_name', 'plate',\n", " AnnData object with n_obs × n_vars = 201038 × 62710 backed at 'data/h5ad/reduced/plate10.h5ad'\n", " obs: 'sample', 'gene_count', 'tscp_count', 'mread_count', 'drugname_drugconc', 'drug', 'cell_line', 'sublibrary', 'BARCODE', 'pcnt_mito', 'S_score', 'G2M_score', 'phase', 'pass_filter', 'cell_name', 'plate',\n", " AnnData object with n_obs × n_vars = 2361999 × 62710 backed at 'data/h5ad/reduced/plate11.h5ad'\n", " obs: 'sample', 'gene_count', 'tscp_count', 'mread_count', 'drugname_drugconc', 'drug', 'cell_line', 'sublibrary', 'BARCODE', 'pcnt_mito', 'S_score', 'G2M_score', 'phase', 'pass_filter', 'cell_name', 'plate',\n", " AnnData object with n_obs × n_vars = 8480425 × 62710 backed at 'data/h5ad/reduced/plate12.h5ad'\n", " obs: 'sample', 'gene_count', 'tscp_count', 'mread_count', 'drugname_drugconc', 'drug', 'cell_line', 'sublibrary', 'BARCODE', 'pcnt_mito', 'S_score', 'G2M_score', 'phase', 'pass_filter', 'cell_name', 'plate',\n", " AnnData object with n_obs × n_vars = 2612839 × 62710 backed at 'data/h5ad/reduced/plate13.h5ad'\n", " obs: 'sample', 'gene_count', 'tscp_count', 'mread_count', 'drugname_drugconc', 'drug', 'cell_line', 'sublibrary', 'BARCODE', 'pcnt_mito', 'S_score', 'G2M_score', 'phase', 'pass_filter', 'cell_name', 'plate',\n", " AnnData object with n_obs × n_vars = 6086200 × 62710 backed at 'data/h5ad/reduced/plate14.h5ad'\n", " obs: 'sample', 'gene_count', 'tscp_count', 'mread_count', 'drugname_drugconc', 'drug', 'cell_line', 'sublibrary', 'BARCODE', 'pcnt_mito', 'S_score', 'G2M_score', 'phase', 'pass_filter', 'cell_name', 'plate']" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Xs = [sc.read_h5ad(f\"../Data/h5ad/h5ad/plate{i}_filt_Vevo_Tahoe100M_WServicesFrom_ParseGigalab.h5ad\", backed=\"r\") for i in tqdm(range(1, 14 + 1))]\n", "Xs = [sc.read_h5ad(f\"data/h5ad/reduced/plate{i}.h5ad\", backed=\"r\") for i in tqdm(range(1, 14 + 1))]\n", "# X = sc.read_h5ad(\"data/tahoe_sampled_1M.h5ad.gz\")\n", "# Xs = [X]\n", "Xs" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "# Define held out data for the validation and test sets\n", "val_one_per_moa = [\"Phenylephrine (hydrochloride)\",\"Darolutamide\", \"palbociclib\", \"Tolmetin\", \"Procainamide (hydrochloride)\", \"Trifluridine\", \"Simotinib\", \"Methylprednisolone succinate\", \"Dapagliflozin\", \"CP21R7\", \"Panobinostat\", \"Tofacitinib\", \"Trametinib\", \"Vinblastine (sulfate)\", \"Temsirolimus\", \"Sunitinib\", \"Ralimetinib dimesylate\", \"Tirabrutinib\", \"GSK1059615\", \"SBI-0640756\", \"Lonafarnib\", \"Retinoic acid\"]\n", "val_one_entire_moa = [\"Bortezomib\", \"Ixazomib\", \"Ixazomib citrate\"]\n", "\n", "val_one_per_organ = [\"CVCL_1724\", \"CVCL_0179\", \"CVCL_1715\", \"CVCL_0366\", \"CVCL_1550\", \"CVCL_0480\", \"CVCL_0069\" ]\n", "val_one_entire_organ = [\"CVCL_0359\"]\n", "\n", "test_one_per_moa = [\"Vilanterol\", \"Flutamide\", \"Ribociclib\", \"Valdecoxib\", \"γ-Oryzanol\", \"Trimetrexate\", \"Tucatinib\", \"Triamcinolone\", \"Dapagliflozin ((2S)-1,2-propanediol, hydrate)\", \"LY2090314\", \"Tucidinostat\", \"Tofacitinib (citrate)\", \"Trametinib (DMSO_TF solvate)\", \"vincristine\", \"Torkinib\", \"Vandetanib\", \"Temuterkib\", \"Tirabrutinib (hydrochloride)\", \"Ipatasertib\", \"Tomivosertib\", \"RMC-6236\", \"Tazarotene\"]\n", "test_one_entire_moa = [\"Glasdegib\" , \"Sonidegib\", \"Vismodegib\"]\n", "\n", "test_one_per_organ = [\"CVCL_0397\", \"CVCL_1239\", \"CVCL_0371\", \"CVCL_1716\", \"CVCL_C466\", \"CVCL_1666\", \"CVCL_0293\"]\n", "test_one_entire_organ = [\"CVCL_1094\"]\n", "\n", "val_plate_14 = lambda x: x.obs_names[0::2]\n", "test_plate_14 = lambda x : x.obs_names[1::2]\n", "\n", "held_out_val = lambda x : x.obs[\"drug\"].isin(val_one_per_moa) | x.obs[\"drug\"].isin(val_one_entire_moa) | x.obs[\"cell_line\"].isin(val_one_per_organ) | x.obs[\"cell_line\"].isin(val_one_entire_organ) | ((x.obs[\"plate\"] == \"plate14\") & (x.obs_names.isin(val_plate_14(x))))\n", "held_out_test = lambda x : x.obs[\"drug\"].isin(test_one_per_moa) | x.obs[\"drug\"].isin(test_one_entire_moa) | x.obs[\"cell_line\"].isin(test_one_per_organ) | x.obs[\"cell_line\"].isin(test_one_entire_organ) | ((x.obs[\"plate\"] == \"plate14\") & (x.obs_names.isin(test_plate_14(x))))" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| drug | drugname_drugconc | len | dose |
|---|---|---|---|
| str | str | i64 | str |
| "(R)-Verapamil (hydrochloride)" | "[('(R)-Verapamil (hydrochlorid… | 166943 | " 5.0" |
| "(S)-Crizotinib" | "[('(S)-Crizotinib', 0.5, 'uM')… | 86840 | " 0.5" |
| "18β-Glycyrrhetinic acid" | "[('18β-Glycyrrhetinic acid', 5… | 113159 | " 5.0" |
| "4EGI-1" | "[('4EGI-1', 0.5, 'uM')]" | 128549 | " 0.5" |
| "5-Azacytidine" | "[('5-Azacytidine', 5.0, 'uM')]" | 71466 | " 5.0" |
| … | … | … | … |
| "olaparib" | "[('olaparib', 0.5, 'uM')]" | 136783 | " 0.5" |
| "palbociclib" | "[('palbociclib', 0.5, 'uM')]" | 91681 | " 0.5" |
| "venetoclax" | "[('venetoclax', 0.5, 'uM')]" | 118167 | " 0.5" |
| "vincristine" | "[('vincristine', 0.5, 'uM')]" | 35862 | " 0.5" |
| "γ-Oryzanol" | "[('γ-Oryzanol', 5.0, 'uM')]" | 103024 | " 5.0" |