{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import pathlib" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Metadata_Source | \n", "Metadata_Plate | \n", "Metadata_Well | \n", "Metadata_JCP2022 | \n", "Cells_AreaShape_Area | \n", "Cells_AreaShape_BoundingBoxArea | \n", "Cells_AreaShape_BoundingBoxMaximum_X | \n", "Cells_AreaShape_BoundingBoxMaximum_Y | \n", "Cells_AreaShape_BoundingBoxMinimum_X | \n", "Cells_AreaShape_BoundingBoxMinimum_Y | \n", "... | \n", "Nuclei_Texture_Variance_RNA_10_02_256 | \n", "Nuclei_Texture_Variance_RNA_10_03_256 | \n", "Nuclei_Texture_Variance_RNA_3_00_256 | \n", "Nuclei_Texture_Variance_RNA_3_01_256 | \n", "Nuclei_Texture_Variance_RNA_3_02_256 | \n", "Nuclei_Texture_Variance_RNA_3_03_256 | \n", "Nuclei_Texture_Variance_RNA_5_00_256 | \n", "Nuclei_Texture_Variance_RNA_5_01_256 | \n", "Nuclei_Texture_Variance_RNA_5_02_256 | \n", "Nuclei_Texture_Variance_RNA_5_03_256 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "source_2 | \n", "1053597806 | \n", "A01 | \n", "JCP2022_085227 | \n", "-0.988041 | \n", "-0.907001 | \n", "0.096069 | \n", "0.323007 | \n", "0.146602 | \n", "0.534867 | \n", "... | \n", "1.605010 | \n", "1.693312 | \n", "1.615753 | \n", "1.604152 | \n", "1.624943 | \n", "1.612151 | \n", "1.589302 | \n", "1.594195 | \n", "1.627130 | \n", "1.610461 | \n", "
| 1 | \n", "source_2 | \n", "1053597806 | \n", "K22 | \n", "JCP2022_049123 | \n", "-0.247098 | \n", "-0.389633 | \n", "-0.828097 | \n", "0.727857 | \n", "-0.708414 | \n", "0.887224 | \n", "... | \n", "-0.696735 | \n", "-0.547206 | \n", "-0.737023 | \n", "-0.739017 | \n", "-0.741798 | \n", "-0.741577 | \n", "-0.724849 | \n", "-0.714124 | \n", "-0.755699 | \n", "-0.729048 | \n", "
| 2 | \n", "source_2 | \n", "1053597806 | \n", "K21 | \n", "JCP2022_025146 | \n", "0.882814 | \n", "0.635229 | \n", "-0.866758 | \n", "1.593473 | \n", "-0.798442 | \n", "1.682966 | \n", "... | \n", "0.427341 | \n", "0.699385 | \n", "0.506531 | \n", "0.478770 | \n", "0.518382 | \n", "0.476467 | \n", "0.454404 | \n", "0.473200 | \n", "0.500973 | \n", "0.484033 | \n", "
| 3 | \n", "source_2 | \n", "1053597806 | \n", "K20 | \n", "JCP2022_109006 | \n", "1.551945 | \n", "1.409719 | \n", "-0.624934 | \n", "-1.552434 | \n", "-0.634504 | \n", "-1.846982 | \n", "... | \n", "-0.016852 | \n", "0.321772 | \n", "0.019403 | \n", "-0.023218 | \n", "0.025426 | \n", "-0.020011 | \n", "-0.036913 | \n", "-0.064012 | \n", "-0.037481 | \n", "-0.048811 | \n", "
| 4 | \n", "source_2 | \n", "1053597806 | \n", "K19 | \n", "JCP2022_040739 | \n", "0.782228 | \n", "0.394115 | \n", "0.016284 | \n", "0.335031 | \n", "0.009199 | \n", "0.228013 | \n", "... | \n", "0.549137 | \n", "0.589808 | \n", "0.643868 | \n", "0.616816 | \n", "0.655204 | \n", "0.615139 | \n", "0.594326 | \n", "0.588221 | \n", "0.632808 | \n", "0.615094 | \n", "
5 rows × 3184 columns
\n", "| \n", " | plate | \n", "Cell_Name_Vevo | \n", "Cell_ID_Cellosaur | \n", "drug | \n", "concentration | \n", "concentration_unit | \n", "
|---|---|---|---|---|---|---|
| 0 | \n", "4 | \n", "A549 | \n", "CVCL_0023 | \n", "8-Hydroxyquinoline | \n", "0.05 | \n", "uM | \n", "
| 1 | \n", "4 | \n", "HS-578T | \n", "CVCL_0332 | \n", "8-Hydroxyquinoline | \n", "0.05 | \n", "uM | \n", "
| 2 | \n", "4 | \n", "HCT15 | \n", "CVCL_0292 | \n", "8-Hydroxyquinoline | \n", "0.05 | \n", "uM | \n", "
| 3 | \n", "4 | \n", "HOP62 | \n", "CVCL_1285 | \n", "8-Hydroxyquinoline | \n", "0.05 | \n", "uM | \n", "
| 4 | \n", "4 | \n", "SK-MEL-2 | \n", "CVCL_0069 | \n", "8-Hydroxyquinoline | \n", "0.05 | \n", "uM | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 60771 | \n", "12 | \n", "hTERT-HPNE | \n", "CVCL_C466 | \n", "Adagrasib | \n", "0.05 | \n", "uM | \n", "
| 60772 | \n", "12 | \n", "NCI-H23 | \n", "CVCL_1547 | \n", "Adagrasib | \n", "0.05 | \n", "uM | \n", "
| 60773 | \n", "12 | \n", "NCI-H1792 | \n", "CVCL_1495 | \n", "Adagrasib | \n", "0.05 | \n", "uM | \n", "
| 60774 | \n", "12 | \n", "NCI-H2030 | \n", "CVCL_1517 | \n", "Adagrasib | \n", "0.05 | \n", "uM | \n", "
| 60775 | \n", "12 | \n", "LOX-IMVI | \n", "CVCL_1381 | \n", "Adagrasib | \n", "0.05 | \n", "uM | \n", "
60776 rows × 6 columns
\n", "| \n", " | drug | \n", "targets | \n", "moa-broad | \n", "moa-fine | \n", "human-approved | \n", "clinical-trials | \n", "gpt-notes-approval | \n", "canonical_smiles | \n", "pubchem_cid | \n", "
|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Talc | \n", "None | \n", "unclear | \n", "unclear | \n", "yes | \n", "yes | \n", "Talc used in pharma and cosmetics; safety unde... | \n", "[OH-].[OH-].[O-][Si]12O[Si]3(O[Si](O1)(O[Si](O... | \n", "165411828.0 | \n", "
| 1 | \n", "Bortezomib | \n", "PSMB5 | \n", "inhibitor/antagonist | \n", "Proteasome inhibitor | \n", "yes | \n", "yes | \n", "Approved for multiple myeloma and mantle cell ... | \n", "B(C(CC(C)C)NC(=O)C(CC1=CC=CC=C1)NC(=O)C2=NC=CN... | \n", "387447.0 | \n", "
| 2 | \n", "Ixazomib | \n", "PSMB5 | \n", "inhibitor/antagonist | \n", "Proteasome inhibitor | \n", "yes | \n", "yes | \n", "Approved for multiple myeloma treatment. | \n", "B(C(CC(C)C)NC(=O)CNC(=O)C1=C(C=CC(=C1)Cl)Cl)(O)O | \n", "25183872.0 | \n", "
| 3 | \n", "Ixazomib citrate | \n", "PSMB1, PSMB2, PSMB5 | \n", "inhibitor/antagonist | \n", "Proteasome inhibitor | \n", "yes | \n", "yes | \n", "Approved for multiple myeloma treatment as par... | \n", "B1(OC(=O)C(O1)(CC(=O)O)CC(=O)O)C(CC(C)C)NC(=O)... | \n", "56844015.0 | \n", "
| 4 | \n", "Lactate (calcium) | \n", "None | \n", "unclear | \n", "unclear | \n", "yes | \n", "yes | \n", "Used in medical settings, but not specifically... | \n", "C.CC(C(=O)[O-])O.[Ca+2] | \n", "168311648.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 374 | \n", "Verteporfin | \n", "YAP1 | \n", "inhibitor/antagonist | \n", "unclear | \n", "yes | \n", "yes | \n", "Used in photodynamic therapy for macular degen... | \n", "None | \n", "NaN | \n", "
| 375 | \n", "Quinidine (15% dihydroquinidine) | \n", "KCNH2 | \n", "inhibitor/antagonist | \n", "unclear | \n", "yes | \n", "yes | \n", "Approved for arrhythmias as part of quinine al... | \n", "COC1=CC2=C(C=CN=C2C=C1)[C@@H]([C@H]3C[C@@H]4CC... | \n", "441074.0 | \n", "
| 376 | \n", "Canagliflozin (hemihydrate) | \n", "SLC5A2 | \n", "inhibitor/antagonist | \n", "Glucose transporter inhibitor | \n", "yes | \n", "yes | \n", "Approved for type 2 diabetes. | \n", "CC1=C(C=C(C=C1)[C@H]2[C@@H]([C@H]([C@@H]([C@H]... | \n", "24997615.0 | \n", "
| 377 | \n", "Osimertinib (mesylate) | \n", "EGFR | \n", "inhibitor/antagonist | \n", "EGFR/ERBB inhibitor | \n", "yes | \n", "yes | \n", "Approved for non-small cell lung cancer treatm... | \n", "CN1C=C(C2=CC=CC=C21)C3=NC(=NC=C3)NC4=C(C=C(C(=... | \n", "78357807.0 | \n", "
| 378 | \n", "γ-Oryzanol | \n", "None | \n", "inhibitor/antagonist | \n", "DNA methyltransferase inhibitor | \n", "no | \n", "yes | \n", "Used in supplements; limited human data. | \n", "C[C@H](CCC=C(C)C)[C@H]1CC[C@@]2([C@@]1(CC[C@]3... | \n", "5282164.0 | \n", "
379 rows × 9 columns
\n", "| \n", " | Compartment | \n", "Feature | \n", "Channel | \n", "Suffix | \n", "Feature significance | \n", "Perturbation | \n", "Perturbation example image | \n", "Median | \n", "Gene Rank | \n", "Feature Rank | \n", "JCP2022 | \n", "Resources | \n", "Synonyms | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Cells | \n", "RadialDistributionFracAtD | \n", "\n", " | _mito_tubeness_3of16 | \n", "0.08213 | \n", "AIRPKJLLJCUQSV-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "-0.964 | \n", "999999 | \n", "9 | \n", "JCP2022_001647 | \n", "None | \n", "None | \n", "
| 1 | \n", "Nuclei | \n", "AreaShapeZernike | \n", "\n", " | _6_2 | \n", "0.00003 | \n", "DTCQXSFBETURSZ-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "1.244 | \n", "999999 | \n", "0 | \n", "JCP2022_018053 | \n", "None | \n", "None | \n", "
| 2 | \n", "Nuclei | \n", "RadialDistribution_RadialCV | \n", "Mito | \n", "_3of4 | \n", "0.02041 | \n", "UQEZJEIWCQNVQU-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "-1.514 | \n", "999999 | \n", "4 | \n", "JCP2022_090828 | \n", "None | \n", "None | \n", "
| 3 | \n", "Cytoplasm | \n", "RadialDistribution_FracAtD | \n", "ER | \n", "_2of4 | \n", "0.00227 | \n", "MOQWDHSSODJEDU-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "1.486 | \n", "999999 | \n", "2 | \n", "JCP2022_055532 | \n", "None | \n", "None | \n", "
| 4 | \n", "Cytoplasm | \n", "RadialDistribution_FracAtD | \n", "DNA | \n", "_1of4 | \n", "0.01685 | \n", "WPCDYOHCABUJAF-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "1.097 | \n", "999999 | \n", "1 | \n", "JCP2022_100243 | \n", "None | \n", "None | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 1189393 | \n", "Cells | \n", "Texture_SumEntropy | \n", "RNA | \n", "_10_01_256 | \n", "0.02717 | \n", "KHEBJWNXOZKKBY-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "1.227 | \n", "999999 | \n", "2 | \n", "JCP2022_044597 | \n", "None | \n", "None | \n", "
| 1189394 | \n", "Nuclei | \n", "AreaShapeCompactness | \n", "\n", " | \n", " | 0.00000 | \n", "QGGLNUGOWVBAJC-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "-1.906 | \n", "999999 | \n", "1 | \n", "JCP2022_073238 | \n", "None | \n", "None | \n", "
| 1189395 | \n", "Cells | \n", "AreaShapeMaxFeretDiameter | \n", "\n", " | \n", " | 0.00138 | \n", "SBJVCDHWJCQQIQ-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "-1.688 | \n", "999999 | \n", "8 | \n", "JCP2022_082092 | \n", "None | \n", "None | \n", "
| 1189396 | \n", "Nuclei | \n", "Texture_Correlation | \n", "ER | \n", "_10_02_256 | \n", "0.19502 | \n", "FCQDLIHIOSXHMH-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "1.206 | \n", "999999 | \n", "5 | \n", "JCP2022_019780 | \n", "None | \n", "None | \n", "
| 1189397 | \n", "Nuclei | \n", "Correlation_RWC_ER | \n", "AGP | \n", "\n", " | 0.05743 | \n", "AXYLSEVXPWECHR-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "-1.665 | \n", "999999 | \n", "2 | \n", "JCP2022_004567 | \n", "None | \n", "None | \n", "
1189398 rows × 13 columns
\n", "| \n", " | Compartment | \n", "Feature | \n", "Channel | \n", "Suffix | \n", "Feature significance | \n", "Perturbation | \n", "Perturbation example image | \n", "Median | \n", "Gene Rank | \n", "Feature Rank | \n", "JCP2022 | \n", "Resources | \n", "Synonyms | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Cells | \n", "RadialDistributionFracAtD | \n", "\n", " | _mito_tubeness_3of16 | \n", "0.08213 | \n", "AIRPKJLLJCUQSV-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "-0.964 | \n", "999999 | \n", "9 | \n", "JCP2022_001647 | \n", "None | \n", "None | \n", "
| 1 | \n", "Nuclei | \n", "AreaShapeZernike | \n", "\n", " | _6_2 | \n", "0.00003 | \n", "DTCQXSFBETURSZ-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "1.244 | \n", "999999 | \n", "0 | \n", "JCP2022_018053 | \n", "None | \n", "None | \n", "
| 2 | \n", "Nuclei | \n", "RadialDistribution_RadialCV | \n", "Mito | \n", "_3of4 | \n", "0.02041 | \n", "UQEZJEIWCQNVQU-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "-1.514 | \n", "999999 | \n", "4 | \n", "JCP2022_090828 | \n", "None | \n", "None | \n", "
| 3 | \n", "Cytoplasm | \n", "RadialDistribution_FracAtD | \n", "ER | \n", "_2of4 | \n", "0.00227 | \n", "MOQWDHSSODJEDU-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "1.486 | \n", "999999 | \n", "2 | \n", "JCP2022_055532 | \n", "None | \n", "None | \n", "
| 4 | \n", "Cytoplasm | \n", "RadialDistribution_FracAtD | \n", "DNA | \n", "_1of4 | \n", "0.01685 | \n", "WPCDYOHCABUJAF-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "1.097 | \n", "999999 | \n", "1 | \n", "JCP2022_100243 | \n", "None | \n", "None | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 1189393 | \n", "Cells | \n", "Texture_SumEntropy | \n", "RNA | \n", "_10_01_256 | \n", "0.02717 | \n", "KHEBJWNXOZKKBY-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "1.227 | \n", "999999 | \n", "2 | \n", "JCP2022_044597 | \n", "None | \n", "None | \n", "
| 1189394 | \n", "Nuclei | \n", "AreaShapeCompactness | \n", "\n", " | \n", " | 0.00000 | \n", "QGGLNUGOWVBAJC-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "-1.906 | \n", "999999 | \n", "1 | \n", "JCP2022_073238 | \n", "None | \n", "None | \n", "
| 1189395 | \n", "Cells | \n", "AreaShapeMaxFeretDiameter | \n", "\n", " | \n", " | 0.00138 | \n", "SBJVCDHWJCQQIQ-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "-1.688 | \n", "999999 | \n", "8 | \n", "JCP2022_082092 | \n", "None | \n", "None | \n", "
| 1189396 | \n", "Nuclei | \n", "Texture_Correlation | \n", "ER | \n", "_10_02_256 | \n", "0.19502 | \n", "FCQDLIHIOSXHMH-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "1.206 | \n", "999999 | \n", "5 | \n", "JCP2022_019780 | \n", "None | \n", "None | \n", "
| 1189397 | \n", "Nuclei | \n", "Correlation_RWC_ER | \n", "AGP | \n", "\n", " | 0.05743 | \n", "AXYLSEVXPWECHR-UHFFFAOYSA-N | \n", "{\"img_src\": \"https://phenaid.ardigen.com/stati... | \n", "-1.665 | \n", "999999 | \n", "2 | \n", "JCP2022_004567 | \n", "None | \n", "None | \n", "
1189398 rows × 13 columns
\n", "