{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "b6059109", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 4, "id": "38498646", "metadata": { "scrolled": true }, "outputs": [], "source": [ "df = pd.read_excel(\"./EC_antibiotic.xlsx\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "c15ce735", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | COADD_ID | \n", "INHIB_AVE | \n", "SMILES | \n", "HIT | \n", "
|---|---|---|---|---|
| 0 | \n", "CO-ADD:0303753 | \n", "93.09 | \n", "N1(c2cccc(OC)c2)C(=O)NC(\\C(=C/c(ccc3[N+](=O)[O... | \n", "1.0 | \n", "
| 1 | \n", "CO-ADD:0307303 | \n", "100.26 | \n", "C(C(O)=O)(=CN(CC)c1c2cc(c(N(CC3)CCN3C(=S)NC(=O... | \n", "1.0 | \n", "
| 2 | \n", "CO-ADD:0240410 | \n", "100.00 | \n", "C(C#N)(C1c2ccc(Br)s2)=C(N)N(C(CCCC3=O)=C13)c4c... | \n", "1.0 | \n", "
| 3 | \n", "CO-ADD:0242617 | \n", "56.74 | \n", "c(CN(CC1)CCN1C)(cc(Br)c(c23)cccn2)c3O | \n", "1.0 | \n", "
| 4 | \n", "CO-ADD:0237159 | \n", "100.66 | \n", "C[C@@H]1[C@H]2[C@H](O)[C@H]3[C@H](N(C)C)C(=C(C... | \n", "1.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 82753 | \n", "CO-ADD:0254726 | \n", "NaN | \n", "N12C(SC(C(C)C)=N1)=NC(O)=CC2=O | \n", "NaN | \n", "
| 82754 | \n", "CO-ADD:0255128 | \n", "NaN | \n", "S(=O)(=O)(c1ccc(cc1)C)N(Cc2ccccc2)c3ccccc3C(=O... | \n", "NaN | \n", "
| 82755 | \n", "CO-ADD:0254376 | \n", "NaN | \n", "N1(c2ccccc2)\\C(=N/c3ccccc3)\\S\\C(=C/c4c(cccc4Cl... | \n", "NaN | \n", "
| 82756 | \n", "CO-ADD:0252344 | \n", "NaN | \n", "C12(c3c(cccc3)C(=NN1c4ccccc4)c5ccc(cc5)Cl)SC(=... | \n", "NaN | \n", "
| 82757 | \n", "CO-ADD:0252267 | \n", "NaN | \n", "c1(C#N)c(CCC(C2)C(C)(C)C)c2sc1\\N=C\\c(cccn3)c3 | \n", "NaN | \n", "
82758 rows × 4 columns
\n", "| \n", " | COADD_ID | \n", "INHIB_AVE | \n", "SMILES | \n", "HIT | \n", "
|---|---|---|---|---|
| 0 | \n", "CO-ADD:0303753 | \n", "93.09 | \n", "N1(c2cccc(OC)c2)C(=O)NC(\\C(=C/c(ccc3[N+](=O)[O... | \n", "1.0 | \n", "
| 1 | \n", "CO-ADD:0307303 | \n", "100.26 | \n", "C(C(O)=O)(=CN(CC)c1c2cc(c(N(CC3)CCN3C(=S)NC(=O... | \n", "1.0 | \n", "
| 2 | \n", "CO-ADD:0240410 | \n", "100.00 | \n", "C(C#N)(C1c2ccc(Br)s2)=C(N)N(C(CCCC3=O)=C13)c4c... | \n", "1.0 | \n", "
| 3 | \n", "CO-ADD:0242617 | \n", "56.74 | \n", "c(CN(CC1)CCN1C)(cc(Br)c(c23)cccn2)c3O | \n", "1.0 | \n", "
| 4 | \n", "CO-ADD:0237159 | \n", "100.66 | \n", "C[C@@H]1[C@H]2[C@H](O)[C@H]3[C@H](N(C)C)C(=C(C... | \n", "1.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 82753 | \n", "CO-ADD:0254726 | \n", "0.00 | \n", "N12C(SC(C(C)C)=N1)=NC(O)=CC2=O | \n", "0.0 | \n", "
| 82754 | \n", "CO-ADD:0255128 | \n", "0.00 | \n", "S(=O)(=O)(c1ccc(cc1)C)N(Cc2ccccc2)c3ccccc3C(=O... | \n", "0.0 | \n", "
| 82755 | \n", "CO-ADD:0254376 | \n", "0.00 | \n", "N1(c2ccccc2)\\C(=N/c3ccccc3)\\S\\C(=C/c4c(cccc4Cl... | \n", "0.0 | \n", "
| 82756 | \n", "CO-ADD:0252344 | \n", "0.00 | \n", "C12(c3c(cccc3)C(=NN1c4ccccc4)c5ccc(cc5)Cl)SC(=... | \n", "0.0 | \n", "
| 82757 | \n", "CO-ADD:0252267 | \n", "0.00 | \n", "c1(C#N)c(CCC(C2)C(C)(C)C)c2sc1\\N=C\\c(cccn3)c3 | \n", "0.0 | \n", "
82080 rows × 4 columns
\n", "| \n", " | COADD_ID | \n", "INHIB_AVE | \n", "SMILES | \n", "HIT | \n", "question | \n", "answer | \n", "
|---|---|---|---|---|---|---|
| 0 | \n", "CO-ADD:0303753 | \n", "93.09 | \n", "N1(c2cccc(OC)c2)C(=O)NC(\\C(=C/c(ccc3[N+](=O)[O... | \n", "1.0 | \n", "Can N1(c2cccc(OC)c2)C(=O)NC(\\C(=C/c(ccc3[N+](=... | \n", "Yes | \n", "
| 1 | \n", "CO-ADD:0307303 | \n", "100.26 | \n", "C(C(O)=O)(=CN(CC)c1c2cc(c(N(CC3)CCN3C(=S)NC(=O... | \n", "1.0 | \n", "Does the proposed molecule C(C(O)=O)(=CN(CC)c1... | \n", "Yes | \n", "
| 2 | \n", "CO-ADD:0240410 | \n", "100.00 | \n", "C(C#N)(C1c2ccc(Br)s2)=C(N)N(C(CCCC3=O)=C13)c4c... | \n", "1.0 | \n", "Does the proposed molecule C(C#N)(C1c2ccc(Br)s... | \n", "Yes | \n", "
| 3 | \n", "CO-ADD:0242617 | \n", "56.74 | \n", "c(CN(CC1)CCN1C)(cc(Br)c(c23)cccn2)c3O | \n", "1.0 | \n", "Is Escherichia coli susceptible to treatment w... | \n", "Yes | \n", "
| 4 | \n", "CO-ADD:0237159 | \n", "100.66 | \n", "C[C@@H]1[C@H]2[C@H](O)[C@H]3[C@H](N(C)C)C(=C(C... | \n", "1.0 | \n", "Is C[C@@H]1[C@H]2[C@H](O)[C@H]3[C@H](N(C)C)C(=... | \n", "Yes | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 82753 | \n", "CO-ADD:0254726 | \n", "0.00 | \n", "N12C(SC(C(C)C)=N1)=NC(O)=CC2=O | \n", "0.0 | \n", "Is N12C(SC(C(C)C)=N1)=NC(O)=CC2=O capable of i... | \n", "No | \n", "
| 82754 | \n", "CO-ADD:0255128 | \n", "0.00 | \n", "S(=O)(=O)(c1ccc(cc1)C)N(Cc2ccccc2)c3ccccc3C(=O... | \n", "0.0 | \n", "Does the proposed molecule S(=O)(=O)(c1ccc(cc1... | \n", "No | \n", "
| 82755 | \n", "CO-ADD:0254376 | \n", "0.00 | \n", "N1(c2ccccc2)\\C(=N/c3ccccc3)\\S\\C(=C/c4c(cccc4Cl... | \n", "0.0 | \n", "Can N1(c2ccccc2)\\C(=N/c3ccccc3)\\S\\C(=C/c4c(ccc... | \n", "No | \n", "
| 82756 | \n", "CO-ADD:0252344 | \n", "0.00 | \n", "C12(c3c(cccc3)C(=NN1c4ccccc4)c5ccc(cc5)Cl)SC(=... | \n", "0.0 | \n", "Can C12(c3c(cccc3)C(=NN1c4ccccc4)c5ccc(cc5)Cl)... | \n", "No | \n", "
| 82757 | \n", "CO-ADD:0252267 | \n", "0.00 | \n", "c1(C#N)c(CCC(C2)C(C)(C)C)c2sc1\\N=C\\c(cccn3)c3 | \n", "0.0 | \n", "Does the proposed molecule c1(C#N)c(CCC(C2)C(C... | \n", "No | \n", "
82080 rows × 6 columns
\n", "