Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -45,7 +45,7 @@ import panel as pn
|
|
| 45 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 46 |
from tinydb import TinyDB, Query
|
| 47 |
|
| 48 |
-
|
| 49 |
from tqdm.auto import tqdm
|
| 50 |
|
| 51 |
from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
|
|
@@ -156,7 +156,6 @@ display: inline-block !important;
|
|
| 156 |
footer {
|
| 157 |
visibility: hidden
|
| 158 |
}
|
| 159 |
-
|
| 160 |
"""
|
| 161 |
|
| 162 |
|
|
@@ -192,7 +191,11 @@ def rgb_to_hex(rgb):
|
|
| 192 |
def mol_to_pharm3d(mol, mode='html'):
|
| 193 |
if mol is None:
|
| 194 |
return
|
| 195 |
-
AllChem.Compute2DCoords(mol)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
feats = FEAT_FACTORY.GetFeaturesForMol(mol)
|
| 198 |
|
|
@@ -291,13 +294,23 @@ COLUMN_ALIASES = {
|
|
| 291 |
}
|
| 292 |
|
| 293 |
DRUG_SCRENN_CPI_OPTS = [
|
| 294 |
-
'
|
| 295 |
-
'
|
| 296 |
-
'
|
| 297 |
]
|
| 298 |
|
| 299 |
DRUG_SCRENN_CPA_OPTS = [
|
| 300 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
]
|
| 302 |
|
| 303 |
pd.set_option('display.float_format', '{:.3f}'.format)
|
|
@@ -383,6 +396,13 @@ def max_tanimoto_similarity(smi, seen_smiles_with_fp):
|
|
| 383 |
return {'Max. Tanimoto Similarity': sims[idx], 'Max. Tanimoto Similarity Compound': compound}
|
| 384 |
|
| 385 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
def max_sequence_identity(seq, seen_fastas):
|
| 387 |
if seq is None or seen_fastas is None or seen_fastas.empty:
|
| 388 |
return {'Max. Sequence Identity': 0, 'Max. Sequence Identity Target': None}
|
|
@@ -395,20 +415,12 @@ def max_sequence_identity(seq, seen_fastas):
|
|
| 395 |
target = id2
|
| 396 |
return {'Max. Sequence Identity': 1, 'Max. Sequence Identity Target': target}
|
| 397 |
|
| 398 |
-
|
| 399 |
-
aligner.mode = 'local'
|
| 400 |
max_iden = 0
|
| 401 |
target = None
|
| 402 |
for fasta in seen_fastas['X2'].values:
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
if identity == 1:
|
| 406 |
-
target = fasta
|
| 407 |
-
if 'ID2' in seen_fastas.columns:
|
| 408 |
-
id2 = seen_fastas.loc[seen_fastas['X2'] == fasta, 'ID2'].values[0]
|
| 409 |
-
if pd.notnull(id2) and id2 != '':
|
| 410 |
-
target = id2
|
| 411 |
-
return {'Max. Sequence Identity': 1, 'Max. Sequence Identity Target': target}
|
| 412 |
if identity > max_iden:
|
| 413 |
max_iden = identity
|
| 414 |
target = fasta
|
|
@@ -416,7 +428,10 @@ def max_sequence_identity(seq, seen_fastas):
|
|
| 416 |
id2 = seen_fastas.loc[seen_fastas['X2'] == fasta, 'ID2'].values[0]
|
| 417 |
if pd.notnull(id2) and id2 != '':
|
| 418 |
target = id2
|
|
|
|
|
|
|
| 419 |
|
|
|
|
| 420 |
return {'Max. Sequence Identity': max_iden, 'Max. Sequence Identity Target': target}
|
| 421 |
|
| 422 |
|
|
@@ -846,12 +861,12 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
| 846 |
orig_df['Target Family'] = None
|
| 847 |
if orig_df['Target Family'].isna().any():
|
| 848 |
orig_df.loc[orig_df['Target Family'].isna(), 'Target Family'] = (
|
| 849 |
-
orig_df.loc[orig_df['Target Family'].isna(), 'X2'].
|
| 850 |
)
|
| 851 |
orig_df['Target Family'] = orig_df['Target Family'].str.capitalize()
|
| 852 |
detect_family.cache_clear()
|
| 853 |
|
| 854 |
-
orig_df['X1^'] = orig_df['X1'].
|
| 855 |
|
| 856 |
orig_df = orig_df.merge(df_training[['X1^', 'X2', 'Y']], on=['X1^', 'X2'], how='left', indicator=False)
|
| 857 |
annotated_df = orig_df[~orig_df['Y'].isna()].copy()
|
|
@@ -952,66 +967,88 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
| 952 |
df_list = [prediction_df, annotated_df]
|
| 953 |
prediction_df = pd.concat([df for df in df_list if not df.empty], ignore_index=True)
|
| 954 |
|
| 955 |
-
|
| 956 |
-
|
| 957 |
-
|
| 958 |
-
family_smiles_df['FP'] = family_smiles_df['X1'].parallel_apply(smiles_to_ecfp)
|
| 959 |
-
|
| 960 |
-
@cache
|
| 961 |
-
def max_sim(smi):
|
| 962 |
-
return max_tanimoto_similarity(smi, family_smiles_df)['Max. Tanimoto Similarity']
|
| 963 |
|
| 964 |
-
|
| 965 |
-
|
| 966 |
-
|
| 967 |
-
|
| 968 |
-
max_sim.cache_clear()
|
| 969 |
|
| 970 |
-
if "
|
| 971 |
x2 = prediction_df['X2'].iloc[0]
|
| 972 |
pos_compounds_df = df_training.loc[(df_training['X2'] == x2) & (df_training['Y'] == 1)].copy()
|
| 973 |
-
pos_compounds_df['FP'] = pos_compounds_df['X1'].apply(smiles_to_ecfp)
|
| 974 |
|
| 975 |
@cache
|
| 976 |
def max_sim(smiles):
|
| 977 |
return max_tanimoto_similarity(smiles, seen_smiles_with_fp=pos_compounds_df)
|
| 978 |
|
| 979 |
-
prediction_df[[
|
| 980 |
-
|
| 981 |
-
|
| 982 |
-
)
|
|
|
|
| 983 |
max_sim.cache_clear()
|
| 984 |
|
| 985 |
-
if "
|
| 986 |
x2 = prediction_df['X2'].iloc[0]
|
| 987 |
-
prediction_df['X1^'] = prediction_df['X1'].
|
| 988 |
|
| 989 |
@cache
|
| 990 |
-
def
|
| 991 |
-
|
| 992 |
-
return max_sequence_identity(x2, seen_fastas=
|
| 993 |
|
| 994 |
-
prediction_df[['Max. Sequence Identity to Known
|
| 995 |
-
'Max.
|
| 996 |
-
prediction_df['X1^'].
|
| 997 |
)
|
| 998 |
prediction_df.drop(['X1^'], axis=1, inplace=True)
|
| 999 |
|
| 1000 |
-
|
| 1001 |
|
| 1002 |
-
|
| 1003 |
-
|
| 1004 |
-
|
|
|
|
| 1005 |
|
| 1006 |
-
|
| 1007 |
-
|
| 1008 |
-
|
|
|
|
| 1009 |
|
| 1010 |
-
|
| 1011 |
-
|
| 1012 |
-
|
| 1013 |
-
|
| 1014 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1015 |
|
| 1016 |
prediction_df.drop(['N'], axis=1).to_csv(predictions_file, index=False, na_rep='')
|
| 1017 |
status = "COMPLETED"
|
|
@@ -1063,10 +1100,10 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
| 1063 |
|
| 1064 |
if 'X1' in df.columns:
|
| 1065 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
| 1066 |
-
df['Compound'] = df['X1'].
|
| 1067 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
| 1068 |
-
df['Scaffold'] = df['Compound'].
|
| 1069 |
-
df['Scaffold SMILES'] = df['Scaffold'].
|
| 1070 |
|
| 1071 |
if task == 'Compound-Protein Binding Affinity':
|
| 1072 |
# Convert Y^ from pIC50 to IC50
|
|
@@ -1114,17 +1151,17 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
| 1114 |
columns_unique = None
|
| 1115 |
|
| 1116 |
if 'Exclude Pharmacophore 3D' not in opts:
|
| 1117 |
-
df_html['Pharmacophore'] = df_html['Compound'].
|
| 1118 |
lambda x: mol_to_pharm3d(x) if not pd.isna(x) else x)
|
| 1119 |
|
| 1120 |
if 'Compound' in df_html.columns and 'Exclude Molecular Graph' not in opts:
|
| 1121 |
-
df_html['Compound'] = df_html['Compound'].
|
| 1122 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
| 1123 |
else:
|
| 1124 |
df_html.drop(['Compound'], axis=1, inplace=True)
|
| 1125 |
|
| 1126 |
if 'Scaffold' in df_html.columns and 'Exclude Scaffold Graph' not in opts:
|
| 1127 |
-
df_html['Scaffold'] = df_html['Scaffold'].
|
| 1128 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
| 1129 |
else:
|
| 1130 |
df_html.drop(['Scaffold'], axis=1, inplace=True)
|
|
@@ -1138,15 +1175,20 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
| 1138 |
if any(col in df_html.columns for col in ['Y^', 'Y']):
|
| 1139 |
job = 'Target Protein Identification'
|
| 1140 |
category = 'Target Family'
|
| 1141 |
-
columns_unique = df_html.columns.isin(
|
| 1142 |
-
|
|
|
|
|
|
|
|
|
|
| 1143 |
|
| 1144 |
elif n_compound >= 2 and n_protein == 1:
|
| 1145 |
unique_entity = 'Target of Interest'
|
| 1146 |
if any(col in df_html.columns for col in ['Y^', 'Y']):
|
| 1147 |
job = 'Drug Hit Screening'
|
| 1148 |
category = 'Scaffold SMILES'
|
| 1149 |
-
columns_unique = df_html.columns.isin(
|
|
|
|
|
|
|
| 1150 |
|
| 1151 |
elif 'Y^' in df_html.columns:
|
| 1152 |
job = 'Interaction Pair Inference'
|
|
@@ -1154,7 +1196,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
| 1154 |
df_html.rename(columns=column_aliases, inplace=True)
|
| 1155 |
df_html.index.name = 'Index'
|
| 1156 |
if 'Target FASTA' in df_html.columns:
|
| 1157 |
-
df_html['Target FASTA'] = df_html['Target FASTA'].
|
| 1158 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
| 1159 |
|
| 1160 |
num_cols = df_html.select_dtypes('number').columns
|
|
@@ -1172,7 +1214,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
| 1172 |
if 'Target ID' in df_html.columns:
|
| 1173 |
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
| 1174 |
if 'Target FASTA' in df_html.columns:
|
| 1175 |
-
df_html['Target FASTA'] = df_html['Target FASTA'].
|
| 1176 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
| 1177 |
if 'Scaffold SMILES' in df_html.columns:
|
| 1178 |
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
|
@@ -1248,9 +1290,9 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
| 1248 |
|
| 1249 |
report_table = pn.widgets.Tabulator(
|
| 1250 |
df_html, formatters=formatters,
|
| 1251 |
-
frozen_columns=[
|
| 1252 |
-
'Target ID', 'Compound ID', 'Compound'
|
| 1253 |
-
]
|
| 1254 |
disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30)
|
| 1255 |
|
| 1256 |
for i, col in enumerate(num_cols):
|
|
@@ -1279,71 +1321,15 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
| 1279 |
# Remove keys with empty values
|
| 1280 |
pie_charts = {k: v for k, v in pie_charts.items() if any(v)}
|
| 1281 |
|
| 1282 |
-
|
| 1283 |
-
|
| 1284 |
-
|
| 1285 |
-
|
| 1286 |
-
|
| 1287 |
-
|
| 1288 |
-
|
| 1289 |
-
|
| 1290 |
-
|
| 1291 |
-
}
|
| 1292 |
-
|
| 1293 |
-
.tabulator-cell:hover {
|
| 1294 |
-
z-index: 1000 !important;
|
| 1295 |
-
}
|
| 1296 |
-
|
| 1297 |
-
.tabulator-cell.tabulator-frozen:hover {
|
| 1298 |
-
z-index: 1000 !important;
|
| 1299 |
-
}
|
| 1300 |
-
|
| 1301 |
-
.image-zoom-viewer {
|
| 1302 |
-
display: inline-block;
|
| 1303 |
-
overflow: visible;
|
| 1304 |
-
z-index: 1000;
|
| 1305 |
-
}
|
| 1306 |
-
|
| 1307 |
-
.image-zoom-viewer::after {
|
| 1308 |
-
content: "";
|
| 1309 |
-
top: 0;
|
| 1310 |
-
left: 0;
|
| 1311 |
-
width: 100%;
|
| 1312 |
-
height: 100%;
|
| 1313 |
-
pointer-events: none;
|
| 1314 |
-
}
|
| 1315 |
-
|
| 1316 |
-
.image-zoom-viewer:hover::after {
|
| 1317 |
-
pointer-events: all;
|
| 1318 |
-
}
|
| 1319 |
-
|
| 1320 |
-
/* When hovering over the container, scale its child (the SVG) */
|
| 1321 |
-
.tabulator-cell:hover .image-zoom-viewer svg {
|
| 1322 |
-
padding: 3px;
|
| 1323 |
-
position: absolute;
|
| 1324 |
-
background-color: rgba(250, 250, 250, 0.854);
|
| 1325 |
-
box-shadow: 0 0 10px rgba(0, 0, 0, 0.618);
|
| 1326 |
-
border-radius: 3px;
|
| 1327 |
-
transform: scale(3); /* Scale up the SVG */
|
| 1328 |
-
transition: transform 0.3s ease;
|
| 1329 |
-
pointer-events: none; /* Prevents the SVG from blocking mouse interactions */
|
| 1330 |
-
z-index: 1000;
|
| 1331 |
-
}
|
| 1332 |
-
|
| 1333 |
-
.image-zoom-viewer svg {
|
| 1334 |
-
display: block; /* SVG is a block-level element for proper scaling */
|
| 1335 |
-
z-index: 1000;
|
| 1336 |
-
}
|
| 1337 |
-
|
| 1338 |
-
.image-zoom-viewer:hover {
|
| 1339 |
-
z-index: 1000;
|
| 1340 |
-
}
|
| 1341 |
-
"""
|
| 1342 |
-
|
| 1343 |
-
pn.extension(raw_css=[pn_css], js_files={
|
| 1344 |
-
'3Dmol': './3Dmol-min.js',
|
| 1345 |
-
'panel_custom': './panel.js',
|
| 1346 |
-
})
|
| 1347 |
|
| 1348 |
template = pn.template.VanillaTemplate(
|
| 1349 |
title=f'DeepSEQreen {job} Report',
|
|
@@ -1359,7 +1345,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
| 1359 |
if unique_df is not None:
|
| 1360 |
unique_table = pn.widgets.Tabulator(unique_df, formatters=formatters, sizing_mode='stretch_width',
|
| 1361 |
show_index=False, disabled=True,
|
| 1362 |
-
frozen_columns=['Compound ID', 'Compound', '
|
| 1363 |
# if pie_charts:
|
| 1364 |
# unique_table.width = 640
|
| 1365 |
stats_pane.append(pn.Column(f'### {unique_entity}', unique_table))
|
|
@@ -1451,11 +1437,11 @@ def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_
|
|
| 1451 |
df_report = df.copy()
|
| 1452 |
try:
|
| 1453 |
for filter_name in filter_list:
|
| 1454 |
-
df_report[filter_name] = df_report['Compound'].
|
| 1455 |
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
|
| 1456 |
|
| 1457 |
for score_name in score_list:
|
| 1458 |
-
df_report[score_name] = df_report['Compound'].
|
| 1459 |
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
|
| 1460 |
|
| 1461 |
return (create_html_report(df_report, file=None, task=task), df_report,
|
|
@@ -1667,16 +1653,25 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
| 1667 |
label='OR Upload Your Own Library', variant='primary')
|
| 1668 |
drug_library_upload = gr.File(label='Custom compound library file', visible=False)
|
| 1669 |
|
| 1670 |
-
|
| 1671 |
-
|
| 1672 |
-
|
| 1673 |
-
|
| 1674 |
-
|
| 1675 |
-
|
| 1676 |
-
|
| 1677 |
-
|
| 1678 |
-
|
| 1679 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1680 |
with gr.Row():
|
| 1681 |
with gr.Column():
|
| 1682 |
drug_screen_email = gr.Textbox(
|
|
@@ -1777,14 +1772,24 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
| 1777 |
target_library_upload_btn = gr.UploadButton(
|
| 1778 |
label='OR Upload Your Own Library', variant='primary')
|
| 1779 |
target_library_upload = gr.File(label='Custom target library file', visible=False)
|
| 1780 |
-
|
| 1781 |
-
|
| 1782 |
-
|
| 1783 |
-
|
| 1784 |
-
|
| 1785 |
-
|
| 1786 |
-
|
| 1787 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1788 |
with gr.Row():
|
| 1789 |
with gr.Column():
|
| 1790 |
target_identify_email = gr.Textbox(
|
|
@@ -1823,9 +1828,11 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
| 1823 |
label='Step 1. Select Pair Input Type and Input',
|
| 1824 |
value='Upload a CSV file containing paired compound-protein data')
|
| 1825 |
with gr.Column() as pair_upload:
|
| 1826 |
-
gr.File(
|
| 1827 |
-
|
| 1828 |
-
|
|
|
|
|
|
|
| 1829 |
with gr.Row():
|
| 1830 |
infer_csv_prompt = gr.Button(
|
| 1831 |
value="Upload Your Own Dataset Below",
|
|
@@ -1833,27 +1840,50 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
| 1833 |
with gr.Column():
|
| 1834 |
infer_pair = gr.File(
|
| 1835 |
label='Upload CSV File Containing Paired Records',
|
| 1836 |
-
file_count="single",
|
|
|
|
|
|
|
|
|
|
| 1837 |
with gr.Column(visible=False) as pair_generate:
|
| 1838 |
with gr.Row():
|
| 1839 |
-
gr.File(
|
| 1840 |
-
|
| 1841 |
-
|
| 1842 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1843 |
with gr.Row():
|
| 1844 |
-
gr.File(
|
| 1845 |
-
|
| 1846 |
-
|
| 1847 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1848 |
with gr.Row():
|
| 1849 |
infer_library_prompt = gr.Button(
|
| 1850 |
value="Upload Your Own Libraries Below",
|
| 1851 |
-
visible=False,
|
|
|
|
|
|
|
| 1852 |
with gr.Row():
|
| 1853 |
-
infer_drug = gr.File(
|
| 1854 |
-
|
| 1855 |
-
|
| 1856 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1857 |
|
| 1858 |
with gr.Row():
|
| 1859 |
with gr.Column(min_width=200):
|
|
@@ -1862,10 +1892,12 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
| 1862 |
"If the proteins in the target library of interest "
|
| 1863 |
"all belong to the same protein family, manually selecting the family is supported."
|
| 1864 |
)
|
|
|
|
| 1865 |
pair_infer_target_family = gr.Dropdown(
|
| 1866 |
choices=list(TARGET_FAMILY_MAP.keys()),
|
| 1867 |
value='General',
|
| 1868 |
-
label='Step 2. Select Target Family (Optional)'
|
|
|
|
| 1869 |
|
| 1870 |
with gr.Column(min_width=200):
|
| 1871 |
HelpTip(
|
|
@@ -1877,15 +1909,17 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
| 1877 |
pair_infer_task = gr.Dropdown(
|
| 1878 |
list(TASK_MAP.keys()),
|
| 1879 |
label='Step 3. Select a Prediction Task',
|
| 1880 |
-
value='Compound-Protein Interaction'
|
|
|
|
| 1881 |
|
| 1882 |
with gr.Column(min_width=200):
|
| 1883 |
-
HelpTip(
|
| 1884 |
-
|
| 1885 |
-
|
| 1886 |
pair_infer_preset = gr.Dropdown(
|
| 1887 |
list(PRESET_MAP.keys()),
|
| 1888 |
-
label='Step 4. Select a Preset Model'
|
|
|
|
| 1889 |
# infer_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
|
| 1890 |
# variant='primary')
|
| 1891 |
pair_infer_opts = gr.CheckboxGroup(visible=False)
|
|
@@ -2093,7 +2127,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
| 2093 |
alignment = aligner.align(processed_fasta, query)
|
| 2094 |
return alignment.score / max(len(processed_fasta), len(query))
|
| 2095 |
|
| 2096 |
-
alignment_df['score'] = alignment_df['X2'].
|
| 2097 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
| 2098 |
family = str(row['Target Family']).title()
|
| 2099 |
return gr.Dropdown(value=family,
|
|
@@ -2119,6 +2153,12 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
| 2119 |
show_progress='hidden'
|
| 2120 |
)
|
| 2121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2122 |
|
| 2123 |
def example_fill(input_type):
|
| 2124 |
return {target_id: 'Q16539',
|
|
@@ -2419,13 +2459,13 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 2419 |
infer_df = pd.read_csv(drug_target_pair_upload)
|
| 2420 |
validate_columns(infer_df, ['X1', 'X2'])
|
| 2421 |
|
| 2422 |
-
infer_df['X1_ERR'] = infer_df['X1'].
|
| 2423 |
validate_seq_str, regex=SMILES_PAT)
|
| 2424 |
if not infer_df['X1_ERR'].isna().all():
|
| 2425 |
raise ValueError(
|
| 2426 |
f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
|
| 2427 |
|
| 2428 |
-
infer_df['X2_ERR'] = infer_df['X2'].
|
| 2429 |
validate_seq_str, regex=FASTA_PAT)
|
| 2430 |
if not infer_df['X2_ERR'].isna().all():
|
| 2431 |
raise ValueError(
|
|
@@ -2757,7 +2797,7 @@ if __name__ == "__main__":
|
|
| 2757 |
db.update({'status': 'FAILED'}, Job.id == job['id'])
|
| 2758 |
|
| 2759 |
scheduler = BackgroundScheduler()
|
| 2760 |
-
scheduler.add_job(check_expiry, 'interval', hours=1)
|
| 2761 |
scheduler.start()
|
| 2762 |
|
| 2763 |
demo.queue(default_concurrency_limit=None, max_size=10).launch(show_api=False)
|
|
|
|
| 45 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 46 |
from tinydb import TinyDB, Query
|
| 47 |
|
| 48 |
+
import swifter
|
| 49 |
from tqdm.auto import tqdm
|
| 50 |
|
| 51 |
from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
|
|
|
|
| 156 |
footer {
|
| 157 |
visibility: hidden
|
| 158 |
}
|
|
|
|
| 159 |
"""
|
| 160 |
|
| 161 |
|
|
|
|
| 191 |
def mol_to_pharm3d(mol, mode='html'):
|
| 192 |
if mol is None:
|
| 193 |
return
|
| 194 |
+
# AllChem.Compute2DCoords(mol)
|
| 195 |
+
mol = Chem.AddHs(mol)
|
| 196 |
+
params = AllChem.ETKDGv3()
|
| 197 |
+
params.randomSeed = 0xf00d # for reproducibility
|
| 198 |
+
AllChem.EmbedMolecule(mol, params)
|
| 199 |
|
| 200 |
feats = FEAT_FACTORY.GetFeaturesForMol(mol)
|
| 201 |
|
|
|
|
| 294 |
}
|
| 295 |
|
| 296 |
DRUG_SCRENN_CPI_OPTS = [
|
| 297 |
+
'Calculate Max. Sequence Identity between the Input Target and Targets in the Training Set',
|
| 298 |
+
'Calculate Max. Tanimoto Similarity between the Hit Compound and Known Ligands of the Input Target',
|
| 299 |
+
'Calculate Max. Sequence Identity between the Input Target and Known Targets of Hit Compound',
|
| 300 |
]
|
| 301 |
|
| 302 |
DRUG_SCRENN_CPA_OPTS = [
|
| 303 |
+
'Calculate Max. Sequence Identity between the Input Target and Targets in the Training Set',
|
| 304 |
+
]
|
| 305 |
+
|
| 306 |
+
TARGET_IDENTIFY_CPI_OPTS = [
|
| 307 |
+
'Calculate Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set',
|
| 308 |
+
'Calculate Max. Sequence Identity between the Identified Target and Known Targets of the Input Compound',
|
| 309 |
+
'Calculate Max. Tanimoto Similarity between the Input Compound and Known Ligands of the Identified Target',
|
| 310 |
+
]
|
| 311 |
+
|
| 312 |
+
TARGET_IDENTIFY_CPA_OPTS = [
|
| 313 |
+
'Calculate Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set',
|
| 314 |
]
|
| 315 |
|
| 316 |
pd.set_option('display.float_format', '{:.3f}'.format)
|
|
|
|
| 396 |
return {'Max. Tanimoto Similarity': sims[idx], 'Max. Tanimoto Similarity Compound': compound}
|
| 397 |
|
| 398 |
|
| 399 |
+
def alignment_score(query, target):
|
| 400 |
+
aligner = PairwiseAligner()
|
| 401 |
+
aligner.mode = 'local'
|
| 402 |
+
alignment = aligner.align(query, target)
|
| 403 |
+
return alignment.score / max(len(query), len(target))
|
| 404 |
+
|
| 405 |
+
|
| 406 |
def max_sequence_identity(seq, seen_fastas):
|
| 407 |
if seq is None or seen_fastas is None or seen_fastas.empty:
|
| 408 |
return {'Max. Sequence Identity': 0, 'Max. Sequence Identity Target': None}
|
|
|
|
| 415 |
target = id2
|
| 416 |
return {'Max. Sequence Identity': 1, 'Max. Sequence Identity Target': target}
|
| 417 |
|
| 418 |
+
cached_alignment_score = cache(alignment_score)
|
|
|
|
| 419 |
max_iden = 0
|
| 420 |
target = None
|
| 421 |
for fasta in seen_fastas['X2'].values:
|
| 422 |
+
identity = cached_alignment_score(seq, fasta)
|
| 423 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
if identity > max_iden:
|
| 425 |
max_iden = identity
|
| 426 |
target = fasta
|
|
|
|
| 428 |
id2 = seen_fastas.loc[seen_fastas['X2'] == fasta, 'ID2'].values[0]
|
| 429 |
if pd.notnull(id2) and id2 != '':
|
| 430 |
target = id2
|
| 431 |
+
if max_iden == 1:
|
| 432 |
+
break
|
| 433 |
|
| 434 |
+
cached_alignment_score.cache_clear()
|
| 435 |
return {'Max. Sequence Identity': max_iden, 'Max. Sequence Identity Target': target}
|
| 436 |
|
| 437 |
|
|
|
|
| 861 |
orig_df['Target Family'] = None
|
| 862 |
if orig_df['Target Family'].isna().any():
|
| 863 |
orig_df.loc[orig_df['Target Family'].isna(), 'Target Family'] = (
|
| 864 |
+
orig_df.loc[orig_df['Target Family'].isna(), 'X2'].swifter.apply(detect_family)
|
| 865 |
)
|
| 866 |
orig_df['Target Family'] = orig_df['Target Family'].str.capitalize()
|
| 867 |
detect_family.cache_clear()
|
| 868 |
|
| 869 |
+
orig_df['X1^'] = orig_df['X1'].swifter.apply(rdkit_canonicalize)
|
| 870 |
|
| 871 |
orig_df = orig_df.merge(df_training[['X1^', 'X2', 'Y']], on=['X1^', 'X2'], how='left', indicator=False)
|
| 872 |
annotated_df = orig_df[~orig_df['Y'].isna()].copy()
|
|
|
|
| 967 |
df_list = [prediction_df, annotated_df]
|
| 968 |
prediction_df = pd.concat([df for df in df_list if not df.empty], ignore_index=True)
|
| 969 |
|
| 970 |
+
# Advanced options for Drug Hit Screening
|
| 971 |
+
if "Calculate Max. Sequence Identity between the Input Target and Targets in the Training Set" in opts:
|
| 972 |
+
x2 = prediction_df['X2'].iloc[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 973 |
|
| 974 |
+
prediction_df[[
|
| 975 |
+
'Max. Sequence Identity to Training Targets',
|
| 976 |
+
'Max. Id. Training Target'
|
| 977 |
+
]] = pd.Series(max_sequence_identity(x2, df_training))
|
|
|
|
| 978 |
|
| 979 |
+
if "Calculate Max. Tanimoto Similarity between the Hit Compound and Known Ligands of the Input Target" in opts:
|
| 980 |
x2 = prediction_df['X2'].iloc[0]
|
| 981 |
pos_compounds_df = df_training.loc[(df_training['X2'] == x2) & (df_training['Y'] == 1)].copy()
|
| 982 |
+
pos_compounds_df['FP'] = pos_compounds_df['X1'].swifter.apply(smiles_to_ecfp)
|
| 983 |
|
| 984 |
@cache
|
| 985 |
def max_sim(smiles):
|
| 986 |
return max_tanimoto_similarity(smiles, seen_smiles_with_fp=pos_compounds_df)
|
| 987 |
|
| 988 |
+
prediction_df[[
|
| 989 |
+
'Max. Tanimoto Similarity to Known Ligands',
|
| 990 |
+
'Max. Sim. Ligand'
|
| 991 |
+
]] = prediction_df['X1'].swifter.apply(max_sim).apply(pd.Series)
|
| 992 |
+
|
| 993 |
max_sim.cache_clear()
|
| 994 |
|
| 995 |
+
if "Calculate Max. Sequence Identity between the Input Target and Known Targets of Hit Compound" in opts:
|
| 996 |
x2 = prediction_df['X2'].iloc[0]
|
| 997 |
+
prediction_df['X1^'] = prediction_df['X1'].swifter.apply(rdkit_canonicalize)
|
| 998 |
|
| 999 |
@cache
|
| 1000 |
+
def max_id(compound):
|
| 1001 |
+
pos_targets_df = df_training.loc[df_training['X1'] == compound]
|
| 1002 |
+
return max_sequence_identity(x2, seen_fastas=pos_targets_df)
|
| 1003 |
|
| 1004 |
+
prediction_df[['Max. Sequence Identity to Known Targets of Hit Compound',
|
| 1005 |
+
'Max. Id. Target']] = (
|
| 1006 |
+
prediction_df['X1^'].swifter.apply(max_id).apply(pd.Series)
|
| 1007 |
)
|
| 1008 |
prediction_df.drop(['X1^'], axis=1, inplace=True)
|
| 1009 |
|
| 1010 |
+
max_id.cache_clear()
|
| 1011 |
|
| 1012 |
+
# Advanced options for Target Protein Identification
|
| 1013 |
+
if "Calculate Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set" in opts:
|
| 1014 |
+
x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
|
| 1015 |
+
prediction_df['FP'] = prediction_df['X1'].swifter.apply(smiles_to_ecfp)
|
| 1016 |
|
| 1017 |
+
prediction_df[[
|
| 1018 |
+
'Max. Tanimoto Similarity to Training Compounds',
|
| 1019 |
+
'Max. Sim. Training Compound'
|
| 1020 |
+
]] = pd.Series(max_tanimoto_similarity(x1, df_training))
|
| 1021 |
|
| 1022 |
+
if "Calculate Max. Sequence Identity between the Identified Target and Known Targets of the Input Compound" in opts:
|
| 1023 |
+
x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
|
| 1024 |
+
pos_targets_df = df_training.loc[(df_training['X1'] == x1) & (df_training['Y'] == 1)].copy()
|
| 1025 |
+
|
| 1026 |
+
@cache
|
| 1027 |
+
def max_id(fasta):
|
| 1028 |
+
return max_sequence_identity(fasta, seen_fastas=pos_targets_df)
|
| 1029 |
+
|
| 1030 |
+
prediction_df[[
|
| 1031 |
+
'Max. Sequence Identity to Known Targets of Input Compound',
|
| 1032 |
+
'Max. Id. Target'
|
| 1033 |
+
]] = prediction_df['X2'].swifter.apply(max_id).apply(pd.Series)
|
| 1034 |
+
|
| 1035 |
+
max_id.cache_clear()
|
| 1036 |
+
|
| 1037 |
+
if "Calculate Max. Tanimoto Similarity between the Input Compound and Known Ligands of the Identified Target" in opts:
|
| 1038 |
+
x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
|
| 1039 |
+
|
| 1040 |
+
@cache
|
| 1041 |
+
def max_sim(fasta):
|
| 1042 |
+
pos_targets_df = df_training.loc[(df_training['X2'] == fasta) & (df_training['Y'] == 1)].copy()
|
| 1043 |
+
pos_targets_df['FP'] = pos_targets_df['X1'].swifter.apply(smiles_to_ecfp)
|
| 1044 |
+
return max_tanimoto_similarity(x1, seen_smiles_with_fp=pos_targets_df)
|
| 1045 |
+
|
| 1046 |
+
prediction_df[[
|
| 1047 |
+
'Max. Tanimoto Similarity to Known Ligands of Identified Target',
|
| 1048 |
+
'Max. Sim. Ligand'
|
| 1049 |
+
]] = prediction_df['X2'].swifter.apply(max_sim).apply(pd.Series)
|
| 1050 |
+
|
| 1051 |
+
max_sim.cache_clear()
|
| 1052 |
|
| 1053 |
prediction_df.drop(['N'], axis=1).to_csv(predictions_file, index=False, na_rep='')
|
| 1054 |
status = "COMPLETED"
|
|
|
|
| 1100 |
|
| 1101 |
if 'X1' in df.columns:
|
| 1102 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
| 1103 |
+
df['Compound'] = df['X1'].swifter.apply(
|
| 1104 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
| 1105 |
+
df['Scaffold'] = df['Compound'].swifter.apply(MurckoScaffold.GetScaffoldForMol)
|
| 1106 |
+
df['Scaffold SMILES'] = df['Scaffold'].swifter.apply(lambda x: Chem.MolToSmiles(x))
|
| 1107 |
|
| 1108 |
if task == 'Compound-Protein Binding Affinity':
|
| 1109 |
# Convert Y^ from pIC50 to IC50
|
|
|
|
| 1151 |
columns_unique = None
|
| 1152 |
|
| 1153 |
if 'Exclude Pharmacophore 3D' not in opts:
|
| 1154 |
+
df_html['Pharmacophore'] = df_html['Compound'].swifter.apply(
|
| 1155 |
lambda x: mol_to_pharm3d(x) if not pd.isna(x) else x)
|
| 1156 |
|
| 1157 |
if 'Compound' in df_html.columns and 'Exclude Molecular Graph' not in opts:
|
| 1158 |
+
df_html['Compound'] = df_html['Compound'].swifter.apply(
|
| 1159 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
| 1160 |
else:
|
| 1161 |
df_html.drop(['Compound'], axis=1, inplace=True)
|
| 1162 |
|
| 1163 |
if 'Scaffold' in df_html.columns and 'Exclude Scaffold Graph' not in opts:
|
| 1164 |
+
df_html['Scaffold'] = df_html['Scaffold'].swifter.apply(
|
| 1165 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
| 1166 |
else:
|
| 1167 |
df_html.drop(['Scaffold'], axis=1, inplace=True)
|
|
|
|
| 1175 |
if any(col in df_html.columns for col in ['Y^', 'Y']):
|
| 1176 |
job = 'Target Protein Identification'
|
| 1177 |
category = 'Target Family'
|
| 1178 |
+
columns_unique = df_html.columns.isin(
|
| 1179 |
+
['ID1', 'Pharmacophore', 'Compound', 'Scaffold', 'X1', 'Scaffold SMILES',
|
| 1180 |
+
'Max. Tanimoto Similarity to Training Compounds', 'Max. Sim. Training Compound']
|
| 1181 |
+
+ list(FILTER_MAP.keys()) + list(SCORE_MAP.keys())
|
| 1182 |
+
)
|
| 1183 |
|
| 1184 |
elif n_compound >= 2 and n_protein == 1:
|
| 1185 |
unique_entity = 'Target of Interest'
|
| 1186 |
if any(col in df_html.columns for col in ['Y^', 'Y']):
|
| 1187 |
job = 'Drug Hit Screening'
|
| 1188 |
category = 'Scaffold SMILES'
|
| 1189 |
+
columns_unique = df_html.columns.isin(
|
| 1190 |
+
['X2', 'ID2', 'Max. Sequence Identity to Training Targets', 'Max. Id. Training Target']
|
| 1191 |
+
)
|
| 1192 |
|
| 1193 |
elif 'Y^' in df_html.columns:
|
| 1194 |
job = 'Interaction Pair Inference'
|
|
|
|
| 1196 |
df_html.rename(columns=column_aliases, inplace=True)
|
| 1197 |
df_html.index.name = 'Index'
|
| 1198 |
if 'Target FASTA' in df_html.columns:
|
| 1199 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].swifter.apply(
|
| 1200 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
| 1201 |
|
| 1202 |
num_cols = df_html.select_dtypes('number').columns
|
|
|
|
| 1214 |
if 'Target ID' in df_html.columns:
|
| 1215 |
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
| 1216 |
if 'Target FASTA' in df_html.columns:
|
| 1217 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].swifter.apply(
|
| 1218 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
| 1219 |
if 'Scaffold SMILES' in df_html.columns:
|
| 1220 |
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
|
|
|
| 1290 |
|
| 1291 |
report_table = pn.widgets.Tabulator(
|
| 1292 |
df_html, formatters=formatters,
|
| 1293 |
+
frozen_columns=[
|
| 1294 |
+
'Index', 'Target ID', 'Compound ID', 'Compound'
|
| 1295 |
+
],
|
| 1296 |
disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30)
|
| 1297 |
|
| 1298 |
for i, col in enumerate(num_cols):
|
|
|
|
| 1321 |
# Remove keys with empty values
|
| 1322 |
pie_charts = {k: v for k, v in pie_charts.items() if any(v)}
|
| 1323 |
|
| 1324 |
+
pn.extension(
|
| 1325 |
+
css_files=[
|
| 1326 |
+
'./static/panel.css',
|
| 1327 |
+
],
|
| 1328 |
+
js_files={
|
| 1329 |
+
'3Dmol': './static/3Dmol-min.js',
|
| 1330 |
+
'panel_custom': './static/panel.js'
|
| 1331 |
+
}
|
| 1332 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1333 |
|
| 1334 |
template = pn.template.VanillaTemplate(
|
| 1335 |
title=f'DeepSEQreen {job} Report',
|
|
|
|
| 1345 |
if unique_df is not None:
|
| 1346 |
unique_table = pn.widgets.Tabulator(unique_df, formatters=formatters, sizing_mode='stretch_width',
|
| 1347 |
show_index=False, disabled=True,
|
| 1348 |
+
frozen_columns=['Compound ID', 'Compound', 'Target ID'])
|
| 1349 |
# if pie_charts:
|
| 1350 |
# unique_table.width = 640
|
| 1351 |
stats_pane.append(pn.Column(f'### {unique_entity}', unique_table))
|
|
|
|
| 1437 |
df_report = df.copy()
|
| 1438 |
try:
|
| 1439 |
for filter_name in filter_list:
|
| 1440 |
+
df_report[filter_name] = df_report['Compound'].swifter.apply(
|
| 1441 |
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
|
| 1442 |
|
| 1443 |
for score_name in score_list:
|
| 1444 |
+
df_report[score_name] = df_report['Compound'].swifter.apply(
|
| 1445 |
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
|
| 1446 |
|
| 1447 |
return (create_html_report(df_report, file=None, task=task), df_report,
|
|
|
|
| 1653 |
label='OR Upload Your Own Library', variant='primary')
|
| 1654 |
drug_library_upload = gr.File(label='Custom compound library file', visible=False)
|
| 1655 |
|
| 1656 |
+
with gr.Column():
|
| 1657 |
+
HelpTip("""
|
| 1658 |
+
<b>Max. Sequence Identity between the Input Target and Targets in the Training Set</b>:
|
| 1659 |
+
this serves as an indicator of the predictioon applicability/reliability β
|
| 1660 |
+
higher similarities indicate more reliable predictions (preferably > 0.85).<br>
|
| 1661 |
+
<b>Max. Tanimoto Similarity between the Hit Compound and Known Ligands of the Input Target</b>:
|
| 1662 |
+
this serves as an indicator of both the confidence level and novelty of the predicted hit compounds β
|
| 1663 |
+
higher similarities suggest greater confidence, while lower Tanimoto similarities may indicate the novelty
|
| 1664 |
+
of the identified hit compounds compared to known drugs or true interacting compounds of the input target.<br>
|
| 1665 |
+
<b>Max. Sequence Identity between the Input Target and Known Targets of Hit Compound</b>:
|
| 1666 |
+
this serves as an additional indicator of the confidence level of the predicted hit compounds β
|
| 1667 |
+
higher identities usually lead to greater confidence in the predictions.<br>
|
| 1668 |
+
""")
|
| 1669 |
+
drug_screen_opts = gr.CheckboxGroup(
|
| 1670 |
+
label="Step 6. Select Additional Options",
|
| 1671 |
+
choices=DRUG_SCRENN_CPI_OPTS,
|
| 1672 |
+
info="Experimental features - may increase the job computation time."
|
| 1673 |
+
"See the Help Tip on the right or the Documentation for detailed explanation."
|
| 1674 |
+
)
|
| 1675 |
with gr.Row():
|
| 1676 |
with gr.Column():
|
| 1677 |
drug_screen_email = gr.Textbox(
|
|
|
|
| 1772 |
target_library_upload_btn = gr.UploadButton(
|
| 1773 |
label='OR Upload Your Own Library', variant='primary')
|
| 1774 |
target_library_upload = gr.File(label='Custom target library file', visible=False)
|
| 1775 |
+
with gr.Column():
|
| 1776 |
+
HelpTip("""
|
| 1777 |
+
<b>Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set</b>:
|
| 1778 |
+
this serves as an indicator of prediction applicability and reliability β
|
| 1779 |
+
higher similarities indicates more reliable predictions (ideally > 0.85).<br>
|
| 1780 |
+
<b>Max. Sequence Identity between the Identified Target and Known Targets of the Input Compound</b>:
|
| 1781 |
+
this serves as an indicator of prediction confidence for the potential targets β
|
| 1782 |
+
higher similarities typically imply higher confidence levels.<br>
|
| 1783 |
+
<b>Max. Tanimoto Similarity between the Input Compound and Known Ligands of the Identified Target</b>:
|
| 1784 |
+
this serves as an additional indicator of the confidence level in the predicted potential targets β
|
| 1785 |
+
higher similarities usually correspond to greater prediction confidence.<br>
|
| 1786 |
+
""")
|
| 1787 |
+
target_identify_opts = gr.CheckboxGroup(
|
| 1788 |
+
choices=TARGET_IDENTIFY_CPI_OPTS,
|
| 1789 |
+
label='Step 6. Select Additional Options',
|
| 1790 |
+
info="Experimental features - may increase the job computation time. "
|
| 1791 |
+
"See the Help Tip on the right or the Documentation for detailed explanation."
|
| 1792 |
+
)
|
| 1793 |
with gr.Row():
|
| 1794 |
with gr.Column():
|
| 1795 |
target_identify_email = gr.Textbox(
|
|
|
|
| 1828 |
label='Step 1. Select Pair Input Type and Input',
|
| 1829 |
value='Upload a CSV file containing paired compound-protein data')
|
| 1830 |
with gr.Column() as pair_upload:
|
| 1831 |
+
gr.File(
|
| 1832 |
+
label="Example CSV dataset",
|
| 1833 |
+
value="data/examples/interaction_pair_inference.csv",
|
| 1834 |
+
interactive=False
|
| 1835 |
+
)
|
| 1836 |
with gr.Row():
|
| 1837 |
infer_csv_prompt = gr.Button(
|
| 1838 |
value="Upload Your Own Dataset Below",
|
|
|
|
| 1840 |
with gr.Column():
|
| 1841 |
infer_pair = gr.File(
|
| 1842 |
label='Upload CSV File Containing Paired Records',
|
| 1843 |
+
file_count="single",
|
| 1844 |
+
type='filepath',
|
| 1845 |
+
visible=True
|
| 1846 |
+
)
|
| 1847 |
with gr.Column(visible=False) as pair_generate:
|
| 1848 |
with gr.Row():
|
| 1849 |
+
gr.File(
|
| 1850 |
+
label='Example SDF compound library',
|
| 1851 |
+
value='data/examples/compound_library.sdf',
|
| 1852 |
+
interactive=False
|
| 1853 |
+
)
|
| 1854 |
+
gr.File(
|
| 1855 |
+
label='Example FASTA target library',
|
| 1856 |
+
value='data/examples/target_library.fasta',
|
| 1857 |
+
interactive=False
|
| 1858 |
+
)
|
| 1859 |
with gr.Row():
|
| 1860 |
+
gr.File(
|
| 1861 |
+
label='Example CSV compound library',
|
| 1862 |
+
value='data/examples/compound_library.csv',
|
| 1863 |
+
interactive=False
|
| 1864 |
+
)
|
| 1865 |
+
gr.File(
|
| 1866 |
+
label='Example CSV target library',
|
| 1867 |
+
value='data/examples/target_library.csv',
|
| 1868 |
+
interactive=False
|
| 1869 |
+
)
|
| 1870 |
with gr.Row():
|
| 1871 |
infer_library_prompt = gr.Button(
|
| 1872 |
value="Upload Your Own Libraries Below",
|
| 1873 |
+
visible=False,
|
| 1874 |
+
variant='secondary'
|
| 1875 |
+
)
|
| 1876 |
with gr.Row():
|
| 1877 |
+
infer_drug = gr.File(
|
| 1878 |
+
label='Upload SDF/CSV File Containing Multiple Compounds',
|
| 1879 |
+
file_count="single",
|
| 1880 |
+
type='filepath'
|
| 1881 |
+
)
|
| 1882 |
+
infer_target = gr.File(
|
| 1883 |
+
label='Upload FASTA/CSV File Containing Multiple Targets',
|
| 1884 |
+
file_count="single",
|
| 1885 |
+
type='filepath'
|
| 1886 |
+
)
|
| 1887 |
|
| 1888 |
with gr.Row():
|
| 1889 |
with gr.Column(min_width=200):
|
|
|
|
| 1892 |
"If the proteins in the target library of interest "
|
| 1893 |
"all belong to the same protein family, manually selecting the family is supported."
|
| 1894 |
)
|
| 1895 |
+
|
| 1896 |
pair_infer_target_family = gr.Dropdown(
|
| 1897 |
choices=list(TARGET_FAMILY_MAP.keys()),
|
| 1898 |
value='General',
|
| 1899 |
+
label='Step 2. Select Target Family (Optional)'
|
| 1900 |
+
)
|
| 1901 |
|
| 1902 |
with gr.Column(min_width=200):
|
| 1903 |
HelpTip(
|
|
|
|
| 1909 |
pair_infer_task = gr.Dropdown(
|
| 1910 |
list(TASK_MAP.keys()),
|
| 1911 |
label='Step 3. Select a Prediction Task',
|
| 1912 |
+
value='Compound-Protein Interaction'
|
| 1913 |
+
)
|
| 1914 |
|
| 1915 |
with gr.Column(min_width=200):
|
| 1916 |
+
HelpTip(
|
| 1917 |
+
"Select your preferred model. Please refer to documentation for detailed benchmark results."
|
| 1918 |
+
)
|
| 1919 |
pair_infer_preset = gr.Dropdown(
|
| 1920 |
list(PRESET_MAP.keys()),
|
| 1921 |
+
label='Step 4. Select a Preset Model'
|
| 1922 |
+
)
|
| 1923 |
# infer_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
|
| 1924 |
# variant='primary')
|
| 1925 |
pair_infer_opts = gr.CheckboxGroup(visible=False)
|
|
|
|
| 2127 |
alignment = aligner.align(processed_fasta, query)
|
| 2128 |
return alignment.score / max(len(processed_fasta), len(query))
|
| 2129 |
|
| 2130 |
+
alignment_df['score'] = alignment_df['X2'].swifter.apply(align_score)
|
| 2131 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
| 2132 |
family = str(row['Target Family']).title()
|
| 2133 |
return gr.Dropdown(value=family,
|
|
|
|
| 2153 |
show_progress='hidden'
|
| 2154 |
)
|
| 2155 |
|
| 2156 |
+
target_identify_task.select(
|
| 2157 |
+
fn=lambda task, opts: gr.CheckboxGroup(choices=TARGET_IDENTIFY_CPA_OPTS)
|
| 2158 |
+
if task == 'Compound-Protein Binding Affinity' else gr.CheckboxGroup(choices=DRUG_SCRENN_CPI_OPTS),
|
| 2159 |
+
inputs=[target_identify_task, target_identify_opts], outputs=target_identify_opts,
|
| 2160 |
+
show_progress='hidden'
|
| 2161 |
+
)
|
| 2162 |
|
| 2163 |
def example_fill(input_type):
|
| 2164 |
return {target_id: 'Q16539',
|
|
|
|
| 2459 |
infer_df = pd.read_csv(drug_target_pair_upload)
|
| 2460 |
validate_columns(infer_df, ['X1', 'X2'])
|
| 2461 |
|
| 2462 |
+
infer_df['X1_ERR'] = infer_df['X1'].swifter.apply(
|
| 2463 |
validate_seq_str, regex=SMILES_PAT)
|
| 2464 |
if not infer_df['X1_ERR'].isna().all():
|
| 2465 |
raise ValueError(
|
| 2466 |
f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
|
| 2467 |
|
| 2468 |
+
infer_df['X2_ERR'] = infer_df['X2'].swifter.apply(
|
| 2469 |
validate_seq_str, regex=FASTA_PAT)
|
| 2470 |
if not infer_df['X2_ERR'].isna().all():
|
| 2471 |
raise ValueError(
|
|
|
|
| 2797 |
db.update({'status': 'FAILED'}, Job.id == job['id'])
|
| 2798 |
|
| 2799 |
scheduler = BackgroundScheduler()
|
| 2800 |
+
scheduler.add_job(check_expiry, 'interval', hours=1, timezone=pytz.utc)
|
| 2801 |
scheduler.start()
|
| 2802 |
|
| 2803 |
demo.queue(default_concurrency_limit=None, max_size=10).launch(show_api=False)
|