Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,7 +12,6 @@ from pathlib import Path
|
|
| 12 |
import sys
|
| 13 |
|
| 14 |
import numpy as np
|
| 15 |
-
from Bio import SeqIO
|
| 16 |
from Bio.Align import PairwiseAligner
|
| 17 |
# from email_validator import validate_email
|
| 18 |
import gradio as gr
|
|
@@ -60,10 +59,10 @@ SESSION.mount('https://', ADAPTER)
|
|
| 60 |
UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
|
| 61 |
|
| 62 |
CSS = """
|
| 63 |
-
.help-tip
|
| 64 |
position: absolute;
|
| 65 |
-
display: block;
|
| 66 |
-
top:
|
| 67 |
right: 0px;
|
| 68 |
text-align: center;
|
| 69 |
border-radius: 40%;
|
|
@@ -74,6 +73,7 @@ CSS = """
|
|
| 74 |
line-height: 26px;
|
| 75 |
cursor: default;
|
| 76 |
transition: all 0.5s cubic-bezier(0.55, 0, 0.1, 1);
|
|
|
|
| 77 |
}
|
| 78 |
|
| 79 |
.help-tip:hover {
|
|
@@ -85,7 +85,7 @@ CSS = """
|
|
| 85 |
content: '?';
|
| 86 |
font-weight: 700;
|
| 87 |
color: #8B0000;
|
| 88 |
-
z-index: 100;
|
| 89 |
}
|
| 90 |
|
| 91 |
.help-tip p {
|
|
@@ -161,9 +161,10 @@ visibility: hidden
|
|
| 161 |
|
| 162 |
class HelpTip:
|
| 163 |
def __new__(cls, text):
|
| 164 |
-
return gr.HTML(
|
| 165 |
-
|
| 166 |
-
|
|
|
|
| 167 |
|
| 168 |
|
| 169 |
def sa_score(row):
|
|
@@ -366,8 +367,8 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
|
|
| 366 |
target_family = TARGET_FAMILY_MAP[target_family]
|
| 367 |
# email_hash = hashlib.sha256(email.encode()).hexdigest()
|
| 368 |
COLUMN_ALIASES = COLUMN_ALIASES | {
|
| 369 |
-
'Y': 'Actual interaction' if task == 'binary' else 'Actual affinity',
|
| 370 |
-
'Y^': 'Predicted interaction' if task == 'binary' else 'Predicted affinity'
|
| 371 |
}
|
| 372 |
|
| 373 |
# target_family_list = [target_family]
|
|
@@ -387,7 +388,7 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
|
|
| 387 |
predictions = [pd.DataFrame(prediction) for prediction in predictions]
|
| 388 |
prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
|
| 389 |
|
| 390 |
-
predictions_file = f'{job_id}_predictions.csv'
|
| 391 |
prediction_df.to_csv(predictions_file, index=False)
|
| 392 |
|
| 393 |
return [predictions_file,
|
|
@@ -484,12 +485,12 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
| 484 |
includeFingerprints=False)
|
| 485 |
DF_FOR_REPORT = df.copy()
|
| 486 |
|
| 487 |
-
pie_chart = None
|
| 488 |
-
value = None
|
| 489 |
-
if 'Y^' in DF_FOR_REPORT.columns:
|
| 490 |
-
|
| 491 |
-
elif 'Y' in DF_FOR_REPORT.columns:
|
| 492 |
-
|
| 493 |
|
| 494 |
# if value:
|
| 495 |
# if DF_FOR_REPORT['X1'].nunique() > 1 >= DF_FOR_REPORT['X2'].nunique():
|
|
@@ -497,37 +498,47 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
| 497 |
# elif DF_FOR_REPORT['X2'].nunique() > 1 >= DF_FOR_REPORT['X1'].nunique():
|
| 498 |
# pie_chart = create_pie_chart(DF_FOR_REPORT, category='Target family', value=value, top_k=100)
|
| 499 |
|
| 500 |
-
return create_html_report(DF_FOR_REPORT), pie_chart
|
| 501 |
else:
|
| 502 |
-
return gr.HTML(
|
| 503 |
|
| 504 |
|
| 505 |
def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
|
| 506 |
-
|
|
|
|
| 507 |
cols_right = ['X1', 'X2']
|
| 508 |
-
cols_left = [col for col in cols_left if col in
|
| 509 |
-
cols_right = [col for col in cols_right if col in
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
#
|
| 516 |
-
|
| 517 |
-
for i, col in enumerate(df.columns):
|
| 518 |
-
if pd.api.types.is_numeric_dtype(df[col]):
|
| 519 |
-
styled_df = styled_df.background_gradient(subset=col, cmap=sns.light_palette(colors[i], as_cmap=True))
|
| 520 |
-
|
| 521 |
# Return the DataFrame as HTML
|
| 522 |
PandasTools.RenderImagesInAllDataFrames(images=True)
|
| 523 |
|
| 524 |
if not file:
|
| 525 |
-
|
| 526 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 527 |
else:
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
|
| 532 |
|
| 533 |
# def create_pie_chart(df, category, value, top_k):
|
|
@@ -694,6 +705,12 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
|
|
| 694 |
with gr.Column() as screen_page:
|
| 695 |
with gr.Row():
|
| 696 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
with gr.Row():
|
| 698 |
target_input_type = gr.Dropdown(
|
| 699 |
label='Target Input Type',
|
|
@@ -713,22 +730,17 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
|
|
| 713 |
info='Organism common name or scientific name (default: Human).',
|
| 714 |
placeholder='Human', show_label=False,
|
| 715 |
visible=False, interactive=True, scale=4, )
|
|
|
|
| 716 |
HelpTip(
|
| 717 |
-
"
|
| 718 |
-
"
|
| 719 |
-
"target of interest. If the input FASTA contains multiple entities, "
|
| 720 |
-
"only the first one will be used."
|
| 721 |
)
|
| 722 |
-
with gr.Column():
|
| 723 |
drug_screen_target_family = gr.Dropdown(
|
| 724 |
choices=list(TARGET_FAMILY_MAP.keys()),
|
| 725 |
value='General',
|
| 726 |
label='Select Input Protein Family (Optional)', interactive=True)
|
| 727 |
# with gr.Column(scale=1, min_width=24):
|
| 728 |
-
|
| 729 |
-
"Identify the protein family by conducting sequence alignment. "
|
| 730 |
-
"You may select General if you find the alignment score unsatisfactory."
|
| 731 |
-
)
|
| 732 |
with gr.Row():
|
| 733 |
with gr.Column():
|
| 734 |
target_upload_btn = gr.UploadButton(label='Upload a FASTA file', type='binary',
|
|
@@ -751,12 +763,13 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
|
|
| 751 |
drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()), label='Select a Prediction Task',
|
| 752 |
value='Drug-target interaction')
|
| 753 |
with gr.Column():
|
| 754 |
-
drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Select a Preset Model')
|
| 755 |
-
screen_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
| 756 |
HelpTip("We recommend the appropriate model for your use case based on model performance "
|
| 757 |
"in drug-target interaction or binding affinity prediction. "
|
| 758 |
"The models were benchmarked on different target families "
|
| 759 |
"and real-world data scenarios.")
|
|
|
|
|
|
|
|
|
|
| 760 |
|
| 761 |
# drug_screen_email = gr.Textbox(
|
| 762 |
# label='Email (optional)',
|
|
@@ -787,13 +800,6 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
|
|
| 787 |
with gr.Column() as identify_page:
|
| 788 |
with gr.Row():
|
| 789 |
with gr.Column():
|
| 790 |
-
compound_type = gr.Dropdown(
|
| 791 |
-
label='Compound Input Type',
|
| 792 |
-
choices=['SMILES', 'SDF'],
|
| 793 |
-
info='Enter (paste) an SMILES string or upload an SMI file.',
|
| 794 |
-
value='SMILES',
|
| 795 |
-
interactive=True)
|
| 796 |
-
compound_upload_btn = gr.UploadButton(label='Upload', variant='primary', type='binary')
|
| 797 |
HelpTip(
|
| 798 |
"""Compound molecule in the SMILES format. You may input the SMILES string directly,
|
| 799 |
upload an SMI file, or upload an SDF file to convert to SMILES. Alternatively,
|
|
@@ -801,6 +807,14 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
|
|
| 801 |
representing your drug of interest.
|
| 802 |
"""
|
| 803 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 804 |
with gr.Column():
|
| 805 |
target_identify_target_family = gr.Dropdown(choices=['General'], value='General',
|
| 806 |
label='Target Protein Family')
|
|
@@ -819,12 +833,13 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
|
|
| 819 |
value='Drug-target interaction')
|
| 820 |
|
| 821 |
with gr.Column():
|
| 822 |
-
target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
|
| 823 |
-
identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
| 824 |
HelpTip("We recommend the appropriate model for your use case based on model performance "
|
| 825 |
"in drug-target interaction or binding affinity prediction. "
|
| 826 |
"The models were benchmarked on different target families "
|
| 827 |
"and real-world data scenarios.")
|
|
|
|
|
|
|
|
|
|
| 828 |
|
| 829 |
# with gr.Row():
|
| 830 |
# target_identify_email = gr.Textbox(
|
|
@@ -843,24 +858,47 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
|
|
| 843 |
visible=False)
|
| 844 |
with gr.TabItem(label='Interaction pair inference', id=2):
|
| 845 |
gr.Markdown('''
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
</center>
|
| 850 |
-
''')
|
| 851 |
with gr.Blocks() as infer_block:
|
| 852 |
with gr.Column() as infer_page:
|
| 853 |
-
|
| 854 |
-
|
| 855 |
-
|
| 856 |
-
|
| 857 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 858 |
|
| 859 |
with gr.Row(visible=True):
|
| 860 |
pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()), label='Task')
|
| 861 |
-
HelpTip("Choose a preset model for making the predictions.")
|
| 862 |
pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
|
| 863 |
-
HelpTip("Choose the protein family of your target.")
|
| 864 |
pair_infer_target_family = gr.Dropdown(choices=['General'],
|
| 865 |
label='Target family',
|
| 866 |
value='General')
|
|
@@ -887,7 +925,9 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
|
|
| 887 |
<center>
|
| 888 |
To compute chemical properties for the predictions of drug hit screening,
|
| 889 |
target protein identification, and interaction pair inference. You may also upload
|
| 890 |
-
your own dataset.
|
|
|
|
|
|
|
| 891 |
</center>
|
| 892 |
''')
|
| 893 |
with gr.Row():
|
|
@@ -907,10 +947,10 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
|
|
| 907 |
|
| 908 |
with gr.Row():
|
| 909 |
with gr.Column():
|
| 910 |
-
csv_generate = gr.Button(value='Generate raw data (CSV)')
|
| 911 |
csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
|
| 912 |
with gr.Column():
|
| 913 |
-
html_generate = gr.Button(value='Generate report (HTML)')
|
| 914 |
html_download_file = gr.File(label='Download report (HTML)', visible=False)
|
| 915 |
|
| 916 |
|
|
@@ -1133,22 +1173,16 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 1133 |
if library_upload.endswith('.csv'):
|
| 1134 |
screen_df = pd.read_csv(library_upload)
|
| 1135 |
elif library_upload.endswith('.sdf'):
|
| 1136 |
-
|
| 1137 |
-
|
| 1138 |
else:
|
| 1139 |
-
raise 'Currently only
|
| 1140 |
validate_columns(screen_df, ['X1'])
|
| 1141 |
|
| 1142 |
-
if not np.isin('ID1', screen_df.columns):
|
| 1143 |
-
screen_df['ID1'] = list(range(screen_df.shape[0]))
|
| 1144 |
-
if not np.isin('ID2', screen_df.columns):
|
| 1145 |
-
screen_df['ID2'] = 'Input'
|
| 1146 |
screen_df['X2'] = fasta
|
| 1147 |
-
screen_df['Y'] = 0
|
| 1148 |
-
screen_df = screen_df.loc[:, ['ID1', 'X1', 'ID2', 'X2', 'Y']]
|
| 1149 |
|
| 1150 |
job_id = uuid4()
|
| 1151 |
-
temp_file = Path(f'{job_id}_input.csv').resolve()
|
| 1152 |
screen_df.to_csv(temp_file, index=False)
|
| 1153 |
if temp_file.is_file():
|
| 1154 |
return {screen_data_for_predict: str(temp_file),
|
|
@@ -1191,15 +1225,9 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 1191 |
validate_columns(identify_df, ['X2'])
|
| 1192 |
|
| 1193 |
identify_df['X1'] = smiles
|
| 1194 |
-
|
| 1195 |
-
identify_df['ID1'] = 'Input'
|
| 1196 |
-
if not np.isin('ID2', identify_df.columns):
|
| 1197 |
-
identify_df['ID2'] = list(range(identify_df.shape[0]))
|
| 1198 |
-
identify_df = identify_df.loc[:, ['ID1', 'X1', 'ID2', 'X2']]
|
| 1199 |
-
identify_df['Y'] = 0
|
| 1200 |
-
|
| 1201 |
job_id = uuid4()
|
| 1202 |
-
temp_file = Path(f'{job_id}_input.csv').resolve()
|
| 1203 |
identify_df.to_csv(temp_file, index=False)
|
| 1204 |
if temp_file.is_file():
|
| 1205 |
return {identify_data_for_predict: str(temp_file),
|
|
@@ -1321,16 +1349,16 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 1321 |
|
| 1322 |
def create_csv_raw_file(df, file_report):
|
| 1323 |
from datetime import datetime
|
| 1324 |
-
now = datetime.now().strftime("%Y-%m-%d_%H
|
| 1325 |
filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
|
| 1326 |
-
df.to_csv(filename, index=False)
|
| 1327 |
return gr.File(filename, visible=True)
|
| 1328 |
|
| 1329 |
|
| 1330 |
def create_html_report_file(df, file_report):
|
| 1331 |
from datetime import datetime
|
| 1332 |
-
now = datetime.now().strftime("%Y-%m-%d_%H
|
| 1333 |
-
filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.
|
| 1334 |
create_html_report(df, filename)
|
| 1335 |
return gr.File(filename, visible=True)
|
| 1336 |
|
|
|
|
| 12 |
import sys
|
| 13 |
|
| 14 |
import numpy as np
|
|
|
|
| 15 |
from Bio.Align import PairwiseAligner
|
| 16 |
# from email_validator import validate_email
|
| 17 |
import gradio as gr
|
|
|
|
| 59 |
UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
|
| 60 |
|
| 61 |
CSS = """
|
| 62 |
+
.help-tip {
|
| 63 |
position: absolute;
|
| 64 |
+
display: inline-block;
|
| 65 |
+
top: 24px;
|
| 66 |
right: 0px;
|
| 67 |
text-align: center;
|
| 68 |
border-radius: 40%;
|
|
|
|
| 73 |
line-height: 26px;
|
| 74 |
cursor: default;
|
| 75 |
transition: all 0.5s cubic-bezier(0.55, 0, 0.1, 1);
|
| 76 |
+
z-index: 100 !important;
|
| 77 |
}
|
| 78 |
|
| 79 |
.help-tip:hover {
|
|
|
|
| 85 |
content: '?';
|
| 86 |
font-weight: 700;
|
| 87 |
color: #8B0000;
|
| 88 |
+
z-index: 100 !important;
|
| 89 |
}
|
| 90 |
|
| 91 |
.help-tip p {
|
|
|
|
| 161 |
|
| 162 |
class HelpTip:
|
| 163 |
def __new__(cls, text):
|
| 164 |
+
return gr.HTML(
|
| 165 |
+
# elem_classes="absolute",
|
| 166 |
+
value=f'<div class="help-tip"><p>{text}</p>',
|
| 167 |
+
)
|
| 168 |
|
| 169 |
|
| 170 |
def sa_score(row):
|
|
|
|
| 367 |
target_family = TARGET_FAMILY_MAP[target_family]
|
| 368 |
# email_hash = hashlib.sha256(email.encode()).hexdigest()
|
| 369 |
COLUMN_ALIASES = COLUMN_ALIASES | {
|
| 370 |
+
'Y': 'Actual interaction probability' if task == 'binary' else 'Actual binding affinity',
|
| 371 |
+
'Y^': 'Predicted interaction probability' if task == 'binary' else 'Predicted binding affinity'
|
| 372 |
}
|
| 373 |
|
| 374 |
# target_family_list = [target_family]
|
|
|
|
| 388 |
predictions = [pd.DataFrame(prediction) for prediction in predictions]
|
| 389 |
prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
|
| 390 |
|
| 391 |
+
predictions_file = f'temp/{job_id}_predictions.csv'
|
| 392 |
prediction_df.to_csv(predictions_file, index=False)
|
| 393 |
|
| 394 |
return [predictions_file,
|
|
|
|
| 485 |
includeFingerprints=False)
|
| 486 |
DF_FOR_REPORT = df.copy()
|
| 487 |
|
| 488 |
+
# pie_chart = None
|
| 489 |
+
# value = None
|
| 490 |
+
# if 'Y^' in DF_FOR_REPORT.columns:
|
| 491 |
+
# value = 'Y^'
|
| 492 |
+
# elif 'Y' in DF_FOR_REPORT.columns:
|
| 493 |
+
# value = 'Y'
|
| 494 |
|
| 495 |
# if value:
|
| 496 |
# if DF_FOR_REPORT['X1'].nunique() > 1 >= DF_FOR_REPORT['X2'].nunique():
|
|
|
|
| 498 |
# elif DF_FOR_REPORT['X2'].nunique() > 1 >= DF_FOR_REPORT['X1'].nunique():
|
| 499 |
# pie_chart = create_pie_chart(DF_FOR_REPORT, category='Target family', value=value, top_k=100)
|
| 500 |
|
| 501 |
+
return create_html_report(DF_FOR_REPORT), df # pie_chart
|
| 502 |
else:
|
| 503 |
+
return gr.HTML(), gr.Dataframe()
|
| 504 |
|
| 505 |
|
| 506 |
def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
|
| 507 |
+
df_html = df.copy()
|
| 508 |
+
cols_left = ['ID1', 'ID2', 'Y', 'Y^', 'Compound', 'Scaffold', 'Scaffold SMILES', ]
|
| 509 |
cols_right = ['X1', 'X2']
|
| 510 |
+
cols_left = [col for col in cols_left if col in df_html.columns]
|
| 511 |
+
cols_right = [col for col in cols_right if col in df_html.columns]
|
| 512 |
+
df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
|
| 513 |
+
df_html['X2'] = df_html['X2'].swifter.apply(wrap_text)
|
| 514 |
+
df_html = df_html.sort_values(
|
| 515 |
+
[col for col in ['Y', 'Y^', 'ID1', 'ID2', 'X1', 'X2'] if col in df.columns], ascending=False
|
| 516 |
+
).rename(columns=COLUMN_ALIASES)
|
| 517 |
+
# PandasTools.RenderImagesInAllDataFrames(images=True)
|
| 518 |
+
PandasTools.ChangeMoleculeRendering(df_html, renderer='image')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 519 |
# Return the DataFrame as HTML
|
| 520 |
PandasTools.RenderImagesInAllDataFrames(images=True)
|
| 521 |
|
| 522 |
if not file:
|
| 523 |
+
styled_df = df_html.iloc[:51].style
|
| 524 |
+
# styled_df = df.style.format("{:.2f}")
|
| 525 |
+
colors = sns.color_palette('husl', len(df_html.columns))
|
| 526 |
+
for i, col in enumerate(df_html.columns):
|
| 527 |
+
if pd.api.types.is_numeric_dtype(df_html[col]):
|
| 528 |
+
styled_df = styled_df.background_gradient(subset=col, cmap=sns.light_palette(colors[i], as_cmap=True))
|
| 529 |
+
html = styled_df.to_html()
|
| 530 |
+
return f'Report preview<div style="overflow:auto; height: 300px; font-family: Courier !important;">{html}</div>'
|
| 531 |
else:
|
| 532 |
+
import panel as pn
|
| 533 |
+
from bokeh.resources import INLINE
|
| 534 |
+
from bokeh.models import NumberFormatter, BooleanFormatter
|
| 535 |
+
bokeh_formatters = {
|
| 536 |
+
'float': {'type': 'progress', 'legend': True},
|
| 537 |
+
'bool': BooleanFormatter(),
|
| 538 |
+
}
|
| 539 |
+
# html = df.to_html(file)
|
| 540 |
+
# return html
|
| 541 |
+
pn.widgets.Tabulator(df_html, formatters=bokeh_formatters).save(file, resources=INLINE)
|
| 542 |
|
| 543 |
|
| 544 |
# def create_pie_chart(df, category, value, top_k):
|
|
|
|
| 705 |
with gr.Column() as screen_page:
|
| 706 |
with gr.Row():
|
| 707 |
with gr.Column():
|
| 708 |
+
HelpTip(
|
| 709 |
+
"Target amino acid sequence in the FASTA format. Alternatively, you may use a "
|
| 710 |
+
"UniProt ID/accession to query UniProt database for the sequence of your "
|
| 711 |
+
"target of interest. If the input FASTA contains multiple entities, "
|
| 712 |
+
"only the first one will be used."
|
| 713 |
+
)
|
| 714 |
with gr.Row():
|
| 715 |
target_input_type = gr.Dropdown(
|
| 716 |
label='Target Input Type',
|
|
|
|
| 730 |
info='Organism common name or scientific name (default: Human).',
|
| 731 |
placeholder='Human', show_label=False,
|
| 732 |
visible=False, interactive=True, scale=4, )
|
| 733 |
+
with gr.Column():
|
| 734 |
HelpTip(
|
| 735 |
+
"Identify the protein family by conducting sequence alignment. "
|
| 736 |
+
"You may select General if you find the alignment score unsatisfactory."
|
|
|
|
|
|
|
| 737 |
)
|
|
|
|
| 738 |
drug_screen_target_family = gr.Dropdown(
|
| 739 |
choices=list(TARGET_FAMILY_MAP.keys()),
|
| 740 |
value='General',
|
| 741 |
label='Select Input Protein Family (Optional)', interactive=True)
|
| 742 |
# with gr.Column(scale=1, min_width=24):
|
| 743 |
+
|
|
|
|
|
|
|
|
|
|
| 744 |
with gr.Row():
|
| 745 |
with gr.Column():
|
| 746 |
target_upload_btn = gr.UploadButton(label='Upload a FASTA file', type='binary',
|
|
|
|
| 763 |
drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()), label='Select a Prediction Task',
|
| 764 |
value='Drug-target interaction')
|
| 765 |
with gr.Column():
|
|
|
|
|
|
|
| 766 |
HelpTip("We recommend the appropriate model for your use case based on model performance "
|
| 767 |
"in drug-target interaction or binding affinity prediction. "
|
| 768 |
"The models were benchmarked on different target families "
|
| 769 |
"and real-world data scenarios.")
|
| 770 |
+
drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Select a Preset Model')
|
| 771 |
+
screen_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
| 772 |
+
|
| 773 |
|
| 774 |
# drug_screen_email = gr.Textbox(
|
| 775 |
# label='Email (optional)',
|
|
|
|
| 800 |
with gr.Column() as identify_page:
|
| 801 |
with gr.Row():
|
| 802 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 803 |
HelpTip(
|
| 804 |
"""Compound molecule in the SMILES format. You may input the SMILES string directly,
|
| 805 |
upload an SMI file, or upload an SDF file to convert to SMILES. Alternatively,
|
|
|
|
| 807 |
representing your drug of interest.
|
| 808 |
"""
|
| 809 |
)
|
| 810 |
+
compound_type = gr.Dropdown(
|
| 811 |
+
label='Compound Input Type',
|
| 812 |
+
choices=['SMILES', 'SDF'],
|
| 813 |
+
info='Enter (paste) an SMILES string or upload an SMI file.',
|
| 814 |
+
value='SMILES',
|
| 815 |
+
interactive=True)
|
| 816 |
+
compound_upload_btn = gr.UploadButton(label='Upload', variant='primary', type='binary')
|
| 817 |
+
|
| 818 |
with gr.Column():
|
| 819 |
target_identify_target_family = gr.Dropdown(choices=['General'], value='General',
|
| 820 |
label='Target Protein Family')
|
|
|
|
| 833 |
value='Drug-target interaction')
|
| 834 |
|
| 835 |
with gr.Column():
|
|
|
|
|
|
|
| 836 |
HelpTip("We recommend the appropriate model for your use case based on model performance "
|
| 837 |
"in drug-target interaction or binding affinity prediction. "
|
| 838 |
"The models were benchmarked on different target families "
|
| 839 |
"and real-world data scenarios.")
|
| 840 |
+
target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
|
| 841 |
+
identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
| 842 |
+
|
| 843 |
|
| 844 |
# with gr.Row():
|
| 845 |
# target_identify_email = gr.Textbox(
|
|
|
|
| 858 |
visible=False)
|
| 859 |
with gr.TabItem(label='Interaction pair inference', id=2):
|
| 860 |
gr.Markdown('''
|
| 861 |
+
# <center>DeepSEQreen Interaction Pair Inference</center>
|
| 862 |
+
<center>To predict interactions/binding affinities between any drug-target pairs.</center>
|
| 863 |
+
''')
|
|
|
|
|
|
|
| 864 |
with gr.Blocks() as infer_block:
|
| 865 |
with gr.Column() as infer_page:
|
| 866 |
+
with gr.Column() as custom_upload:
|
| 867 |
+
gr.Markdown("""
|
| 868 |
+
Please upload a custom dataset CSV file with 2 required string columns and optionally 2 ID columns:
|
| 869 |
+
|
| 870 |
+
<b>X1</b>: the SMILES string of a compound\n
|
| 871 |
+
<b>X2</b>: the FASTA sequence of a target\n
|
| 872 |
+
<b>ID1</b>: the ID (PubChem or any arbitrary unique identifier) of a compound\n
|
| 873 |
+
<b>ID22</b>: the ID (UniProt or any arbitrary unique identifier) of a target
|
| 874 |
+
|
| 875 |
+
Example:
|
| 876 |
+
|
| 877 |
+
| X1 | X2 | ID1 | ID2 |
|
| 878 |
+
|---------------------------------------- |---------------|--------------|--------|
|
| 879 |
+
| CCOC(=O)Nc1ccc(NCc2ccc(F)cc2)cc1N | MVQKSRNGGV... | CHEMBL41355 | O88943 |
|
| 880 |
+
| CCCCCc1cc(O)c(C/C=C(\C)CCC=C(C)C)c(O)c1 | MTSPSSSPVF... | CHEMBL497318 | Q9Y5S1 |
|
| 881 |
+
""")
|
| 882 |
+
gr.File(label="Example custom dataset",
|
| 883 |
+
value="data/examples/interaction_pair_inference.csv",
|
| 884 |
+
interactive=False)
|
| 885 |
+
with gr.Column():
|
| 886 |
+
infer_data_for_predict = gr.File(
|
| 887 |
+
label='Custom dataset file', file_count="single", type='filepath', visible=True)
|
| 888 |
+
with gr.Column() as pair_generate:
|
| 889 |
+
gr.Markdown("""
|
| 890 |
+
Upload a SDF file which contains multiple compounds of interest and a FASTA file which contains multiple targets of
|
| 891 |
+
interest. All combinations of drug-target pairs from these two files will be automatically generated and submitted to
|
| 892 |
+
a prediction job.
|
| 893 |
+
""")
|
| 894 |
+
pair_sdf = gr.File(label='SDF file containing multiple compounds')
|
| 895 |
+
pair_fasta = gr.File(label='FASTA file containing multiple targets')
|
| 896 |
+
|
| 897 |
+
|
| 898 |
|
| 899 |
with gr.Row(visible=True):
|
| 900 |
pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()), label='Task')
|
|
|
|
| 901 |
pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
|
|
|
|
| 902 |
pair_infer_target_family = gr.Dropdown(choices=['General'],
|
| 903 |
label='Target family',
|
| 904 |
value='General')
|
|
|
|
| 925 |
<center>
|
| 926 |
To compute chemical properties for the predictions of drug hit screening,
|
| 927 |
target protein identification, and interaction pair inference. You may also upload
|
| 928 |
+
your own dataset. The page shows only a preview report displaying at most 30 records
|
| 929 |
+
(with top predicted DTI/DTA if reporting results from a prediction job). For a full report, please
|
| 930 |
+
generate and download a raw data CSV or interactive table HTML file below.
|
| 931 |
</center>
|
| 932 |
''')
|
| 933 |
with gr.Row():
|
|
|
|
| 947 |
|
| 948 |
with gr.Row():
|
| 949 |
with gr.Column():
|
| 950 |
+
csv_generate = gr.Button(value='Generate raw data (CSV)', interactive=True)
|
| 951 |
csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
|
| 952 |
with gr.Column():
|
| 953 |
+
html_generate = gr.Button(value='Generate report (HTML)', interactive=True)
|
| 954 |
html_download_file = gr.File(label='Download report (HTML)', visible=False)
|
| 955 |
|
| 956 |
|
|
|
|
| 1173 |
if library_upload.endswith('.csv'):
|
| 1174 |
screen_df = pd.read_csv(library_upload)
|
| 1175 |
elif library_upload.endswith('.sdf'):
|
| 1176 |
+
screen_df = PandasTools.LoadSDF(library_upload,
|
| 1177 |
+
smilesName='X1', molColName='Compound', includeFingerprints=True)
|
| 1178 |
else:
|
| 1179 |
+
raise gr.Error('Currently only CSV and SDF files are supported.')
|
| 1180 |
validate_columns(screen_df, ['X1'])
|
| 1181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1182 |
screen_df['X2'] = fasta
|
|
|
|
|
|
|
| 1183 |
|
| 1184 |
job_id = uuid4()
|
| 1185 |
+
temp_file = Path(f'temp/{job_id}_input.csv').resolve()
|
| 1186 |
screen_df.to_csv(temp_file, index=False)
|
| 1187 |
if temp_file.is_file():
|
| 1188 |
return {screen_data_for_predict: str(temp_file),
|
|
|
|
| 1225 |
validate_columns(identify_df, ['X2'])
|
| 1226 |
|
| 1227 |
identify_df['X1'] = smiles
|
| 1228 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1229 |
job_id = uuid4()
|
| 1230 |
+
temp_file = Path(f'temp/{job_id}_input.csv').resolve()
|
| 1231 |
identify_df.to_csv(temp_file, index=False)
|
| 1232 |
if temp_file.is_file():
|
| 1233 |
return {identify_data_for_predict: str(temp_file),
|
|
|
|
| 1349 |
|
| 1350 |
def create_csv_raw_file(df, file_report):
|
| 1351 |
from datetime import datetime
|
| 1352 |
+
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
| 1353 |
filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
|
| 1354 |
+
df.drop(['Compound', 'Scaffold']).to_csv(filename, index=False)
|
| 1355 |
return gr.File(filename, visible=True)
|
| 1356 |
|
| 1357 |
|
| 1358 |
def create_html_report_file(df, file_report):
|
| 1359 |
from datetime import datetime
|
| 1360 |
+
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
| 1361 |
+
filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
|
| 1362 |
create_html_report(df, filename)
|
| 1363 |
return gr.File(filename, visible=True)
|
| 1364 |
|