Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -846,20 +846,20 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
|
|
| 846 |
infer_flag = gr.State(value=False)
|
| 847 |
|
| 848 |
with gr.Tabs() as tabs:
|
| 849 |
-
with gr.TabItem(label='Drug
|
| 850 |
gr.Markdown('''
|
| 851 |
-
# <center>
|
| 852 |
-
<center>
|
| 853 |
-
To predict interactions
|
| 854 |
-
</center>
|
| 855 |
''')
|
| 856 |
with gr.Blocks() as screen_block:
|
| 857 |
with gr.Column() as screen_page:
|
| 858 |
with gr.Row():
|
| 859 |
with gr.Column():
|
| 860 |
HelpTip(
|
| 861 |
-
"Enter (paste) a amino acid sequence below manually or upload a FASTA file."
|
| 862 |
-
"If multiple entities are in the FASTA, only the first will be used."
|
| 863 |
"Alternatively, enter a Uniprot ID or gene symbol with organism and click Query for "
|
| 864 |
"the sequence."
|
| 865 |
)
|
|
@@ -883,11 +883,11 @@ To predict interactions/binding affinities of a single target against a library
|
|
| 883 |
info='Organism scientific name (default: Homo sapiens).',
|
| 884 |
placeholder='Homo sapiens', show_label=False,
|
| 885 |
visible=False, interactive=True, scale=4, )
|
| 886 |
-
target_upload_btn = gr.UploadButton(label='Upload a FASTA
|
| 887 |
visible=True, variant='primary',
|
| 888 |
size='lg')
|
| 889 |
-
target_paste_markdown = gr.Button(value='
|
| 890 |
-
target_query_btn = gr.Button(value='Query the
|
| 891 |
visible=False, scale=4)
|
| 892 |
# with gr.Row():
|
| 893 |
# example_uniprot = gr.Button(value='Example: Q16539', elem_classes='example', visible=False)
|
|
@@ -905,27 +905,28 @@ To predict interactions/binding affinities of a single target against a library
|
|
| 905 |
HelpTip(
|
| 906 |
"Click Auto-detect to identify the protein family using sequence alignment. "
|
| 907 |
"This optional step allows applying a family-specific model instead of a all-family "
|
| 908 |
-
"model (general)."
|
| 909 |
"Manually select general if the alignment results are unsatisfactory."
|
| 910 |
)
|
| 911 |
drug_screen_target_family = gr.Dropdown(
|
| 912 |
choices=list(TARGET_FAMILY_MAP.keys()),
|
| 913 |
value='General',
|
| 914 |
-
label='Step 2. Select
|
| 915 |
# with gr.Column(scale=1, min_width=24):
|
| 916 |
|
| 917 |
with gr.Row():
|
| 918 |
with gr.Column():
|
| 919 |
-
target_family_detect_btn = gr.Button(value='Auto-
|
| 920 |
|
| 921 |
with gr.Row():
|
| 922 |
with gr.Column():
|
| 923 |
HelpTip(
|
| 924 |
-
"Select a preset compound library (e.g., DrugBank)."
|
| 925 |
"Alternatively, upload a CSV file with a column named X1 containing compound SMILES, "
|
| 926 |
-
"or use an SDF file."
|
|
|
|
| 927 |
)
|
| 928 |
-
drug_library = gr.Dropdown(label='Step 3. Select
|
| 929 |
choices=list(DRUG_LIBRARY_MAP.keys()))
|
| 930 |
with gr.Row():
|
| 931 |
gr.File(label='Example SDF compound library',
|
|
@@ -933,42 +934,41 @@ To predict interactions/binding affinities of a single target against a library
|
|
| 933 |
gr.File(label='Example CSV compound library',
|
| 934 |
value='data/examples/compound_library.csv', interactive=False)
|
| 935 |
drug_library_upload_btn = gr.UploadButton(
|
| 936 |
-
label='Upload
|
| 937 |
drug_library_upload = gr.File(label='Custom compound library file', visible=False)
|
| 938 |
with gr.Row():
|
| 939 |
with gr.Column():
|
| 940 |
HelpTip(
|
| 941 |
"Interaction prediction provides you binding probability score between the target of "
|
| 942 |
-
"interest and each compound in the library,"
|
| 943 |
"while affinity prediction directly estimates their binding strength measured using "
|
| 944 |
"IC50."
|
| 945 |
)
|
| 946 |
drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
|
| 947 |
-
label='Step 4. Select
|
| 948 |
value='Compound-protein interaction')
|
| 949 |
|
| 950 |
with gr.Row():
|
| 951 |
with gr.Column():
|
| 952 |
HelpTip(
|
| 953 |
"Select your preferred model, or click Recommend for the best-performing model based "
|
| 954 |
-
"on the selected task, family, and whether the target was trained."
|
| 955 |
"Please refer to documentation for detailed benchamrk results."
|
| 956 |
)
|
| 957 |
drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
|
| 958 |
label='Step 5. Select a Preset Model')
|
| 959 |
-
screen_preset_recommend_btn = gr.Button(value='Recommend
|
| 960 |
with gr.Row():
|
| 961 |
with gr.Column():
|
| 962 |
drug_screen_email = gr.Textbox(
|
| 963 |
-
label='Step 6. Email (Optional)',
|
| 964 |
-
info="
|
| 965 |
-
"is completed."
|
| 966 |
)
|
| 967 |
|
| 968 |
with gr.Row(visible=True):
|
| 969 |
with gr.Column():
|
| 970 |
# drug_screen_clr_btn = gr.ClearButton(size='lg')
|
| 971 |
-
drug_screen_btn = gr.Button(value='
|
| 972 |
# TODO Modify the pd df directly with df['X2'] = target
|
| 973 |
|
| 974 |
screen_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
|
|
@@ -980,19 +980,19 @@ To predict interactions/binding affinities of a single target against a library
|
|
| 980 |
|
| 981 |
with gr.TabItem(label='Target protein identification', id=1):
|
| 982 |
gr.Markdown('''
|
| 983 |
-
# <center>Target Protein Identification</center>
|
| 984 |
-
|
| 985 |
-
<center>
|
| 986 |
-
To predict interactions
|
| 987 |
-
</center>
|
| 988 |
''')
|
| 989 |
with gr.Blocks() as identify_block:
|
| 990 |
with gr.Column() as identify_page:
|
| 991 |
with gr.Row():
|
| 992 |
with gr.Column():
|
| 993 |
HelpTip(
|
| 994 |
-
"Enter (paste) a compound SMILES below manually or upload a SDF file."
|
| 995 |
-
"If multiple entities are in the SDF, only the first will be used."
|
| 996 |
"SMILES can be obtained by searching for the compound of interest in databases such "
|
| 997 |
"as NCBI, PubChem and and ChEMBL."
|
| 998 |
)
|
|
@@ -1002,7 +1002,7 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 1002 |
info='Enter (paste) an SMILES string or upload an SDF file to convert to SMILES.',
|
| 1003 |
value='SMILES',
|
| 1004 |
interactive=True)
|
| 1005 |
-
compound_upload_btn = gr.UploadButton(label='Upload', variant='primary',
|
| 1006 |
type='binary', visible=False)
|
| 1007 |
|
| 1008 |
compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
|
|
@@ -1011,23 +1011,25 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 1011 |
with gr.Row():
|
| 1012 |
with gr.Column():
|
| 1013 |
HelpTip(
|
| 1014 |
-
"By default, models trained on all protein families (general) will be applied."
|
| 1015 |
-
"If the proteins in the target library of interest all belong to the same protein "
|
| 1016 |
-
"family, manually selecting the family is supported."
|
| 1017 |
)
|
| 1018 |
target_identify_target_family = gr.Dropdown(choices=['General'],
|
| 1019 |
value='General',
|
| 1020 |
-
label='Step 2. Select Target
|
| 1021 |
'Optional)')
|
| 1022 |
|
| 1023 |
with gr.Row():
|
| 1024 |
with gr.Column():
|
| 1025 |
HelpTip(
|
| 1026 |
-
"Select a preset target library (e.g., ChEMBL33_human_proteins)."
|
| 1027 |
"Alternatively, upload a CSV file with a column named X2 containing target protein "
|
| 1028 |
-
"sequences, or use an FASTA file."
|
|
|
|
|
|
|
| 1029 |
)
|
| 1030 |
-
target_library = gr.Dropdown(label='Step 3. Select
|
| 1031 |
choices=list(TARGET_LIBRARY_MAP.keys()))
|
| 1032 |
with gr.Row():
|
| 1033 |
gr.File(label='Example FASTA target library',
|
|
@@ -1035,7 +1037,7 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 1035 |
gr.File(label='Example CSV target library',
|
| 1036 |
value='data/examples/target_library.csv', interactive=False)
|
| 1037 |
target_library_upload_btn = gr.UploadButton(
|
| 1038 |
-
label='Upload
|
| 1039 |
target_library_upload = gr.File(label='Custom target library file', visible=False)
|
| 1040 |
|
| 1041 |
with gr.Row():
|
|
@@ -1047,7 +1049,7 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 1047 |
"IC50."
|
| 1048 |
)
|
| 1049 |
target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
|
| 1050 |
-
label='Step 4. Select
|
| 1051 |
value='Compound-protein interaction')
|
| 1052 |
|
| 1053 |
with gr.Row():
|
|
@@ -1057,21 +1059,21 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 1057 |
"on the selected task, family, and whether the compound was trained. "
|
| 1058 |
"Please refer to documentation for detailed benchamrk results."
|
| 1059 |
)
|
| 1060 |
-
target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()),
|
| 1061 |
-
|
| 1062 |
-
identify_preset_recommend_btn = gr.Button(value='Recommend
|
|
|
|
| 1063 |
|
| 1064 |
with gr.Row():
|
| 1065 |
with gr.Column():
|
| 1066 |
target_identify_email = gr.Textbox(
|
| 1067 |
-
label='Step 6. Email (Optional)',
|
| 1068 |
-
info="
|
| 1069 |
-
"is completed."
|
| 1070 |
)
|
| 1071 |
|
| 1072 |
with gr.Row(visible=True):
|
| 1073 |
# target_identify_clr_btn = gr.ClearButton(size='lg')
|
| 1074 |
-
target_identify_btn = gr.Button(value='
|
| 1075 |
|
| 1076 |
identify_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
|
| 1077 |
identify_waiting = gr.Markdown(f"Your job is running... It might take a few minutes."
|
|
@@ -1081,7 +1083,7 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 1081 |
with gr.TabItem(label='Interaction pair inference', id=2):
|
| 1082 |
gr.Markdown('''
|
| 1083 |
# <center>Interaction Pair Inference</center>
|
| 1084 |
-
<center>To predict interactions
|
| 1085 |
''')
|
| 1086 |
with gr.Blocks() as infer_block:
|
| 1087 |
with gr.Column() as infer_page:
|
|
@@ -1089,22 +1091,29 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 1089 |
"A custom interation pair dataset can be a CSV file with 2 required columns "
|
| 1090 |
"(X1 for smiles and X2 for sequences) "
|
| 1091 |
"and optionally 2 ID columns (ID1 for compound ID and ID2 for target ID), "
|
| 1092 |
-
"or generated from a FASTA file containing multiple"
|
| 1093 |
-
"sequences and a SDF file containing multiple compounds."
|
|
|
|
|
|
|
|
|
|
| 1094 |
)
|
| 1095 |
infer_type = gr.Dropdown(
|
| 1096 |
-
choices=['Upload a CSV
|
| 1097 |
'Upload a compound library and a target library'],
|
| 1098 |
label='Step 1. Select Pair Input Type and Input',
|
| 1099 |
-
value='Upload a CSV
|
| 1100 |
with gr.Column() as pair_upload:
|
| 1101 |
-
gr.File(label="Example
|
| 1102 |
value="data/examples/interaction_pair_inference.csv",
|
| 1103 |
interactive=False)
|
|
|
|
|
|
|
|
|
|
| 1104 |
with gr.Column():
|
| 1105 |
infer_data_for_predict = gr.File(
|
| 1106 |
-
label='Upload
|
| 1107 |
-
|
|
|
|
| 1108 |
with gr.Row():
|
| 1109 |
gr.File(label='Example SDF compound library',
|
| 1110 |
value='data/examples/compound_library.sdf', interactive=False)
|
|
@@ -1116,48 +1125,56 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 1116 |
gr.File(label='Example CSV target library',
|
| 1117 |
value='data/examples/target_library.csv', interactive=False)
|
| 1118 |
with gr.Row():
|
| 1119 |
-
|
|
|
|
|
|
|
|
|
|
| 1120 |
file_count="single", type='filepath')
|
| 1121 |
-
infer_target = gr.File(label='FASTA/CSV
|
| 1122 |
file_count="single", type='filepath')
|
| 1123 |
|
| 1124 |
with gr.Row():
|
| 1125 |
with gr.Column():
|
| 1126 |
HelpTip(
|
| 1127 |
"By default, models trained on all protein families (general) will be applied. "
|
| 1128 |
-
"If the proteins in the target library of interest
|
|
|
|
| 1129 |
)
|
| 1130 |
pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
|
| 1131 |
value='General',
|
| 1132 |
-
label='Step 2. Select Target
|
| 1133 |
|
| 1134 |
with gr.Row():
|
| 1135 |
with gr.Column():
|
| 1136 |
HelpTip(
|
| 1137 |
-
"Interaction prediction provides you binding probability score
|
| 1138 |
-
"
|
|
|
|
|
|
|
| 1139 |
)
|
| 1140 |
pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()),
|
| 1141 |
-
label='Step 3. Select
|
| 1142 |
value='Compound-protein interaction')
|
| 1143 |
|
| 1144 |
with gr.Row():
|
| 1145 |
with gr.Column():
|
| 1146 |
HelpTip("Select your preferred model. "
|
| 1147 |
-
"Please refer to documentation for detailed
|
| 1148 |
)
|
| 1149 |
-
pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()),
|
| 1150 |
-
|
|
|
|
|
|
|
| 1151 |
|
| 1152 |
with gr.Row():
|
| 1153 |
pair_infer_email = gr.Textbox(
|
| 1154 |
-
label='Step 5. Email (Optional)',
|
| 1155 |
-
info="
|
| 1156 |
)
|
| 1157 |
|
| 1158 |
with gr.Row(visible=True):
|
| 1159 |
# pair_infer_clr_btn = gr.ClearButton(size='lg')
|
| 1160 |
-
pair_infer_btn = gr.Button(value='
|
| 1161 |
|
| 1162 |
infer_waiting = gr.Markdown(f"Your job is running... It might take a few minutes."
|
| 1163 |
f"When it's done, you will be redirected to the report page. "
|
|
@@ -1400,7 +1417,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 1400 |
elif task == 'DTA':
|
| 1401 |
train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
|
| 1402 |
score = 'CI'
|
| 1403 |
-
if
|
| 1404 |
scenario = "Unseen drug"
|
| 1405 |
else:
|
| 1406 |
scenario = "Seen drug"
|
|
@@ -1429,21 +1446,26 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 1429 |
pair_generate: gr.Column(visible=True),
|
| 1430 |
infer_data_for_predict: None,
|
| 1431 |
infer_drug: None,
|
| 1432 |
-
infer_target: None
|
|
|
|
|
|
|
| 1433 |
}
|
| 1434 |
match upload_type:
|
| 1435 |
-
case "Upload a CSV
|
| 1436 |
return {
|
| 1437 |
pair_upload: gr.Column(visible=True),
|
| 1438 |
pair_generate: gr.Column(visible=False),
|
| 1439 |
infer_data_for_predict: None,
|
| 1440 |
infer_drug: None,
|
| 1441 |
-
infer_target: None
|
|
|
|
|
|
|
| 1442 |
}
|
| 1443 |
|
| 1444 |
|
| 1445 |
infer_type.select(fn=infer_type_change, inputs=infer_type,
|
| 1446 |
-
outputs=[pair_upload, pair_generate, infer_data_for_predict, infer_drug, infer_target
|
|
|
|
| 1447 |
|
| 1448 |
|
| 1449 |
def drug_screen_validate(fasta, library, library_upload, state, progress=gr.Progress(track_tqdm=True)):
|
|
|
|
| 846 |
infer_flag = gr.State(value=False)
|
| 847 |
|
| 848 |
with gr.Tabs() as tabs:
|
| 849 |
+
with gr.TabItem(label='Drug Hit Screening', id=0):
|
| 850 |
gr.Markdown('''
|
| 851 |
+
# <center>Drug Hit Screening</center>
|
| 852 |
+
<center>
|
| 853 |
+
To predict interactions or binding affinities of a single target against a compound library.
|
| 854 |
+
</center>
|
| 855 |
''')
|
| 856 |
with gr.Blocks() as screen_block:
|
| 857 |
with gr.Column() as screen_page:
|
| 858 |
with gr.Row():
|
| 859 |
with gr.Column():
|
| 860 |
HelpTip(
|
| 861 |
+
"Enter (paste) a amino acid sequence below manually or upload a FASTA file. "
|
| 862 |
+
"If multiple entities are in the FASTA, only the first will be used. "
|
| 863 |
"Alternatively, enter a Uniprot ID or gene symbol with organism and click Query for "
|
| 864 |
"the sequence."
|
| 865 |
)
|
|
|
|
| 883 |
info='Organism scientific name (default: Homo sapiens).',
|
| 884 |
placeholder='Homo sapiens', show_label=False,
|
| 885 |
visible=False, interactive=True, scale=4, )
|
| 886 |
+
target_upload_btn = gr.UploadButton(label='Upload a FASTA File', type='binary',
|
| 887 |
visible=True, variant='primary',
|
| 888 |
size='lg')
|
| 889 |
+
target_paste_markdown = gr.Button(value='OR Paste Your Sequence Below', visible=True)
|
| 890 |
+
target_query_btn = gr.Button(value='Query the Sequence', variant='primary',
|
| 891 |
visible=False, scale=4)
|
| 892 |
# with gr.Row():
|
| 893 |
# example_uniprot = gr.Button(value='Example: Q16539', elem_classes='example', visible=False)
|
|
|
|
| 905 |
HelpTip(
|
| 906 |
"Click Auto-detect to identify the protein family using sequence alignment. "
|
| 907 |
"This optional step allows applying a family-specific model instead of a all-family "
|
| 908 |
+
"model (general). "
|
| 909 |
"Manually select general if the alignment results are unsatisfactory."
|
| 910 |
)
|
| 911 |
drug_screen_target_family = gr.Dropdown(
|
| 912 |
choices=list(TARGET_FAMILY_MAP.keys()),
|
| 913 |
value='General',
|
| 914 |
+
label='Step 2. Select Target Family (Optional)', interactive=True)
|
| 915 |
# with gr.Column(scale=1, min_width=24):
|
| 916 |
|
| 917 |
with gr.Row():
|
| 918 |
with gr.Column():
|
| 919 |
+
target_family_detect_btn = gr.Button(value='OR Let Us Auto-Detect for You', variant='primary')
|
| 920 |
|
| 921 |
with gr.Row():
|
| 922 |
with gr.Column():
|
| 923 |
HelpTip(
|
| 924 |
+
"Select a preset compound library (e.g., DrugBank). "
|
| 925 |
"Alternatively, upload a CSV file with a column named X1 containing compound SMILES, "
|
| 926 |
+
"or use an SDF file (Max. 10,000 compounds per task). Example CSV and SDF files are "
|
| 927 |
+
"provided below and can be downloaded by clicking the lower right corner."
|
| 928 |
)
|
| 929 |
+
drug_library = gr.Dropdown(label='Step 3. Select a Preset Compound Library',
|
| 930 |
choices=list(DRUG_LIBRARY_MAP.keys()))
|
| 931 |
with gr.Row():
|
| 932 |
gr.File(label='Example SDF compound library',
|
|
|
|
| 934 |
gr.File(label='Example CSV compound library',
|
| 935 |
value='data/examples/compound_library.csv', interactive=False)
|
| 936 |
drug_library_upload_btn = gr.UploadButton(
|
| 937 |
+
label='OR Upload Your Own Library', variant='primary')
|
| 938 |
drug_library_upload = gr.File(label='Custom compound library file', visible=False)
|
| 939 |
with gr.Row():
|
| 940 |
with gr.Column():
|
| 941 |
HelpTip(
|
| 942 |
"Interaction prediction provides you binding probability score between the target of "
|
| 943 |
+
"interest and each compound in the library, "
|
| 944 |
"while affinity prediction directly estimates their binding strength measured using "
|
| 945 |
"IC50."
|
| 946 |
)
|
| 947 |
drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
|
| 948 |
+
label='Step 4. Select the Prediction Task You Want to Conduct',
|
| 949 |
value='Compound-protein interaction')
|
| 950 |
|
| 951 |
with gr.Row():
|
| 952 |
with gr.Column():
|
| 953 |
HelpTip(
|
| 954 |
"Select your preferred model, or click Recommend for the best-performing model based "
|
| 955 |
+
"on the selected task, family, and whether the target was trained. "
|
| 956 |
"Please refer to documentation for detailed benchamrk results."
|
| 957 |
)
|
| 958 |
drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
|
| 959 |
label='Step 5. Select a Preset Model')
|
| 960 |
+
screen_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You', variant='primary')
|
| 961 |
with gr.Row():
|
| 962 |
with gr.Column():
|
| 963 |
drug_screen_email = gr.Textbox(
|
| 964 |
+
label='Step 6. Input Your Email Address (Optional)',
|
| 965 |
+
info="Your email address will be used to notify you about the completion of your job."
|
|
|
|
| 966 |
)
|
| 967 |
|
| 968 |
with gr.Row(visible=True):
|
| 969 |
with gr.Column():
|
| 970 |
# drug_screen_clr_btn = gr.ClearButton(size='lg')
|
| 971 |
+
drug_screen_btn = gr.Button(value='SUBMIT THE SCREENING JOB', variant='primary', size='lg')
|
| 972 |
# TODO Modify the pd df directly with df['X2'] = target
|
| 973 |
|
| 974 |
screen_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
|
|
|
|
| 980 |
|
| 981 |
with gr.TabItem(label='Target protein identification', id=1):
|
| 982 |
gr.Markdown('''
|
| 983 |
+
# <center>Target Protein Identification</center>
|
| 984 |
+
|
| 985 |
+
<center>
|
| 986 |
+
To predict interactions or binding affinities of a single compound against a protein library.
|
| 987 |
+
</center>
|
| 988 |
''')
|
| 989 |
with gr.Blocks() as identify_block:
|
| 990 |
with gr.Column() as identify_page:
|
| 991 |
with gr.Row():
|
| 992 |
with gr.Column():
|
| 993 |
HelpTip(
|
| 994 |
+
"Enter (paste) a compound SMILES below manually or upload a SDF file. "
|
| 995 |
+
"If multiple entities are in the SDF, only the first will be used. "
|
| 996 |
"SMILES can be obtained by searching for the compound of interest in databases such "
|
| 997 |
"as NCBI, PubChem and and ChEMBL."
|
| 998 |
)
|
|
|
|
| 1002 |
info='Enter (paste) an SMILES string or upload an SDF file to convert to SMILES.',
|
| 1003 |
value='SMILES',
|
| 1004 |
interactive=True)
|
| 1005 |
+
compound_upload_btn = gr.UploadButton(label='OR Upload a SDF File', variant='primary',
|
| 1006 |
type='binary', visible=False)
|
| 1007 |
|
| 1008 |
compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
|
|
|
|
| 1011 |
with gr.Row():
|
| 1012 |
with gr.Column():
|
| 1013 |
HelpTip(
|
| 1014 |
+
"By default, models trained on all protein families (general) will be applied. "
|
| 1015 |
+
# "If the proteins in the target library of interest all belong to the same protein "
|
| 1016 |
+
# "family, manually selecting the family is supported."
|
| 1017 |
)
|
| 1018 |
target_identify_target_family = gr.Dropdown(choices=['General'],
|
| 1019 |
value='General',
|
| 1020 |
+
label='Step 2. Select Target Family ('
|
| 1021 |
'Optional)')
|
| 1022 |
|
| 1023 |
with gr.Row():
|
| 1024 |
with gr.Column():
|
| 1025 |
HelpTip(
|
| 1026 |
+
"Select a preset target library (e.g., ChEMBL33_human_proteins). "
|
| 1027 |
"Alternatively, upload a CSV file with a column named X2 containing target protein "
|
| 1028 |
+
"sequences, or use an FASTA file (Max. 10,000 targets per task). "
|
| 1029 |
+
"Example CSV and SDF files are provided below "
|
| 1030 |
+
"and can be downloaded by clicking the lower right corner."
|
| 1031 |
)
|
| 1032 |
+
target_library = gr.Dropdown(label='Step 3. Select a Preset Target Library',
|
| 1033 |
choices=list(TARGET_LIBRARY_MAP.keys()))
|
| 1034 |
with gr.Row():
|
| 1035 |
gr.File(label='Example FASTA target library',
|
|
|
|
| 1037 |
gr.File(label='Example CSV target library',
|
| 1038 |
value='data/examples/target_library.csv', interactive=False)
|
| 1039 |
target_library_upload_btn = gr.UploadButton(
|
| 1040 |
+
label='OR Upload Your Own Library', variant='primary')
|
| 1041 |
target_library_upload = gr.File(label='Custom target library file', visible=False)
|
| 1042 |
|
| 1043 |
with gr.Row():
|
|
|
|
| 1049 |
"IC50."
|
| 1050 |
)
|
| 1051 |
target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
|
| 1052 |
+
label='Step 4. Select the Prediction Task You Want to Conduct',
|
| 1053 |
value='Compound-protein interaction')
|
| 1054 |
|
| 1055 |
with gr.Row():
|
|
|
|
| 1059 |
"on the selected task, family, and whether the compound was trained. "
|
| 1060 |
"Please refer to documentation for detailed benchamrk results."
|
| 1061 |
)
|
| 1062 |
+
target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()),
|
| 1063 |
+
label='Step 5. Select a Preset Model')
|
| 1064 |
+
identify_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
|
| 1065 |
+
variant='primary')
|
| 1066 |
|
| 1067 |
with gr.Row():
|
| 1068 |
with gr.Column():
|
| 1069 |
target_identify_email = gr.Textbox(
|
| 1070 |
+
label='Step 6. Input Your Email Address (Optional)',
|
| 1071 |
+
info="Your email address will be used to notify you about the completion of your job."
|
|
|
|
| 1072 |
)
|
| 1073 |
|
| 1074 |
with gr.Row(visible=True):
|
| 1075 |
# target_identify_clr_btn = gr.ClearButton(size='lg')
|
| 1076 |
+
target_identify_btn = gr.Button(value='SUBMIT THE IDENTIFICATION JOB', variant='primary', size='lg')
|
| 1077 |
|
| 1078 |
identify_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
|
| 1079 |
identify_waiting = gr.Markdown(f"Your job is running... It might take a few minutes."
|
|
|
|
| 1083 |
with gr.TabItem(label='Interaction pair inference', id=2):
|
| 1084 |
gr.Markdown('''
|
| 1085 |
# <center>Interaction Pair Inference</center>
|
| 1086 |
+
<center>To predict interactions or binding affinities between up to 10,000 paired compound-protein data.</center>
|
| 1087 |
''')
|
| 1088 |
with gr.Blocks() as infer_block:
|
| 1089 |
with gr.Column() as infer_page:
|
|
|
|
| 1091 |
"A custom interation pair dataset can be a CSV file with 2 required columns "
|
| 1092 |
"(X1 for smiles and X2 for sequences) "
|
| 1093 |
"and optionally 2 ID columns (ID1 for compound ID and ID2 for target ID), "
|
| 1094 |
+
"or generated from a FASTA file containing multiple "
|
| 1095 |
+
"sequences and a SDF file containing multiple compounds. "
|
| 1096 |
+
"Currently, a maximum of 10,000 pairs is supported, "
|
| 1097 |
+
"which means that the size of CSV file or "
|
| 1098 |
+
"the product of the two library sizes should not exceed 10,000."
|
| 1099 |
)
|
| 1100 |
infer_type = gr.Dropdown(
|
| 1101 |
+
choices=['Upload a CSV file containing paired compound-protein data',
|
| 1102 |
'Upload a compound library and a target library'],
|
| 1103 |
label='Step 1. Select Pair Input Type and Input',
|
| 1104 |
+
value='Upload a CSV file containing paired compound-protein data')
|
| 1105 |
with gr.Column() as pair_upload:
|
| 1106 |
+
gr.File(label="Example CSV dataset",
|
| 1107 |
value="data/examples/interaction_pair_inference.csv",
|
| 1108 |
interactive=False)
|
| 1109 |
+
with gr.Row():
|
| 1110 |
+
infer_csv_prompt = gr.Button(value="Upload Your Own Dataset Below",
|
| 1111 |
+
visible=True)
|
| 1112 |
with gr.Column():
|
| 1113 |
infer_data_for_predict = gr.File(
|
| 1114 |
+
label='Upload CSV File Containing Paired Records',
|
| 1115 |
+
file_count="single", type='filepath', visible=True)
|
| 1116 |
+
with gr.Column(visible=False) as pair_generate:
|
| 1117 |
with gr.Row():
|
| 1118 |
gr.File(label='Example SDF compound library',
|
| 1119 |
value='data/examples/compound_library.sdf', interactive=False)
|
|
|
|
| 1125 |
gr.File(label='Example CSV target library',
|
| 1126 |
value='data/examples/target_library.csv', interactive=False)
|
| 1127 |
with gr.Row():
|
| 1128 |
+
infer_library_prompt = gr.Button(value="Upload Your Own Libraries Below",
|
| 1129 |
+
visible=False)
|
| 1130 |
+
with gr.Row():
|
| 1131 |
+
infer_drug = gr.File(label='Upload SDF/CSV File Containing Multiple Compounds',
|
| 1132 |
file_count="single", type='filepath')
|
| 1133 |
+
infer_target = gr.File(label='Upload FASTA/CSV File Containing Multiple Targets',
|
| 1134 |
file_count="single", type='filepath')
|
| 1135 |
|
| 1136 |
with gr.Row():
|
| 1137 |
with gr.Column():
|
| 1138 |
HelpTip(
|
| 1139 |
"By default, models trained on all protein families (general) will be applied. "
|
| 1140 |
+
"If the proteins in the target library of interest "
|
| 1141 |
+
"all belong to the same protein family, manually selecting the family is supported."
|
| 1142 |
)
|
| 1143 |
pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
|
| 1144 |
value='General',
|
| 1145 |
+
label='Step 2. Select Target Family (Optional)')
|
| 1146 |
|
| 1147 |
with gr.Row():
|
| 1148 |
with gr.Column():
|
| 1149 |
HelpTip(
|
| 1150 |
+
"Interaction prediction provides you binding probability score "
|
| 1151 |
+
"between the target of interest and each compound in the library, "
|
| 1152 |
+
"while affinity prediction directly estimates their binding strength "
|
| 1153 |
+
"measured using IC50."
|
| 1154 |
)
|
| 1155 |
pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()),
|
| 1156 |
+
label='Step 3. Select the Prediction Task You Want to Conduct',
|
| 1157 |
value='Compound-protein interaction')
|
| 1158 |
|
| 1159 |
with gr.Row():
|
| 1160 |
with gr.Column():
|
| 1161 |
HelpTip("Select your preferred model. "
|
| 1162 |
+
"Please refer to documentation for detailed benchmark results."
|
| 1163 |
)
|
| 1164 |
+
pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()),
|
| 1165 |
+
label='Step 4. Select a Preset Model')
|
| 1166 |
+
# infer_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
|
| 1167 |
+
# variant='primary')
|
| 1168 |
|
| 1169 |
with gr.Row():
|
| 1170 |
pair_infer_email = gr.Textbox(
|
| 1171 |
+
label='Step 5. Input Your Email Address (Optional)',
|
| 1172 |
+
info="Your email address will be used to notify you about the completion of your job."
|
| 1173 |
)
|
| 1174 |
|
| 1175 |
with gr.Row(visible=True):
|
| 1176 |
# pair_infer_clr_btn = gr.ClearButton(size='lg')
|
| 1177 |
+
pair_infer_btn = gr.Button(value='SUBMIT THE INFERENCE JOB', variant='primary', size='lg')
|
| 1178 |
|
| 1179 |
infer_waiting = gr.Markdown(f"Your job is running... It might take a few minutes."
|
| 1180 |
f"When it's done, you will be redirected to the report page. "
|
|
|
|
| 1417 |
elif task == 'DTA':
|
| 1418 |
train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
|
| 1419 |
score = 'CI'
|
| 1420 |
+
if not np.isin(smiles, train['X1']):
|
| 1421 |
scenario = "Unseen drug"
|
| 1422 |
else:
|
| 1423 |
scenario = "Seen drug"
|
|
|
|
| 1446 |
pair_generate: gr.Column(visible=True),
|
| 1447 |
infer_data_for_predict: None,
|
| 1448 |
infer_drug: None,
|
| 1449 |
+
infer_target: None,
|
| 1450 |
+
infer_csv_prompt: gr.Button(visible=False),
|
| 1451 |
+
infer_library_prompt: gr.Button(visible=True),
|
| 1452 |
}
|
| 1453 |
match upload_type:
|
| 1454 |
+
case "Upload a CSV file containing paired compound-protein data":
|
| 1455 |
return {
|
| 1456 |
pair_upload: gr.Column(visible=True),
|
| 1457 |
pair_generate: gr.Column(visible=False),
|
| 1458 |
infer_data_for_predict: None,
|
| 1459 |
infer_drug: None,
|
| 1460 |
+
infer_target: None,
|
| 1461 |
+
infer_csv_prompt: gr.Button(visible=True),
|
| 1462 |
+
infer_library_prompt: gr.Button(visible=False),
|
| 1463 |
}
|
| 1464 |
|
| 1465 |
|
| 1466 |
infer_type.select(fn=infer_type_change, inputs=infer_type,
|
| 1467 |
+
outputs=[pair_upload, pair_generate, infer_data_for_predict, infer_drug, infer_target,
|
| 1468 |
+
infer_csv_prompt, infer_library_prompt])
|
| 1469 |
|
| 1470 |
|
| 1471 |
def drug_screen_validate(fasta, library, library_upload, state, progress=gr.Progress(track_tqdm=True)):
|