libokj commited on
Commit
a2264f5
·
1 Parent(s): 5eeb7c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -95
app.py CHANGED
@@ -12,7 +12,6 @@ from pathlib import Path
12
  import sys
13
 
14
  import numpy as np
15
- from Bio import SeqIO
16
  from Bio.Align import PairwiseAligner
17
  # from email_validator import validate_email
18
  import gradio as gr
@@ -60,10 +59,10 @@ SESSION.mount('https://', ADAPTER)
60
  UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
61
 
62
  CSS = """
63
- .help-tip > div {
64
  position: absolute;
65
- display: block;
66
- top: 0px;
67
  right: 0px;
68
  text-align: center;
69
  border-radius: 40%;
@@ -74,6 +73,7 @@ CSS = """
74
  line-height: 26px;
75
  cursor: default;
76
  transition: all 0.5s cubic-bezier(0.55, 0, 0.1, 1);
 
77
  }
78
 
79
  .help-tip:hover {
@@ -85,7 +85,7 @@ CSS = """
85
  content: '?';
86
  font-weight: 700;
87
  color: #8B0000;
88
- z-index: 100;
89
  }
90
 
91
  .help-tip p {
@@ -161,9 +161,10 @@ visibility: hidden
161
 
162
  class HelpTip:
163
  def __new__(cls, text):
164
- return gr.HTML(elem_classes="help-tip",
165
- value=f'<p>{text}</p>'
166
- )
 
167
 
168
 
169
  def sa_score(row):
@@ -366,8 +367,8 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
366
  target_family = TARGET_FAMILY_MAP[target_family]
367
  # email_hash = hashlib.sha256(email.encode()).hexdigest()
368
  COLUMN_ALIASES = COLUMN_ALIASES | {
369
- 'Y': 'Actual interaction' if task == 'binary' else 'Actual affinity',
370
- 'Y^': 'Predicted interaction' if task == 'binary' else 'Predicted affinity'
371
  }
372
 
373
  # target_family_list = [target_family]
@@ -387,7 +388,7 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
387
  predictions = [pd.DataFrame(prediction) for prediction in predictions]
388
  prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
389
 
390
- predictions_file = f'{job_id}_predictions.csv'
391
  prediction_df.to_csv(predictions_file, index=False)
392
 
393
  return [predictions_file,
@@ -484,12 +485,12 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
484
  includeFingerprints=False)
485
  DF_FOR_REPORT = df.copy()
486
 
487
- pie_chart = None
488
- value = None
489
- if 'Y^' in DF_FOR_REPORT.columns:
490
- value = 'Y^'
491
- elif 'Y' in DF_FOR_REPORT.columns:
492
- value = 'Y'
493
 
494
  # if value:
495
  # if DF_FOR_REPORT['X1'].nunique() > 1 >= DF_FOR_REPORT['X2'].nunique():
@@ -497,37 +498,47 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
497
  # elif DF_FOR_REPORT['X2'].nunique() > 1 >= DF_FOR_REPORT['X1'].nunique():
498
  # pie_chart = create_pie_chart(DF_FOR_REPORT, category='Target family', value=value, top_k=100)
499
 
500
- return create_html_report(DF_FOR_REPORT), pie_chart
501
  else:
502
- return gr.HTML(''), gr.Plot()
503
 
504
 
505
  def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
506
- cols_left = ['ID2', 'Y', 'Y^', 'ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', ]
 
507
  cols_right = ['X1', 'X2']
508
- cols_left = [col for col in cols_left if col in df.columns]
509
- cols_right = [col for col in cols_right if col in df.columns]
510
- df = df[cols_left + (df.columns.drop(cols_left + cols_right).tolist()) + cols_right]
511
- df['X2'] = df['X2'].apply(wrap_text)
512
- df.rename(COLUMN_ALIASES, inplace=True)
513
-
514
- styled_df = df.style
515
- # styled_df = df.style.format("{:.2f}")
516
- colors = sns.color_palette('husl', len(df.columns))
517
- for i, col in enumerate(df.columns):
518
- if pd.api.types.is_numeric_dtype(df[col]):
519
- styled_df = styled_df.background_gradient(subset=col, cmap=sns.light_palette(colors[i], as_cmap=True))
520
-
521
  # Return the DataFrame as HTML
522
  PandasTools.RenderImagesInAllDataFrames(images=True)
523
 
524
  if not file:
525
- html = df.to_html()
526
- return f'<div style="overflow:auto; height: 500px;">{html}</div>'
 
 
 
 
 
 
527
  else:
528
- html = df.to_html(file)
529
- return html
530
- # return gr.HTML(pn.widgets.Tabulator(df).embed())
 
 
 
 
 
 
 
531
 
532
 
533
  # def create_pie_chart(df, category, value, top_k):
@@ -694,6 +705,12 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
694
  with gr.Column() as screen_page:
695
  with gr.Row():
696
  with gr.Column():
 
 
 
 
 
 
697
  with gr.Row():
698
  target_input_type = gr.Dropdown(
699
  label='Target Input Type',
@@ -713,22 +730,17 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
713
  info='Organism common name or scientific name (default: Human).',
714
  placeholder='Human', show_label=False,
715
  visible=False, interactive=True, scale=4, )
 
716
  HelpTip(
717
- "Target amino acid sequence in the FASTA format. Alternatively, you may use a "
718
- "UniProt ID/accession to query UniProt database for the sequence of your "
719
- "target of interest. If the input FASTA contains multiple entities, "
720
- "only the first one will be used."
721
  )
722
- with gr.Column():
723
  drug_screen_target_family = gr.Dropdown(
724
  choices=list(TARGET_FAMILY_MAP.keys()),
725
  value='General',
726
  label='Select Input Protein Family (Optional)', interactive=True)
727
  # with gr.Column(scale=1, min_width=24):
728
- HelpTip(
729
- "Identify the protein family by conducting sequence alignment. "
730
- "You may select General if you find the alignment score unsatisfactory."
731
- )
732
  with gr.Row():
733
  with gr.Column():
734
  target_upload_btn = gr.UploadButton(label='Upload a FASTA file', type='binary',
@@ -751,12 +763,13 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
751
  drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()), label='Select a Prediction Task',
752
  value='Drug-target interaction')
753
  with gr.Column():
754
- drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Select a Preset Model')
755
- screen_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
756
  HelpTip("We recommend the appropriate model for your use case based on model performance "
757
  "in drug-target interaction or binding affinity prediction. "
758
  "The models were benchmarked on different target families "
759
  "and real-world data scenarios.")
 
 
 
760
 
761
  # drug_screen_email = gr.Textbox(
762
  # label='Email (optional)',
@@ -787,13 +800,6 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
787
  with gr.Column() as identify_page:
788
  with gr.Row():
789
  with gr.Column():
790
- compound_type = gr.Dropdown(
791
- label='Compound Input Type',
792
- choices=['SMILES', 'SDF'],
793
- info='Enter (paste) an SMILES string or upload an SMI file.',
794
- value='SMILES',
795
- interactive=True)
796
- compound_upload_btn = gr.UploadButton(label='Upload', variant='primary', type='binary')
797
  HelpTip(
798
  """Compound molecule in the SMILES format. You may input the SMILES string directly,
799
  upload an SMI file, or upload an SDF file to convert to SMILES. Alternatively,
@@ -801,6 +807,14 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
801
  representing your drug of interest.
802
  """
803
  )
 
 
 
 
 
 
 
 
804
  with gr.Column():
805
  target_identify_target_family = gr.Dropdown(choices=['General'], value='General',
806
  label='Target Protein Family')
@@ -819,12 +833,13 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
819
  value='Drug-target interaction')
820
 
821
  with gr.Column():
822
- target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
823
- identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
824
  HelpTip("We recommend the appropriate model for your use case based on model performance "
825
  "in drug-target interaction or binding affinity prediction. "
826
  "The models were benchmarked on different target families "
827
  "and real-world data scenarios.")
 
 
 
828
 
829
  # with gr.Row():
830
  # target_identify_email = gr.Textbox(
@@ -843,24 +858,47 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
843
  visible=False)
844
  with gr.TabItem(label='Interaction pair inference', id=2):
845
  gr.Markdown('''
846
- # <center>DeepSEQreen Interaction Pair Inference</center>
847
- <center>
848
- To predict interactions/binding affinities between any drug-target pairs.
849
- </center>
850
- ''')
851
  with gr.Blocks() as infer_block:
852
  with gr.Column() as infer_page:
853
- HelpTip("Upload a custom drug-target pair dataset. See the documentation for details.")
854
- infer_data_for_predict = gr.File(
855
- label='Prediction dataset file', file_count="single", type='filepath')
856
- # TODO example dataset
857
- # TODO download example dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
858
 
859
  with gr.Row(visible=True):
860
  pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()), label='Task')
861
- HelpTip("Choose a preset model for making the predictions.")
862
  pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
863
- HelpTip("Choose the protein family of your target.")
864
  pair_infer_target_family = gr.Dropdown(choices=['General'],
865
  label='Target family',
866
  value='General')
@@ -887,7 +925,9 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
887
  <center>
888
  To compute chemical properties for the predictions of drug hit screening,
889
  target protein identification, and interaction pair inference. You may also upload
890
- your own dataset.
 
 
891
  </center>
892
  ''')
893
  with gr.Row():
@@ -907,10 +947,10 @@ with (gr.Blocks(theme=theme, title='DeepScreen', css=CSS) as demo):
907
 
908
  with gr.Row():
909
  with gr.Column():
910
- csv_generate = gr.Button(value='Generate raw data (CSV)')
911
  csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
912
  with gr.Column():
913
- html_generate = gr.Button(value='Generate report (HTML)')
914
  html_download_file = gr.File(label='Download report (HTML)', visible=False)
915
 
916
 
@@ -1133,22 +1173,16 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1133
  if library_upload.endswith('.csv'):
1134
  screen_df = pd.read_csv(library_upload)
1135
  elif library_upload.endswith('.sdf'):
1136
- suppl = Chem.ForwardSDMolSupplier(library_upload)
1137
- screen_df = pd.DataFrame([Chem.MolToSmiles(mol) for mol in suppl], columns=['X1'])
1138
  else:
1139
- raise 'Currently only csv and sdf files are supported.'
1140
  validate_columns(screen_df, ['X1'])
1141
 
1142
- if not np.isin('ID1', screen_df.columns):
1143
- screen_df['ID1'] = list(range(screen_df.shape[0]))
1144
- if not np.isin('ID2', screen_df.columns):
1145
- screen_df['ID2'] = 'Input'
1146
  screen_df['X2'] = fasta
1147
- screen_df['Y'] = 0
1148
- screen_df = screen_df.loc[:, ['ID1', 'X1', 'ID2', 'X2', 'Y']]
1149
 
1150
  job_id = uuid4()
1151
- temp_file = Path(f'{job_id}_input.csv').resolve()
1152
  screen_df.to_csv(temp_file, index=False)
1153
  if temp_file.is_file():
1154
  return {screen_data_for_predict: str(temp_file),
@@ -1191,15 +1225,9 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1191
  validate_columns(identify_df, ['X2'])
1192
 
1193
  identify_df['X1'] = smiles
1194
- if not np.isin('ID1', identify_df.columns):
1195
- identify_df['ID1'] = 'Input'
1196
- if not np.isin('ID2', identify_df.columns):
1197
- identify_df['ID2'] = list(range(identify_df.shape[0]))
1198
- identify_df = identify_df.loc[:, ['ID1', 'X1', 'ID2', 'X2']]
1199
- identify_df['Y'] = 0
1200
-
1201
  job_id = uuid4()
1202
- temp_file = Path(f'{job_id}_input.csv').resolve()
1203
  identify_df.to_csv(temp_file, index=False)
1204
  if temp_file.is_file():
1205
  return {identify_data_for_predict: str(temp_file),
@@ -1321,16 +1349,16 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1321
 
1322
  def create_csv_raw_file(df, file_report):
1323
  from datetime import datetime
1324
- now = datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
1325
  filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
1326
- df.to_csv(filename, index=False)
1327
  return gr.File(filename, visible=True)
1328
 
1329
 
1330
  def create_html_report_file(df, file_report):
1331
  from datetime import datetime
1332
- now = datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
1333
- filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
1334
  create_html_report(df, filename)
1335
  return gr.File(filename, visible=True)
1336
 
 
12
  import sys
13
 
14
  import numpy as np
 
15
  from Bio.Align import PairwiseAligner
16
  # from email_validator import validate_email
17
  import gradio as gr
 
59
  UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
60
 
61
  CSS = """
62
+ .help-tip {
63
  position: absolute;
64
+ display: inline-block;
65
+ top: 24px;
66
  right: 0px;
67
  text-align: center;
68
  border-radius: 40%;
 
73
  line-height: 26px;
74
  cursor: default;
75
  transition: all 0.5s cubic-bezier(0.55, 0, 0.1, 1);
76
+ z-index: 100 !important;
77
  }
78
 
79
  .help-tip:hover {
 
85
  content: '?';
86
  font-weight: 700;
87
  color: #8B0000;
88
+ z-index: 100 !important;
89
  }
90
 
91
  .help-tip p {
 
161
 
162
  class HelpTip:
163
  def __new__(cls, text):
164
+ return gr.HTML(
165
+ # elem_classes="absolute",
166
+ value=f'<div class="help-tip"><p>{text}</p>',
167
+ )
168
 
169
 
170
  def sa_score(row):
 
367
  target_family = TARGET_FAMILY_MAP[target_family]
368
  # email_hash = hashlib.sha256(email.encode()).hexdigest()
369
  COLUMN_ALIASES = COLUMN_ALIASES | {
370
+ 'Y': 'Actual interaction probability' if task == 'binary' else 'Actual binding affinity',
371
+ 'Y^': 'Predicted interaction probability' if task == 'binary' else 'Predicted binding affinity'
372
  }
373
 
374
  # target_family_list = [target_family]
 
388
  predictions = [pd.DataFrame(prediction) for prediction in predictions]
389
  prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
390
 
391
+ predictions_file = f'temp/{job_id}_predictions.csv'
392
  prediction_df.to_csv(predictions_file, index=False)
393
 
394
  return [predictions_file,
 
485
  includeFingerprints=False)
486
  DF_FOR_REPORT = df.copy()
487
 
488
+ # pie_chart = None
489
+ # value = None
490
+ # if 'Y^' in DF_FOR_REPORT.columns:
491
+ # value = 'Y^'
492
+ # elif 'Y' in DF_FOR_REPORT.columns:
493
+ # value = 'Y'
494
 
495
  # if value:
496
  # if DF_FOR_REPORT['X1'].nunique() > 1 >= DF_FOR_REPORT['X2'].nunique():
 
498
  # elif DF_FOR_REPORT['X2'].nunique() > 1 >= DF_FOR_REPORT['X1'].nunique():
499
  # pie_chart = create_pie_chart(DF_FOR_REPORT, category='Target family', value=value, top_k=100)
500
 
501
+ return create_html_report(DF_FOR_REPORT), df # pie_chart
502
  else:
503
+ return gr.HTML(), gr.Dataframe()
504
 
505
 
506
  def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
507
+ df_html = df.copy()
508
+ cols_left = ['ID1', 'ID2', 'Y', 'Y^', 'Compound', 'Scaffold', 'Scaffold SMILES', ]
509
  cols_right = ['X1', 'X2']
510
+ cols_left = [col for col in cols_left if col in df_html.columns]
511
+ cols_right = [col for col in cols_right if col in df_html.columns]
512
+ df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
513
+ df_html['X2'] = df_html['X2'].swifter.apply(wrap_text)
514
+ df_html = df_html.sort_values(
515
+ [col for col in ['Y', 'Y^', 'ID1', 'ID2', 'X1', 'X2'] if col in df.columns], ascending=False
516
+ ).rename(columns=COLUMN_ALIASES)
517
+ # PandasTools.RenderImagesInAllDataFrames(images=True)
518
+ PandasTools.ChangeMoleculeRendering(df_html, renderer='image')
 
 
 
 
519
  # Return the DataFrame as HTML
520
  PandasTools.RenderImagesInAllDataFrames(images=True)
521
 
522
  if not file:
523
+ styled_df = df_html.iloc[:51].style
524
+ # styled_df = df.style.format("{:.2f}")
525
+ colors = sns.color_palette('husl', len(df_html.columns))
526
+ for i, col in enumerate(df_html.columns):
527
+ if pd.api.types.is_numeric_dtype(df_html[col]):
528
+ styled_df = styled_df.background_gradient(subset=col, cmap=sns.light_palette(colors[i], as_cmap=True))
529
+ html = styled_df.to_html()
530
+ return f'Report preview<div style="overflow:auto; height: 300px; font-family: Courier !important;">{html}</div>'
531
  else:
532
+ import panel as pn
533
+ from bokeh.resources import INLINE
534
+ from bokeh.models import NumberFormatter, BooleanFormatter
535
+ bokeh_formatters = {
536
+ 'float': {'type': 'progress', 'legend': True},
537
+ 'bool': BooleanFormatter(),
538
+ }
539
+ # html = df.to_html(file)
540
+ # return html
541
+ pn.widgets.Tabulator(df_html, formatters=bokeh_formatters).save(file, resources=INLINE)
542
 
543
 
544
  # def create_pie_chart(df, category, value, top_k):
 
705
  with gr.Column() as screen_page:
706
  with gr.Row():
707
  with gr.Column():
708
+ HelpTip(
709
+ "Target amino acid sequence in the FASTA format. Alternatively, you may use a "
710
+ "UniProt ID/accession to query UniProt database for the sequence of your "
711
+ "target of interest. If the input FASTA contains multiple entities, "
712
+ "only the first one will be used."
713
+ )
714
  with gr.Row():
715
  target_input_type = gr.Dropdown(
716
  label='Target Input Type',
 
730
  info='Organism common name or scientific name (default: Human).',
731
  placeholder='Human', show_label=False,
732
  visible=False, interactive=True, scale=4, )
733
+ with gr.Column():
734
  HelpTip(
735
+ "Identify the protein family by conducting sequence alignment. "
736
+ "You may select General if you find the alignment score unsatisfactory."
 
 
737
  )
 
738
  drug_screen_target_family = gr.Dropdown(
739
  choices=list(TARGET_FAMILY_MAP.keys()),
740
  value='General',
741
  label='Select Input Protein Family (Optional)', interactive=True)
742
  # with gr.Column(scale=1, min_width=24):
743
+
 
 
 
744
  with gr.Row():
745
  with gr.Column():
746
  target_upload_btn = gr.UploadButton(label='Upload a FASTA file', type='binary',
 
763
  drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()), label='Select a Prediction Task',
764
  value='Drug-target interaction')
765
  with gr.Column():
 
 
766
  HelpTip("We recommend the appropriate model for your use case based on model performance "
767
  "in drug-target interaction or binding affinity prediction. "
768
  "The models were benchmarked on different target families "
769
  "and real-world data scenarios.")
770
+ drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Select a Preset Model')
771
+ screen_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
772
+
773
 
774
  # drug_screen_email = gr.Textbox(
775
  # label='Email (optional)',
 
800
  with gr.Column() as identify_page:
801
  with gr.Row():
802
  with gr.Column():
 
 
 
 
 
 
 
803
  HelpTip(
804
  """Compound molecule in the SMILES format. You may input the SMILES string directly,
805
  upload an SMI file, or upload an SDF file to convert to SMILES. Alternatively,
 
807
  representing your drug of interest.
808
  """
809
  )
810
+ compound_type = gr.Dropdown(
811
+ label='Compound Input Type',
812
+ choices=['SMILES', 'SDF'],
813
+ info='Enter (paste) an SMILES string or upload an SMI file.',
814
+ value='SMILES',
815
+ interactive=True)
816
+ compound_upload_btn = gr.UploadButton(label='Upload', variant='primary', type='binary')
817
+
818
  with gr.Column():
819
  target_identify_target_family = gr.Dropdown(choices=['General'], value='General',
820
  label='Target Protein Family')
 
833
  value='Drug-target interaction')
834
 
835
  with gr.Column():
 
 
836
  HelpTip("We recommend the appropriate model for your use case based on model performance "
837
  "in drug-target interaction or binding affinity prediction. "
838
  "The models were benchmarked on different target families "
839
  "and real-world data scenarios.")
840
+ target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
841
+ identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
842
+
843
 
844
  # with gr.Row():
845
  # target_identify_email = gr.Textbox(
 
858
  visible=False)
859
  with gr.TabItem(label='Interaction pair inference', id=2):
860
  gr.Markdown('''
861
+ # <center>DeepSEQreen Interaction Pair Inference</center>
862
+ <center>To predict interactions/binding affinities between any drug-target pairs.</center>
863
+ ''')
 
 
864
  with gr.Blocks() as infer_block:
865
  with gr.Column() as infer_page:
866
+ with gr.Column() as custom_upload:
867
+ gr.Markdown("""
868
+ Please upload a custom dataset CSV file with 2 required string columns and optionally 2 ID columns:
869
+
870
+ <b>X1</b>: the SMILES string of a compound\n
871
+ <b>X2</b>: the FASTA sequence of a target\n
872
+ <b>ID1</b>: the ID (PubChem or any arbitrary unique identifier) of a compound\n
873
+ <b>ID22</b>: the ID (UniProt or any arbitrary unique identifier) of a target
874
+
875
+ Example:
876
+
877
+ | X1 | X2 | ID1 | ID2 |
878
+ |---------------------------------------- |---------------|--------------|--------|
879
+ | CCOC(=O)Nc1ccc(NCc2ccc(F)cc2)cc1N | MVQKSRNGGV... | CHEMBL41355 | O88943 |
880
+ | CCCCCc1cc(O)c(C/C=C(\C)CCC=C(C)C)c(O)c1 | MTSPSSSPVF... | CHEMBL497318 | Q9Y5S1 |
881
+ """)
882
+ gr.File(label="Example custom dataset",
883
+ value="data/examples/interaction_pair_inference.csv",
884
+ interactive=False)
885
+ with gr.Column():
886
+ infer_data_for_predict = gr.File(
887
+ label='Custom dataset file', file_count="single", type='filepath', visible=True)
888
+ with gr.Column() as pair_generate:
889
+ gr.Markdown("""
890
+ Upload a SDF file which contains multiple compounds of interest and a FASTA file which contains multiple targets of
891
+ interest. All combinations of drug-target pairs from these two files will be automatically generated and submitted to
892
+ a prediction job.
893
+ """)
894
+ pair_sdf = gr.File(label='SDF file containing multiple compounds')
895
+ pair_fasta = gr.File(label='FASTA file containing multiple targets')
896
+
897
+
898
 
899
  with gr.Row(visible=True):
900
  pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()), label='Task')
 
901
  pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
 
902
  pair_infer_target_family = gr.Dropdown(choices=['General'],
903
  label='Target family',
904
  value='General')
 
925
  <center>
926
  To compute chemical properties for the predictions of drug hit screening,
927
  target protein identification, and interaction pair inference. You may also upload
928
+ your own dataset. The page shows only a preview report displaying at most 30 records
929
+ (with top predicted DTI/DTA if reporting results from a prediction job). For a full report, please
930
+ generate and download a raw data CSV or interactive table HTML file below.
931
  </center>
932
  ''')
933
  with gr.Row():
 
947
 
948
  with gr.Row():
949
  with gr.Column():
950
+ csv_generate = gr.Button(value='Generate raw data (CSV)', interactive=True)
951
  csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
952
  with gr.Column():
953
+ html_generate = gr.Button(value='Generate report (HTML)', interactive=True)
954
  html_download_file = gr.File(label='Download report (HTML)', visible=False)
955
 
956
 
 
1173
  if library_upload.endswith('.csv'):
1174
  screen_df = pd.read_csv(library_upload)
1175
  elif library_upload.endswith('.sdf'):
1176
+ screen_df = PandasTools.LoadSDF(library_upload,
1177
+ smilesName='X1', molColName='Compound', includeFingerprints=True)
1178
  else:
1179
+ raise gr.Error('Currently only CSV and SDF files are supported.')
1180
  validate_columns(screen_df, ['X1'])
1181
 
 
 
 
 
1182
  screen_df['X2'] = fasta
 
 
1183
 
1184
  job_id = uuid4()
1185
+ temp_file = Path(f'temp/{job_id}_input.csv').resolve()
1186
  screen_df.to_csv(temp_file, index=False)
1187
  if temp_file.is_file():
1188
  return {screen_data_for_predict: str(temp_file),
 
1225
  validate_columns(identify_df, ['X2'])
1226
 
1227
  identify_df['X1'] = smiles
1228
+
 
 
 
 
 
 
1229
  job_id = uuid4()
1230
+ temp_file = Path(f'temp/{job_id}_input.csv').resolve()
1231
  identify_df.to_csv(temp_file, index=False)
1232
  if temp_file.is_file():
1233
  return {identify_data_for_predict: str(temp_file),
 
1349
 
1350
  def create_csv_raw_file(df, file_report):
1351
  from datetime import datetime
1352
+ now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
1353
  filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
1354
+ df.drop(['Compound', 'Scaffold']).to_csv(filename, index=False)
1355
  return gr.File(filename, visible=True)
1356
 
1357
 
1358
  def create_html_report_file(df, file_report):
1359
  from datetime import datetime
1360
+ now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
1361
+ filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
1362
  create_html_report(df, filename)
1363
  return gr.File(filename, visible=True)
1364