Tingxie commited on
Commit
676ad09
·
1 Parent(s): ae95de8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -48
app.py CHANGED
@@ -89,7 +89,7 @@ seafoam = Seafoam()
89
  custom_css = """
90
  <style>
91
  .file-upload-height {
92
- height:320px !important;
93
  display: none;
94
  }
95
  .file-upload-height2 {
@@ -99,7 +99,7 @@ custom_css = """
99
  height: 380px !important;
100
  }
101
  #custom_plot {
102
- height: 360px !important;
103
  }
104
  #custom_plot2 {
105
  height: 480px !important;
@@ -119,20 +119,7 @@ def draw_mass_spectrum(peak_data_path):
119
  ms2 = spectrum_processing(ms2)
120
  Mz = np.array(ms2.mz)
121
  Intens = np.array(ms2.intensities)
122
- plt.figure(figsize=(7,3))
123
- for i in range(len(Mz)):
124
- plt.axvline(x=Mz[i], ymin=0, ymax=Intens[i],c='red')
125
- plt.xlabel("m/z")
126
- plt.ylabel("Intensity")
127
- plt.title("Mass Spectrum")
128
- return plt
129
-
130
- def draw_mass_spectrum2(peak_data_path):
131
- ms2 = list(load_from_msp(peak_data_path.name))[0]
132
- ms2 = spectrum_processing(ms2)
133
- Mz = np.array(ms2.mz)
134
- Intens = np.array(ms2.intensities)
135
- plt.figure(figsize=(8,5))
136
  for i in range(len(Mz)):
137
  plt.axvline(x=Mz[i], ymin=0, ymax=Intens[i],c='red')
138
  plt.xlabel("m/z")
@@ -140,6 +127,12 @@ def draw_mass_spectrum2(peak_data_path):
140
  plt.title("Mass Spectrum")
141
  return plt
142
 
 
 
 
 
 
 
143
 
144
  dataset_repo = "Tingxie/CSU-MS2-DB"
145
  db_filename = "csu_ms2_db.db"
@@ -350,7 +343,7 @@ def rank_lib(database_name,spectrum_path,instrument_type,adduct,parent_Mass,coll
350
  #top_10_results = pd.DataFrame({'SMILES':[x[0] for x in top_10_smiles],'Struture':images,'Score':[x[1] for x in top_10_smiles],'Rank':list(range(10))})
351
  return image_descrips
352
 
353
- def rank_user_lib(candidate_file,spectrum_path,instrument_type,adduct,collision_energy):
354
  model_inference_low = ModelInference(config_path=config_path,
355
  pretrain_model_path=pretrain_model_path_low,
356
  device="cpu")
@@ -361,8 +354,21 @@ def rank_user_lib(candidate_file,spectrum_path,instrument_type,adduct,collision_
361
  pretrain_model_path=pretrain_model_path_high,
362
  device="cpu")
363
  collision_energy=float(collision_energy)
364
- candidate = pd.read_csv(candidate_file.name)
365
- candidate_smiles = list(candidate['SMILES'])
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  ms2 = list(load_from_msp(spectrum_path.name))[0]
367
  ms2 = spectrum_processing(ms2)
368
  ms2_embedding_low,ms2_embedding_median,ms2_embedding_high = MS2Embedding(ms2)
@@ -407,18 +413,21 @@ def rank_user_lib(candidate_file,spectrum_path,instrument_type,adduct,collision_
407
  return image_descrips
408
 
409
 
 
410
  with gr.Blocks(theme=seafoam) as demo:
411
  gr.HTML(custom_css)
412
- gr.Markdown('<div style="font-size:100px; font-weight:bold;">🔍 CSU-MS2 web server </div>')
413
-
414
- #gr.Markdown('<div style="font-size:20px;">You can select from a list of collected databases or upload your structural file by Clicking the button. The MS/MS spectrum to be identified can be uploaded in MSP format. You also need to set the necessary experimental parameters for the input spectrum, including accurate precursor mass and collision energy. Finally, by clicking the "Cross-Modal Retrieval" button, you can retrieve candidates from the selected database or your own structural file and access identification results that include compound structures, scores, and rankings.</div>')
 
 
 
 
 
 
 
 
415
  with gr.Tab(label="📶 Struture library", elem_id='custom_tab'):
416
- with gr.Row():
417
- with gr.Column():
418
- peak_data = gr.File(file_count="single", label="Upload MS/MS spectrum file in .msp format", elem_classes=".file-upload-height")
419
- draw_button = gr.Button("Draw Mass Spectrum")
420
- with gr.Column():
421
- spectrum_output = gr.Plot(label="Mass Spectrum",elem_id="custom_plot")
422
  with gr.Column():
423
  dataset = gr.Dropdown(["CSU_MS2_DB",
424
  "BloodExp: blood exposome database",
@@ -443,35 +452,19 @@ with gr.Blocks(theme=seafoam) as demo:
443
  "STOFF-IDENT: database of water relevant substances",
444
  "T3DB: toxic exposome database",
445
  "TCMSP: traditional chinese medicine systems pharmacology database",
446
- "YMDB: yeast metabolome database"], label="Choose a structure library")
447
- #gr.CheckboxGroup(choices=["HMDB", "Lipidmaps", "CHEMBL"], label="Choose a structure library"),
448
- with gr.Row():
449
- instru=gr.Dropdown(["HCD"], label="Instrument Type")
450
- ionmode=gr.Dropdown(["[M+H]+"], label="Adduct Type")
451
- par_ion_mass=gr.Textbox(label="Parent Ion Mass",placeholder="e.g., 180.00")
452
- collision_e=gr.Textbox(label="collision energy", placeholder="e.g., 40")
453
  with gr.Column():
454
  lib_button = gr.Button("Cross-Modal Retrieval")
455
  lib_output = gr.Gallery(height='auto',columns=4,elem_classes="gallery-height",label='Cross-modal retrieval results')
456
- #lib_output = gr.Dataframe(type="pandas")
457
  with gr.Tab("📁 Upload structure file"):
458
  with gr.Row():
459
  with gr.Column():
460
- use_dataset= gr.File(file_count="single", label="Upload the candidate structure file in csv format, columns=['SMIELS']",elem_classes="file-upload-height2")
461
- user_peak_data=gr.File(file_count="single", label="Upload MS/MS spectrum file in .msp format", elem_classes="file-upload-height2")
462
- user_draw_button = gr.Button("Draw Mass Spectrum")
463
- with gr.Column():
464
- user_spectrum_output = gr.Plot(label="Mass Spectrum",elem_id="custom_plot2")
465
- with gr.Row():
466
- user_instru=gr.Dropdown(["HCD"], label="Instrument Type")
467
- user_ionmode=gr.Dropdown(["[M+H]+"], label="Adduct Type")
468
- user_collision_e=gr.Textbox(label="collision energy", placeholder="e.g., 40")
469
  with gr.Column():
470
  user_button = gr.Button("Cross-Modal Retrieval")
471
  user_output = gr.Gallery(height='auto',columns=4,elem_classes="gallery-height",label='Cross-modal retrieval results')
472
- draw_button.click(draw_mass_spectrum, inputs=[peak_data], outputs=[spectrum_output])
473
- user_draw_button.click(draw_mass_spectrum2, inputs=[user_peak_data], outputs=[user_spectrum_output])
474
  lib_button.click(rank_lib, inputs=[dataset,peak_data,instru,ionmode,par_ion_mass,collision_e], outputs=lib_output)
475
- user_button.click(rank_user_lib, inputs=[use_dataset,user_peak_data,user_instru,user_ionmode,user_collision_e], outputs=user_output)
476
  demo.launch(share=True)
477
 
 
 
89
  custom_css = """
90
  <style>
91
  .file-upload-height {
92
+ height:330px !important;
93
  display: none;
94
  }
95
  .file-upload-height2 {
 
99
  height: 380px !important;
100
  }
101
  #custom_plot {
102
+ height: 280px !important;
103
  }
104
  #custom_plot2 {
105
  height: 480px !important;
 
119
  ms2 = spectrum_processing(ms2)
120
  Mz = np.array(ms2.mz)
121
  Intens = np.array(ms2.intensities)
122
+ plt.figure(figsize=(7,4))
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  for i in range(len(Mz)):
124
  plt.axvline(x=Mz[i], ymin=0, ymax=Intens[i],c='red')
125
  plt.xlabel("m/z")
 
127
  plt.title("Mass Spectrum")
128
  return plt
129
 
130
+ def search_structure_from_mass(structureDB,mass, ppm):
131
+ structures=pd.DataFrame()
132
+ mmin = mass - mass*ppm/10**6
133
+ mmax = mass + mass*ppm/10**6
134
+ structures = structureDB[(structureDB['MonoisotopicMass'] >= mmin) & (structureDB['MonoisotopicMass'] <= mmax)]
135
+ return structures
136
 
137
  dataset_repo = "Tingxie/CSU-MS2-DB"
138
  db_filename = "csu_ms2_db.db"
 
343
  #top_10_results = pd.DataFrame({'SMILES':[x[0] for x in top_10_smiles],'Struture':images,'Score':[x[1] for x in top_10_smiles],'Rank':list(range(10))})
344
  return image_descrips
345
 
346
+ def rank_user_lib(candidate_file,spectrum_path,instrument_type,adduct,parent_Mass,collision_energy):
347
  model_inference_low = ModelInference(config_path=config_path,
348
  pretrain_model_path=pretrain_model_path_low,
349
  device="cpu")
 
354
  pretrain_model_path=pretrain_model_path_high,
355
  device="cpu")
356
  collision_energy=float(collision_energy)
357
+ users_candidate = pd.read_csv(candidate_file.name)
358
+ user_candidate_smiles = list(users_candidate['SMILES'])
359
+ candidate = pd.DataFrame(columns=['SMILES','MonoisotopicMass'])
360
+ for smi in user_candidate_smiles:
361
+ mol = Chem.MolFromSmiles(smi)
362
+ MonoisotopicMass = ExactMolWt(mol)
363
+ candidate.loc[len(candidate.index)] = [smi,MonoisotopicMass]
364
+ parent_Mass=float(parent_Mass)
365
+ query_ms=parent_Mass-1.008
366
+ sub_reference_library = search_structure_from_mass(candidate,query_ms,10)
367
+ if len(sub_reference_library) == 0:
368
+ sub_reference_library = search_structure_from_mass(candidate,query_ms,30)
369
+ if len(sub_reference_library) == 0:
370
+ sub_reference_library = candidate.copy()
371
+ candidate_smiles = list(sub_reference_library['SMILES'])
372
  ms2 = list(load_from_msp(spectrum_path.name))[0]
373
  ms2 = spectrum_processing(ms2)
374
  ms2_embedding_low,ms2_embedding_median,ms2_embedding_high = MS2Embedding(ms2)
 
413
  return image_descrips
414
 
415
 
416
+
417
  with gr.Blocks(theme=seafoam) as demo:
418
  gr.HTML(custom_css)
419
+ gr.Markdown('<div style="font-size:50px; font-weight:bold;">🔍 CSU-MS2 web server </div>')
420
+ with gr.Row():
421
+ with gr.Column():
422
+ peak_data = gr.File(file_count="single", label="Upload MS/MS spectrum file in .msp format", elem_classes=".file-upload-height")
423
+ spectrum_output = gr.Plot(label="Mass Spectrum",elem_id="custom_plot")
424
+ peak_data.change(fn=draw_mass_spectrum, inputs=[peak_data], outputs=[spectrum_output])
425
+ with gr.Row():
426
+ instru=gr.Dropdown(["HCD"], label="Instrument Type")
427
+ ionmode=gr.Dropdown(["[M+H]+"], label="Adduct Type")
428
+ par_ion_mass=gr.Textbox(label="Parent Ion Mass",placeholder="e.g., 180.00")
429
+ collision_e=gr.Textbox(label="collision energy", placeholder="e.g., 40")
430
  with gr.Tab(label="📶 Struture library", elem_id='custom_tab'):
 
 
 
 
 
 
431
  with gr.Column():
432
  dataset = gr.Dropdown(["CSU_MS2_DB",
433
  "BloodExp: blood exposome database",
 
452
  "STOFF-IDENT: database of water relevant substances",
453
  "T3DB: toxic exposome database",
454
  "TCMSP: traditional chinese medicine systems pharmacology database",
455
+ "YMDB: yeast metabolome database"], label="Choose a structure library")
 
 
 
 
 
 
456
  with gr.Column():
457
  lib_button = gr.Button("Cross-Modal Retrieval")
458
  lib_output = gr.Gallery(height='auto',columns=4,elem_classes="gallery-height",label='Cross-modal retrieval results')
 
459
  with gr.Tab("📁 Upload structure file"):
460
  with gr.Row():
461
  with gr.Column():
462
+ user_dataset= gr.File(file_count="single", label="Upload the candidate structure file in csv format, columns=['SMIELS']",elem_classes="file-upload-height2")
 
 
 
 
 
 
 
 
463
  with gr.Column():
464
  user_button = gr.Button("Cross-Modal Retrieval")
465
  user_output = gr.Gallery(height='auto',columns=4,elem_classes="gallery-height",label='Cross-modal retrieval results')
 
 
466
  lib_button.click(rank_lib, inputs=[dataset,peak_data,instru,ionmode,par_ion_mass,collision_e], outputs=lib_output)
467
+ user_button.click(rank_user_lib, inputs=[user_dataset,peak_data,instru,ionmode,par_ion_mass,collision_e], outputs=user_output)
468
  demo.launch(share=True)
469
 
470
+