Tingxie commited on
Commit
0ad1bee
·
1 Parent(s): 3a6f041

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +494 -494
app.py CHANGED
@@ -1,494 +1,494 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Created on Thu Oct 10 16:46:36 2024
4
-
5
- @author: ZNDX002
6
- """
7
-
8
- from __future__ import annotations
9
- import sqlite3
10
- import gradio as gr
11
- from infer import ModelInference
12
- from model import ModelCLR
13
- from matchms.importing import load_from_mgf,load_from_msp
14
- import matchms.filtering as msfilters
15
- import numpy as np
16
- from rdkit.Chem import Draw
17
- from rdkit import Chem
18
- import torch
19
- import yaml
20
- import pickle
21
- import subprocess
22
- import pandas as pd
23
- from sklearn.metrics.pairwise import cosine_similarity
24
- import tempfile
25
- import shutil
26
- import os
27
- import matplotlib.pyplot as plt
28
- import gradio as gr
29
- from typing import Iterable
30
- from gradio.themes.base import Base
31
- from gradio.themes.utils import colors, fonts, sizes
32
- import time
33
-
34
- class Seafoam(Base):
35
- def __init__(
36
- self,
37
- *,
38
- primary_hue: colors.Color | str = colors.emerald,
39
- secondary_hue: colors.Color | str = colors.blue,
40
- neutral_hue: colors.Color | str = colors.blue,
41
- spacing_size: sizes.Size | str = sizes.spacing_md,
42
- radius_size: sizes.Size | str = sizes.radius_md,
43
- text_size: sizes.Size | str = sizes.text_lg,
44
- font: fonts.Font
45
- | str
46
- | Iterable[fonts.Font | str] = (
47
- fonts.GoogleFont("Quicksand"),
48
- "ui-sans-serif",
49
- "sans-serif",
50
- ),
51
- font_mono: fonts.Font
52
- | str
53
- | Iterable[fonts.Font | str] = (
54
- fonts.GoogleFont("IBM Plex Mono"),
55
- "ui-monospace",
56
- "monospace",
57
- ),
58
- ):
59
- super().__init__(
60
- primary_hue=primary_hue,
61
- secondary_hue=secondary_hue,
62
- neutral_hue=neutral_hue,
63
- spacing_size=spacing_size,
64
- radius_size=radius_size,
65
- text_size=text_size,
66
- font=font,
67
- font_mono=font_mono,
68
- )
69
- super().set(
70
- #body_background_fill="repeating-linear-gradient(45deg, *primary_200, *primary_200 10px, *primary_50 10px, *primary_50 20px)",
71
- body_background_fill_dark="repeating-linear-gradient(45deg, *primary_800, *primary_800 10px, *primary_900 10px, *primary_900 20px)",
72
- button_primary_background_fill="linear-gradient(90deg, *primary_300, *secondary_400)",
73
- button_primary_background_fill_hover="linear-gradient(90deg, *primary_200, *secondary_300)",
74
- button_primary_text_color="white",
75
- button_primary_background_fill_dark="linear-gradient(90deg, *primary_600, *secondary_800)",
76
- slider_color="*secondary_300",
77
- slider_color_dark="*secondary_600",
78
- block_title_text_weight="600",
79
- block_border_width="3px",
80
- block_shadow="*shadow_drop_lg",
81
- button_large_padding="17px",
82
- body_text_color="#000000",
83
- )
84
-
85
- seafoam = Seafoam()
86
-
87
-
88
- # 定义CSS样式
89
- custom_css = """
90
- <style>
91
- .file-upload-height {
92
- height:320px !important;
93
- display: none;
94
- }
95
- .file-upload-height2 {
96
- height:190px !important;
97
- }
98
- .gallery-height {
99
- height: 380px !important;
100
- }
101
- #custom_plot {
102
- height: 360px !important;
103
- }
104
- #custom_plot2 {
105
- height: 480px !important;
106
- }
107
- </style>
108
-
109
- """
110
- def spectrum_processing(s):
111
- """This is how one would typically design a desired pre- and post-
112
- processing pipeline."""
113
- s = msfilters.normalize_intensities(s)
114
- s = msfilters.select_by_mz(s, mz_from=0, mz_to=1500)
115
- return s
116
-
117
- def draw_mass_spectrum(peak_data_path):
118
- # 解析输入的质谱峰数据
119
- ms2 = list(load_from_msp(peak_data_path.name))[0]
120
- ms2 = spectrum_processing(ms2)
121
-
122
- # 示例数据(m/z值和强度)
123
- Mz = np.array(ms2.mz)
124
- Intens = np.array(ms2.intensities) # 随机生成强度值
125
- plt.figure(figsize=(8.5,5))
126
- # 绘制质谱图
127
- for i in range(len(Mz)):
128
- plt.axvline(x=Mz[i], ymin=0, ymax=Intens[i],c='red')
129
- plt.xlabel("m/z")
130
- plt.ylabel("Intensity")
131
- plt.title("Mass Spectrum")
132
- return plt
133
-
134
- conn = sqlite3.connect("/csu_ms2_db.db", check_same_thread=False)
135
- device='cpu'
136
- pretrain_model_path_low,pretrain_model_path_median,pretrain_model_path_high='low/checkpoints/model.pth','median/checkpoints/model.pth','high/checkpoints/model.pth'
137
- config_path = "low/checkpoints/config.yaml"
138
- config = yaml.load(open(config_path, "r"), Loader=yaml.FullLoader)
139
-
140
- model_low = ModelCLR(**config["model_config"]).to(device)
141
- model_median = ModelCLR(**config["model_config"]).to(device)
142
- model_high = ModelCLR(**config["model_config"]).to(device)
143
-
144
- state_dict_low = torch.load(pretrain_model_path_low)
145
- state_dict_median = torch.load(pretrain_model_path_median)
146
- state_dict_high = torch.load(pretrain_model_path_high)
147
- model_low.load_state_dict(state_dict_low)
148
- model_low.eval()
149
- model_median.load_state_dict(state_dict_median)
150
- model_median.eval()
151
- model_high.load_state_dict(state_dict_high)
152
- model_high.eval()
153
-
154
- def generate_file(file_obj):
155
- global tmpdir
156
- shutil.copy(file_obj.name, tmpdir)
157
- FileName=os.path.basename(file_obj.name)
158
- NewfilePath=os.path.join(tmpdir,FileName)
159
- return NewfilePath
160
-
161
- def MS2Embedding(spectra):
162
- spec_mzs = [spec.mz for spec in [spectra]]
163
- spec_intens = [spec.intensities for spec in [spectra]]
164
- num_peaks = [len(i) for i in spec_mzs]
165
- spec_mzs = [np.around(spec_mz, decimals=4) for spec_mz in spec_mzs]
166
- if len(spec_mzs[0]) > 300:
167
- spec_mzs = [spec_mzs[0][-300:]]
168
- spec_intens = [spec_intens[0][-300:]]
169
- num_peaks=[300]
170
- else:
171
- spec_mzs = [np.pad(spec_mz, (0, 300 - len(spec_mz)), mode='constant', constant_values=0) for spec_mz in spec_mzs]
172
- spec_intens = [np.pad(spec_inten, (0, 300 - len(spec_inten)), mode='constant', constant_values=0) for spec_inten in spec_intens]
173
- spec_mzs= torch.tensor(spec_mzs).float()
174
- spec_intens= torch.tensor(spec_intens).float()
175
- num_peaks = torch.LongTensor(num_peaks)
176
- spec_tensor_low,spec_mask_low = model_low.ms_encoder(spec_mzs,spec_intens,num_peaks)
177
- spec_tensor_low=model_low.spec_esa(spec_tensor_low,spec_mask_low)
178
- spec_tensor_low = model_low.spec_proj(spec_tensor_low)
179
- spec_tensor_low = spec_tensor_low/spec_tensor_low.norm(dim=-1, keepdim=True)
180
- spec_tensor_median,spec_mask_median = model_median.ms_encoder(spec_mzs,spec_intens,num_peaks)
181
- spec_tensor_median=model_median.spec_esa(spec_tensor_median,spec_mask_median)
182
- spec_tensor_median = model_median.spec_proj(spec_tensor_median)
183
- spec_tensor_median = spec_tensor_median/spec_tensor_median.norm(dim=-1, keepdim=True)
184
- spec_tensor_high,spec_mask_high = model_high.ms_encoder(spec_mzs,spec_intens,num_peaks)
185
- spec_tensor_high=model_high.spec_esa(spec_tensor_high,spec_mask_high)
186
- spec_tensor_high = model_high.spec_proj(spec_tensor_high)
187
- spec_tensor_high = spec_tensor_high/spec_tensor_high.norm(dim=-1, keepdim=True)
188
- return np.array(spec_tensor_low.tolist()[0]),np.array(spec_tensor_median.tolist()[0]),np.array(spec_tensor_high.tolist()[0])
189
-
190
- def calculate_cosine_similarity(vector1, vector2):
191
- return cosine_similarity(vector1.reshape(1, -1), vector2.reshape(1, -1))[0][0]
192
-
193
- def retrieve_similarity_scores( table_name, target_mass,collision_energy, ms2_embedding_low, ms2_embedding_median, ms2_embedding_high):
194
- cur = conn.cursor()
195
- if table_name == "CSU_MS2_DB":
196
- table_name = 'ConSSDB'
197
- if table_name == "BloodExp: blood exposome database":
198
- table_name = 'BloodexpDB'
199
- if table_name == "ChEBI: products of nature or synthetic products database":
200
- table_name = 'ChebiDB'
201
- if table_name == "ChemFOnt: Biochemical database including primary metabolites, secondary metabolites, natural products, etc":
202
- table_name = 'ChemfontDB'
203
- if table_name == "ContaminantDB: Contaminant data from different online references and databases on contaminants":
204
- table_name = 'ContaminantdbDB'
205
- if table_name == "DrugBank: drug biochemical and pharmacological information database":
206
- table_name = 'DrugbankDB'
207
- if table_name == "ECMDB: database of small molecule metabolites found in or produced by Escherichia coli":
208
- table_name = 'EcmdbDB'
209
- if table_name == "Exposome-Explorer: biomarkers of exposure to environmental risk factors for diseases":
210
- table_name = 'ExposomeDB'
211
- if table_name == "Foodb: food constituent database":
212
- table_name = 'FoodbDB'
213
- if table_name == "HMDB: human metabolome database":
214
- table_name = 'HmdbDB'
215
- if table_name == "KEGG: a collection of small molecules, biopolymers, and other chemical substances":
216
- table_name = 'KeggDB'
217
- if table_name == "KNApSAcK: integrated metabolite-plant species database":
218
- table_name = 'KnapsackDB'
219
- if table_name == "MCDB: small molecule metabolites found in cow milk":
220
- table_name = 'MilkDB'
221
- if table_name == "MiMeDB: taxonomic, microbiological, and body-site location data on most known human microbes":
222
- table_name = 'MimedbDB'
223
- if table_name == "NANPDB: database of natural products isolated from native organisms of Northern Africa":
224
- table_name = 'NanpdbDB'
225
- if table_name == "NPAtlas: natural products atlas database":
226
- table_name = 'NpatlasDB'
227
- if table_name == "Phenol-Explorer: Polyphenols":
228
- table_name = 'PhenolDB'
229
- if table_name == "PMHub: plant metabolite database":
230
- table_name = 'PmhubDB'
231
- if table_name == "PMN: plant metabolite database":
232
- table_name = 'PmnDB'
233
- if table_name == "SMPDB: small molecule pathway database":
234
- table_name = 'SmpdbDB'
235
- if table_name == "STOFF-IDENT: database of water relevant substances":
236
- table_name = 'StoffDB'
237
- if table_name == "T3DB: toxic exposome database":
238
- table_name = 'T3dbDB'
239
- if table_name == "TCMSP: traditional chinese medicine systems pharmacology database":
240
- table_name = 'TcmspDB'
241
- if table_name == "YMDB: yeast metabolome database":
242
- table_name = 'YmdbDB'
243
- target_mass = target_mass-1.008
244
- tolerance = target_mass * 20 / 1000000
245
- query = f"""
246
- SELECT SMILES
247
- FROM {table_name}
248
- WHERE MonoisotopicMass >= ? - ? AND MonoisotopicMass <= ? + ?
249
- """
250
- cur.execute(query, (target_mass, tolerance, target_mass, tolerance))
251
- filtered_smiles = cur.fetchall()
252
- similarity_scores = []
253
-
254
- for smile in filtered_smiles:
255
- query = f"""
256
- SELECT low_energy_embedding, median_energy_embedding, high_energy_embedding
257
- FROM {table_name}
258
- WHERE SMILES = ?
259
- """
260
- cur.execute(query, (smile[0],))
261
- row = cur.fetchone()
262
- if row is None:
263
- return None
264
- low_energy_embedding_db = np.array(pickle.loads(row[0]), dtype=np.float64)
265
- median_energy_embedding_db = np.array(pickle.loads(row[1]), dtype=np.float64)
266
- high_energy_embedding_db = np.array(pickle.loads(row[2]), dtype=np.float64)
267
- low_energy_embedding_db,median_energy_embedding_db,high_energy_embedding_db = torch.tensor(low_energy_embedding_db).float(),torch.tensor(median_energy_embedding_db).float(),torch.tensor(high_energy_embedding_db).float()
268
- # 计算余弦相似度
269
- low_similarity =(ms2_embedding_low @ low_energy_embedding_db.t()).item()
270
- median_similarity = (ms2_embedding_median @ median_energy_embedding_db.t()).item()
271
- high_similarity = (ms2_embedding_high @ high_energy_embedding_db.t()).item()
272
- '''
273
- low_similarity = calculate_cosine_similarity(ms2_embedding_low, low_energy_embedding_db)
274
- median_similarity = calculate_cosine_similarity(ms2_embedding_median, median_energy_embedding_db)
275
- high_similarity = calculate_cosine_similarity(ms2_embedding_high, high_energy_embedding_db)'''
276
- similarity_scores.append((smile, low_similarity, median_similarity, high_similarity))
277
-
278
- weighted_similarity_scores = []
279
- for smile, low_similarity, median_similarity, high_similarity in similarity_scores:
280
- if collision_energy <=15:
281
- weighted_similarity = 0.4 * low_similarity + 0.3 * median_similarity + 0.3 * high_similarity
282
- weighted_similarity_scores.append((smile, weighted_similarity))
283
- elif collision_energy >15 and collision_energy <= 25:
284
- weighted_similarity = 0.3 * low_similarity + 0.4 * median_similarity + 0.3 * high_similarity
285
- weighted_similarity_scores.append((smile, weighted_similarity))
286
- elif collision_energy > 25:
287
- weighted_similarity = 0.2 * low_similarity + 0.3 * median_similarity + 0.5 * high_similarity
288
- weighted_similarity_scores.append((smile, weighted_similarity))
289
-
290
- # 按加权相似度降序排序
291
- weighted_similarity_scores.sort(key=lambda x: x[1], reverse=True)
292
-
293
- # 取出前10个 SMILES
294
- top_10_smiles = weighted_similarity_scores[:10]
295
- cur.close()
296
- #conn.close()
297
- return top_10_smiles
298
-
299
- def get_topK_result(library,ms_feature, smiles_feature, topK):
300
-
301
- if topK >= len(library):
302
- topK = len(library)
303
- with torch.no_grad():
304
- ms_smiles_distances_tmp = (
305
- ms_feature.unsqueeze(0) @ smiles_feature.t()).cpu()
306
- scores_, indices_ = ms_smiles_distances_tmp.topk(topK,
307
- dim=1,
308
- largest=True,
309
- sorted=True)
310
- candidates=[library[i] for i in indices_.tolist()[0]]
311
- indices=indices_.tolist()[0]
312
- scores=scores_.tolist()[0]
313
- return indices, scores, candidates
314
-
315
-
316
- def rank_lib(database_name,spectrum_path,instrument_type,adduct,parent_Mass,collision_energy):
317
- ms2 = list(load_from_msp(spectrum_path.name))[0]
318
- ms2 = spectrum_processing(ms2)
319
- collision_energy=float(collision_energy)
320
- parent_Mass=float(parent_Mass)
321
- ms2_embedding_low,ms2_embedding_median,ms2_embedding_high = MS2Embedding(ms2)
322
- ms2_embedding_low,ms2_embedding_median,ms2_embedding_high = torch.tensor(ms2_embedding_low).float(),torch.tensor(ms2_embedding_median).float(),torch.tensor(ms2_embedding_high).float()
323
- top_10_smiles = retrieve_similarity_scores(database_name,parent_Mass,collision_energy,ms2_embedding_low,ms2_embedding_median,ms2_embedding_high)
324
- smis = [x[0][0] for x in top_10_smiles]
325
- scores = [x[1] for x in top_10_smiles]
326
- images,image_descrips=[],[]
327
- bw_draw_options = Draw.MolDrawOptions()
328
- bw_draw_options.useBWAtomPalette()
329
-
330
- for smi in smis:
331
- try:
332
- mol = Chem.MolFromSmiles(smi)
333
- images.append(Draw.MolToImage(mol, options=bw_draw_options))
334
- except:
335
- images.append('NAN')
336
- for i in range(len(smis)):
337
- image_descrips.append((images[i],'SMILES: '+smis[i]+' ' + 'Score: '+str(scores[i])))
338
- #top_10_results = pd.DataFrame({'SMILES':[x[0] for x in top_10_smiles],'Struture':images,'Score':[x[1] for x in top_10_smiles],'Rank':list(range(10))})
339
- return image_descrips
340
-
341
- def rank_user_lib(candidate_file,spectrum_path,instrument_type,adduct,collision_energy):
342
- model_inference_low = ModelInference(config_path=config_path,
343
- pretrain_model_path=pretrain_model_path_low,
344
- device="cpu")
345
- model_inference_median = ModelInference(config_path=config_path,
346
- pretrain_model_path=pretrain_model_path_median,
347
- device="cpu")
348
- model_inference_high = ModelInference(config_path=config_path,
349
- pretrain_model_path=pretrain_model_path_high,
350
- device="cpu")
351
- collision_energy=float(collision_energy)
352
- candidate = pd.read_csv(candidate_file.name)
353
- candidate_smiles = list(candidate['SMILES'])
354
- ms2 = list(load_from_msp(spectrum_path.name))[0]
355
- ms2 = spectrum_processing(ms2)
356
- ms2_embedding_low,ms2_embedding_median,ms2_embedding_high = MS2Embedding(ms2)
357
- ms2_embedding_low,ms2_embedding_median,ms2_embedding_high = torch.tensor(ms2_embedding_low).float(),torch.tensor(ms2_embedding_median).float(),torch.tensor(ms2_embedding_high).float()
358
- contexts = []
359
- for i in range(0, len(candidate_smiles),64):
360
- contexts.append(candidate_smiles[i:i + 64])
361
- result_low = [model_inference_low.smiles_encode(i).cpu() for i in contexts]
362
- result_low = torch.cat(result_low, 0)
363
- result_median = [model_inference_median.smiles_encode(i).cpu() for i in contexts]
364
- result_median = torch.cat(result_median, 0)
365
- result_high = [model_inference_high.smiles_encode(i).cpu() for i in contexts]
366
- result_high = torch.cat(result_high, 0)
367
- low_similarity = ms2_embedding_low @ result_low.t()
368
- median_similarity = ms2_embedding_median @ result_median.t()
369
- high_similarity = ms2_embedding_high @ result_high.t()
370
- low_similarity = low_similarity.numpy()
371
- median_similarity = median_similarity.numpy()
372
- high_similarity = high_similarity.numpy()
373
- if collision_energy <=15:
374
- weighted_similarity = 0.4 * low_similarity + 0.3 * median_similarity + 0.3 * high_similarity
375
- elif collision_energy >15 and collision_energy <= 25:
376
- weighted_similarity = 0.3 * low_similarity + 0.4 * median_similarity + 0.3 * high_similarity
377
- elif collision_energy > 25:
378
- weighted_similarity = 0.2 * low_similarity + 0.3 * median_similarity + 0.5 * high_similarity
379
- weighted_similarity_scores=[(candidate_smiles[i],weighted_similarity[i]) for i in range(len(candidate_smiles))]
380
- weighted_similarity_scores.sort(key=lambda x: x[1], reverse=True)
381
- top_10_smiles = weighted_similarity_scores[:10]
382
- smis = [x[0] for x in top_10_smiles]
383
- scores = [x[1] for x in top_10_smiles]
384
- images,image_descrips=[],[]
385
- bw_draw_options = Draw.MolDrawOptions()
386
- bw_draw_options.useBWAtomPalette()
387
- for smi in smis:
388
- try:
389
- mol = Chem.MolFromSmiles(smi)
390
- images.append(Draw.MolToImage(mol, options=bw_draw_options))
391
- except:
392
- images.append('NAN')
393
- for i in range(len(smis)):
394
- image_descrips.append((images[i],'SMILES: '+smis[i]+' ' + 'Score: '+str(scores[i])))
395
- return image_descrips
396
-
397
-
398
- with gr.Blocks(theme=seafoam) as demo:
399
- gr.HTML(custom_css)
400
- gr.Markdown('<div style="font-size:40px; font-weight:bold;">🔍 Cross-Modal MS2 Retrieval Demo </div>')
401
- gr.Markdown('''
402
- <div style=" font-weight:bold;">
403
- <span style="font-size:40px;"> Based on CSU-MS2 model </span>
404
- </div>
405
- ''')
406
-
407
- #gr.Markdown('<div style="font-size:20px;">You can select from a list of collected databases or upload your structural file by Clicking the button. The MS/MS spectrum to be identified can be uploaded in MSP format. You also need to set the necessary experimental parameters for the input spectrum, including accurate precursor mass and collision energy. Finally, by clicking the "Cross-Modal Retrieval" button, you can retrieve candidates from the selected database or your own structural file and access identification results that include compound structures, scores, and rankings.</div>')
408
- with gr.Tab(label="📶 Struture library", elem_id='custom_tab'):
409
- #Blocks特有组件,设置所有子组件按垂直排列
410
- with gr.Row():
411
- with gr.Column():
412
- peak_data = gr.File(file_count="single", label="Upload MS/MS spectrum file in .msp format", elem_classes=".file-upload-height")
413
- draw_button = gr.Button("Draw Mass Spectrum")
414
- with gr.Column():
415
- spectrum_output = gr.Plot(label="Mass Spectrum",elem_id="custom_plot")
416
- with gr.Column():
417
- dataset = gr.Dropdown(["CSU_MS2_DB",
418
- "BloodExp: blood exposome database",
419
- "ChEBI: products of nature or synthetic products database",
420
- "ChemFOnt: Biochemical database including primary metabolites, secondary metabolites, natural products, etc",
421
- "ContaminantDB: Contaminant data from different online references and databases on contaminants",
422
- "DrugBank: drug biochemical and pharmacological information database",
423
- "ECMDB: database of small molecule metabolites found in or produced by Escherichia coli",
424
- "Exposome-Explorer: biomarkers of exposure to environmental risk factors for diseases",
425
- "Foodb: food constituent database",
426
- "HMDB: human metabolome database",
427
- "KEGG: a collection of small molecules, biopolymers, and other chemical substances",
428
- "KNApSAcK: integrated metabolite-plant species database",
429
- "MCDB: small molecule metabolites found in cow milk",
430
- "MiMeDB: taxonomic, microbiological, and body-site location data on most known human microbes",
431
- "NANPDB: database of natural products isolated from native organisms of Northern Africa",
432
- "NPAtlas: natural products atlas database",
433
- "Phenol-Explorer: Polyphenols",
434
- "PMHub: plant metabolite database",
435
- "PMN: plant metabolite database",
436
- "SMPDB: small molecule pathway database",
437
- "STOFF-IDENT: database of water relevant substances",
438
- "T3DB: toxic exposome database",
439
- "TCMSP: traditional chinese medicine systems pharmacology database",
440
- "YMDB: yeast metabolome database"], label="Choose a structure library")
441
- #gr.CheckboxGroup(choices=["HMDB", "Lipidmaps", "CHEMBL"], label="Choose a structure library"),
442
- with gr.Row():
443
- instru=gr.Dropdown(["HCD"], label="Instrument Type")
444
- ionmode=gr.Dropdown(["[M+H]+"], label="Adduct Type")
445
- par_ion_mass=gr.Textbox(label="Parent Ion Mass",placeholder="e.g., 180.00")
446
- collision_e=gr.Textbox(label="collision energy", placeholder="e.g., 40")
447
- with gr.Column():
448
- lib_button = gr.Button("Cross-Modal Retrieval")
449
- lib_output = gr.Gallery(height='auto',columns=4,elem_classes="gallery-height",label='Cross-modal retrieval results')
450
- #lib_output = gr.Dataframe(type="pandas")
451
- with gr.Tab("📁 Upload structure file"):
452
- with gr.Row():
453
- with gr.Column():
454
- use_dataset= gr.File(file_count="single", label="Upload the candidate structure file in csv format, columns=['SMIELS']",elem_classes="file-upload-height2")
455
- user_peak_data=gr.File(file_count="single", label="Upload MS/MS spectrum file in .msp format", elem_classes="file-upload-height2")
456
- user_draw_button = gr.Button("Draw Mass Spectrum")
457
- with gr.Column():
458
- user_spectrum_output = gr.Plot(label="Mass Spectrum",elem_id="custom_plot2")
459
- with gr.Row():
460
- user_instru=gr.Dropdown(["HCD"], label="Instrument Type")
461
- user_ionmode=gr.Dropdown(["[M+H]+"], label="Adduct Type")
462
- user_collision_e=gr.Textbox(label="collision energy", placeholder="e.g., 40")
463
- with gr.Column():
464
- user_button = gr.Button("Cross-Modal Retrieval")
465
- user_output = gr.Gallery(height='auto',columns=4,elem_classes="gallery-height",label='Cross-modal retrieval results')
466
- draw_button.click(draw_mass_spectrum, inputs=[peak_data], outputs=[spectrum_output])
467
- user_draw_button.click(draw_mass_spectrum, inputs=[user_peak_data], outputs=[user_spectrum_output])
468
- lib_button.click(rank_lib, inputs=[dataset,peak_data,instru,ionmode,par_ion_mass,collision_e], outputs=lib_output)
469
- user_button.click(rank_user_lib, inputs=[use_dataset,user_peak_data,user_instru,user_ionmode,user_collision_e], outputs=user_output)
470
- demo.launch(share=False)
471
-
472
-
473
-
474
-
475
-
476
-
477
-
478
-
479
-
480
-
481
-
482
-
483
-
484
-
485
-
486
-
487
-
488
-
489
-
490
-
491
-
492
-
493
-
494
-
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Thu Oct 10 16:46:36 2024
4
+
5
+ @author: ZNDX002
6
+ """
7
+
8
+ from __future__ import annotations
9
+ import sqlite3
10
+ import gradio as gr
11
+ from infer import ModelInference
12
+ from model import ModelCLR
13
+ from matchms.importing import load_from_mgf,load_from_msp
14
+ import matchms.filtering as msfilters
15
+ import numpy as np
16
+ from rdkit.Chem import Draw
17
+ from rdkit import Chem
18
+ import torch
19
+ import yaml
20
+ import pickle
21
+ import subprocess
22
+ import pandas as pd
23
+ from sklearn.metrics.pairwise import cosine_similarity
24
+ import tempfile
25
+ import shutil
26
+ import os
27
+ import matplotlib.pyplot as plt
28
+ import gradio as gr
29
+ from typing import Iterable
30
+ from gradio.themes.base import Base
31
+ from gradio.themes.utils import colors, fonts, sizes
32
+ import time
33
+
34
+ class Seafoam(Base):
35
+ def __init__(
36
+ self,
37
+ *,
38
+ primary_hue: colors.Color | str = colors.emerald,
39
+ secondary_hue: colors.Color | str = colors.blue,
40
+ neutral_hue: colors.Color | str = colors.blue,
41
+ spacing_size: sizes.Size | str = sizes.spacing_md,
42
+ radius_size: sizes.Size | str = sizes.radius_md,
43
+ text_size: sizes.Size | str = sizes.text_lg,
44
+ font: fonts.Font
45
+ | str
46
+ | Iterable[fonts.Font | str] = (
47
+ fonts.GoogleFont("Quicksand"),
48
+ "ui-sans-serif",
49
+ "sans-serif",
50
+ ),
51
+ font_mono: fonts.Font
52
+ | str
53
+ | Iterable[fonts.Font | str] = (
54
+ fonts.GoogleFont("IBM Plex Mono"),
55
+ "ui-monospace",
56
+ "monospace",
57
+ ),
58
+ ):
59
+ super().__init__(
60
+ primary_hue=primary_hue,
61
+ secondary_hue=secondary_hue,
62
+ neutral_hue=neutral_hue,
63
+ spacing_size=spacing_size,
64
+ radius_size=radius_size,
65
+ text_size=text_size,
66
+ font=font,
67
+ font_mono=font_mono,
68
+ )
69
+ super().set(
70
+ #body_background_fill="repeating-linear-gradient(45deg, *primary_200, *primary_200 10px, *primary_50 10px, *primary_50 20px)",
71
+ body_background_fill_dark="repeating-linear-gradient(45deg, *primary_800, *primary_800 10px, *primary_900 10px, *primary_900 20px)",
72
+ button_primary_background_fill="linear-gradient(90deg, *primary_300, *secondary_400)",
73
+ button_primary_background_fill_hover="linear-gradient(90deg, *primary_200, *secondary_300)",
74
+ button_primary_text_color="white",
75
+ button_primary_background_fill_dark="linear-gradient(90deg, *primary_600, *secondary_800)",
76
+ slider_color="*secondary_300",
77
+ slider_color_dark="*secondary_600",
78
+ block_title_text_weight="600",
79
+ block_border_width="3px",
80
+ block_shadow="*shadow_drop_lg",
81
+ button_large_padding="17px",
82
+ body_text_color="#000000",
83
+ )
84
+
85
+ seafoam = Seafoam()
86
+
87
+
88
+ # 定义CSS样式
89
+ custom_css = """
90
+ <style>
91
+ .file-upload-height {
92
+ height:320px !important;
93
+ display: none;
94
+ }
95
+ .file-upload-height2 {
96
+ height:190px !important;
97
+ }
98
+ .gallery-height {
99
+ height: 380px !important;
100
+ }
101
+ #custom_plot {
102
+ height: 360px !important;
103
+ }
104
+ #custom_plot2 {
105
+ height: 480px !important;
106
+ }
107
+ </style>
108
+
109
+ """
110
+ def spectrum_processing(s):
111
+ """This is how one would typically design a desired pre- and post-
112
+ processing pipeline."""
113
+ s = msfilters.normalize_intensities(s)
114
+ s = msfilters.select_by_mz(s, mz_from=0, mz_to=1500)
115
+ return s
116
+
117
+ def draw_mass_spectrum(peak_data_path):
118
+ # 解析输入的质谱峰数据
119
+ ms2 = list(load_from_msp(peak_data_path.name))[0]
120
+ ms2 = spectrum_processing(ms2)
121
+
122
+ # 示例数据(m/z值和强度)
123
+ Mz = np.array(ms2.mz)
124
+ Intens = np.array(ms2.intensities) # 随机生成强度值
125
+ plt.figure(figsize=(8.5,5))
126
+ # 绘制质谱图
127
+ for i in range(len(Mz)):
128
+ plt.axvline(x=Mz[i], ymin=0, ymax=Intens[i],c='red')
129
+ plt.xlabel("m/z")
130
+ plt.ylabel("Intensity")
131
+ plt.title("Mass Spectrum")
132
+ return plt
133
+
134
+ conn = sqlite3.connect("/csu_ms2_db.db", check_same_thread=False)
135
+ device='cpu'
136
+ pretrain_model_path_low,pretrain_model_path_median,pretrain_model_path_high='model/low_energy/checkpoints/model.pth','model/median_energy/checkpoints/model.pth','model/high_energy/checkpoints/model.pth'
137
+ config_path = "model/low_energy/checkpoints/config.yaml"
138
+ config = yaml.load(open(config_path, "r"), Loader=yaml.FullLoader)
139
+
140
+ model_low = ModelCLR(**config["model_config"]).to(device)
141
+ model_median = ModelCLR(**config["model_config"]).to(device)
142
+ model_high = ModelCLR(**config["model_config"]).to(device)
143
+
144
+ state_dict_low = torch.load(pretrain_model_path_low)
145
+ state_dict_median = torch.load(pretrain_model_path_median)
146
+ state_dict_high = torch.load(pretrain_model_path_high)
147
+ model_low.load_state_dict(state_dict_low)
148
+ model_low.eval()
149
+ model_median.load_state_dict(state_dict_median)
150
+ model_median.eval()
151
+ model_high.load_state_dict(state_dict_high)
152
+ model_high.eval()
153
+
154
+ def generate_file(file_obj):
155
+ global tmpdir
156
+ shutil.copy(file_obj.name, tmpdir)
157
+ FileName=os.path.basename(file_obj.name)
158
+ NewfilePath=os.path.join(tmpdir,FileName)
159
+ return NewfilePath
160
+
161
+ def MS2Embedding(spectra):
162
+ spec_mzs = [spec.mz for spec in [spectra]]
163
+ spec_intens = [spec.intensities for spec in [spectra]]
164
+ num_peaks = [len(i) for i in spec_mzs]
165
+ spec_mzs = [np.around(spec_mz, decimals=4) for spec_mz in spec_mzs]
166
+ if len(spec_mzs[0]) > 300:
167
+ spec_mzs = [spec_mzs[0][-300:]]
168
+ spec_intens = [spec_intens[0][-300:]]
169
+ num_peaks=[300]
170
+ else:
171
+ spec_mzs = [np.pad(spec_mz, (0, 300 - len(spec_mz)), mode='constant', constant_values=0) for spec_mz in spec_mzs]
172
+ spec_intens = [np.pad(spec_inten, (0, 300 - len(spec_inten)), mode='constant', constant_values=0) for spec_inten in spec_intens]
173
+ spec_mzs= torch.tensor(spec_mzs).float()
174
+ spec_intens= torch.tensor(spec_intens).float()
175
+ num_peaks = torch.LongTensor(num_peaks)
176
+ spec_tensor_low,spec_mask_low = model_low.ms_encoder(spec_mzs,spec_intens,num_peaks)
177
+ spec_tensor_low=model_low.spec_esa(spec_tensor_low,spec_mask_low)
178
+ spec_tensor_low = model_low.spec_proj(spec_tensor_low)
179
+ spec_tensor_low = spec_tensor_low/spec_tensor_low.norm(dim=-1, keepdim=True)
180
+ spec_tensor_median,spec_mask_median = model_median.ms_encoder(spec_mzs,spec_intens,num_peaks)
181
+ spec_tensor_median=model_median.spec_esa(spec_tensor_median,spec_mask_median)
182
+ spec_tensor_median = model_median.spec_proj(spec_tensor_median)
183
+ spec_tensor_median = spec_tensor_median/spec_tensor_median.norm(dim=-1, keepdim=True)
184
+ spec_tensor_high,spec_mask_high = model_high.ms_encoder(spec_mzs,spec_intens,num_peaks)
185
+ spec_tensor_high=model_high.spec_esa(spec_tensor_high,spec_mask_high)
186
+ spec_tensor_high = model_high.spec_proj(spec_tensor_high)
187
+ spec_tensor_high = spec_tensor_high/spec_tensor_high.norm(dim=-1, keepdim=True)
188
+ return np.array(spec_tensor_low.tolist()[0]),np.array(spec_tensor_median.tolist()[0]),np.array(spec_tensor_high.tolist()[0])
189
+
190
+ def calculate_cosine_similarity(vector1, vector2):
191
+ return cosine_similarity(vector1.reshape(1, -1), vector2.reshape(1, -1))[0][0]
192
+
193
+ def retrieve_similarity_scores( table_name, target_mass,collision_energy, ms2_embedding_low, ms2_embedding_median, ms2_embedding_high):
194
+ cur = conn.cursor()
195
+ if table_name == "CSU_MS2_DB":
196
+ table_name = 'ConSSDB'
197
+ if table_name == "BloodExp: blood exposome database":
198
+ table_name = 'BloodexpDB'
199
+ if table_name == "ChEBI: products of nature or synthetic products database":
200
+ table_name = 'ChebiDB'
201
+ if table_name == "ChemFOnt: Biochemical database including primary metabolites, secondary metabolites, natural products, etc":
202
+ table_name = 'ChemfontDB'
203
+ if table_name == "ContaminantDB: Contaminant data from different online references and databases on contaminants":
204
+ table_name = 'ContaminantdbDB'
205
+ if table_name == "DrugBank: drug biochemical and pharmacological information database":
206
+ table_name = 'DrugbankDB'
207
+ if table_name == "ECMDB: database of small molecule metabolites found in or produced by Escherichia coli":
208
+ table_name = 'EcmdbDB'
209
+ if table_name == "Exposome-Explorer: biomarkers of exposure to environmental risk factors for diseases":
210
+ table_name = 'ExposomeDB'
211
+ if table_name == "Foodb: food constituent database":
212
+ table_name = 'FoodbDB'
213
+ if table_name == "HMDB: human metabolome database":
214
+ table_name = 'HmdbDB'
215
+ if table_name == "KEGG: a collection of small molecules, biopolymers, and other chemical substances":
216
+ table_name = 'KeggDB'
217
+ if table_name == "KNApSAcK: integrated metabolite-plant species database":
218
+ table_name = 'KnapsackDB'
219
+ if table_name == "MCDB: small molecule metabolites found in cow milk":
220
+ table_name = 'MilkDB'
221
+ if table_name == "MiMeDB: taxonomic, microbiological, and body-site location data on most known human microbes":
222
+ table_name = 'MimedbDB'
223
+ if table_name == "NANPDB: database of natural products isolated from native organisms of Northern Africa":
224
+ table_name = 'NanpdbDB'
225
+ if table_name == "NPAtlas: natural products atlas database":
226
+ table_name = 'NpatlasDB'
227
+ if table_name == "Phenol-Explorer: Polyphenols":
228
+ table_name = 'PhenolDB'
229
+ if table_name == "PMHub: plant metabolite database":
230
+ table_name = 'PmhubDB'
231
+ if table_name == "PMN: plant metabolite database":
232
+ table_name = 'PmnDB'
233
+ if table_name == "SMPDB: small molecule pathway database":
234
+ table_name = 'SmpdbDB'
235
+ if table_name == "STOFF-IDENT: database of water relevant substances":
236
+ table_name = 'StoffDB'
237
+ if table_name == "T3DB: toxic exposome database":
238
+ table_name = 'T3dbDB'
239
+ if table_name == "TCMSP: traditional chinese medicine systems pharmacology database":
240
+ table_name = 'TcmspDB'
241
+ if table_name == "YMDB: yeast metabolome database":
242
+ table_name = 'YmdbDB'
243
+ target_mass = target_mass-1.008
244
+ tolerance = target_mass * 20 / 1000000
245
+ query = f"""
246
+ SELECT SMILES
247
+ FROM {table_name}
248
+ WHERE MonoisotopicMass >= ? - ? AND MonoisotopicMass <= ? + ?
249
+ """
250
+ cur.execute(query, (target_mass, tolerance, target_mass, tolerance))
251
+ filtered_smiles = cur.fetchall()
252
+ similarity_scores = []
253
+
254
+ for smile in filtered_smiles:
255
+ query = f"""
256
+ SELECT low_energy_embedding, median_energy_embedding, high_energy_embedding
257
+ FROM {table_name}
258
+ WHERE SMILES = ?
259
+ """
260
+ cur.execute(query, (smile[0],))
261
+ row = cur.fetchone()
262
+ if row is None:
263
+ return None
264
+ low_energy_embedding_db = np.array(pickle.loads(row[0]), dtype=np.float64)
265
+ median_energy_embedding_db = np.array(pickle.loads(row[1]), dtype=np.float64)
266
+ high_energy_embedding_db = np.array(pickle.loads(row[2]), dtype=np.float64)
267
+ low_energy_embedding_db,median_energy_embedding_db,high_energy_embedding_db = torch.tensor(low_energy_embedding_db).float(),torch.tensor(median_energy_embedding_db).float(),torch.tensor(high_energy_embedding_db).float()
268
+ # 计算余弦相似度
269
+ low_similarity =(ms2_embedding_low @ low_energy_embedding_db.t()).item()
270
+ median_similarity = (ms2_embedding_median @ median_energy_embedding_db.t()).item()
271
+ high_similarity = (ms2_embedding_high @ high_energy_embedding_db.t()).item()
272
+ '''
273
+ low_similarity = calculate_cosine_similarity(ms2_embedding_low, low_energy_embedding_db)
274
+ median_similarity = calculate_cosine_similarity(ms2_embedding_median, median_energy_embedding_db)
275
+ high_similarity = calculate_cosine_similarity(ms2_embedding_high, high_energy_embedding_db)'''
276
+ similarity_scores.append((smile, low_similarity, median_similarity, high_similarity))
277
+
278
+ weighted_similarity_scores = []
279
+ for smile, low_similarity, median_similarity, high_similarity in similarity_scores:
280
+ if collision_energy <=15:
281
+ weighted_similarity = 0.4 * low_similarity + 0.3 * median_similarity + 0.3 * high_similarity
282
+ weighted_similarity_scores.append((smile, weighted_similarity))
283
+ elif collision_energy >15 and collision_energy <= 25:
284
+ weighted_similarity = 0.3 * low_similarity + 0.4 * median_similarity + 0.3 * high_similarity
285
+ weighted_similarity_scores.append((smile, weighted_similarity))
286
+ elif collision_energy > 25:
287
+ weighted_similarity = 0.2 * low_similarity + 0.3 * median_similarity + 0.5 * high_similarity
288
+ weighted_similarity_scores.append((smile, weighted_similarity))
289
+
290
+ # 按加权相似度降序排序
291
+ weighted_similarity_scores.sort(key=lambda x: x[1], reverse=True)
292
+
293
+ # 取出前10个 SMILES
294
+ top_10_smiles = weighted_similarity_scores[:10]
295
+ cur.close()
296
+ #conn.close()
297
+ return top_10_smiles
298
+
299
+ def get_topK_result(library,ms_feature, smiles_feature, topK):
300
+
301
+ if topK >= len(library):
302
+ topK = len(library)
303
+ with torch.no_grad():
304
+ ms_smiles_distances_tmp = (
305
+ ms_feature.unsqueeze(0) @ smiles_feature.t()).cpu()
306
+ scores_, indices_ = ms_smiles_distances_tmp.topk(topK,
307
+ dim=1,
308
+ largest=True,
309
+ sorted=True)
310
+ candidates=[library[i] for i in indices_.tolist()[0]]
311
+ indices=indices_.tolist()[0]
312
+ scores=scores_.tolist()[0]
313
+ return indices, scores, candidates
314
+
315
+
316
+ def rank_lib(database_name,spectrum_path,instrument_type,adduct,parent_Mass,collision_energy):
317
+ ms2 = list(load_from_msp(spectrum_path.name))[0]
318
+ ms2 = spectrum_processing(ms2)
319
+ collision_energy=float(collision_energy)
320
+ parent_Mass=float(parent_Mass)
321
+ ms2_embedding_low,ms2_embedding_median,ms2_embedding_high = MS2Embedding(ms2)
322
+ ms2_embedding_low,ms2_embedding_median,ms2_embedding_high = torch.tensor(ms2_embedding_low).float(),torch.tensor(ms2_embedding_median).float(),torch.tensor(ms2_embedding_high).float()
323
+ top_10_smiles = retrieve_similarity_scores(database_name,parent_Mass,collision_energy,ms2_embedding_low,ms2_embedding_median,ms2_embedding_high)
324
+ smis = [x[0][0] for x in top_10_smiles]
325
+ scores = [x[1] for x in top_10_smiles]
326
+ images,image_descrips=[],[]
327
+ bw_draw_options = Draw.MolDrawOptions()
328
+ bw_draw_options.useBWAtomPalette()
329
+
330
+ for smi in smis:
331
+ try:
332
+ mol = Chem.MolFromSmiles(smi)
333
+ images.append(Draw.MolToImage(mol, options=bw_draw_options))
334
+ except:
335
+ images.append('NAN')
336
+ for i in range(len(smis)):
337
+ image_descrips.append((images[i],'SMILES: '+smis[i]+' ' + 'Score: '+str(scores[i])))
338
+ #top_10_results = pd.DataFrame({'SMILES':[x[0] for x in top_10_smiles],'Struture':images,'Score':[x[1] for x in top_10_smiles],'Rank':list(range(10))})
339
+ return image_descrips
340
+
341
+ def rank_user_lib(candidate_file,spectrum_path,instrument_type,adduct,collision_energy):
342
+ model_inference_low = ModelInference(config_path=config_path,
343
+ pretrain_model_path=pretrain_model_path_low,
344
+ device="cpu")
345
+ model_inference_median = ModelInference(config_path=config_path,
346
+ pretrain_model_path=pretrain_model_path_median,
347
+ device="cpu")
348
+ model_inference_high = ModelInference(config_path=config_path,
349
+ pretrain_model_path=pretrain_model_path_high,
350
+ device="cpu")
351
+ collision_energy=float(collision_energy)
352
+ candidate = pd.read_csv(candidate_file.name)
353
+ candidate_smiles = list(candidate['SMILES'])
354
+ ms2 = list(load_from_msp(spectrum_path.name))[0]
355
+ ms2 = spectrum_processing(ms2)
356
+ ms2_embedding_low,ms2_embedding_median,ms2_embedding_high = MS2Embedding(ms2)
357
+ ms2_embedding_low,ms2_embedding_median,ms2_embedding_high = torch.tensor(ms2_embedding_low).float(),torch.tensor(ms2_embedding_median).float(),torch.tensor(ms2_embedding_high).float()
358
+ contexts = []
359
+ for i in range(0, len(candidate_smiles),64):
360
+ contexts.append(candidate_smiles[i:i + 64])
361
+ result_low = [model_inference_low.smiles_encode(i).cpu() for i in contexts]
362
+ result_low = torch.cat(result_low, 0)
363
+ result_median = [model_inference_median.smiles_encode(i).cpu() for i in contexts]
364
+ result_median = torch.cat(result_median, 0)
365
+ result_high = [model_inference_high.smiles_encode(i).cpu() for i in contexts]
366
+ result_high = torch.cat(result_high, 0)
367
+ low_similarity = ms2_embedding_low @ result_low.t()
368
+ median_similarity = ms2_embedding_median @ result_median.t()
369
+ high_similarity = ms2_embedding_high @ result_high.t()
370
+ low_similarity = low_similarity.numpy()
371
+ median_similarity = median_similarity.numpy()
372
+ high_similarity = high_similarity.numpy()
373
+ if collision_energy <=15:
374
+ weighted_similarity = 0.4 * low_similarity + 0.3 * median_similarity + 0.3 * high_similarity
375
+ elif collision_energy >15 and collision_energy <= 25:
376
+ weighted_similarity = 0.3 * low_similarity + 0.4 * median_similarity + 0.3 * high_similarity
377
+ elif collision_energy > 25:
378
+ weighted_similarity = 0.2 * low_similarity + 0.3 * median_similarity + 0.5 * high_similarity
379
+ weighted_similarity_scores=[(candidate_smiles[i],weighted_similarity[i]) for i in range(len(candidate_smiles))]
380
+ weighted_similarity_scores.sort(key=lambda x: x[1], reverse=True)
381
+ top_10_smiles = weighted_similarity_scores[:10]
382
+ smis = [x[0] for x in top_10_smiles]
383
+ scores = [x[1] for x in top_10_smiles]
384
+ images,image_descrips=[],[]
385
+ bw_draw_options = Draw.MolDrawOptions()
386
+ bw_draw_options.useBWAtomPalette()
387
+ for smi in smis:
388
+ try:
389
+ mol = Chem.MolFromSmiles(smi)
390
+ images.append(Draw.MolToImage(mol, options=bw_draw_options))
391
+ except:
392
+ images.append('NAN')
393
+ for i in range(len(smis)):
394
+ image_descrips.append((images[i],'SMILES: '+smis[i]+' ' + 'Score: '+str(scores[i])))
395
+ return image_descrips
396
+
397
+
398
+ with gr.Blocks(theme=seafoam) as demo:
399
+ gr.HTML(custom_css)
400
+ gr.Markdown('<div style="font-size:40px; font-weight:bold;">🔍 Cross-Modal MS2 Retrieval Demo </div>')
401
+ gr.Markdown('''
402
+ <div style=" font-weight:bold;">
403
+ <span style="font-size:40px;"> Based on CSU-MS2 model </span>
404
+ </div>
405
+ ''')
406
+
407
+ #gr.Markdown('<div style="font-size:20px;">You can select from a list of collected databases or upload your structural file by Clicking the button. The MS/MS spectrum to be identified can be uploaded in MSP format. You also need to set the necessary experimental parameters for the input spectrum, including accurate precursor mass and collision energy. Finally, by clicking the "Cross-Modal Retrieval" button, you can retrieve candidates from the selected database or your own structural file and access identification results that include compound structures, scores, and rankings.</div>')
408
+ with gr.Tab(label="📶 Struture library", elem_id='custom_tab'):
409
+ #Blocks特有组件,设置所有子组件按垂直排列
410
+ with gr.Row():
411
+ with gr.Column():
412
+ peak_data = gr.File(file_count="single", label="Upload MS/MS spectrum file in .msp format", elem_classes=".file-upload-height")
413
+ draw_button = gr.Button("Draw Mass Spectrum")
414
+ with gr.Column():
415
+ spectrum_output = gr.Plot(label="Mass Spectrum",elem_id="custom_plot")
416
+ with gr.Column():
417
+ dataset = gr.Dropdown(["CSU_MS2_DB",
418
+ "BloodExp: blood exposome database",
419
+ "ChEBI: products of nature or synthetic products database",
420
+ "ChemFOnt: Biochemical database including primary metabolites, secondary metabolites, natural products, etc",
421
+ "ContaminantDB: Contaminant data from different online references and databases on contaminants",
422
+ "DrugBank: drug biochemical and pharmacological information database",
423
+ "ECMDB: database of small molecule metabolites found in or produced by Escherichia coli",
424
+ "Exposome-Explorer: biomarkers of exposure to environmental risk factors for diseases",
425
+ "Foodb: food constituent database",
426
+ "HMDB: human metabolome database",
427
+ "KEGG: a collection of small molecules, biopolymers, and other chemical substances",
428
+ "KNApSAcK: integrated metabolite-plant species database",
429
+ "MCDB: small molecule metabolites found in cow milk",
430
+ "MiMeDB: taxonomic, microbiological, and body-site location data on most known human microbes",
431
+ "NANPDB: database of natural products isolated from native organisms of Northern Africa",
432
+ "NPAtlas: natural products atlas database",
433
+ "Phenol-Explorer: Polyphenols",
434
+ "PMHub: plant metabolite database",
435
+ "PMN: plant metabolite database",
436
+ "SMPDB: small molecule pathway database",
437
+ "STOFF-IDENT: database of water relevant substances",
438
+ "T3DB: toxic exposome database",
439
+ "TCMSP: traditional chinese medicine systems pharmacology database",
440
+ "YMDB: yeast metabolome database"], label="Choose a structure library")
441
+ #gr.CheckboxGroup(choices=["HMDB", "Lipidmaps", "CHEMBL"], label="Choose a structure library"),
442
+ with gr.Row():
443
+ instru=gr.Dropdown(["HCD"], label="Instrument Type")
444
+ ionmode=gr.Dropdown(["[M+H]+"], label="Adduct Type")
445
+ par_ion_mass=gr.Textbox(label="Parent Ion Mass",placeholder="e.g., 180.00")
446
+ collision_e=gr.Textbox(label="collision energy", placeholder="e.g., 40")
447
+ with gr.Column():
448
+ lib_button = gr.Button("Cross-Modal Retrieval")
449
+ lib_output = gr.Gallery(height='auto',columns=4,elem_classes="gallery-height",label='Cross-modal retrieval results')
450
+ #lib_output = gr.Dataframe(type="pandas")
451
+ with gr.Tab("📁 Upload structure file"):
452
+ with gr.Row():
453
+ with gr.Column():
454
+ use_dataset= gr.File(file_count="single", label="Upload the candidate structure file in csv format, columns=['SMIELS']",elem_classes="file-upload-height2")
455
+ user_peak_data=gr.File(file_count="single", label="Upload MS/MS spectrum file in .msp format", elem_classes="file-upload-height2")
456
+ user_draw_button = gr.Button("Draw Mass Spectrum")
457
+ with gr.Column():
458
+ user_spectrum_output = gr.Plot(label="Mass Spectrum",elem_id="custom_plot2")
459
+ with gr.Row():
460
+ user_instru=gr.Dropdown(["HCD"], label="Instrument Type")
461
+ user_ionmode=gr.Dropdown(["[M+H]+"], label="Adduct Type")
462
+ user_collision_e=gr.Textbox(label="collision energy", placeholder="e.g., 40")
463
+ with gr.Column():
464
+ user_button = gr.Button("Cross-Modal Retrieval")
465
+ user_output = gr.Gallery(height='auto',columns=4,elem_classes="gallery-height",label='Cross-modal retrieval results')
466
+ draw_button.click(draw_mass_spectrum, inputs=[peak_data], outputs=[spectrum_output])
467
+ user_draw_button.click(draw_mass_spectrum, inputs=[user_peak_data], outputs=[user_spectrum_output])
468
+ lib_button.click(rank_lib, inputs=[dataset,peak_data,instru,ionmode,par_ion_mass,collision_e], outputs=lib_output)
469
+ user_button.click(rank_user_lib, inputs=[use_dataset,user_peak_data,user_instru,user_ionmode,user_collision_e], outputs=user_output)
470
+ demo.launch(share=False)
471
+
472
+
473
+
474
+
475
+
476
+
477
+
478
+
479
+
480
+
481
+
482
+
483
+
484
+
485
+
486
+
487
+
488
+
489
+
490
+
491
+
492
+
493
+
494
+