|
|
""" |
|
|
DreaMS Gradio Web Application |
|
|
|
|
|
This module provides a web interface for the DreaMS (Deep Representations Empowering |
|
|
the Annotation of Mass Spectra) tool using Gradio. It allows users to upload MS/MS |
|
|
files and perform library matching with DreaMS embeddings. |
|
|
|
|
|
Author: DreaMS Team |
|
|
License: MIT |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
import spaces |
|
|
import shutil |
|
|
import urllib.request |
|
|
from datetime import datetime |
|
|
from functools import partial |
|
|
import matplotlib.pyplot as plt |
|
|
import matplotlib |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from pathlib import Path |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
from rdkit import Chem |
|
|
from rdkit.Chem.Draw import rdMolDraw2D |
|
|
import base64 |
|
|
from io import BytesIO |
|
|
from PIL import Image |
|
|
import io |
|
|
import dreams.utils.spectra as su |
|
|
import dreams.utils.io as dio |
|
|
from dreams.utils.data import MSData |
|
|
from dreams.api import dreams_embeddings |
|
|
from dreams.definitions import * |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SMILES_IMG_SIZE = 120 |
|
|
SPECTRUM_IMG_SIZE = 800 |
|
|
|
|
|
|
|
|
LIBRARY_PATH = Path('DreaMS/data/MassSpecGym_DreaMS.hdf5') |
|
|
DATA_PATH = Path('./DreaMS/data') |
|
|
EXAMPLE_PATH = Path('./data') |
|
|
|
|
|
|
|
|
_smiles_cache = {} |
|
|
|
|
|
def clear_smiles_cache(): |
|
|
"""Clear the SMILES image cache to free memory""" |
|
|
global _smiles_cache |
|
|
_smiles_cache.clear() |
|
|
print("SMILES image cache cleared") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_input_file(file_path): |
|
|
""" |
|
|
Validate that the input file exists and has a supported format |
|
|
|
|
|
Args: |
|
|
file_path: Path to the input file |
|
|
|
|
|
Returns: |
|
|
bool: True if file is valid, False otherwise |
|
|
""" |
|
|
if not file_path or not Path(file_path).exists(): |
|
|
return False |
|
|
|
|
|
supported_extensions = ['.mgf', '.mzML', '.mzml'] |
|
|
file_ext = Path(file_path).suffix.lower() |
|
|
|
|
|
return file_ext in supported_extensions |
|
|
|
|
|
|
|
|
def _convert_pil_to_base64(img, format='PNG'): |
|
|
""" |
|
|
Convert a PIL Image to base64 encoded string |
|
|
|
|
|
Args: |
|
|
img: PIL Image object |
|
|
format: Image format (default: 'PNG') |
|
|
|
|
|
Returns: |
|
|
str: Base64 encoded image string |
|
|
""" |
|
|
buffered = io.BytesIO() |
|
|
img.save(buffered, format=format, optimize=True) |
|
|
img_str = base64.b64encode(buffered.getvalue()) |
|
|
return f"data:image/{format.lower()};base64,{repr(img_str)[2:-1]}" |
|
|
|
|
|
|
|
|
def _crop_transparent_edges(img): |
|
|
""" |
|
|
Crop transparent edges from a PIL Image |
|
|
|
|
|
Args: |
|
|
img: PIL Image object (should be RGBA) |
|
|
|
|
|
Returns: |
|
|
PIL Image: Cropped image |
|
|
""" |
|
|
|
|
|
if img.mode != 'RGBA': |
|
|
img = img.convert('RGBA') |
|
|
|
|
|
|
|
|
bbox = img.getbbox() |
|
|
if bbox: |
|
|
|
|
|
img = img.crop(bbox) |
|
|
|
|
|
return img |
|
|
|
|
|
|
|
|
def smiles_to_html_img(smiles, img_size=SMILES_IMG_SIZE): |
|
|
""" |
|
|
Convert SMILES string to HTML image for display in Gradio dataframe |
|
|
Uses caching to avoid regenerating the same molecule images |
|
|
|
|
|
Args: |
|
|
smiles: SMILES string representation of molecule |
|
|
img_size: Size of the output image (default: SMILES_IMG_SIZE) |
|
|
|
|
|
Returns: |
|
|
str: HTML img tag with base64 encoded image |
|
|
""" |
|
|
|
|
|
cache_key = f"{smiles}_{img_size}" |
|
|
if cache_key in _smiles_cache: |
|
|
return _smiles_cache[cache_key] |
|
|
|
|
|
try: |
|
|
|
|
|
mol = Chem.MolFromSmiles(smiles) |
|
|
if mol is None: |
|
|
result = f"<div style='text-align: center; color: red;'>Invalid SMILES</div>" |
|
|
_smiles_cache[cache_key] = result |
|
|
return result |
|
|
|
|
|
|
|
|
d2d = rdMolDraw2D.MolDraw2DCairo(img_size, img_size) |
|
|
opts = d2d.drawOptions() |
|
|
opts.clearBackground = False |
|
|
opts.padding = 0.05 |
|
|
opts.bondLineWidth = 1.5 |
|
|
|
|
|
|
|
|
d2d.DrawMolecule(mol) |
|
|
d2d.FinishDrawing() |
|
|
|
|
|
|
|
|
png_data = d2d.GetDrawingText() |
|
|
img = Image.open(io.BytesIO(png_data)) |
|
|
|
|
|
|
|
|
img = _crop_transparent_edges(img) |
|
|
img_str = _convert_pil_to_base64(img) |
|
|
|
|
|
result = f"<img src='{img_str}' style='max-width: 100%; height: auto;' title='{smiles}' />" |
|
|
|
|
|
|
|
|
_smiles_cache[cache_key] = result |
|
|
return result |
|
|
|
|
|
except Exception as e: |
|
|
result = f"<div style='text-align: center; color: red;'>Error: {str(e)}</div>" |
|
|
_smiles_cache[cache_key] = result |
|
|
return result |
|
|
|
|
|
|
|
|
def spectrum_to_html_img(spec1, spec2, img_size=SPECTRUM_IMG_SIZE): |
|
|
""" |
|
|
Convert spectrum plot to HTML image for display in Gradio dataframe |
|
|
Optimized version based on working code |
|
|
|
|
|
Args: |
|
|
spec1: First spectrum data |
|
|
spec2: Second spectrum data (for mirror plot) |
|
|
img_size: Size of the output image (default: SPECTRUM_IMG_SIZE) |
|
|
|
|
|
Returns: |
|
|
str: HTML img tag with base64 encoded spectrum plot |
|
|
""" |
|
|
try: |
|
|
|
|
|
matplotlib.use('Agg') |
|
|
|
|
|
|
|
|
su.plot_spectrum(spec=spec1, mirror_spec=spec2, figsize=(1.6, 0.8)) |
|
|
|
|
|
|
|
|
buffered = BytesIO() |
|
|
plt.savefig(buffered, format='png', bbox_inches='tight', dpi=80, transparent=True) |
|
|
buffered.seek(0) |
|
|
|
|
|
|
|
|
img = Image.open(buffered) |
|
|
img = _crop_transparent_edges(img) |
|
|
img_str = _convert_pil_to_base64(img) |
|
|
|
|
|
|
|
|
plt.close() |
|
|
|
|
|
return f"<img src='{img_str}' style='max-width: 100%; height: auto;' title='Spectrum comparison' />" |
|
|
|
|
|
except Exception as e: |
|
|
return f"<div style='text-align: center; color: red;'>Error: {str(e)}</div>" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _download_file(url, target_path, description): |
|
|
""" |
|
|
Download a file from URL if it doesn't exist |
|
|
|
|
|
Args: |
|
|
url: Source URL |
|
|
target_path: Target file path |
|
|
description: Description for logging |
|
|
""" |
|
|
if not target_path.exists(): |
|
|
print(f"Downloading {description}...") |
|
|
target_path.parent.mkdir(parents=True, exist_ok=True) |
|
|
urllib.request.urlretrieve(url, target_path) |
|
|
print(f"Downloaded {description} to {target_path}") |
|
|
|
|
|
|
|
|
def setup(): |
|
|
""" |
|
|
Initialize the application by downloading required data files |
|
|
|
|
|
Downloads: |
|
|
- MassSpecGym spectral library |
|
|
- Example MS/MS files for testing |
|
|
|
|
|
Raises: |
|
|
Exception: If critical setup steps fail |
|
|
""" |
|
|
print("=" * 60) |
|
|
print("Setting up DreaMS application...") |
|
|
print("=" * 60) |
|
|
|
|
|
|
|
|
clear_smiles_cache() |
|
|
|
|
|
try: |
|
|
|
|
|
library_url = 'https://huggingface.co/datasets/roman-bushuiev/GeMS/resolve/main/data/auxiliary/MassSpecGym_DreaMS.hdf5' |
|
|
_download_file(library_url, LIBRARY_PATH, "MassSpecGym spectral library") |
|
|
|
|
|
|
|
|
example_urls = [ |
|
|
('https://huggingface.co/datasets/titodamiani/PiperNET/resolve/main/lcms/rawfiles/202312_147_P55-Leaf-r2_1uL.mzML', |
|
|
EXAMPLE_PATH / '202312_147_P55-Leaf-r2_1uL.mzML', |
|
|
"PiperNET example spectra"), |
|
|
('https://raw.githubusercontent.com/pluskal-lab/DreaMS/refs/heads/main/data/examples/example_5_spectra.mgf', |
|
|
EXAMPLE_PATH / 'example_5_spectra.mgf', |
|
|
"DreaMS example spectra") |
|
|
] |
|
|
|
|
|
for url, path, desc in example_urls: |
|
|
_download_file(url, path, desc) |
|
|
|
|
|
|
|
|
print("\nTesting DreaMS embeddings...") |
|
|
test_path = EXAMPLE_PATH / 'example_5_spectra.mgf' |
|
|
embs = dreams_embeddings(test_path) |
|
|
print(f"✓ Setup complete - DreaMS embeddings test successful (shape: {embs.shape})") |
|
|
print("=" * 60) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"✗ Setup failed: {e}") |
|
|
print("The application may not work properly. Please check your internet connection and try again.") |
|
|
raise |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@spaces.GPU |
|
|
def _predict_gpu(in_pth, progress): |
|
|
""" |
|
|
GPU-accelerated prediction of DreaMS embeddings |
|
|
|
|
|
Args: |
|
|
in_pth: Input file path |
|
|
progress: Gradio progress tracker |
|
|
|
|
|
Returns: |
|
|
numpy.ndarray: DreaMS embeddings |
|
|
""" |
|
|
progress(0.2, desc="Loading spectra data...") |
|
|
msdata = MSData.load(in_pth) |
|
|
|
|
|
progress(0.3, desc="Computing DreaMS embeddings...") |
|
|
embs = dreams_embeddings(msdata) |
|
|
print(f'Shape of the query embeddings: {embs.shape}') |
|
|
|
|
|
return embs |
|
|
|
|
|
|
|
|
def _create_result_row(i, j, n, msdata, msdata_lib, sims, cos_sim, embs, similarity_threshold, calculate_modified_cosine=False): |
|
|
""" |
|
|
Create a single result row for the DataFrame |
|
|
|
|
|
Args: |
|
|
i: Query spectrum index |
|
|
j: Library spectrum index |
|
|
n: Top-k rank |
|
|
msdata: Query MS data |
|
|
msdata_lib: Library MS data |
|
|
sims: Similarity matrix |
|
|
cos_sim: Cosine similarity calculator |
|
|
embs: Query embeddings |
|
|
similarity_threshold: Similarity threshold for filtering results |
|
|
calculate_modified_cosine: Whether to calculate modified cosine similarity |
|
|
|
|
|
Returns: |
|
|
dict: Result row data |
|
|
""" |
|
|
smiles = msdata_lib.get_smiles(j) |
|
|
spec1 = msdata.get_spectra(i) |
|
|
spec2 = msdata_lib.get_spectra(j) |
|
|
|
|
|
dreams_similarity = sims[i, j] |
|
|
|
|
|
|
|
|
row_data = { |
|
|
'scan_number': msdata.get_values(SCAN_NUMBER, i) if SCAN_NUMBER in msdata.columns() else None, |
|
|
'rt': msdata.get_values(RT, i) if RT in msdata.columns() else None, |
|
|
'charge': msdata.get_values(CHARGE, i) if CHARGE in msdata.columns() else None, |
|
|
'precursor_mz': msdata.get_prec_mzs(i), |
|
|
'topk': n + 1, |
|
|
'library_j': j, |
|
|
'library_SMILES': smiles_to_html_img(smiles) if dreams_similarity > similarity_threshold else None, |
|
|
'library_SMILES_raw': smiles, |
|
|
'Spectrum': spectrum_to_html_img(spec1, spec2) if dreams_similarity > similarity_threshold else None, |
|
|
'Spectrum_raw': su.unpad_peak_list(spec1), |
|
|
'library_ID': msdata_lib.get_values('IDENTIFIER', j), |
|
|
'DreaMS_similarity': dreams_similarity, |
|
|
'i': i, |
|
|
'j': j, |
|
|
'DreaMS_embedding': embs[i], |
|
|
} |
|
|
|
|
|
|
|
|
if calculate_modified_cosine: |
|
|
modified_cosine_sim = cos_sim( |
|
|
spec1=spec1, |
|
|
prec_mz1=msdata.get_prec_mzs(i), |
|
|
spec2=spec2, |
|
|
prec_mz2=msdata_lib.get_prec_mzs(j), |
|
|
) |
|
|
row_data['Modified_cosine_similarity'] = modified_cosine_sim |
|
|
|
|
|
return row_data |
|
|
|
|
|
|
|
|
def _process_results_dataframe(df, in_pth, similarity_threshold, calculate_modified_cosine=False): |
|
|
""" |
|
|
Process and clean the results DataFrame |
|
|
|
|
|
Args: |
|
|
df: Raw results DataFrame |
|
|
in_pth: Input file path for CSV export |
|
|
similarity_threshold: Similarity threshold for filtering results |
|
|
calculate_modified_cosine: Whether modified cosine similarity was calculated |
|
|
|
|
|
Returns: |
|
|
tuple: (processed_df, csv_path) |
|
|
""" |
|
|
|
|
|
df = df.drop(columns=['i', 'j', 'library_j']) |
|
|
df['DreaMS_similarity'] = df['DreaMS_similarity'].astype(float).round(4) |
|
|
|
|
|
|
|
|
if calculate_modified_cosine and 'Modified_cosine_similarity' in df.columns: |
|
|
df['Modified_cosine_similarity'] = df['Modified_cosine_similarity'].astype(float).round(4) |
|
|
|
|
|
df['precursor_mz'] = df['precursor_mz'].astype(float).round(4) |
|
|
df['rt'] = df['rt'].astype(float).round(2) |
|
|
df['charge'] = df['charge'].astype(str) |
|
|
|
|
|
|
|
|
column_mapping = { |
|
|
'topk': 'Top k', |
|
|
'library_ID': 'Library ID', |
|
|
"scan_number": "Scan number", |
|
|
"rt": "Retention time", |
|
|
"charge": "Charge", |
|
|
"precursor_mz": "Precursor m/z", |
|
|
"library_SMILES": "Molecule", |
|
|
"library_SMILES_raw": "SMILES", |
|
|
"Spectrum": "Spectrum", |
|
|
"Spectrum_raw": "Input Spectrum", |
|
|
"DreaMS_similarity": "DreaMS similarity", |
|
|
"DreaMS_embedding": "DreaMS embedding", |
|
|
} |
|
|
|
|
|
|
|
|
if calculate_modified_cosine and 'Modified_cosine_similarity' in df.columns: |
|
|
column_mapping["Modified_cosine_similarity"] = "Modified cos similarity" |
|
|
|
|
|
df = df.rename(columns=column_mapping) |
|
|
|
|
|
|
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') |
|
|
df_path = dio.append_to_stem(in_pth, f"MassSpecGym_hits_{timestamp}").with_suffix('.csv') |
|
|
df_to_save = df.drop(columns=['Molecule', 'Spectrum', 'Top k']) |
|
|
df_to_save.to_csv(df_path, index=False) |
|
|
|
|
|
|
|
|
df = df.drop(columns=['DreaMS embedding', "SMILES", "Input Spectrum"]) |
|
|
df = df[df['Top k'] == 1].sort_values('DreaMS similarity', ascending=False) |
|
|
df = df.drop(columns=['Top k']) |
|
|
df = df[df["DreaMS similarity"] > similarity_threshold] |
|
|
|
|
|
|
|
|
df.insert(0, 'Row', range(1, len(df) + 1)) |
|
|
|
|
|
return df, str(df_path) |
|
|
|
|
|
|
|
|
def _predict_core(lib_pth, in_pth, similarity_threshold, calculate_modified_cosine, progress): |
|
|
""" |
|
|
Core prediction function that orchestrates the entire prediction pipeline |
|
|
|
|
|
Args: |
|
|
lib_pth: Library file path |
|
|
in_pth: Input file path |
|
|
calculate_modified_cosine: Whether to calculate modified cosine similarity |
|
|
progress: Gradio progress tracker |
|
|
|
|
|
Returns: |
|
|
tuple: (results_dataframe, csv_file_path) |
|
|
""" |
|
|
in_pth = Path(in_pth) |
|
|
|
|
|
|
|
|
clear_smiles_cache() |
|
|
|
|
|
|
|
|
progress(0, desc="Creating temporary file copies...") |
|
|
temp_lib_path = Path(lib_pth).parent / f"temp_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{Path(lib_pth).name}" |
|
|
temp_in_path = in_pth.parent / f"temp_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{in_pth.name}" |
|
|
shutil.copy2(lib_pth, temp_lib_path) |
|
|
shutil.copy2(in_pth, temp_in_path) |
|
|
|
|
|
try: |
|
|
|
|
|
progress(0.1, desc="Loading library data...") |
|
|
msdata_lib = MSData.load(temp_lib_path, in_mem=True) |
|
|
embs_lib = msdata_lib[DREAMS_EMBEDDING] |
|
|
print(f'Shape of the library embeddings: {embs_lib.shape}') |
|
|
|
|
|
|
|
|
embs = _predict_gpu(temp_in_path, progress) |
|
|
|
|
|
|
|
|
progress(0.4, desc="Computing similarity matrix...") |
|
|
sims = cosine_similarity(embs, embs_lib) |
|
|
print(f'Shape of the similarity matrix: {sims.shape}') |
|
|
|
|
|
|
|
|
k = 1 |
|
|
topk_cands = np.argsort(sims, axis=1)[:, -k:][:, ::-1] |
|
|
|
|
|
|
|
|
msdata = MSData.load(temp_in_path, in_mem=True) |
|
|
print(f'Available columns: {msdata.columns()}') |
|
|
|
|
|
|
|
|
progress(0.5, desc="Constructing results table...") |
|
|
df = [] |
|
|
cos_sim = su.PeakListModifiedCosine() |
|
|
total_spectra = len(topk_cands) |
|
|
|
|
|
for i, topk in enumerate(topk_cands): |
|
|
progress(0.5 + 0.4 * (i / total_spectra), |
|
|
desc=f"Processing hits for spectrum {i+1}/{total_spectra}...") |
|
|
|
|
|
for n, j in enumerate(topk): |
|
|
row_data = _create_result_row(i, j, n, msdata, msdata_lib, sims, cos_sim, embs, similarity_threshold, calculate_modified_cosine) |
|
|
df.append(row_data) |
|
|
|
|
|
|
|
|
if (i + 1) % 100 == 0: |
|
|
clear_smiles_cache() |
|
|
|
|
|
df = pd.DataFrame(df) |
|
|
|
|
|
|
|
|
progress(0.9, desc="Post-processing results...") |
|
|
df, csv_path = _process_results_dataframe(df, in_pth, similarity_threshold, calculate_modified_cosine) |
|
|
|
|
|
progress(1.0, desc=f"Predictions complete! Found {len(df)} high-confidence matches.") |
|
|
|
|
|
return df, csv_path |
|
|
|
|
|
finally: |
|
|
|
|
|
if temp_lib_path.exists(): |
|
|
temp_lib_path.unlink() |
|
|
if temp_in_path.exists(): |
|
|
temp_in_path.unlink() |
|
|
|
|
|
|
|
|
def predict(lib_pth, in_pth, similarity_threshold=0.75, calculate_modified_cosine=False, progress=gr.Progress(track_tqdm=True)): |
|
|
""" |
|
|
Main prediction function with error handling |
|
|
|
|
|
Args: |
|
|
lib_pth: Library file path |
|
|
in_pth: Input file path |
|
|
calculate_modified_cosine: Whether to calculate modified cosine similarity |
|
|
progress: Gradio progress tracker |
|
|
|
|
|
Returns: |
|
|
tuple: (results_dataframe, csv_file_path) |
|
|
|
|
|
Raises: |
|
|
gr.Error: If prediction fails or input is invalid |
|
|
""" |
|
|
try: |
|
|
|
|
|
if not _validate_input_file(in_pth): |
|
|
raise gr.Error("Invalid input file. Please provide a valid .mgf or .mzML file.") |
|
|
|
|
|
|
|
|
if not Path(lib_pth).exists(): |
|
|
raise gr.Error("Spectral library not found. Please ensure the library file exists.") |
|
|
|
|
|
df, csv_path = _predict_core(lib_pth, in_pth, similarity_threshold, calculate_modified_cosine, progress) |
|
|
|
|
|
return df, csv_path |
|
|
|
|
|
except gr.Error: |
|
|
|
|
|
raise |
|
|
except Exception as e: |
|
|
error_msg = str(e) |
|
|
if "CUDA" in error_msg or "cuda" in error_msg: |
|
|
error_msg = f"GPU/CUDA error: {error_msg}. The app is falling back to CPU mode." |
|
|
elif "RuntimeError" in error_msg: |
|
|
error_msg = f"Runtime error: {error_msg}. This may be due to memory or device issues." |
|
|
else: |
|
|
error_msg = f"Error: {error_msg}" |
|
|
|
|
|
print(f"Prediction failed: {error_msg}") |
|
|
raise gr.Error(error_msg) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _create_gradio_interface(): |
|
|
""" |
|
|
Create and configure the Gradio interface |
|
|
|
|
|
Returns: |
|
|
gr.Blocks: Configured Gradio app |
|
|
""" |
|
|
|
|
|
js_func = """ |
|
|
function refresh() { |
|
|
const url = new URL(window.location); |
|
|
if (url.searchParams.get('__theme') !== 'light') { |
|
|
url.searchParams.set('__theme', 'light'); |
|
|
window.location.href = url.href; |
|
|
} |
|
|
} |
|
|
""" |
|
|
|
|
|
|
|
|
app = gr.Blocks( |
|
|
theme=gr.themes.Default(primary_hue="yellow", secondary_hue="pink"), |
|
|
js=js_func |
|
|
) |
|
|
|
|
|
with app: |
|
|
|
|
|
gr.Image("https://raw.githubusercontent.com/pluskal-lab/DreaMS/cc806fa6fea281c1e57dd81fc512f71de9290017/assets/dreams_background.png", |
|
|
label="DreaMS") |
|
|
|
|
|
gr.Markdown(value=""" |
|
|
DreaMS (Deep Representations Empowering the Annotation of Mass Spectra) is a transformer-based |
|
|
neural network designed to interpret tandem mass spectrometry (MS/MS) data (<a href="https://www.nature.com/articles/s41587-025-02663-3">Bushuiev et al., Nature Biotechnology, 2025</a>). |
|
|
This website provides an easy access to perform library matching with DreaMS against the <a href="https://huggingface.co/datasets/roman-bushuiev/MassSpecGym">MassSpecGym</a> spectral library (combination of GNPS, MoNA, and Pluskal lab data). Please upload |
|
|
your file with MS/MS data and click on the "Run DreaMS" button. |
|
|
""") |
|
|
|
|
|
|
|
|
with gr.Row(equal_height=True): |
|
|
in_pth = gr.File( |
|
|
file_count="single", |
|
|
label="Input MS/MS file (.mgf or .mzML)", |
|
|
) |
|
|
|
|
|
|
|
|
examples = gr.Examples( |
|
|
examples=["./data/example_5_spectra.mgf", "./data/202312_147_P55-Leaf-r2_1uL.mzML"], |
|
|
inputs=[in_pth], |
|
|
label="Examples (click on a file to load as input)", |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Accordion("⚙️ Settings", open=False): |
|
|
similarity_threshold = gr.Slider( |
|
|
minimum=-1.0, |
|
|
maximum=1.0, |
|
|
value=0.75, |
|
|
step=0.01, |
|
|
label="Similarity threshold", |
|
|
info="Only display library matches with DreaMS similarity above this threshold (rendering less results also makes calculation faster)" |
|
|
) |
|
|
calculate_modified_cosine = gr.Checkbox( |
|
|
label="Calculate modified cosine similarity", |
|
|
value=False, |
|
|
info="Enable to also calculate traditional modified cosine similarity scores between the input spectra and library hits (a bit slower)" |
|
|
) |
|
|
|
|
|
|
|
|
predict_button = gr.Button(value="Run DreaMS", variant="primary") |
|
|
|
|
|
|
|
|
gr.Markdown("## Predictions") |
|
|
df_file = gr.File(label="Download predictions as .csv", interactive=False, visible=True) |
|
|
|
|
|
|
|
|
headers = ["Row", "Scan number", "Retention time", "Charge", "Precursor m/z", "Molecule", "Spectrum", |
|
|
"DreaMS similarity", "Library ID"] |
|
|
datatype = ["number", "number", "number", "str", "number", "html", "html", "number", "str"] |
|
|
column_widths = ["20px", "30px", "30px", "25px", "30px", "40px", "40px", "40px", "50px"] |
|
|
|
|
|
df = gr.Dataframe( |
|
|
headers=headers, |
|
|
datatype=datatype, |
|
|
col_count=(len(headers), "fixed"), |
|
|
column_widths=column_widths, |
|
|
max_height=1000, |
|
|
show_fullscreen_button=True, |
|
|
show_row_numbers=False, |
|
|
show_search='filter', |
|
|
) |
|
|
|
|
|
|
|
|
inputs = [in_pth, similarity_threshold, calculate_modified_cosine] |
|
|
outputs = [df, df_file] |
|
|
|
|
|
|
|
|
def update_headers(show_cosine): |
|
|
if show_cosine: |
|
|
return gr.update(headers=headers + ["Modified cosine similarity"], |
|
|
col_count=(len(headers) + 1, "fixed"), |
|
|
column_widths=column_widths + ["40px"]) |
|
|
else: |
|
|
return gr.update(headers=headers, |
|
|
col_count=(len(headers), "fixed"), |
|
|
column_widths=column_widths) |
|
|
|
|
|
|
|
|
calculate_modified_cosine.change( |
|
|
fn=update_headers, |
|
|
inputs=[calculate_modified_cosine], |
|
|
outputs=[df] |
|
|
) |
|
|
|
|
|
predict_func = partial(predict, LIBRARY_PATH) |
|
|
predict_button.click(predict_func, inputs=inputs, outputs=outputs, show_progress="first") |
|
|
|
|
|
return app |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
setup() |
|
|
|
|
|
|
|
|
app = _create_gradio_interface() |
|
|
app.launch(allowed_paths=['./assets']) |
|
|
else: |
|
|
|
|
|
setup() |
|
|
|