Spaces:

TornikeO
/

SimMS

Sleeping

App Files Files Community

TornikeO commited on Dec 14, 2024

Commit

15498d2

1 Parent(s): 97989c2

Make filtering default, add warnings

Browse files

Files changed (1) hide show

app.py +9 -1

app.py CHANGED Viewed

@@ -57,9 +57,14 @@ def run(r_filepath:Path, q_filepath:Path,
     assert q_filepath is not None, "Query file is missing."
     refs, ques = list(load_from_mgf(str(r_filepath))), list(load_from_mgf(str(q_filepath)))
     if do_preprocess:
         refs = preprocess_spectra(refs)
         ques = preprocess_spectra(ques)
     # If we have small spectra, don't make a huge batch
     if batch_size > max(len(refs), len(ques)):
@@ -75,6 +80,7 @@ def run(r_filepath:Path, q_filepath:Path,
     if similarity_method == 'ModifiedCosine':
         kwargs.pop('shift')
     similarity_class = CudaCosineGreedy if similarity_method == 'CosineGreedy' else CudaModifiedCosine
     scores_obj = calculate_scores(
@@ -112,6 +118,8 @@ with gr.Blocks() as demo:
     Calculate cosine greedy similarity matrix using CUDA. See the [main repo](https://github.com/pangeai/simms) for this project.
     This approach is x100-x500 faster than [MatchMS](https://github.com/matchms/matchms). Upload your MGF files below, or run the sample `pesticides.mgf` files against each other.
     """)
     with gr.Row():
         refs = gr.File(label="Upload REFERENCES.mgf",
@@ -135,7 +143,7 @@ with gr.Blocks() as demo:
         match_limit = gr.Number(value=2048, label="Match Limit",
                                 info="Consider this many pairs of m/z before stopping. "
                                     "In practice, a value of 2048 gives more than 99.99% accuracy on GNPS")
-        do_preprocess = gr.Checkbox(value=False, label="filter spectra",
                                     info="If you want to filter spectra before processing, we can do that. Look at the code to see details.")
     with gr.Row():
         array_type = gr.Radio(['numpy', 'sparse'],

     assert q_filepath is not None, "Query file is missing."
     refs, ques = list(load_from_mgf(str(r_filepath))), list(load_from_mgf(str(q_filepath)))
     if do_preprocess:
         refs = preprocess_spectra(refs)
         ques = preprocess_spectra(ques)
+        if not refs: gr.Error("References are empty after filtering")
+        if not ques: gr.Error("Queries are empty after filtering")
+    else:
+        gr.Warning("Filtering is skipped. Malformed spectra can cause errors.")
     # If we have small spectra, don't make a huge batch
     if batch_size > max(len(refs), len(ques)):
     if similarity_method == 'ModifiedCosine':
         kwargs.pop('shift')
     similarity_class = CudaCosineGreedy if similarity_method == 'CosineGreedy' else CudaModifiedCosine
     scores_obj = calculate_scores(
     Calculate cosine greedy similarity matrix using CUDA. See the [main repo](https://github.com/pangeai/simms) for this project.
     This approach is x100-x500 faster than [MatchMS](https://github.com/matchms/matchms). Upload your MGF files below, or run the sample `pesticides.mgf` files against each other.
+    **In case of errors, check the "logs" above - malformed spectra will cause errors**
     """)
     with gr.Row():
         refs = gr.File(label="Upload REFERENCES.mgf",
         match_limit = gr.Number(value=2048, label="Match Limit",
                                 info="Consider this many pairs of m/z before stopping. "
                                     "In practice, a value of 2048 gives more than 99.99% accuracy on GNPS")
+        do_preprocess = gr.Checkbox(value=True, label="filter spectra",
                                     info="If you want to filter spectra before processing, we can do that. Look at the code to see details.")
     with gr.Row():
         array_type = gr.Radio(['numpy', 'sparse'],