Make filtering default, add warnings
Browse files
app.py
CHANGED
|
@@ -57,9 +57,14 @@ def run(r_filepath:Path, q_filepath:Path,
|
|
| 57 |
assert q_filepath is not None, "Query file is missing."
|
| 58 |
|
| 59 |
refs, ques = list(load_from_mgf(str(r_filepath))), list(load_from_mgf(str(q_filepath)))
|
|
|
|
| 60 |
if do_preprocess:
|
| 61 |
refs = preprocess_spectra(refs)
|
| 62 |
ques = preprocess_spectra(ques)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
# If we have small spectra, don't make a huge batch
|
| 65 |
if batch_size > max(len(refs), len(ques)):
|
|
@@ -75,6 +80,7 @@ def run(r_filepath:Path, q_filepath:Path,
|
|
| 75 |
if similarity_method == 'ModifiedCosine':
|
| 76 |
kwargs.pop('shift')
|
| 77 |
|
|
|
|
| 78 |
similarity_class = CudaCosineGreedy if similarity_method == 'CosineGreedy' else CudaModifiedCosine
|
| 79 |
|
| 80 |
scores_obj = calculate_scores(
|
|
@@ -112,6 +118,8 @@ with gr.Blocks() as demo:
|
|
| 112 |
|
| 113 |
Calculate cosine greedy similarity matrix using CUDA. See the [main repo](https://github.com/pangeai/simms) for this project.
|
| 114 |
This approach is x100-x500 faster than [MatchMS](https://github.com/matchms/matchms). Upload your MGF files below, or run the sample `pesticides.mgf` files against each other.
|
|
|
|
|
|
|
| 115 |
""")
|
| 116 |
with gr.Row():
|
| 117 |
refs = gr.File(label="Upload REFERENCES.mgf",
|
|
@@ -135,7 +143,7 @@ with gr.Blocks() as demo:
|
|
| 135 |
match_limit = gr.Number(value=2048, label="Match Limit",
|
| 136 |
info="Consider this many pairs of m/z before stopping. "
|
| 137 |
"In practice, a value of 2048 gives more than 99.99% accuracy on GNPS")
|
| 138 |
-
do_preprocess = gr.Checkbox(value=
|
| 139 |
info="If you want to filter spectra before processing, we can do that. Look at the code to see details.")
|
| 140 |
with gr.Row():
|
| 141 |
array_type = gr.Radio(['numpy', 'sparse'],
|
|
|
|
| 57 |
assert q_filepath is not None, "Query file is missing."
|
| 58 |
|
| 59 |
refs, ques = list(load_from_mgf(str(r_filepath))), list(load_from_mgf(str(q_filepath)))
|
| 60 |
+
|
| 61 |
if do_preprocess:
|
| 62 |
refs = preprocess_spectra(refs)
|
| 63 |
ques = preprocess_spectra(ques)
|
| 64 |
+
if not refs: gr.Error("References are empty after filtering")
|
| 65 |
+
if not ques: gr.Error("Queries are empty after filtering")
|
| 66 |
+
else:
|
| 67 |
+
gr.Warning("Filtering is skipped. Malformed spectra can cause errors.")
|
| 68 |
|
| 69 |
# If we have small spectra, don't make a huge batch
|
| 70 |
if batch_size > max(len(refs), len(ques)):
|
|
|
|
| 80 |
if similarity_method == 'ModifiedCosine':
|
| 81 |
kwargs.pop('shift')
|
| 82 |
|
| 83 |
+
|
| 84 |
similarity_class = CudaCosineGreedy if similarity_method == 'CosineGreedy' else CudaModifiedCosine
|
| 85 |
|
| 86 |
scores_obj = calculate_scores(
|
|
|
|
| 118 |
|
| 119 |
Calculate cosine greedy similarity matrix using CUDA. See the [main repo](https://github.com/pangeai/simms) for this project.
|
| 120 |
This approach is x100-x500 faster than [MatchMS](https://github.com/matchms/matchms). Upload your MGF files below, or run the sample `pesticides.mgf` files against each other.
|
| 121 |
+
|
| 122 |
+
**In case of errors, check the "logs" above - malformed spectra will cause errors**
|
| 123 |
""")
|
| 124 |
with gr.Row():
|
| 125 |
refs = gr.File(label="Upload REFERENCES.mgf",
|
|
|
|
| 143 |
match_limit = gr.Number(value=2048, label="Match Limit",
|
| 144 |
info="Consider this many pairs of m/z before stopping. "
|
| 145 |
"In practice, a value of 2048 gives more than 99.99% accuracy on GNPS")
|
| 146 |
+
do_preprocess = gr.Checkbox(value=True, label="filter spectra",
|
| 147 |
info="If you want to filter spectra before processing, we can do that. Look at the code to see details.")
|
| 148 |
with gr.Row():
|
| 149 |
array_type = gr.Radio(['numpy', 'sparse'],
|