Spaces:
Running
on
Zero
Running
on
Zero
docs: update Notes
Browse files
app.py
CHANGED
|
@@ -39,9 +39,6 @@ except ImportError as e:
|
|
| 39 |
TRANSFORMER_MODEL_ID = "instanovo-v1.1.0"
|
| 40 |
DIFFUSION_MODEL_ID = "instanovoplus-v1.1.0-alpha"
|
| 41 |
KNAPSACK_DIR = Path("./knapsack_cache")
|
| 42 |
-
DEFAULT_CONFIG_PATH = Path(
|
| 43 |
-
"./configs/inference/default.yaml"
|
| 44 |
-
)
|
| 45 |
|
| 46 |
# Determine device
|
| 47 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -198,43 +195,38 @@ def create_inference_config(
|
|
| 198 |
output_path: str,
|
| 199 |
) -> DictConfig:
|
| 200 |
"""Creates a base OmegaConf DictConfig for prediction environment."""
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
"index_columns": [
|
| 234 |
-
"scan_number", "precursor_mz", "precursor_charge",
|
| 235 |
-
"retention_time", "spectrum_id", "experiment_name",
|
| 236 |
-
],
|
| 237 |
-
})
|
| 238 |
|
| 239 |
cfg_overrides = {
|
| 240 |
"data_path": input_path, "output_path": output_path,
|
|
@@ -524,7 +516,7 @@ def predict_peptides(input_file, mode_selection, transformer_decoder_selection):
|
|
| 524 |
logger.info("--- New Prediction Request ---")
|
| 525 |
logger.info(f"Input File: {input_path}")
|
| 526 |
logger.info(f"Selected Mode: {mode_selection}")
|
| 527 |
-
if "
|
| 528 |
logger.info(f"Selected Transformer Decoder: {transformer_decoder_selection}")
|
| 529 |
|
| 530 |
# Create temp output file
|
|
@@ -630,7 +622,7 @@ def predict_peptides(input_file, mode_selection, transformer_decoder_selection):
|
|
| 630 |
row_data["prediction"] = ""
|
| 631 |
results_data.append(row_data)
|
| 632 |
|
| 633 |
-
elif "
|
| 634 |
output_headers.extend([
|
| 635 |
"transformer_prediction", "transformer_log_probability",
|
| 636 |
"refined_prediction", "refined_log_probability", "refined_delta_mass_ppm"
|
|
@@ -713,8 +705,20 @@ with gr.Blocks(
|
|
| 713 |
Upload your mass spectrometry data file (.mgf, .mzml, or .mzxml) and get peptide sequence predictions.
|
| 714 |
Choose your prediction method and decoding options.
|
| 715 |
|
| 716 |
-
|
| 717 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 718 |
)
|
| 719 |
with gr.Row():
|
| 720 |
with gr.Column(scale=1):
|
|
@@ -790,20 +794,6 @@ with gr.Blocks(
|
|
| 790 |
label="Example Usage:",
|
| 791 |
)
|
| 792 |
|
| 793 |
-
gr.Markdown(
|
| 794 |
-
f"""
|
| 795 |
-
**Notes:**
|
| 796 |
-
* Predictions use `{TRANSFORMER_MODEL_ID}` (Transformer) and `{DIFFUSION_MODEL_ID}` (Diffusion, Alpha release).
|
| 797 |
-
* **Refinement Mode:** Runs initial prediction with the selected Transformer method (Greedy/Knapsack), then refines using InstaNovo+.
|
| 798 |
-
* **InstaNovo Only Mode:** Uses only the Transformer with the selected decoding method.
|
| 799 |
-
* **InstaNovo+ Only Mode:** Predicts directly using the Diffusion model (alpha version).
|
| 800 |
-
* `delta_mass_ppm` shows the lowest absolute precursor mass error (ppm) across isotopes 0-1 for the final sequence.
|
| 801 |
-
* Knapsack Beam Search requires a pre-computed knapsack file. If unavailable, the option will be disabled.
|
| 802 |
-
* Check logs for progress, especially for large files or slower methods.
|
| 803 |
-
""",
|
| 804 |
-
elem_classes="feedback"
|
| 805 |
-
)
|
| 806 |
-
|
| 807 |
with gr.Accordion("Application Logs", open=True):
|
| 808 |
log_display = Log(log_file, dark=True, height=300)
|
| 809 |
|
|
|
|
| 39 |
TRANSFORMER_MODEL_ID = "instanovo-v1.1.0"
|
| 40 |
DIFFUSION_MODEL_ID = "instanovoplus-v1.1.0-alpha"
|
| 41 |
KNAPSACK_DIR = Path("./knapsack_cache")
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
# Determine device
|
| 44 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 195 |
output_path: str,
|
| 196 |
) -> DictConfig:
|
| 197 |
"""Creates a base OmegaConf DictConfig for prediction environment."""
|
| 198 |
+
base_cfg = OmegaConf.create({
|
| 199 |
+
"data_path": None, "instanovo_model": TRANSFORMER_MODEL_ID,
|
| 200 |
+
"instanovoplus_model": DIFFUSION_MODEL_ID, "output_path": None,
|
| 201 |
+
"knapsack_path": str(KNAPSACK_DIR), "denovo": True, "refine": True,
|
| 202 |
+
"num_beams": 1, "max_length": 40, "max_charge": 10,
|
| 203 |
+
"isotope_error_range": [0, 1], "subset": 1.0, "use_knapsack": False,
|
| 204 |
+
"save_beams": False, "batch_size": 64, "device": DEVICE, "fp16": FP16,
|
| 205 |
+
"log_interval": 500, "use_basic_logging": True,
|
| 206 |
+
"filter_precursor_ppm": 20, "filter_confidence": 1e-4,
|
| 207 |
+
"filter_fdr_threshold": 0.05, "suppressed_residues": None,
|
| 208 |
+
"disable_terminal_residues_anywhere": True,
|
| 209 |
+
"residue_remapping": {
|
| 210 |
+
"M(ox)": "M[UNIMOD:35]", "M(+15.99)": "M[UNIMOD:35]",
|
| 211 |
+
"S(p)": "S[UNIMOD:21]", "T(p)": "T[UNIMOD:21]", "Y(p)": "Y[UNIMOD:21]",
|
| 212 |
+
"S(+79.97)": "S[UNIMOD:21]", "T(+79.97)": "T[UNIMOD:21]", "Y(+79.97)": "Y[UNIMOD:21]",
|
| 213 |
+
"Q(+0.98)": "Q[UNIMOD:7]", "N(+0.98)": "N[UNIMOD:7]",
|
| 214 |
+
"Q(+.98)": "Q[UNIMOD:7]", "N(+.98)": "N[UNIMOD:7]",
|
| 215 |
+
"C(+57.02)": "C[UNIMOD:4]", "(+42.01)": "[UNIMOD:1]",
|
| 216 |
+
"(+43.01)": "[UNIMOD:5]", "(-17.03)": "[UNIMOD:385]",
|
| 217 |
+
},
|
| 218 |
+
"column_map": {
|
| 219 |
+
"Modified sequence": "modified_sequence", "MS/MS m/z": "precursor_mz",
|
| 220 |
+
"Mass": "precursor_mass", "Charge": "precursor_charge",
|
| 221 |
+
"Mass values": "mz_array", "Mass spectrum": "mz_array",
|
| 222 |
+
"Intensity": "intensity_array", "Raw intensity spectrum": "intensity_array",
|
| 223 |
+
"Scan number": "scan_number"
|
| 224 |
+
},
|
| 225 |
+
"index_columns": [
|
| 226 |
+
"scan_number", "precursor_mz", "precursor_charge",
|
| 227 |
+
"retention_time", "spectrum_id", "experiment_name",
|
| 228 |
+
],
|
| 229 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
cfg_overrides = {
|
| 232 |
"data_path": input_path, "output_path": output_path,
|
|
|
|
| 516 |
logger.info("--- New Prediction Request ---")
|
| 517 |
logger.info(f"Input File: {input_path}")
|
| 518 |
logger.info(f"Selected Mode: {mode_selection}")
|
| 519 |
+
if "refinement" in mode_selection or "InstaNovo Only" in mode_selection:
|
| 520 |
logger.info(f"Selected Transformer Decoder: {transformer_decoder_selection}")
|
| 521 |
|
| 522 |
# Create temp output file
|
|
|
|
| 622 |
row_data["prediction"] = ""
|
| 623 |
results_data.append(row_data)
|
| 624 |
|
| 625 |
+
elif "refinement" in mode_selection:
|
| 626 |
output_headers.extend([
|
| 627 |
"transformer_prediction", "transformer_log_probability",
|
| 628 |
"refined_prediction", "refined_log_probability", "refined_delta_mass_ppm"
|
|
|
|
| 705 |
Upload your mass spectrometry data file (.mgf, .mzml, or .mzxml) and get peptide sequence predictions.
|
| 706 |
Choose your prediction method and decoding options.
|
| 707 |
|
| 708 |
+
**Notes:**
|
| 709 |
+
* Predictions use version `{TRANSFORMER_MODEL_ID}` for the transformer-based InstaNovo model and version `{DIFFUSION_MODEL_ID}` for the diffusion-based InstaNovo+ model.
|
| 710 |
+
* The InstaNovo+ model `{DIFFUSION_MODEL_ID}` is an alpha release.
|
| 711 |
+
* **Predction Modes:**
|
| 712 |
+
* **InstaNovo with InstaNovo+ refinement** Runs initial prediction with the selected Transformer method (Greedy/Knapsack), then refines using InstaNovo+.
|
| 713 |
+
* **InstaNovo Only:** Uses only the Transformer with the selected decoding method.
|
| 714 |
+
* **InstaNovo+ Only:** Predicts directly using the Diffusion model (alpha release).
|
| 715 |
+
* **Transformer Decoding Methods:**
|
| 716 |
+
* **Greedy Search:** use this for optimal performance, has similar performance as Knapsack Beam Search at 5% FDR.
|
| 717 |
+
* **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.
|
| 718 |
+
* `delta_mass_ppm` shows the lowest absolute precursor mass error (ppm) across isotopes 0-1 for the final sequence.
|
| 719 |
+
* Check logs for progress, especially for large files or slower methods.
|
| 720 |
+
""",
|
| 721 |
+
elem_classes="feedback"
|
| 722 |
)
|
| 723 |
with gr.Row():
|
| 724 |
with gr.Column(scale=1):
|
|
|
|
| 794 |
label="Example Usage:",
|
| 795 |
)
|
| 796 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 797 |
with gr.Accordion("Application Logs", open=True):
|
| 798 |
log_display = Log(log_file, dark=True, height=300)
|
| 799 |
|