Spaces:
Sleeping
Sleeping
| """Tests that CSV data takes priority over parquet when both are available. | |
| This validates the fix for the issue where loading an event from parquet and | |
| then modifying the CSV text fields (e.g. removing tracks) was ignored because | |
| the code always re-loaded from the parquet file. | |
| """ | |
| import os | |
| import ast | |
| import textwrap | |
| def _extract_source_priority_logic(): | |
| """Extract and verify the input-source priority logic from app.py. | |
| Reads the ``run_inference_ui`` function source and checks that CSV | |
| is tested *before* parquet, so that user edits to the CSV text | |
| fields are respected even when a parquet file path is present. | |
| """ | |
| app_path = os.path.join(os.path.dirname(__file__), "..", "app.py") | |
| with open(app_path) as f: | |
| source = f.read() | |
| return source | |
| def test_csv_checked_before_parquet(): | |
| """In run_inference_ui, the ``if use_csv`` branch must come before | |
| ``use_parquet`` so that CSV edits are not silently ignored.""" | |
| source = _extract_source_priority_logic() | |
| # Find positions of the key branching statements | |
| idx_csv = source.find("if use_csv:") | |
| idx_parquet_elif = source.find("elif use_parquet:") | |
| idx_parquet_if = source.find("if use_parquet:") | |
| # "if use_csv:" must exist | |
| assert idx_csv != -1, "Could not find 'if use_csv:' in app.py" | |
| # "elif use_parquet:" must exist (parquet is the fallback) | |
| assert idx_parquet_elif != -1, ( | |
| "Could not find 'elif use_parquet:' in app.py — parquet should be " | |
| "a fallback after CSV" | |
| ) | |
| # CSV check must come before the parquet fallback | |
| assert idx_csv < idx_parquet_elif, ( | |
| "'if use_csv:' must appear before 'elif use_parquet:' so that " | |
| "user CSV edits take priority over re-reading the parquet file" | |
| ) | |
| # There should NOT be a standalone "if use_parquet:" that would take | |
| # priority over CSV (the old buggy pattern) | |
| if idx_parquet_if != -1: | |
| # The only occurrence should be inside the guard for empty input | |
| # (not use_parquet and not use_csv). A standalone "if use_parquet:" | |
| # that dispatches to load_event_from_parquet before checking CSV is | |
| # the bug we fixed. | |
| # Make sure it's not followed by load_event_from_parquet before | |
| # "if use_csv:" appears | |
| assert idx_parquet_if > idx_csv or "load_event_from_parquet" not in source[idx_parquet_if:idx_csv], ( | |
| "Found 'if use_parquet:' with load_event_from_parquet before " | |
| "'if use_csv:' — this is the bug where parquet takes priority " | |
| "over CSV edits" | |
| ) | |
| def test_parse_csv_event_logic(): | |
| """_parse_csv_event should correctly build event dicts from CSV text. | |
| We inline the same parsing logic used by app.py to avoid importing | |
| the module (which requires heavy dependencies like gradio). | |
| """ | |
| import io | |
| import numpy as np | |
| import pandas as pd | |
| def _read(text, min_cols=1): | |
| if not text or not text.strip(): | |
| return np.zeros((0, min_cols), dtype=np.float64) | |
| df = pd.read_csv(io.StringIO(text), header=None) | |
| return df.values.astype(np.float64) | |
| def _parse_csv_event(csv_hits, csv_tracks, csv_particles, csv_pandora=""): | |
| hits_arr = _read(csv_hits, 11) | |
| tracks_arr = _read(csv_tracks, 25) | |
| particles_arr = _read(csv_particles, 18) | |
| pandora_arr = _read(csv_pandora, 9) | |
| if tracks_arr.shape[1] < 25 and tracks_arr.shape[0] > 0: | |
| pad = np.zeros((tracks_arr.shape[0], 25 - tracks_arr.shape[1])) | |
| tracks_arr = np.concatenate([tracks_arr, pad], axis=1) | |
| ygen_hit = np.full(len(hits_arr), -1, dtype=np.int64) | |
| ygen_track = np.full(len(tracks_arr), -1, dtype=np.int64) | |
| return { | |
| "X_hit": hits_arr, | |
| "X_track": tracks_arr, | |
| "X_gen": particles_arr, | |
| "X_pandora": pandora_arr, | |
| "ygen_hit": ygen_hit, | |
| "ygen_track": ygen_track, | |
| } | |
| # Basic parse | |
| csv_hits = "0,0,0,0,0,1.23,1800.5,200.3,100.1,0,1" | |
| event = _parse_csv_event(csv_hits, "", "", "") | |
| assert event["X_hit"].shape == (1, 11) | |
| assert event["X_track"].shape == (0, 25) | |
| assert np.isclose(event["X_hit"][0, 5], 1.23) | |
| # Empty tracks after removing them | |
| event2 = _parse_csv_event(csv_hits, "", "", "") | |
| assert event2["X_track"].shape[0] == 0 | |
| # Two tracks vs one track | |
| csv_tracks_two = ( | |
| "1,0,0,0,0,5.0,3.0,2.0,3.3,0,0,0,1800.0,150.0,90.0,12.5,8,0,0,0,0,0,2.9,1.9,3.2\n" | |
| "1,0,0,0,0,3.0,1.0,1.5,2.1,0,0,0,1700.0,100.0,80.0,10.0,6,0,0,0,0,0,0.9,1.4,2.0" | |
| ) | |
| csv_tracks_one = ( | |
| "1,0,0,0,0,5.0,3.0,2.0,3.3,0,0,0,1800.0,150.0,90.0,12.5,8,0,0,0,0,0,2.9,1.9,3.2" | |
| ) | |
| event_two = _parse_csv_event(csv_hits, csv_tracks_two, "", "") | |
| event_one = _parse_csv_event(csv_hits, csv_tracks_one, "", "") | |
| assert event_two["X_track"].shape[0] == 2 | |
| assert event_one["X_track"].shape[0] == 1 | |
| def test_input_source_decision_logic(): | |
| """Simulate the decision logic from run_inference_ui and verify that | |
| CSV is used even when a parquet path is present.""" | |
| def decide_source(parquet_path, csv_hits): | |
| """Mirrors the decision logic in run_inference_ui.""" | |
| use_parquet = parquet_path and os.path.isfile(parquet_path) | |
| use_csv = bool(csv_hits and csv_hits.strip()) | |
| if use_csv: | |
| return "csv" | |
| elif use_parquet: | |
| return "parquet" | |
| else: | |
| return "none" | |
| # CSV present + parquet path present → should use CSV | |
| # (use this script as a stand-in for an existing file) | |
| existing_file = os.path.abspath(__file__) | |
| assert decide_source(existing_file, "some,csv,data") == "csv" | |
| # CSV present + no parquet → should use CSV | |
| assert decide_source("", "some,csv,data") == "csv" | |
| # CSV empty + parquet present → should use parquet | |
| assert decide_source(existing_file, "") == "parquet" | |
| # Both empty → none | |
| assert decide_source("", "") == "none" | |
| if __name__ == "__main__": | |
| test_csv_checked_before_parquet() | |
| test_parse_csv_event_logic() | |
| test_input_source_decision_logic() | |
| print("All tests passed.") | |