"""Tests that CSV data takes priority over parquet when both are available. This validates the fix for the issue where loading an event from parquet and then modifying the CSV text fields (e.g. removing tracks) was ignored because the code always re-loaded from the parquet file. """ import os import ast import textwrap def _extract_source_priority_logic(): """Extract and verify the input-source priority logic from app.py. Reads the ``run_inference_ui`` function source and checks that CSV is tested *before* parquet, so that user edits to the CSV text fields are respected even when a parquet file path is present. """ app_path = os.path.join(os.path.dirname(__file__), "..", "app.py") with open(app_path) as f: source = f.read() return source def test_csv_checked_before_parquet(): """In run_inference_ui, the ``if use_csv`` branch must come before ``use_parquet`` so that CSV edits are not silently ignored.""" source = _extract_source_priority_logic() # Find positions of the key branching statements idx_csv = source.find("if use_csv:") idx_parquet_elif = source.find("elif use_parquet:") idx_parquet_if = source.find("if use_parquet:") # "if use_csv:" must exist assert idx_csv != -1, "Could not find 'if use_csv:' in app.py" # "elif use_parquet:" must exist (parquet is the fallback) assert idx_parquet_elif != -1, ( "Could not find 'elif use_parquet:' in app.py — parquet should be " "a fallback after CSV" ) # CSV check must come before the parquet fallback assert idx_csv < idx_parquet_elif, ( "'if use_csv:' must appear before 'elif use_parquet:' so that " "user CSV edits take priority over re-reading the parquet file" ) # There should NOT be a standalone "if use_parquet:" that would take # priority over CSV (the old buggy pattern) if idx_parquet_if != -1: # The only occurrence should be inside the guard for empty input # (not use_parquet and not use_csv). A standalone "if use_parquet:" # that dispatches to load_event_from_parquet before checking CSV is # the bug we fixed. # Make sure it's not followed by load_event_from_parquet before # "if use_csv:" appears assert idx_parquet_if > idx_csv or "load_event_from_parquet" not in source[idx_parquet_if:idx_csv], ( "Found 'if use_parquet:' with load_event_from_parquet before " "'if use_csv:' — this is the bug where parquet takes priority " "over CSV edits" ) def test_parse_csv_event_logic(): """_parse_csv_event should correctly build event dicts from CSV text. We inline the same parsing logic used by app.py to avoid importing the module (which requires heavy dependencies like gradio). """ import io import numpy as np import pandas as pd def _read(text, min_cols=1): if not text or not text.strip(): return np.zeros((0, min_cols), dtype=np.float64) df = pd.read_csv(io.StringIO(text), header=None) return df.values.astype(np.float64) def _parse_csv_event(csv_hits, csv_tracks, csv_particles, csv_pandora=""): hits_arr = _read(csv_hits, 11) tracks_arr = _read(csv_tracks, 25) particles_arr = _read(csv_particles, 18) pandora_arr = _read(csv_pandora, 9) if tracks_arr.shape[1] < 25 and tracks_arr.shape[0] > 0: pad = np.zeros((tracks_arr.shape[0], 25 - tracks_arr.shape[1])) tracks_arr = np.concatenate([tracks_arr, pad], axis=1) ygen_hit = np.full(len(hits_arr), -1, dtype=np.int64) ygen_track = np.full(len(tracks_arr), -1, dtype=np.int64) return { "X_hit": hits_arr, "X_track": tracks_arr, "X_gen": particles_arr, "X_pandora": pandora_arr, "ygen_hit": ygen_hit, "ygen_track": ygen_track, } # Basic parse csv_hits = "0,0,0,0,0,1.23,1800.5,200.3,100.1,0,1" event = _parse_csv_event(csv_hits, "", "", "") assert event["X_hit"].shape == (1, 11) assert event["X_track"].shape == (0, 25) assert np.isclose(event["X_hit"][0, 5], 1.23) # Empty tracks after removing them event2 = _parse_csv_event(csv_hits, "", "", "") assert event2["X_track"].shape[0] == 0 # Two tracks vs one track csv_tracks_two = ( "1,0,0,0,0,5.0,3.0,2.0,3.3,0,0,0,1800.0,150.0,90.0,12.5,8,0,0,0,0,0,2.9,1.9,3.2\n" "1,0,0,0,0,3.0,1.0,1.5,2.1,0,0,0,1700.0,100.0,80.0,10.0,6,0,0,0,0,0,0.9,1.4,2.0" ) csv_tracks_one = ( "1,0,0,0,0,5.0,3.0,2.0,3.3,0,0,0,1800.0,150.0,90.0,12.5,8,0,0,0,0,0,2.9,1.9,3.2" ) event_two = _parse_csv_event(csv_hits, csv_tracks_two, "", "") event_one = _parse_csv_event(csv_hits, csv_tracks_one, "", "") assert event_two["X_track"].shape[0] == 2 assert event_one["X_track"].shape[0] == 1 def test_input_source_decision_logic(): """Simulate the decision logic from run_inference_ui and verify that CSV is used even when a parquet path is present.""" def decide_source(parquet_path, csv_hits): """Mirrors the decision logic in run_inference_ui.""" use_parquet = parquet_path and os.path.isfile(parquet_path) use_csv = bool(csv_hits and csv_hits.strip()) if use_csv: return "csv" elif use_parquet: return "parquet" else: return "none" # CSV present + parquet path present → should use CSV # (use this script as a stand-in for an existing file) existing_file = os.path.abspath(__file__) assert decide_source(existing_file, "some,csv,data") == "csv" # CSV present + no parquet → should use CSV assert decide_source("", "some,csv,data") == "csv" # CSV empty + parquet present → should use parquet assert decide_source(existing_file, "") == "parquet" # Both empty → none assert decide_source("", "") == "none" if __name__ == "__main__": test_csv_checked_before_parquet() test_parse_csv_event_logic() test_input_source_decision_logic() print("All tests passed.")