HitPF_demo / tests /test_csv_priority.py
github-actions[bot]
Sync from GitHub f6dbbfb
cc0720f
"""Tests that CSV data takes priority over parquet when both are available.
This validates the fix for the issue where loading an event from parquet and
then modifying the CSV text fields (e.g. removing tracks) was ignored because
the code always re-loaded from the parquet file.
"""
import os
import ast
import textwrap
def _extract_source_priority_logic():
"""Extract and verify the input-source priority logic from app.py.
Reads the ``run_inference_ui`` function source and checks that CSV
is tested *before* parquet, so that user edits to the CSV text
fields are respected even when a parquet file path is present.
"""
app_path = os.path.join(os.path.dirname(__file__), "..", "app.py")
with open(app_path) as f:
source = f.read()
return source
def test_csv_checked_before_parquet():
"""In run_inference_ui, the ``if use_csv`` branch must come before
``use_parquet`` so that CSV edits are not silently ignored."""
source = _extract_source_priority_logic()
# Find positions of the key branching statements
idx_csv = source.find("if use_csv:")
idx_parquet_elif = source.find("elif use_parquet:")
idx_parquet_if = source.find("if use_parquet:")
# "if use_csv:" must exist
assert idx_csv != -1, "Could not find 'if use_csv:' in app.py"
# "elif use_parquet:" must exist (parquet is the fallback)
assert idx_parquet_elif != -1, (
"Could not find 'elif use_parquet:' in app.py — parquet should be "
"a fallback after CSV"
)
# CSV check must come before the parquet fallback
assert idx_csv < idx_parquet_elif, (
"'if use_csv:' must appear before 'elif use_parquet:' so that "
"user CSV edits take priority over re-reading the parquet file"
)
# There should NOT be a standalone "if use_parquet:" that would take
# priority over CSV (the old buggy pattern)
if idx_parquet_if != -1:
# The only occurrence should be inside the guard for empty input
# (not use_parquet and not use_csv). A standalone "if use_parquet:"
# that dispatches to load_event_from_parquet before checking CSV is
# the bug we fixed.
# Make sure it's not followed by load_event_from_parquet before
# "if use_csv:" appears
assert idx_parquet_if > idx_csv or "load_event_from_parquet" not in source[idx_parquet_if:idx_csv], (
"Found 'if use_parquet:' with load_event_from_parquet before "
"'if use_csv:' — this is the bug where parquet takes priority "
"over CSV edits"
)
def test_parse_csv_event_logic():
"""_parse_csv_event should correctly build event dicts from CSV text.
We inline the same parsing logic used by app.py to avoid importing
the module (which requires heavy dependencies like gradio).
"""
import io
import numpy as np
import pandas as pd
def _read(text, min_cols=1):
if not text or not text.strip():
return np.zeros((0, min_cols), dtype=np.float64)
df = pd.read_csv(io.StringIO(text), header=None)
return df.values.astype(np.float64)
def _parse_csv_event(csv_hits, csv_tracks, csv_particles, csv_pandora=""):
hits_arr = _read(csv_hits, 11)
tracks_arr = _read(csv_tracks, 25)
particles_arr = _read(csv_particles, 18)
pandora_arr = _read(csv_pandora, 9)
if tracks_arr.shape[1] < 25 and tracks_arr.shape[0] > 0:
pad = np.zeros((tracks_arr.shape[0], 25 - tracks_arr.shape[1]))
tracks_arr = np.concatenate([tracks_arr, pad], axis=1)
ygen_hit = np.full(len(hits_arr), -1, dtype=np.int64)
ygen_track = np.full(len(tracks_arr), -1, dtype=np.int64)
return {
"X_hit": hits_arr,
"X_track": tracks_arr,
"X_gen": particles_arr,
"X_pandora": pandora_arr,
"ygen_hit": ygen_hit,
"ygen_track": ygen_track,
}
# Basic parse
csv_hits = "0,0,0,0,0,1.23,1800.5,200.3,100.1,0,1"
event = _parse_csv_event(csv_hits, "", "", "")
assert event["X_hit"].shape == (1, 11)
assert event["X_track"].shape == (0, 25)
assert np.isclose(event["X_hit"][0, 5], 1.23)
# Empty tracks after removing them
event2 = _parse_csv_event(csv_hits, "", "", "")
assert event2["X_track"].shape[0] == 0
# Two tracks vs one track
csv_tracks_two = (
"1,0,0,0,0,5.0,3.0,2.0,3.3,0,0,0,1800.0,150.0,90.0,12.5,8,0,0,0,0,0,2.9,1.9,3.2\n"
"1,0,0,0,0,3.0,1.0,1.5,2.1,0,0,0,1700.0,100.0,80.0,10.0,6,0,0,0,0,0,0.9,1.4,2.0"
)
csv_tracks_one = (
"1,0,0,0,0,5.0,3.0,2.0,3.3,0,0,0,1800.0,150.0,90.0,12.5,8,0,0,0,0,0,2.9,1.9,3.2"
)
event_two = _parse_csv_event(csv_hits, csv_tracks_two, "", "")
event_one = _parse_csv_event(csv_hits, csv_tracks_one, "", "")
assert event_two["X_track"].shape[0] == 2
assert event_one["X_track"].shape[0] == 1
def test_input_source_decision_logic():
"""Simulate the decision logic from run_inference_ui and verify that
CSV is used even when a parquet path is present."""
def decide_source(parquet_path, csv_hits):
"""Mirrors the decision logic in run_inference_ui."""
use_parquet = parquet_path and os.path.isfile(parquet_path)
use_csv = bool(csv_hits and csv_hits.strip())
if use_csv:
return "csv"
elif use_parquet:
return "parquet"
else:
return "none"
# CSV present + parquet path present → should use CSV
# (use this script as a stand-in for an existing file)
existing_file = os.path.abspath(__file__)
assert decide_source(existing_file, "some,csv,data") == "csv"
# CSV present + no parquet → should use CSV
assert decide_source("", "some,csv,data") == "csv"
# CSV empty + parquet present → should use parquet
assert decide_source(existing_file, "") == "parquet"
# Both empty → none
assert decide_source("", "") == "none"
if __name__ == "__main__":
test_csv_checked_before_parquet()
test_parse_csv_event_logic()
test_input_source_decision_logic()
print("All tests passed.")