Spaces:

gregorkrzmanc
/

HitPF_demo

Sleeping

HitPF_demo / tests /test_csv_priority.py

github-actions[bot]

Sync from GitHub f6dbbfb

cc0720f about 1 month ago

6.19 kB

	"""Tests that CSV data takes priority over parquet when both are available.

	This validates the fix for the issue where loading an event from parquet and
	then modifying the CSV text fields (e.g. removing tracks) was ignored because
	the code always re-loaded from the parquet file.
	"""

	import os
	import ast
	import textwrap


	def _extract_source_priority_logic():
	"""Extract and verify the input-source priority logic from app.py.

	Reads the ``run_inference_ui`` function source and checks that CSV
	is tested before parquet, so that user edits to the CSV text
	fields are respected even when a parquet file path is present.
	"""
	app_path = os.path.join(os.path.dirname(__file__), "..", "app.py")
	with open(app_path) as f:
	source = f.read()
	return source


	def test_csv_checked_before_parquet():
	"""In run_inference_ui, the ``if use_csv`` branch must come before
	``use_parquet`` so that CSV edits are not silently ignored."""
	source = _extract_source_priority_logic()

	# Find positions of the key branching statements
	idx_csv = source.find("if use_csv:")
	idx_parquet_elif = source.find("elif use_parquet:")
	idx_parquet_if = source.find("if use_parquet:")

	# "if use_csv:" must exist
	assert idx_csv != -1, "Could not find 'if use_csv:' in app.py"

	# "elif use_parquet:" must exist (parquet is the fallback)
	assert idx_parquet_elif != -1, (
	"Could not find 'elif use_parquet:' in app.py — parquet should be "
	"a fallback after CSV"
	)

	# CSV check must come before the parquet fallback
	assert idx_csv < idx_parquet_elif, (
	"'if use_csv:' must appear before 'elif use_parquet:' so that "
	"user CSV edits take priority over re-reading the parquet file"
	)

	# There should NOT be a standalone "if use_parquet:" that would take
	# priority over CSV (the old buggy pattern)
	if idx_parquet_if != -1:
	# The only occurrence should be inside the guard for empty input
	# (not use_parquet and not use_csv). A standalone "if use_parquet:"
	# that dispatches to load_event_from_parquet before checking CSV is
	# the bug we fixed.
	# Make sure it's not followed by load_event_from_parquet before
	# "if use_csv:" appears
	assert idx_parquet_if > idx_csv or "load_event_from_parquet" not in source[idx_parquet_if:idx_csv], (
	"Found 'if use_parquet:' with load_event_from_parquet before "
	"'if use_csv:' — this is the bug where parquet takes priority "
	"over CSV edits"
	)


	def test_parse_csv_event_logic():
	"""_parse_csv_event should correctly build event dicts from CSV text.

	We inline the same parsing logic used by app.py to avoid importing
	the module (which requires heavy dependencies like gradio).
	"""
	import io
	import numpy as np
	import pandas as pd

	def _read(text, min_cols=1):
	if not text or not text.strip():
	return np.zeros((0, min_cols), dtype=np.float64)
	df = pd.read_csv(io.StringIO(text), header=None)
	return df.values.astype(np.float64)

	def _parse_csv_event(csv_hits, csv_tracks, csv_particles, csv_pandora=""):
	hits_arr = _read(csv_hits, 11)
	tracks_arr = _read(csv_tracks, 25)
	particles_arr = _read(csv_particles, 18)
	pandora_arr = _read(csv_pandora, 9)
	if tracks_arr.shape[1] < 25 and tracks_arr.shape[0] > 0:
	pad = np.zeros((tracks_arr.shape[0], 25 - tracks_arr.shape[1]))
	tracks_arr = np.concatenate([tracks_arr, pad], axis=1)
	ygen_hit = np.full(len(hits_arr), -1, dtype=np.int64)
	ygen_track = np.full(len(tracks_arr), -1, dtype=np.int64)
	return {
	"X_hit": hits_arr,
	"X_track": tracks_arr,
	"X_gen": particles_arr,
	"X_pandora": pandora_arr,
	"ygen_hit": ygen_hit,
	"ygen_track": ygen_track,
	}

	# Basic parse
	csv_hits = "0,0,0,0,0,1.23,1800.5,200.3,100.1,0,1"
	event = _parse_csv_event(csv_hits, "", "", "")
	assert event["X_hit"].shape == (1, 11)
	assert event["X_track"].shape == (0, 25)
	assert np.isclose(event["X_hit"][0, 5], 1.23)

	# Empty tracks after removing them
	event2 = _parse_csv_event(csv_hits, "", "", "")
	assert event2["X_track"].shape[0] == 0

	# Two tracks vs one track
	csv_tracks_two = (
	"1,0,0,0,0,5.0,3.0,2.0,3.3,0,0,0,1800.0,150.0,90.0,12.5,8,0,0,0,0,0,2.9,1.9,3.2\n"
	"1,0,0,0,0,3.0,1.0,1.5,2.1,0,0,0,1700.0,100.0,80.0,10.0,6,0,0,0,0,0,0.9,1.4,2.0"
	)
	csv_tracks_one = (
	"1,0,0,0,0,5.0,3.0,2.0,3.3,0,0,0,1800.0,150.0,90.0,12.5,8,0,0,0,0,0,2.9,1.9,3.2"
	)
	event_two = _parse_csv_event(csv_hits, csv_tracks_two, "", "")
	event_one = _parse_csv_event(csv_hits, csv_tracks_one, "", "")
	assert event_two["X_track"].shape[0] == 2
	assert event_one["X_track"].shape[0] == 1


	def test_input_source_decision_logic():
	"""Simulate the decision logic from run_inference_ui and verify that
	CSV is used even when a parquet path is present."""

	def decide_source(parquet_path, csv_hits):
	"""Mirrors the decision logic in run_inference_ui."""
	use_parquet = parquet_path and os.path.isfile(parquet_path)
	use_csv = bool(csv_hits and csv_hits.strip())

	if use_csv:
	return "csv"
	elif use_parquet:
	return "parquet"
	else:
	return "none"

	# CSV present + parquet path present → should use CSV
	# (use this script as a stand-in for an existing file)
	existing_file = os.path.abspath(__file__)
	assert decide_source(existing_file, "some,csv,data") == "csv"

	# CSV present + no parquet → should use CSV
	assert decide_source("", "some,csv,data") == "csv"

	# CSV empty + parquet present → should use parquet
	assert decide_source(existing_file, "") == "parquet"

	# Both empty → none
	assert decide_source("", "") == "none"


	if __name__ == "__main__":
	test_csv_checked_before_parquet()
	test_parse_csv_event_logic()
	test_input_source_decision_logic()
	print("All tests passed.")