File size: 6,193 Bytes
cc0720f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
"""Tests that CSV data takes priority over parquet when both are available.

This validates the fix for the issue where loading an event from parquet and
then modifying the CSV text fields (e.g. removing tracks) was ignored because
the code always re-loaded from the parquet file.
"""

import os
import ast
import textwrap


def _extract_source_priority_logic():
    """Extract and verify the input-source priority logic from app.py.

    Reads the ``run_inference_ui`` function source and checks that CSV
    is tested *before* parquet, so that user edits to the CSV text
    fields are respected even when a parquet file path is present.
    """
    app_path = os.path.join(os.path.dirname(__file__), "..", "app.py")
    with open(app_path) as f:
        source = f.read()
    return source


def test_csv_checked_before_parquet():
    """In run_inference_ui, the ``if use_csv`` branch must come before
    ``use_parquet`` so that CSV edits are not silently ignored."""
    source = _extract_source_priority_logic()

    # Find positions of the key branching statements
    idx_csv = source.find("if use_csv:")
    idx_parquet_elif = source.find("elif use_parquet:")
    idx_parquet_if = source.find("if use_parquet:")

    # "if use_csv:" must exist
    assert idx_csv != -1, "Could not find 'if use_csv:' in app.py"

    # "elif use_parquet:" must exist (parquet is the fallback)
    assert idx_parquet_elif != -1, (
        "Could not find 'elif use_parquet:' in app.py — parquet should be "
        "a fallback after CSV"
    )

    # CSV check must come before the parquet fallback
    assert idx_csv < idx_parquet_elif, (
        "'if use_csv:' must appear before 'elif use_parquet:' so that "
        "user CSV edits take priority over re-reading the parquet file"
    )

    # There should NOT be a standalone "if use_parquet:" that would take
    # priority over CSV (the old buggy pattern)
    if idx_parquet_if != -1:
        # The only occurrence should be inside the guard for empty input
        # (not use_parquet and not use_csv). A standalone "if use_parquet:"
        # that dispatches to load_event_from_parquet before checking CSV is
        # the bug we fixed.
        # Make sure it's not followed by load_event_from_parquet before
        # "if use_csv:" appears
        assert idx_parquet_if > idx_csv or "load_event_from_parquet" not in source[idx_parquet_if:idx_csv], (
            "Found 'if use_parquet:' with load_event_from_parquet before "
            "'if use_csv:' — this is the bug where parquet takes priority "
            "over CSV edits"
        )


def test_parse_csv_event_logic():
    """_parse_csv_event should correctly build event dicts from CSV text.

    We inline the same parsing logic used by app.py to avoid importing
    the module (which requires heavy dependencies like gradio).
    """
    import io
    import numpy as np
    import pandas as pd

    def _read(text, min_cols=1):
        if not text or not text.strip():
            return np.zeros((0, min_cols), dtype=np.float64)
        df = pd.read_csv(io.StringIO(text), header=None)
        return df.values.astype(np.float64)

    def _parse_csv_event(csv_hits, csv_tracks, csv_particles, csv_pandora=""):
        hits_arr = _read(csv_hits, 11)
        tracks_arr = _read(csv_tracks, 25)
        particles_arr = _read(csv_particles, 18)
        pandora_arr = _read(csv_pandora, 9)
        if tracks_arr.shape[1] < 25 and tracks_arr.shape[0] > 0:
            pad = np.zeros((tracks_arr.shape[0], 25 - tracks_arr.shape[1]))
            tracks_arr = np.concatenate([tracks_arr, pad], axis=1)
        ygen_hit = np.full(len(hits_arr), -1, dtype=np.int64)
        ygen_track = np.full(len(tracks_arr), -1, dtype=np.int64)
        return {
            "X_hit": hits_arr,
            "X_track": tracks_arr,
            "X_gen": particles_arr,
            "X_pandora": pandora_arr,
            "ygen_hit": ygen_hit,
            "ygen_track": ygen_track,
        }

    # Basic parse
    csv_hits = "0,0,0,0,0,1.23,1800.5,200.3,100.1,0,1"
    event = _parse_csv_event(csv_hits, "", "", "")
    assert event["X_hit"].shape == (1, 11)
    assert event["X_track"].shape == (0, 25)
    assert np.isclose(event["X_hit"][0, 5], 1.23)

    # Empty tracks after removing them
    event2 = _parse_csv_event(csv_hits, "", "", "")
    assert event2["X_track"].shape[0] == 0

    # Two tracks vs one track
    csv_tracks_two = (
        "1,0,0,0,0,5.0,3.0,2.0,3.3,0,0,0,1800.0,150.0,90.0,12.5,8,0,0,0,0,0,2.9,1.9,3.2\n"
        "1,0,0,0,0,3.0,1.0,1.5,2.1,0,0,0,1700.0,100.0,80.0,10.0,6,0,0,0,0,0,0.9,1.4,2.0"
    )
    csv_tracks_one = (
        "1,0,0,0,0,5.0,3.0,2.0,3.3,0,0,0,1800.0,150.0,90.0,12.5,8,0,0,0,0,0,2.9,1.9,3.2"
    )
    event_two = _parse_csv_event(csv_hits, csv_tracks_two, "", "")
    event_one = _parse_csv_event(csv_hits, csv_tracks_one, "", "")
    assert event_two["X_track"].shape[0] == 2
    assert event_one["X_track"].shape[0] == 1


def test_input_source_decision_logic():
    """Simulate the decision logic from run_inference_ui and verify that
    CSV is used even when a parquet path is present."""

    def decide_source(parquet_path, csv_hits):
        """Mirrors the decision logic in run_inference_ui."""
        use_parquet = parquet_path and os.path.isfile(parquet_path)
        use_csv = bool(csv_hits and csv_hits.strip())

        if use_csv:
            return "csv"
        elif use_parquet:
            return "parquet"
        else:
            return "none"

    # CSV present + parquet path present → should use CSV
    # (use this script as a stand-in for an existing file)
    existing_file = os.path.abspath(__file__)
    assert decide_source(existing_file, "some,csv,data") == "csv"

    # CSV present + no parquet → should use CSV
    assert decide_source("", "some,csv,data") == "csv"

    # CSV empty + parquet present → should use parquet
    assert decide_source(existing_file, "") == "parquet"

    # Both empty → none
    assert decide_source("", "") == "none"


if __name__ == "__main__":
    test_csv_checked_before_parquet()
    test_parse_csv_event_logic()
    test_input_source_decision_logic()
    print("All tests passed.")