File size: 13,227 Bytes
e00eceb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
import os
import uuid
from pathlib import Path

import pytest
import requests
from helpers import get_asset_filename, trigger_sync_seed_assets




@pytest.mark.parametrize("root", ["input", "output"])
def test_seed_asset_removed_when_file_is_deleted(
    root: str,
    http: requests.Session,
    api_base: str,
    comfy_tmp_base_dir: Path,
):
    """Asset without hash (seed) whose file disappears:
       after triggering sync_seed_assets, Asset + AssetInfo disappear.
    """
    # Create a file directly under input/unit-tests/<case> so tags include "unit-tests"
    case_dir = comfy_tmp_base_dir / root / "unit-tests" / "syncseed"
    case_dir.mkdir(parents=True, exist_ok=True)
    name = f"seed_{uuid.uuid4().hex[:8]}.bin"
    fp = case_dir / name
    fp.write_bytes(b"Z" * 2048)

    # Trigger a seed sync so DB sees this path (seed asset => hash is NULL)
    trigger_sync_seed_assets(http, api_base)

    # Verify it is visible via API and carries no hash (seed)
    r1 = http.get(
        api_base + "/api/assets",
        params={"include_tags": "unit-tests,syncseed", "name_contains": name},
        timeout=120,
    )
    body1 = r1.json()
    assert r1.status_code == 200
    # there should be exactly one with that name
    matches = [a for a in body1.get("assets", []) if a.get("name") == name]
    assert matches
    assert matches[0].get("asset_hash") is None
    asset_info_id = matches[0]["id"]

    # Remove the underlying file and sync again
    if fp.exists():
        fp.unlink()

    trigger_sync_seed_assets(http, api_base)

    # It should disappear (AssetInfo and seed Asset gone)
    r2 = http.get(
        api_base + "/api/assets",
        params={"include_tags": "unit-tests,syncseed", "name_contains": name},
        timeout=120,
    )
    body2 = r2.json()
    assert r2.status_code == 200
    matches2 = [a for a in body2.get("assets", []) if a.get("name") == name]
    assert not matches2, f"Seed asset {asset_info_id} should be gone after sync"


@pytest.mark.skip(reason="Requires computing hashes of files in directories to verify and clear missing tags")
def test_hashed_asset_missing_tag_added_then_removed_after_scan(
    http: requests.Session,
    api_base: str,
    comfy_tmp_base_dir: Path,
    asset_factory,
    make_asset_bytes,
):
    """Hashed asset with a single cache_state:
       1. delete its file -> sync adds 'missing'
       2. restore file -> sync removes 'missing'
    """
    name = "missing_tag_test.png"
    tags = ["input", "unit-tests", "msync2"]
    data = make_asset_bytes(name, 4096)
    a = asset_factory(name, tags, {}, data)

    # Compute its on-disk path and remove it
    dest = comfy_tmp_base_dir / "input" / "unit-tests" / "msync2" / get_asset_filename(a["asset_hash"], ".png")
    assert dest.exists(), f"Expected asset file at {dest}"
    dest.unlink()

    # Fast sync should add 'missing' to the AssetInfo
    trigger_sync_seed_assets(http, api_base)

    g1 = http.get(f"{api_base}/api/assets/{a['id']}", timeout=120)
    d1 = g1.json()
    assert g1.status_code == 200, d1
    assert "missing" in set(d1.get("tags", [])), "Expected 'missing' tag after deletion"

    # Restore the file with the exact same content and sync again
    dest.parent.mkdir(parents=True, exist_ok=True)
    dest.write_bytes(data)

    trigger_sync_seed_assets(http, api_base)

    g2 = http.get(f"{api_base}/api/assets/{a['id']}", timeout=120)
    d2 = g2.json()
    assert g2.status_code == 200, d2
    assert "missing" not in set(d2.get("tags", [])), "Missing tag should be cleared after verify"


def test_hashed_asset_two_asset_infos_both_get_missing(
    http: requests.Session,
    api_base: str,
    comfy_tmp_base_dir: Path,
    asset_factory,
):
    """Hashed asset with a single cache_state, but two AssetInfo rows:
       deleting the single file then syncing should add 'missing' to both infos.
    """
    # Upload one hashed asset
    name = "two_infos_one_path.png"
    base_tags = ["input", "unit-tests", "multiinfo"]
    created = asset_factory(name, base_tags, {}, b"A" * 2048)

    # Create second AssetInfo for the same Asset via from-hash
    payload = {
        "hash": created["asset_hash"],
        "name": "two_infos_one_path_copy.png",
        "tags": base_tags,  # keep it in our unit-tests scope for cleanup
        "user_metadata": {"k": "v"},
    }
    r2 = http.post(api_base + "/api/assets/from-hash", json=payload, timeout=120)
    b2 = r2.json()
    assert r2.status_code == 201, b2
    second_id = b2["id"]

    # Remove the single underlying file
    p = comfy_tmp_base_dir / "input" / "unit-tests" / "multiinfo" / get_asset_filename(b2["asset_hash"], ".png")
    assert p.exists()
    p.unlink()

    r0 = http.get(api_base + "/api/tags", params={"limit": "1000", "include_zero": "false"}, timeout=120)
    tags0 = r0.json()
    assert r0.status_code == 200, tags0
    byname0 = {t["name"]: t for t in tags0.get("tags", [])}
    old_missing = int(byname0.get("missing", {}).get("count", 0))

    # Sync -> both AssetInfos for this asset must receive 'missing'
    trigger_sync_seed_assets(http, api_base)

    ga = http.get(f"{api_base}/api/assets/{created['id']}", timeout=120)
    da = ga.json()
    assert ga.status_code == 200, da
    assert "missing" in set(da.get("tags", []))

    gb = http.get(f"{api_base}/api/assets/{second_id}", timeout=120)
    db = gb.json()
    assert gb.status_code == 200, db
    assert "missing" in set(db.get("tags", []))

    # Tag usage for 'missing' increased by exactly 2 (two AssetInfos)
    r1 = http.get(api_base + "/api/tags", params={"limit": "1000", "include_zero": "false"}, timeout=120)
    tags1 = r1.json()
    assert r1.status_code == 200, tags1
    byname1 = {t["name"]: t for t in tags1.get("tags", [])}
    new_missing = int(byname1.get("missing", {}).get("count", 0))
    assert new_missing == old_missing + 2


@pytest.mark.skip(reason="Requires computing hashes of files in directories to deduplicate into multiple cache states")
def test_hashed_asset_two_cache_states_partial_delete_then_full_delete(
    http: requests.Session,
    api_base: str,
    comfy_tmp_base_dir: Path,
    asset_factory,
    make_asset_bytes,
    run_scan_and_wait,
):
    """Hashed asset with two cache_state rows:
       1. delete one file -> sync should NOT add 'missing'
       2. delete second file -> sync should add 'missing'
    """
    name = "two_cache_states_partial_delete.png"
    tags = ["input", "unit-tests", "dual"]
    data = make_asset_bytes(name, 3072)

    created = asset_factory(name, tags, {}, data)
    path1 = comfy_tmp_base_dir / "input" / "unit-tests" / "dual" / get_asset_filename(created["asset_hash"], ".png")
    assert path1.exists()

    # Create a second on-disk copy under the same root but different subfolder
    path2 = comfy_tmp_base_dir / "input" / "unit-tests" / "dual_copy" / name
    path2.parent.mkdir(parents=True, exist_ok=True)
    path2.write_bytes(data)

    # Fast seed so the second path appears (as a seed initially)
    trigger_sync_seed_assets(http, api_base)

    # Deduplication of AssetInfo-s will not happen as first AssetInfo has owner='default' and second has empty owner.
    run_scan_and_wait("input")

    # Remove only one file and sync -> asset should still be healthy (no 'missing')
    path1.unlink()
    trigger_sync_seed_assets(http, api_base)

    g1 = http.get(f"{api_base}/api/assets/{created['id']}", timeout=120)
    d1 = g1.json()
    assert g1.status_code == 200, d1
    assert "missing" not in set(d1.get("tags", [])), "Should not be missing while one valid path remains"

    # Baseline 'missing' usage count just before last file removal
    r0 = http.get(api_base + "/api/tags", params={"limit": "1000", "include_zero": "false"}, timeout=120)
    tags0 = r0.json()
    assert r0.status_code == 200, tags0
    old_missing = int({t["name"]: t for t in tags0.get("tags", [])}.get("missing", {}).get("count", 0))

    # Remove the second (last) file and sync -> now we expect 'missing' on this AssetInfo
    path2.unlink()
    trigger_sync_seed_assets(http, api_base)

    g2 = http.get(f"{api_base}/api/assets/{created['id']}", timeout=120)
    d2 = g2.json()
    assert g2.status_code == 200, d2
    assert "missing" in set(d2.get("tags", [])), "Missing must be set once no valid paths remain"

    # Tag usage for 'missing' increased by exactly 2 (two AssetInfo for one Asset)
    r1 = http.get(api_base + "/api/tags", params={"limit": "1000", "include_zero": "false"}, timeout=120)
    tags1 = r1.json()
    assert r1.status_code == 200, tags1
    new_missing = int({t["name"]: t for t in tags1.get("tags", [])}.get("missing", {}).get("count", 0))
    assert new_missing == old_missing + 2


@pytest.mark.parametrize("root", ["input", "output"])
def test_missing_tag_clears_on_fastpass_when_mtime_and_size_match(
    root: str,
    http: requests.Session,
    api_base: str,
    comfy_tmp_base_dir: Path,
    asset_factory,
    make_asset_bytes,
):
    """
    Fast pass alone clears 'missing' when size and mtime match exactly:
      1) upload (hashed), record original mtime_ns
      2) delete -> fast pass adds 'missing'
      3) restore same bytes and set mtime back to the original value
      4) run fast pass again -> 'missing' is removed (no slow scan)
    """
    scope = f"fastclear-{uuid.uuid4().hex[:6]}"
    name = "fastpass_clear.bin"
    data = make_asset_bytes(name, 3072)

    a = asset_factory(name, [root, "unit-tests", scope], {}, data)
    aid = a["id"]
    base = comfy_tmp_base_dir / root / "unit-tests" / scope
    p = base / get_asset_filename(a["asset_hash"], ".bin")
    st0 = p.stat()
    orig_mtime_ns = getattr(st0, "st_mtime_ns", int(st0.st_mtime * 1_000_000_000))

    # Delete -> fast pass adds 'missing'
    p.unlink()
    trigger_sync_seed_assets(http, api_base)
    g1 = http.get(f"{api_base}/api/assets/{aid}", timeout=120)
    d1 = g1.json()
    assert g1.status_code == 200, d1
    assert "missing" in set(d1.get("tags", []))

    # Restore same bytes and revert mtime to the original value
    p.parent.mkdir(parents=True, exist_ok=True)
    p.write_bytes(data)
    # set both atime and mtime in ns to ensure exact match
    os.utime(p, ns=(orig_mtime_ns, orig_mtime_ns))

    # Fast pass should clear 'missing' without a scan
    trigger_sync_seed_assets(http, api_base)
    g2 = http.get(f"{api_base}/api/assets/{aid}", timeout=120)
    d2 = g2.json()
    assert g2.status_code == 200, d2
    assert "missing" not in set(d2.get("tags", [])), "Fast pass should clear 'missing' when size+mtime match"


@pytest.mark.skip(reason="Requires computing hashes of files in directories to deduplicate into multiple cache states")
@pytest.mark.parametrize("root", ["input", "output"])
def test_fastpass_removes_stale_state_row_no_missing(
    root: str,
    http: requests.Session,
    api_base: str,
    comfy_tmp_base_dir: Path,
    asset_factory,
    make_asset_bytes,
    run_scan_and_wait,
):
    """
    Hashed asset with two states:
      - delete one file
      - run fast pass only
    Expect:
      - asset stays healthy (no 'missing')
      - stale AssetCacheState row for the deleted path is removed.
        We verify this behaviorally by recreating the deleted path and running fast pass again:
        a new *seed* AssetInfo is created, which proves the old state row was not reused.
    """
    scope = f"stale-{uuid.uuid4().hex[:6]}"
    name = "two_states.bin"
    data = make_asset_bytes(name, 2048)

    # Upload hashed asset at path1
    a = asset_factory(name, [root, "unit-tests", scope], {}, data)
    base = comfy_tmp_base_dir / root / "unit-tests" / scope
    a1_filename = get_asset_filename(a["asset_hash"], ".bin")
    p1 = base / a1_filename
    assert p1.exists()

    aid = a["id"]
    h = a["asset_hash"]

    # Create second state path2, seed+scan to dedupe into the same Asset
    p2 = base / "copy" / name
    p2.parent.mkdir(parents=True, exist_ok=True)
    p2.write_bytes(data)
    trigger_sync_seed_assets(http, api_base)
    run_scan_and_wait(root)

    # Delete path1 and run fast pass -> no 'missing' and stale state row should be removed
    p1.unlink()
    trigger_sync_seed_assets(http, api_base)
    g1 = http.get(f"{api_base}/api/assets/{aid}", timeout=120)
    d1 = g1.json()
    assert g1.status_code == 200, d1
    assert "missing" not in set(d1.get("tags", []))

    # Recreate path1 and run fast pass again.
    # If the stale state row was removed, a NEW seed AssetInfo will appear for this path.
    p1.write_bytes(data)
    trigger_sync_seed_assets(http, api_base)

    rl = http.get(
        api_base + "/api/assets",
        params={"include_tags": f"unit-tests,{scope}"},
        timeout=120,
    )
    bl = rl.json()
    assert rl.status_code == 200, bl
    items = bl.get("assets", [])
    # one hashed AssetInfo (asset_hash == h) + one seed AssetInfo (asset_hash == null)
    hashes = [it.get("asset_hash") for it in items if it.get("name") in (name, a1_filename)]
    assert h in hashes
    assert any(x is None for x in hashes), "Expected a new seed AssetInfo for the recreated path"

    # Asset identity still healthy
    rh = http.head(f"{api_base}/api/assets/hash/{h}", timeout=120)
    assert rh.status_code == 200