File size: 2,884 Bytes
5412d82
842d4e9
5412d82
9b33910
 
 
 
5412d82
 
 
842d4e9
5412d82
 
 
9b33910
 
 
d265d8e
 
 
 
 
 
9b33910
d265d8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b33910
 
 
842d4e9
9b33910
842d4e9
 
 
 
 
9b33910
 
 
 
 
 
 
 
 
 
 
 
842d4e9
9b33910
 
 
 
842d4e9
9b33910
 
 
 
 
 
 
 
 
 
 
 
 
5412d82
842d4e9
5412d82
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""
Hugging Face Spaces entry point (ZeroGPU compatible).

On ZeroGPU, CUDA is available at import time AND inside @spaces.GPU
decorated functions.  Models are loaded once at import so the first
request does not spend minutes downloading weights inside a short GPU
slice (which caused every call to fall back to CPU).
"""

import os
import sys

sys.path.insert(0, os.path.dirname(__file__))

# ---------------------------------------------------------------------------
# Defensive patch: gradio_client.utils.get_type crashes on bool schemas.
# ---------------------------------------------------------------------------
try:
    from gradio_client import utils as _gc_utils

    _orig_get_type = _gc_utils.get_type

    def _safe_get_type(schema):
        if isinstance(schema, bool) or not isinstance(schema, dict):
            return "Any"
        return _orig_get_type(schema)

    _gc_utils.get_type = _safe_get_type

    _orig_json_to_py = _gc_utils._json_schema_to_python_type

    def _safe_json_to_py(schema, defs=None):
        if isinstance(schema, bool) or not isinstance(schema, dict):
            return "Any"
        try:
            return _orig_json_to_py(schema, defs)
        except (TypeError, KeyError):
            return "Any"

    _gc_utils._json_schema_to_python_type = _safe_json_to_py
except Exception:
    pass

# ---------------------------------------------------------------------------
# ZeroGPU import + pipeline warm-up
# ---------------------------------------------------------------------------
try:
    import spaces
    _HAS_SPACES = True
except ImportError:
    _HAS_SPACES = False

from src.ui import gradio_app as _ga
from src.pipeline import Pipeline

# Pre-load every model at import — on ZeroGPU torch.cuda is available here
# and the loaded tensors stay on GPU across @spaces.GPU calls (the runtime
# handles slice re-attachment transparently).
print("Pre-loading pipeline (VLM + Depth + YOLO)...")
_pipeline = Pipeline()
_pipeline._get_vlm()
_pipeline._get_depth()
_pipeline._get_detector()
_ga._PIPELINE = _pipeline
print("Pipeline ready.")

# ---------------------------------------------------------------------------
# Wrap inference entry points so ZeroGPU keeps a GPU slice for the call.
# 300 s covers first-call weight migration + heaviest Stage 3 inference.
# ---------------------------------------------------------------------------
if _HAS_SPACES:
    _orig_run = _ga.run_inference
    _orig_live = _ga.live_run_inference

    @spaces.GPU(duration=300)
    def _gpu_run_inference(*args, **kwargs):
        return _orig_run(*args, **kwargs)

    @spaces.GPU(duration=300)
    def _gpu_live_inference(*args, **kwargs):
        return _orig_live(*args, **kwargs)

    _ga.run_inference = _gpu_run_inference
    _ga.live_run_inference = _gpu_live_inference

demo = _ga.build_ui()

if __name__ == "__main__":
    demo.launch()