File size: 7,778 Bytes
8e9c5be
 
 
 
 
2971610
8e9c5be
044d0aa
8e9c5be
 
 
 
5482a01
8e9c5be
67e72f7
044d0aa
 
 
 
 
 
 
 
 
 
 
67e72f7
3023aa7
 
 
 
 
 
 
b0da3be
 
 
4a652b7
b0da3be
 
4a652b7
b0da3be
 
 
 
4a652b7
b0da3be
390df97
b0da3be
 
 
 
 
 
 
4a652b7
b0da3be
390df97
b0da3be
 
390df97
b0da3be
 
 
 
 
 
 
 
 
4a652b7
b0da3be
 
 
 
 
4a652b7
b0da3be
 
 
4a652b7
2971610
 
 
 
 
 
67e72f7
 
2971610
b0da3be
 
 
 
 
 
38c755c
b0da3be
4a652b7
8e9c5be
5482a01
 
 
 
 
 
 
 
 
044d0aa
 
2971610
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e9c5be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2971610
f3e3f11
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
from pathlib import Path
from collections.abc import Mapping, Sequence
import importlib
import importlib.util
import gradio as gr
from rich import _console
from transformers import AutoModel, AutoTokenizer, AutoConfig, pipeline
import torch 
from huggingface_hub import snapshot_download
import sys, pathlib
import os
os.environ["OPENAI_API_KEY"] = "test"
os.environ["OMP_NUM_THREADS"] = "4"

print("All imports finished")
print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"cuDNN version: {torch.backends.cudnn.version()}")
print(f"Number of GPUs: {torch.cuda.device_count()}")

if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        print(f"  Memory: {torch.cuda.get_device_properties(i).total_memory / 1e9:.2f} GB")

torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False
os.environ['TORCH_DTYPE'] = 'float32'

# Set default dtype
torch.set_default_dtype(torch.float32)

# # 1) Download the repo to a local cache dir
# print("Downloading remote vine repo...")
# repo_dir = snapshot_download(repo_id="KevinX-Penn28/testing", revision="main")

# # 2) Register the snapshot as an importable package
# VINE_PACKAGE = "vine_remote_repo"

# # Drop stale modules in case the script reloads
# for module_name in list(sys.modules):
#     if module_name == VINE_PACKAGE or module_name.startswith(f"{VINE_PACKAGE}."):
#         del sys.modules[module_name]

# print("Dropped stale modules and registering vine package...")

# package_spec = importlib.util.spec_from_file_location(
#     VINE_PACKAGE,
#     Path(repo_dir) / "__init__.py",
#     submodule_search_locations=[str(repo_dir)],
# )
# if not package_spec or not package_spec.loader:
#     raise ImportError(f"Cannot create package spec for {VINE_PACKAGE} at {repo_dir}")

# print("Created package spec, loading module...")

# package_module = importlib.util.module_from_spec(package_spec)
# sys.modules[VINE_PACKAGE] = package_module

# try:
#     print("Executing module...")
#     package_spec.loader.exec_module(package_module)
#     print("Module executed successfully!")
# except Exception as e:
#     print(f"ERROR during module execution: {e}")
#     import traceback
#     traceback.print_exc()
#     raise

# # 3) Import and use via the registered package
# print("Importing vine modules...")
# vine_config_module = importlib.import_module(f"{VINE_PACKAGE}.vine_config")
# vine_model_module = importlib.import_module(f"{VINE_PACKAGE}.vine_model")
# vine_pipeline_module = importlib.import_module(f"{VINE_PACKAGE}.vine_pipeline")

# VineConfig = vine_config_module.VineConfig              # your config class
# VineModel = vine_model_module.VineModel                 # your model class
# VinePipeline = vine_pipeline_module.VinePipeline        

current_dir = Path(__file__).resolve().parent
sam_config_path = "/" + str(Path(current_dir) / "sam2_hiera_t.yaml")
sam_checkpoint_path = "/" + str(Path(current_dir) / "sam2_hiera_tiny.pt")
gd_config_path = "/" + str(Path(current_dir) / "GroundingDINO_SwinT_OGC.py")
gd_checkpoint_path = "/" + str(Path(current_dir) / "groundingdino_swint_ogc.pth")
visualization_dir = "/" + str(Path(current_dir) / "outputs")    
print(f"Setting up paths: {sam_config_path}, {sam_checkpoint_path}, {gd_config_path}, {gd_checkpoint_path}")   


# # current_dir = Path.cwd()
# # sam_config_path = "/" + str(current_dir / "sam2_hiera_t.yaml")
# # sam_checkpoint_path = "/" + str(current_dir / "sam2_hiera_tiny.pt")
# # gd_config_path = "/" + str(current_dir / "GroundingDINO_SwinT_OGC.py")
# # gd_checkpoint_path = "/" + str(current_dir / "groundingdino_swint_ogc.pth")
# # visualization_dir = "/" + str(current_dir / "outputs")

# print(f"Setting up paths done: {sam_config_path}, {sam_checkpoint_path}, {gd_config_path}, {gd_checkpoint_path}")

def process_video(video_file, categorical_keywords, unary_keywords, binary_keywords, object_pairs, output_fps):
    print("Starting vine_hf imports...")
    try:
        from vine_hf import VineConfig, VineModel, VinePipeline
        print("vine_hf imports successful!")
    except Exception as e:
        print(f"ERROR importing vine_hf: {e}")
        import traceback
        traceback.print_exc()
        raise

    
    categorical_keywords = [kw.strip() for kw in categorical_keywords.split(",")] if categorical_keywords else []
    unary_keywords = [kw.strip() for kw in unary_keywords.split(",")] if unary_keywords else []
    binary_keywords = [kw.strip() for kw in binary_keywords.split(",")] if binary_keywords else []
    object_pairs = [tuple(map(int, pair.split("-"))) for pair in object_pairs.split(",")] if object_pairs else []
    
    inputs = {
        "video": video_file,
        "unary_keywords": unary_keywords,
        "binary_keywords": binary_keywords,
    }
    
    config = VineConfig(
        segmentation_method="grounding_dino_sam2",
        model_name="openai/clip-vit-base-patch32",
        # Example: load from HF repo
        use_hf_repo=True,
        model_repo="KevinX-Penn28/testing",
        # Alternatively use a local path by setting use_hf_repo=False and local_dir/local_filename
        box_threshold=0.35,
        text_threshold=0.25,
        target_fps=output_fps,
        topk_cate=5,
        visualization_dir=visualization_dir,
        visualize=True,
        debug_visualizations=False,
        device="cuda",
        )
    model = VineModel(config)
    
    vine_pipe = VinePipeline(
        model=model,
        tokenizer=None,
        sam_config_path=sam_config_path,
        sam_checkpoint_path=sam_checkpoint_path,
        gd_config_path=gd_config_path,
        gd_checkpoint_path=gd_checkpoint_path,
        device="cuda",
        trust_remote_code=True,
        )
    
    results = vine_pipe(
        inputs = video_file,
        categorical_keywords=categorical_keywords,
        unary_keywords=unary_keywords,
        binary_keywords=binary_keywords,
        object_pairs=object_pairs,
        segmentation_method="grounding_dino_sam2",        
        return_top_k=5,
        include_visualizations=True,
        debug_visualizations=False,
        device="cuda",
    )
    
    if isinstance(results, Mapping):
        results_dict = results
    elif isinstance(results, Sequence) and results and isinstance(results[0], Mapping):
        results_dict = results[0]
    else:
        results_dict = {}
    # Print brief summary
    visualizations = results_dict.get("visualizations") or {}
    vine = visualizations.get("vine") or {}
    all_vis = vine.get("all") or {}
    result_video_path = all_vis.get("video_path")
    summary = results_dict.get("summary") or {}
    return result_video_path, summary

demo = gr.Interface(
    fn = process_video,
    inputs = [
        gr.Video(label="Input Video"),
        gr.Textbox(label="Categorical Keywords (comma-separated)", placeholder="e.g., dog, cat, car"),
        gr.Textbox(label="Unary Keywords (comma-separated)", placeholder="e.g., running, jumping"),
        gr.Textbox(label="Binary Keywords (comma-separated)", placeholder="e.g., chasing, carrying"),
        gr.Textbox(label="Object Pairs (comma-separated indices)", placeholder="e.g., 0-1,0-2 for pairs of objects"),
        gr.Number(label="Output FPS (affects processing speed)", placeholder="5")
    ],
    outputs = [
        gr.Video(label="Output Video with Annotations"),
        gr.JSON(label="Summary of Detected Events"),
    ],
)

if __name__ == "__main__":
    print("Got to main")
    demo.launch()
    
    
    
        


#input would be video file path and keywords
#out would be video, efforts