from pathlib import Path from collections.abc import Mapping, Sequence import importlib import importlib.util import gradio as gr from rich import _console from transformers import AutoModel, AutoTokenizer, AutoConfig, pipeline import torch from huggingface_hub import snapshot_download import sys, pathlib import os os.environ["OPENAI_API_KEY"] = "test" os.environ["OMP_NUM_THREADS"] = "4" print("All imports finished") print(f"Python version: {sys.version}") print(f"PyTorch version: {torch.__version__}") print(f"CUDA available: {torch.cuda.is_available()}") print(f"CUDA version: {torch.version.cuda}") print(f"cuDNN version: {torch.backends.cudnn.version()}") print(f"Number of GPUs: {torch.cuda.device_count()}") if torch.cuda.is_available(): for i in range(torch.cuda.device_count()): print(f"GPU {i}: {torch.cuda.get_device_name(i)}") print(f" Memory: {torch.cuda.get_device_properties(i).total_memory / 1e9:.2f} GB") torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cudnn.allow_tf32 = False os.environ['TORCH_DTYPE'] = 'float32' # Set default dtype torch.set_default_dtype(torch.float32) # # 1) Download the repo to a local cache dir # print("Downloading remote vine repo...") # repo_dir = snapshot_download(repo_id="KevinX-Penn28/testing", revision="main") # # 2) Register the snapshot as an importable package # VINE_PACKAGE = "vine_remote_repo" # # Drop stale modules in case the script reloads # for module_name in list(sys.modules): # if module_name == VINE_PACKAGE or module_name.startswith(f"{VINE_PACKAGE}."): # del sys.modules[module_name] # print("Dropped stale modules and registering vine package...") # package_spec = importlib.util.spec_from_file_location( # VINE_PACKAGE, # Path(repo_dir) / "__init__.py", # submodule_search_locations=[str(repo_dir)], # ) # if not package_spec or not package_spec.loader: # raise ImportError(f"Cannot create package spec for {VINE_PACKAGE} at {repo_dir}") # print("Created package spec, loading module...") # package_module = importlib.util.module_from_spec(package_spec) # sys.modules[VINE_PACKAGE] = package_module # try: # print("Executing module...") # package_spec.loader.exec_module(package_module) # print("Module executed successfully!") # except Exception as e: # print(f"ERROR during module execution: {e}") # import traceback # traceback.print_exc() # raise # # 3) Import and use via the registered package # print("Importing vine modules...") # vine_config_module = importlib.import_module(f"{VINE_PACKAGE}.vine_config") # vine_model_module = importlib.import_module(f"{VINE_PACKAGE}.vine_model") # vine_pipeline_module = importlib.import_module(f"{VINE_PACKAGE}.vine_pipeline") # VineConfig = vine_config_module.VineConfig # your config class # VineModel = vine_model_module.VineModel # your model class # VinePipeline = vine_pipeline_module.VinePipeline current_dir = Path(__file__).resolve().parent sam_config_path = "/" + str(Path(current_dir) / "sam2_hiera_t.yaml") sam_checkpoint_path = "/" + str(Path(current_dir) / "sam2_hiera_tiny.pt") gd_config_path = "/" + str(Path(current_dir) / "GroundingDINO_SwinT_OGC.py") gd_checkpoint_path = "/" + str(Path(current_dir) / "groundingdino_swint_ogc.pth") visualization_dir = "/" + str(Path(current_dir) / "outputs") print(f"Setting up paths: {sam_config_path}, {sam_checkpoint_path}, {gd_config_path}, {gd_checkpoint_path}") # # current_dir = Path.cwd() # # sam_config_path = "/" + str(current_dir / "sam2_hiera_t.yaml") # # sam_checkpoint_path = "/" + str(current_dir / "sam2_hiera_tiny.pt") # # gd_config_path = "/" + str(current_dir / "GroundingDINO_SwinT_OGC.py") # # gd_checkpoint_path = "/" + str(current_dir / "groundingdino_swint_ogc.pth") # # visualization_dir = "/" + str(current_dir / "outputs") # print(f"Setting up paths done: {sam_config_path}, {sam_checkpoint_path}, {gd_config_path}, {gd_checkpoint_path}") def process_video(video_file, categorical_keywords, unary_keywords, binary_keywords, object_pairs, output_fps): print("Starting vine_hf imports...") try: from vine_hf import VineConfig, VineModel, VinePipeline print("vine_hf imports successful!") except Exception as e: print(f"ERROR importing vine_hf: {e}") import traceback traceback.print_exc() raise categorical_keywords = [kw.strip() for kw in categorical_keywords.split(",")] if categorical_keywords else [] unary_keywords = [kw.strip() for kw in unary_keywords.split(",")] if unary_keywords else [] binary_keywords = [kw.strip() for kw in binary_keywords.split(",")] if binary_keywords else [] object_pairs = [tuple(map(int, pair.split("-"))) for pair in object_pairs.split(",")] if object_pairs else [] inputs = { "video": video_file, "unary_keywords": unary_keywords, "binary_keywords": binary_keywords, } config = VineConfig( segmentation_method="grounding_dino_sam2", model_name="openai/clip-vit-base-patch32", # Example: load from HF repo use_hf_repo=True, model_repo="KevinX-Penn28/testing", # Alternatively use a local path by setting use_hf_repo=False and local_dir/local_filename box_threshold=0.35, text_threshold=0.25, target_fps=output_fps, topk_cate=5, visualization_dir=visualization_dir, visualize=True, debug_visualizations=False, device="cuda", ) model = VineModel(config) vine_pipe = VinePipeline( model=model, tokenizer=None, sam_config_path=sam_config_path, sam_checkpoint_path=sam_checkpoint_path, gd_config_path=gd_config_path, gd_checkpoint_path=gd_checkpoint_path, device="cuda", trust_remote_code=True, ) results = vine_pipe( inputs = video_file, categorical_keywords=categorical_keywords, unary_keywords=unary_keywords, binary_keywords=binary_keywords, object_pairs=object_pairs, segmentation_method="grounding_dino_sam2", return_top_k=5, include_visualizations=True, debug_visualizations=False, device="cuda", ) if isinstance(results, Mapping): results_dict = results elif isinstance(results, Sequence) and results and isinstance(results[0], Mapping): results_dict = results[0] else: results_dict = {} # Print brief summary visualizations = results_dict.get("visualizations") or {} vine = visualizations.get("vine") or {} all_vis = vine.get("all") or {} result_video_path = all_vis.get("video_path") summary = results_dict.get("summary") or {} return result_video_path, summary demo = gr.Interface( fn = process_video, inputs = [ gr.Video(label="Input Video"), gr.Textbox(label="Categorical Keywords (comma-separated)", placeholder="e.g., dog, cat, car"), gr.Textbox(label="Unary Keywords (comma-separated)", placeholder="e.g., running, jumping"), gr.Textbox(label="Binary Keywords (comma-separated)", placeholder="e.g., chasing, carrying"), gr.Textbox(label="Object Pairs (comma-separated indices)", placeholder="e.g., 0-1,0-2 for pairs of objects"), gr.Number(label="Output FPS (affects processing speed)", placeholder="5") ], outputs = [ gr.Video(label="Output Video with Annotations"), gr.JSON(label="Summary of Detected Events"), ], ) if __name__ == "__main__": print("Got to main") demo.launch() #input would be video file path and keywords #out would be video, efforts