Spaces:
Paused
Paused
| from pathlib import Path | |
| from collections.abc import Mapping, Sequence | |
| import importlib | |
| import importlib.util | |
| import gradio as gr | |
| from rich import _console | |
| from transformers import AutoModel, AutoTokenizer, AutoConfig, pipeline | |
| import torch | |
| from huggingface_hub import snapshot_download | |
| import sys, pathlib | |
| import os | |
| os.environ["OPENAI_API_KEY"] = "test" | |
| os.environ["OMP_NUM_THREADS"] = "4" | |
| print("All imports finished") | |
| print(f"Python version: {sys.version}") | |
| print(f"PyTorch version: {torch.__version__}") | |
| print(f"CUDA available: {torch.cuda.is_available()}") | |
| print(f"CUDA version: {torch.version.cuda}") | |
| print(f"cuDNN version: {torch.backends.cudnn.version()}") | |
| print(f"Number of GPUs: {torch.cuda.device_count()}") | |
| if torch.cuda.is_available(): | |
| for i in range(torch.cuda.device_count()): | |
| print(f"GPU {i}: {torch.cuda.get_device_name(i)}") | |
| print(f" Memory: {torch.cuda.get_device_properties(i).total_memory / 1e9:.2f} GB") | |
| torch.backends.cuda.matmul.allow_tf32 = False | |
| torch.backends.cudnn.allow_tf32 = False | |
| os.environ['TORCH_DTYPE'] = 'float32' | |
| # Set default dtype | |
| torch.set_default_dtype(torch.float32) | |
| # # 1) Download the repo to a local cache dir | |
| # print("Downloading remote vine repo...") | |
| # repo_dir = snapshot_download(repo_id="KevinX-Penn28/testing", revision="main") | |
| # # 2) Register the snapshot as an importable package | |
| # VINE_PACKAGE = "vine_remote_repo" | |
| # # Drop stale modules in case the script reloads | |
| # for module_name in list(sys.modules): | |
| # if module_name == VINE_PACKAGE or module_name.startswith(f"{VINE_PACKAGE}."): | |
| # del sys.modules[module_name] | |
| # print("Dropped stale modules and registering vine package...") | |
| # package_spec = importlib.util.spec_from_file_location( | |
| # VINE_PACKAGE, | |
| # Path(repo_dir) / "__init__.py", | |
| # submodule_search_locations=[str(repo_dir)], | |
| # ) | |
| # if not package_spec or not package_spec.loader: | |
| # raise ImportError(f"Cannot create package spec for {VINE_PACKAGE} at {repo_dir}") | |
| # print("Created package spec, loading module...") | |
| # package_module = importlib.util.module_from_spec(package_spec) | |
| # sys.modules[VINE_PACKAGE] = package_module | |
| # try: | |
| # print("Executing module...") | |
| # package_spec.loader.exec_module(package_module) | |
| # print("Module executed successfully!") | |
| # except Exception as e: | |
| # print(f"ERROR during module execution: {e}") | |
| # import traceback | |
| # traceback.print_exc() | |
| # raise | |
| # # 3) Import and use via the registered package | |
| # print("Importing vine modules...") | |
| # vine_config_module = importlib.import_module(f"{VINE_PACKAGE}.vine_config") | |
| # vine_model_module = importlib.import_module(f"{VINE_PACKAGE}.vine_model") | |
| # vine_pipeline_module = importlib.import_module(f"{VINE_PACKAGE}.vine_pipeline") | |
| # VineConfig = vine_config_module.VineConfig # your config class | |
| # VineModel = vine_model_module.VineModel # your model class | |
| # VinePipeline = vine_pipeline_module.VinePipeline | |
| current_dir = Path(__file__).resolve().parent | |
| sam_config_path = "/" + str(Path(current_dir) / "sam2_hiera_t.yaml") | |
| sam_checkpoint_path = "/" + str(Path(current_dir) / "sam2_hiera_tiny.pt") | |
| gd_config_path = "/" + str(Path(current_dir) / "GroundingDINO_SwinT_OGC.py") | |
| gd_checkpoint_path = "/" + str(Path(current_dir) / "groundingdino_swint_ogc.pth") | |
| visualization_dir = "/" + str(Path(current_dir) / "outputs") | |
| print(f"Setting up paths: {sam_config_path}, {sam_checkpoint_path}, {gd_config_path}, {gd_checkpoint_path}") | |
| # # current_dir = Path.cwd() | |
| # # sam_config_path = "/" + str(current_dir / "sam2_hiera_t.yaml") | |
| # # sam_checkpoint_path = "/" + str(current_dir / "sam2_hiera_tiny.pt") | |
| # # gd_config_path = "/" + str(current_dir / "GroundingDINO_SwinT_OGC.py") | |
| # # gd_checkpoint_path = "/" + str(current_dir / "groundingdino_swint_ogc.pth") | |
| # # visualization_dir = "/" + str(current_dir / "outputs") | |
| # print(f"Setting up paths done: {sam_config_path}, {sam_checkpoint_path}, {gd_config_path}, {gd_checkpoint_path}") | |
| def process_video(video_file, categorical_keywords, unary_keywords, binary_keywords, object_pairs, output_fps): | |
| print("Starting vine_hf imports...") | |
| try: | |
| from vine_hf import VineConfig, VineModel, VinePipeline | |
| print("vine_hf imports successful!") | |
| except Exception as e: | |
| print(f"ERROR importing vine_hf: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| raise | |
| categorical_keywords = [kw.strip() for kw in categorical_keywords.split(",")] if categorical_keywords else [] | |
| unary_keywords = [kw.strip() for kw in unary_keywords.split(",")] if unary_keywords else [] | |
| binary_keywords = [kw.strip() for kw in binary_keywords.split(",")] if binary_keywords else [] | |
| object_pairs = [tuple(map(int, pair.split("-"))) for pair in object_pairs.split(",")] if object_pairs else [] | |
| inputs = { | |
| "video": video_file, | |
| "unary_keywords": unary_keywords, | |
| "binary_keywords": binary_keywords, | |
| } | |
| config = VineConfig( | |
| segmentation_method="grounding_dino_sam2", | |
| model_name="openai/clip-vit-base-patch32", | |
| # Example: load from HF repo | |
| use_hf_repo=True, | |
| model_repo="KevinX-Penn28/testing", | |
| # Alternatively use a local path by setting use_hf_repo=False and local_dir/local_filename | |
| box_threshold=0.35, | |
| text_threshold=0.25, | |
| target_fps=output_fps, | |
| topk_cate=5, | |
| visualization_dir=visualization_dir, | |
| visualize=True, | |
| debug_visualizations=False, | |
| device="cuda", | |
| ) | |
| model = VineModel(config) | |
| vine_pipe = VinePipeline( | |
| model=model, | |
| tokenizer=None, | |
| sam_config_path=sam_config_path, | |
| sam_checkpoint_path=sam_checkpoint_path, | |
| gd_config_path=gd_config_path, | |
| gd_checkpoint_path=gd_checkpoint_path, | |
| device="cuda", | |
| trust_remote_code=True, | |
| ) | |
| results = vine_pipe( | |
| inputs = video_file, | |
| categorical_keywords=categorical_keywords, | |
| unary_keywords=unary_keywords, | |
| binary_keywords=binary_keywords, | |
| object_pairs=object_pairs, | |
| segmentation_method="grounding_dino_sam2", | |
| return_top_k=5, | |
| include_visualizations=True, | |
| debug_visualizations=False, | |
| device="cuda", | |
| ) | |
| if isinstance(results, Mapping): | |
| results_dict = results | |
| elif isinstance(results, Sequence) and results and isinstance(results[0], Mapping): | |
| results_dict = results[0] | |
| else: | |
| results_dict = {} | |
| # Print brief summary | |
| visualizations = results_dict.get("visualizations") or {} | |
| vine = visualizations.get("vine") or {} | |
| all_vis = vine.get("all") or {} | |
| result_video_path = all_vis.get("video_path") | |
| summary = results_dict.get("summary") or {} | |
| return result_video_path, summary | |
| demo = gr.Interface( | |
| fn = process_video, | |
| inputs = [ | |
| gr.Video(label="Input Video"), | |
| gr.Textbox(label="Categorical Keywords (comma-separated)", placeholder="e.g., dog, cat, car"), | |
| gr.Textbox(label="Unary Keywords (comma-separated)", placeholder="e.g., running, jumping"), | |
| gr.Textbox(label="Binary Keywords (comma-separated)", placeholder="e.g., chasing, carrying"), | |
| gr.Textbox(label="Object Pairs (comma-separated indices)", placeholder="e.g., 0-1,0-2 for pairs of objects"), | |
| gr.Number(label="Output FPS (affects processing speed)", placeholder="5") | |
| ], | |
| outputs = [ | |
| gr.Video(label="Output Video with Annotations"), | |
| gr.JSON(label="Summary of Detected Events"), | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| print("Got to main") | |
| demo.launch() | |
| #input would be video file path and keywords | |
| #out would be video, efforts | |