Spaces:

KevinX-Penn28
/

testingVine

Paused

File size: 7,778 Bytes

from pathlib import Path
from collections.abc import Mapping, Sequence
import importlib
import importlib.util
import gradio as gr
from rich import _console
from transformers import AutoModel, AutoTokenizer, AutoConfig, pipeline
import torch 
from huggingface_hub import snapshot_download
import sys, pathlib
import os
os.environ["OPENAI_API_KEY"] = "test"
os.environ["OMP_NUM_THREADS"] = "4"

print("All imports finished")
print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"cuDNN version: {torch.backends.cudnn.version()}")
print(f"Number of GPUs: {torch.cuda.device_count()}")

if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        print(f"  Memory: {torch.cuda.get_device_properties(i).total_memory / 1e9:.2f} GB")

torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False
os.environ['TORCH_DTYPE'] = 'float32'

# Set default dtype
torch.set_default_dtype(torch.float32)

# # 1) Download the repo to a local cache dir
# print("Downloading remote vine repo...")
# repo_dir = snapshot_download(repo_id="KevinX-Penn28/testing", revision="main")

# # 2) Register the snapshot as an importable package
# VINE_PACKAGE = "vine_remote_repo"

# # Drop stale modules in case the script reloads
# for module_name in list(sys.modules):
#     if module_name == VINE_PACKAGE or module_name.startswith(f"{VINE_PACKAGE}."):
#         del sys.modules[module_name]

# print("Dropped stale modules and registering vine package...")

# package_spec = importlib.util.spec_from_file_location(
#     VINE_PACKAGE,
#     Path(repo_dir) / "__init__.py",
#     submodule_search_locations=[str(repo_dir)],
# )
# if not package_spec or not package_spec.loader:
#     raise ImportError(f"Cannot create package spec for {VINE_PACKAGE} at {repo_dir}")

# print("Created package spec, loading module...")

# package_module = importlib.util.module_from_spec(package_spec)
# sys.modules[VINE_PACKAGE] = package_module

# try:
#     print("Executing module...")
#     package_spec.loader.exec_module(package_module)
#     print("Module executed successfully!")
# except Exception as e:
#     print(f"ERROR during module execution: {e}")
#     import traceback
#     traceback.print_exc()
#     raise

# # 3) Import and use via the registered package
# print("Importing vine modules...")
# vine_config_module = importlib.import_module(f"{VINE_PACKAGE}.vine_config")
# vine_model_module = importlib.import_module(f"{VINE_PACKAGE}.vine_model")
# vine_pipeline_module = importlib.import_module(f"{VINE_PACKAGE}.vine_pipeline")

# VineConfig = vine_config_module.VineConfig              # your config class
# VineModel = vine_model_module.VineModel                 # your model class
# VinePipeline = vine_pipeline_module.VinePipeline        

current_dir = Path(__file__).resolve().parent
sam_config_path = "/" + str(Path(current_dir) / "sam2_hiera_t.yaml")
sam_checkpoint_path = "/" + str(Path(current_dir) / "sam2_hiera_tiny.pt")
gd_config_path = "/" + str(Path(current_dir) / "GroundingDINO_SwinT_OGC.py")
gd_checkpoint_path = "/" + str(Path(current_dir) / "groundingdino_swint_ogc.pth")
visualization_dir = "/" + str(Path(current_dir) / "outputs")    
print(f"Setting up paths: {sam_config_path}, {sam_checkpoint_path}, {gd_config_path}, {gd_checkpoint_path}")   


# # current_dir = Path.cwd()
# # sam_config_path = "/" + str(current_dir / "sam2_hiera_t.yaml")
# # sam_checkpoint_path = "/" + str(current_dir / "sam2_hiera_tiny.pt")
# # gd_config_path = "/" + str(current_dir / "GroundingDINO_SwinT_OGC.py")
# # gd_checkpoint_path = "/" + str(current_dir / "groundingdino_swint_ogc.pth")
# # visualization_dir = "/" + str(current_dir / "outputs")

# print(f"Setting up paths done: {sam_config_path}, {sam_checkpoint_path}, {gd_config_path}, {gd_checkpoint_path}")

def process_video(video_file, categorical_keywords, unary_keywords, binary_keywords, object_pairs, output_fps):
    print("Starting vine_hf imports...")
    try:
        from vine_hf import VineConfig, VineModel, VinePipeline
        print("vine_hf imports successful!")
    except Exception as e:
        print(f"ERROR importing vine_hf: {e}")
        import traceback
        traceback.print_exc()
        raise

    
    categorical_keywords = [kw.strip() for kw in categorical_keywords.split(",")] if categorical_keywords else []
    unary_keywords = [kw.strip() for kw in unary_keywords.split(",")] if unary_keywords else []
    binary_keywords = [kw.strip() for kw in binary_keywords.split(",")] if binary_keywords else []
    object_pairs = [tuple(map(int, pair.split("-"))) for pair in object_pairs.split(",")] if object_pairs else []
    
    inputs = {
        "video": video_file,
        "unary_keywords": unary_keywords,
        "binary_keywords": binary_keywords,
    }
    
    config = VineConfig(
        segmentation_method="grounding_dino_sam2",
        model_name="openai/clip-vit-base-patch32",
        # Example: load from HF repo
        use_hf_repo=True,
        model_repo="KevinX-Penn28/testing",
        # Alternatively use a local path by setting use_hf_repo=False and local_dir/local_filename
        box_threshold=0.35,
        text_threshold=0.25,
        target_fps=output_fps,
        topk_cate=5,
        visualization_dir=visualization_dir,
        visualize=True,
        debug_visualizations=False,
        device="cuda",
        )
    model = VineModel(config)
    
    vine_pipe = VinePipeline(
        model=model,
        tokenizer=None,
        sam_config_path=sam_config_path,
        sam_checkpoint_path=sam_checkpoint_path,
        gd_config_path=gd_config_path,
        gd_checkpoint_path=gd_checkpoint_path,
        device="cuda",
        trust_remote_code=True,
        )
    
    results = vine_pipe(
        inputs = video_file,
        categorical_keywords=categorical_keywords,
        unary_keywords=unary_keywords,
        binary_keywords=binary_keywords,
        object_pairs=object_pairs,
        segmentation_method="grounding_dino_sam2",        
        return_top_k=5,
        include_visualizations=True,
        debug_visualizations=False,
        device="cuda",
    )
    
    if isinstance(results, Mapping):
        results_dict = results
    elif isinstance(results, Sequence) and results and isinstance(results[0], Mapping):
        results_dict = results[0]
    else:
        results_dict = {}
    # Print brief summary
    visualizations = results_dict.get("visualizations") or {}
    vine = visualizations.get("vine") or {}
    all_vis = vine.get("all") or {}
    result_video_path = all_vis.get("video_path")
    summary = results_dict.get("summary") or {}
    return result_video_path, summary

demo = gr.Interface(
    fn = process_video,
    inputs = [
        gr.Video(label="Input Video"),
        gr.Textbox(label="Categorical Keywords (comma-separated)", placeholder="e.g., dog, cat, car"),
        gr.Textbox(label="Unary Keywords (comma-separated)", placeholder="e.g., running, jumping"),
        gr.Textbox(label="Binary Keywords (comma-separated)", placeholder="e.g., chasing, carrying"),
        gr.Textbox(label="Object Pairs (comma-separated indices)", placeholder="e.g., 0-1,0-2 for pairs of objects"),
        gr.Number(label="Output FPS (affects processing speed)", placeholder="5")
    ],
    outputs = [
        gr.Video(label="Output Video with Annotations"),
        gr.JSON(label="Summary of Detected Events"),
    ],
)

if __name__ == "__main__":
    print("Got to main")
    demo.launch()
    
    
    
        


#input would be video file path and keywords
#out would be video, efforts