testingVine / app.py
KevinX-Penn28's picture
Update app.py
3023aa7 verified
from pathlib import Path
from collections.abc import Mapping, Sequence
import importlib
import importlib.util
import gradio as gr
from rich import _console
from transformers import AutoModel, AutoTokenizer, AutoConfig, pipeline
import torch
from huggingface_hub import snapshot_download
import sys, pathlib
import os
os.environ["OPENAI_API_KEY"] = "test"
os.environ["OMP_NUM_THREADS"] = "4"
print("All imports finished")
print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"cuDNN version: {torch.backends.cudnn.version()}")
print(f"Number of GPUs: {torch.cuda.device_count()}")
if torch.cuda.is_available():
for i in range(torch.cuda.device_count()):
print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
print(f" Memory: {torch.cuda.get_device_properties(i).total_memory / 1e9:.2f} GB")
torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False
os.environ['TORCH_DTYPE'] = 'float32'
# Set default dtype
torch.set_default_dtype(torch.float32)
# # 1) Download the repo to a local cache dir
# print("Downloading remote vine repo...")
# repo_dir = snapshot_download(repo_id="KevinX-Penn28/testing", revision="main")
# # 2) Register the snapshot as an importable package
# VINE_PACKAGE = "vine_remote_repo"
# # Drop stale modules in case the script reloads
# for module_name in list(sys.modules):
# if module_name == VINE_PACKAGE or module_name.startswith(f"{VINE_PACKAGE}."):
# del sys.modules[module_name]
# print("Dropped stale modules and registering vine package...")
# package_spec = importlib.util.spec_from_file_location(
# VINE_PACKAGE,
# Path(repo_dir) / "__init__.py",
# submodule_search_locations=[str(repo_dir)],
# )
# if not package_spec or not package_spec.loader:
# raise ImportError(f"Cannot create package spec for {VINE_PACKAGE} at {repo_dir}")
# print("Created package spec, loading module...")
# package_module = importlib.util.module_from_spec(package_spec)
# sys.modules[VINE_PACKAGE] = package_module
# try:
# print("Executing module...")
# package_spec.loader.exec_module(package_module)
# print("Module executed successfully!")
# except Exception as e:
# print(f"ERROR during module execution: {e}")
# import traceback
# traceback.print_exc()
# raise
# # 3) Import and use via the registered package
# print("Importing vine modules...")
# vine_config_module = importlib.import_module(f"{VINE_PACKAGE}.vine_config")
# vine_model_module = importlib.import_module(f"{VINE_PACKAGE}.vine_model")
# vine_pipeline_module = importlib.import_module(f"{VINE_PACKAGE}.vine_pipeline")
# VineConfig = vine_config_module.VineConfig # your config class
# VineModel = vine_model_module.VineModel # your model class
# VinePipeline = vine_pipeline_module.VinePipeline
current_dir = Path(__file__).resolve().parent
sam_config_path = "/" + str(Path(current_dir) / "sam2_hiera_t.yaml")
sam_checkpoint_path = "/" + str(Path(current_dir) / "sam2_hiera_tiny.pt")
gd_config_path = "/" + str(Path(current_dir) / "GroundingDINO_SwinT_OGC.py")
gd_checkpoint_path = "/" + str(Path(current_dir) / "groundingdino_swint_ogc.pth")
visualization_dir = "/" + str(Path(current_dir) / "outputs")
print(f"Setting up paths: {sam_config_path}, {sam_checkpoint_path}, {gd_config_path}, {gd_checkpoint_path}")
# # current_dir = Path.cwd()
# # sam_config_path = "/" + str(current_dir / "sam2_hiera_t.yaml")
# # sam_checkpoint_path = "/" + str(current_dir / "sam2_hiera_tiny.pt")
# # gd_config_path = "/" + str(current_dir / "GroundingDINO_SwinT_OGC.py")
# # gd_checkpoint_path = "/" + str(current_dir / "groundingdino_swint_ogc.pth")
# # visualization_dir = "/" + str(current_dir / "outputs")
# print(f"Setting up paths done: {sam_config_path}, {sam_checkpoint_path}, {gd_config_path}, {gd_checkpoint_path}")
def process_video(video_file, categorical_keywords, unary_keywords, binary_keywords, object_pairs, output_fps):
print("Starting vine_hf imports...")
try:
from vine_hf import VineConfig, VineModel, VinePipeline
print("vine_hf imports successful!")
except Exception as e:
print(f"ERROR importing vine_hf: {e}")
import traceback
traceback.print_exc()
raise
categorical_keywords = [kw.strip() for kw in categorical_keywords.split(",")] if categorical_keywords else []
unary_keywords = [kw.strip() for kw in unary_keywords.split(",")] if unary_keywords else []
binary_keywords = [kw.strip() for kw in binary_keywords.split(",")] if binary_keywords else []
object_pairs = [tuple(map(int, pair.split("-"))) for pair in object_pairs.split(",")] if object_pairs else []
inputs = {
"video": video_file,
"unary_keywords": unary_keywords,
"binary_keywords": binary_keywords,
}
config = VineConfig(
segmentation_method="grounding_dino_sam2",
model_name="openai/clip-vit-base-patch32",
# Example: load from HF repo
use_hf_repo=True,
model_repo="KevinX-Penn28/testing",
# Alternatively use a local path by setting use_hf_repo=False and local_dir/local_filename
box_threshold=0.35,
text_threshold=0.25,
target_fps=output_fps,
topk_cate=5,
visualization_dir=visualization_dir,
visualize=True,
debug_visualizations=False,
device="cuda",
)
model = VineModel(config)
vine_pipe = VinePipeline(
model=model,
tokenizer=None,
sam_config_path=sam_config_path,
sam_checkpoint_path=sam_checkpoint_path,
gd_config_path=gd_config_path,
gd_checkpoint_path=gd_checkpoint_path,
device="cuda",
trust_remote_code=True,
)
results = vine_pipe(
inputs = video_file,
categorical_keywords=categorical_keywords,
unary_keywords=unary_keywords,
binary_keywords=binary_keywords,
object_pairs=object_pairs,
segmentation_method="grounding_dino_sam2",
return_top_k=5,
include_visualizations=True,
debug_visualizations=False,
device="cuda",
)
if isinstance(results, Mapping):
results_dict = results
elif isinstance(results, Sequence) and results and isinstance(results[0], Mapping):
results_dict = results[0]
else:
results_dict = {}
# Print brief summary
visualizations = results_dict.get("visualizations") or {}
vine = visualizations.get("vine") or {}
all_vis = vine.get("all") or {}
result_video_path = all_vis.get("video_path")
summary = results_dict.get("summary") or {}
return result_video_path, summary
demo = gr.Interface(
fn = process_video,
inputs = [
gr.Video(label="Input Video"),
gr.Textbox(label="Categorical Keywords (comma-separated)", placeholder="e.g., dog, cat, car"),
gr.Textbox(label="Unary Keywords (comma-separated)", placeholder="e.g., running, jumping"),
gr.Textbox(label="Binary Keywords (comma-separated)", placeholder="e.g., chasing, carrying"),
gr.Textbox(label="Object Pairs (comma-separated indices)", placeholder="e.g., 0-1,0-2 for pairs of objects"),
gr.Number(label="Output FPS (affects processing speed)", placeholder="5")
],
outputs = [
gr.Video(label="Output Video with Annotations"),
gr.JSON(label="Summary of Detected Events"),
],
)
if __name__ == "__main__":
print("Got to main")
demo.launch()
#input would be video file path and keywords
#out would be video, efforts