Spaces:
Sleeping
Sleeping
Commit ·
d0dc8d3
1
Parent(s): b7dcf66
fix: update Dockerfile and requirements for video processing dependencies; enhance preprocess_video function
Browse files- Dockerfile +9 -1
- model.py +16 -19
- requirements.txt +3 -2
Dockerfile
CHANGED
|
@@ -1,9 +1,17 @@
|
|
| 1 |
FROM python:3.10
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
COPY requirements.txt .
|
| 6 |
-
|
|
|
|
| 7 |
|
| 8 |
COPY . .
|
| 9 |
|
|
|
|
| 1 |
FROM python:3.10
|
| 2 |
|
| 3 |
+
# System dependencies for video/image processing
|
| 4 |
+
RUN apt-get update && apt-get install -y \
|
| 5 |
+
libgl1-mesa-glx \
|
| 6 |
+
libglib2.0-0 \
|
| 7 |
+
ffmpeg \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
WORKDIR /app
|
| 11 |
|
| 12 |
COPY requirements.txt .
|
| 13 |
+
# Use --upgrade to ensure numpy is downgraded and torch is updated
|
| 14 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 15 |
|
| 16 |
COPY . .
|
| 17 |
|
model.py
CHANGED
|
@@ -65,17 +65,12 @@ def load_model():
|
|
| 65 |
model.eval()
|
| 66 |
return model
|
| 67 |
|
| 68 |
-
def preprocess_video(video_bytes: bytes):
|
| 69 |
-
"""Preprocessing logic utilizing VivitImageProcessor and Decord"""
|
| 70 |
set_bridge("torch")
|
| 71 |
-
|
| 72 |
-
# Save bytes to temporary file for decord VideoReader
|
| 73 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
|
| 74 |
f.write(video_bytes)
|
| 75 |
tmp_path = f.name
|
| 76 |
-
|
| 77 |
try:
|
| 78 |
-
# Manual processor configuration from your notebook
|
| 79 |
image_processor = VivitImageProcessor(
|
| 80 |
do_resize=True,
|
| 81 |
size={"shortest_edge": 224},
|
|
@@ -87,24 +82,26 @@ def preprocess_video(video_bytes: bytes):
|
|
| 87 |
image_mean=[0.5, 0.5, 0.5],
|
| 88 |
image_std=[0.5, 0.5, 0.5],
|
| 89 |
)
|
| 90 |
-
|
| 91 |
vr = VideoReader(tmp_path)
|
| 92 |
-
# Ensure we get exactly CLIP_LENGTH frames
|
| 93 |
total_frames = len(vr)
|
| 94 |
-
indices = list(range(min(total_frames,
|
| 95 |
-
if len(indices) <
|
| 96 |
-
|
| 97 |
-
indices += [indices[-1]] * (CLIP_LENGTH - len(indices))
|
| 98 |
|
| 99 |
-
video
|
| 100 |
-
|
| 101 |
-
video =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
-
|
| 104 |
-
pixel_values =
|
| 105 |
-
pixel_values = pixel_values.permute(1, 0, 2, 3) # Permute to (C, T, H, W)
|
| 106 |
|
| 107 |
-
return pixel_values.unsqueeze(0)
|
| 108 |
finally:
|
| 109 |
if os.path.exists(tmp_path):
|
| 110 |
os.remove(tmp_path)
|
|
|
|
| 65 |
model.eval()
|
| 66 |
return model
|
| 67 |
|
| 68 |
+
def preprocess_video(video_bytes: bytes, clip_length: int = 16):
|
|
|
|
| 69 |
set_bridge("torch")
|
|
|
|
|
|
|
| 70 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
|
| 71 |
f.write(video_bytes)
|
| 72 |
tmp_path = f.name
|
|
|
|
| 73 |
try:
|
|
|
|
| 74 |
image_processor = VivitImageProcessor(
|
| 75 |
do_resize=True,
|
| 76 |
size={"shortest_edge": 224},
|
|
|
|
| 82 |
image_mean=[0.5, 0.5, 0.5],
|
| 83 |
image_std=[0.5, 0.5, 0.5],
|
| 84 |
)
|
|
|
|
| 85 |
vr = VideoReader(tmp_path)
|
|
|
|
| 86 |
total_frames = len(vr)
|
| 87 |
+
indices = list(range(min(total_frames, clip_length)))
|
| 88 |
+
if len(indices) < clip_length:
|
| 89 |
+
indices += [indices[-1]] * (clip_length - len(indices))
|
|
|
|
| 90 |
|
| 91 |
+
# Ensure video is a torch tensor in (Frames, Channels, Height, Width)
|
| 92 |
+
video = vr.get_batch(indices)
|
| 93 |
+
video = video.permute(0, 3, 1, 2).float() # Convert to Float for the processor
|
| 94 |
+
|
| 95 |
+
# Pass as a list of Tensors
|
| 96 |
+
processed = image_processor(
|
| 97 |
+
list(video),
|
| 98 |
+
return_tensors='pt'
|
| 99 |
+
)
|
| 100 |
|
| 101 |
+
pixel_values = processed['pixel_values'].squeeze(0) # (T, C, H, W)
|
| 102 |
+
pixel_values = pixel_values.permute(1, 0, 2, 3) # (C, T, H, W) for Swin3D
|
|
|
|
| 103 |
|
| 104 |
+
return pixel_values.unsqueeze(0)
|
| 105 |
finally:
|
| 106 |
if os.path.exists(tmp_path):
|
| 107 |
os.remove(tmp_path)
|
requirements.txt
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
fastapi
|
| 2 |
uvicorn
|
| 3 |
-
|
| 4 |
-
|
|
|
|
| 5 |
transformers
|
| 6 |
decord
|
| 7 |
huggingface_hub
|
|
|
|
| 1 |
fastapi
|
| 2 |
uvicorn
|
| 3 |
+
numpy<2.0.0
|
| 4 |
+
torch>=2.4.0
|
| 5 |
+
torchvision
|
| 6 |
transformers
|
| 7 |
decord
|
| 8 |
huggingface_hub
|