Upload app.py
Browse files
app.py
CHANGED
|
@@ -39,8 +39,8 @@ VOID_MODEL_ID = os.environ.get("VOID_MODEL_ID", "your-hf-username/VOID")
|
|
| 39 |
VOID_CKPT_FILE = "void_pass1.safetensors"
|
| 40 |
|
| 41 |
SAMPLE_SIZE = (384, 672) # H Γ W
|
| 42 |
-
MAX_VID_LEN =
|
| 43 |
-
TEMPORAL_WIN =
|
| 44 |
FPS = 12
|
| 45 |
WEIGHT_DTYPE = torch.bfloat16
|
| 46 |
NEG_PROMPT = (
|
|
@@ -120,8 +120,8 @@ def load_quadmask_tensor(path: str) -> torch.Tensor:
|
|
| 120 |
|
| 121 |
Quadmask pixel values:
|
| 122 |
0 β primary object (to erase)
|
| 123 |
-
63 β overlap
|
| 124 |
-
127 β affected region (shadows, reflections
|
| 125 |
255 β background (keep)
|
| 126 |
|
| 127 |
After quantisation the mask is inverted so 255 = "erase", 0 = "keep",
|
|
@@ -216,23 +216,41 @@ Use the **VLM-Mask-Reasoner** pipeline included in the repo to generate quadmask
|
|
| 216 |
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "sample")
|
| 217 |
EXAMPLES = [
|
| 218 |
[
|
| 219 |
-
os.path.join(SAMPLE_DIR, "lime",
|
| 220 |
-
os.path.join(SAMPLE_DIR, "lime",
|
| 221 |
"A lime falls on the table.",
|
| 222 |
30, 1.0, 42,
|
| 223 |
],
|
| 224 |
[
|
| 225 |
-
os.path.join(SAMPLE_DIR, "moving_ball",
|
| 226 |
-
os.path.join(SAMPLE_DIR, "moving_ball",
|
| 227 |
"A ball rolls off the table.",
|
| 228 |
30, 1.0, 42,
|
| 229 |
],
|
| 230 |
[
|
| 231 |
-
os.path.join(SAMPLE_DIR, "pillow",
|
| 232 |
-
os.path.join(SAMPLE_DIR, "pillow",
|
| 233 |
"Two pillows placed on the table.",
|
| 234 |
30, 1.0, 42,
|
| 235 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
]
|
| 237 |
|
| 238 |
with gr.Blocks(title="VOID β Video Object & Interaction Deletion") as demo:
|
|
@@ -243,7 +261,7 @@ with gr.Blocks(title="VOID β Video Object & Interaction Deletion") as demo:
|
|
| 243 |
[π Project Page](https://void-model.github.io/) | [π» GitHub](https://github.com/void-model/VOID)
|
| 244 |
|
| 245 |
Upload a video and its **quadmask**, enter a prompt describing the scene *after* removal,
|
| 246 |
-
and VOID will erase the object along with its physical interactions.
|
| 247 |
|
| 248 |
> Built on **CogVideoX-Fun-V1.5-5B** fine-tuned for interaction-aware video inpainting.
|
| 249 |
"""
|
|
|
|
| 39 |
VOID_CKPT_FILE = "void_pass1.safetensors"
|
| 40 |
|
| 41 |
SAMPLE_SIZE = (384, 672) # H Γ W
|
| 42 |
+
MAX_VID_LEN = 197
|
| 43 |
+
TEMPORAL_WIN = 85
|
| 44 |
FPS = 12
|
| 45 |
WEIGHT_DTYPE = torch.bfloat16
|
| 46 |
NEG_PROMPT = (
|
|
|
|
| 120 |
|
| 121 |
Quadmask pixel values:
|
| 122 |
0 β primary object (to erase)
|
| 123 |
+
63 β overlap / interaction zone
|
| 124 |
+
127 β affected region (shadows, reflections β¦)
|
| 125 |
255 β background (keep)
|
| 126 |
|
| 127 |
After quantisation the mask is inverted so 255 = "erase", 0 = "keep",
|
|
|
|
| 216 |
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "sample")
|
| 217 |
EXAMPLES = [
|
| 218 |
[
|
| 219 |
+
os.path.join(SAMPLE_DIR, "lime", "input_video.mp4"),
|
| 220 |
+
os.path.join(SAMPLE_DIR, "lime", "quadmask_0.mp4"),
|
| 221 |
"A lime falls on the table.",
|
| 222 |
30, 1.0, 42,
|
| 223 |
],
|
| 224 |
[
|
| 225 |
+
os.path.join(SAMPLE_DIR, "moving_ball", "input_video.mp4"),
|
| 226 |
+
os.path.join(SAMPLE_DIR, "moving_ball", "quadmask_0.mp4"),
|
| 227 |
"A ball rolls off the table.",
|
| 228 |
30, 1.0, 42,
|
| 229 |
],
|
| 230 |
[
|
| 231 |
+
os.path.join(SAMPLE_DIR, "pillow", "input_video.mp4"),
|
| 232 |
+
os.path.join(SAMPLE_DIR, "pillow", "quadmask_0.mp4"),
|
| 233 |
"Two pillows placed on the table.",
|
| 234 |
30, 1.0, 42,
|
| 235 |
],
|
| 236 |
+
[
|
| 237 |
+
os.path.join(SAMPLE_DIR, "bowling", "input_video.mp4"),
|
| 238 |
+
os.path.join(SAMPLE_DIR, "bowling", "quadmask_0.mp4"),
|
| 239 |
+
"Bowling pins standing on the grass.",
|
| 240 |
+
30, 1.0, 42,
|
| 241 |
+
],
|
| 242 |
+
[
|
| 243 |
+
os.path.join(SAMPLE_DIR, "crush-can", "input_video.mp4"),
|
| 244 |
+
os.path.join(SAMPLE_DIR, "crush-can", "quadmask_0.mp4"),
|
| 245 |
+
"An empty table.",
|
| 246 |
+
30, 1.0, 42,
|
| 247 |
+
],
|
| 248 |
+
[
|
| 249 |
+
os.path.join(SAMPLE_DIR, "toast-shmello", "input_video.mp4"),
|
| 250 |
+
os.path.join(SAMPLE_DIR, "toast-shmello", "quadmask_0.mp4"),
|
| 251 |
+
"A plate on the table.",
|
| 252 |
+
30, 1.0, 42,
|
| 253 |
+
],
|
| 254 |
]
|
| 255 |
|
| 256 |
with gr.Blocks(title="VOID β Video Object & Interaction Deletion") as demo:
|
|
|
|
| 261 |
[π Project Page](https://void-model.github.io/) | [π» GitHub](https://github.com/void-model/VOID)
|
| 262 |
|
| 263 |
Upload a video and its **quadmask**, enter a prompt describing the scene *after* removal,
|
| 264 |
+
and VOID will erase the object along with its physical interactions (shadows, deformations, secondary motion).
|
| 265 |
|
| 266 |
> Built on **CogVideoX-Fun-V1.5-5B** fine-tuned for interaction-aware video inpainting.
|
| 267 |
"""
|