sam-motamed commited on
Commit
a608b1a
Β·
verified Β·
1 Parent(s): dad59bc

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -11
app.py CHANGED
@@ -39,8 +39,8 @@ VOID_MODEL_ID = os.environ.get("VOID_MODEL_ID", "your-hf-username/VOID")
39
  VOID_CKPT_FILE = "void_pass1.safetensors"
40
 
41
  SAMPLE_SIZE = (384, 672) # H Γ— W
42
- MAX_VID_LEN = 85
43
- TEMPORAL_WIN = 60
44
  FPS = 12
45
  WEIGHT_DTYPE = torch.bfloat16
46
  NEG_PROMPT = (
@@ -120,8 +120,8 @@ def load_quadmask_tensor(path: str) -> torch.Tensor:
120
 
121
  Quadmask pixel values:
122
  0 β†’ primary object (to erase)
123
- 63 β†’ overlap of affected region/ primary object
124
- 127 β†’ affected region (shadows, reflections, new trajectory)
125
  255 β†’ background (keep)
126
 
127
  After quantisation the mask is inverted so 255 = "erase", 0 = "keep",
@@ -216,23 +216,41 @@ Use the **VLM-Mask-Reasoner** pipeline included in the repo to generate quadmask
216
  SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "sample")
217
  EXAMPLES = [
218
  [
219
- os.path.join(SAMPLE_DIR, "lime", "input_video.mp4"),
220
- os.path.join(SAMPLE_DIR, "lime", "quadmask_0.mp4"),
221
  "A lime falls on the table.",
222
  30, 1.0, 42,
223
  ],
224
  [
225
- os.path.join(SAMPLE_DIR, "moving_ball", "input_video.mp4"),
226
- os.path.join(SAMPLE_DIR, "moving_ball", "quadmask_0.mp4"),
227
  "A ball rolls off the table.",
228
  30, 1.0, 42,
229
  ],
230
  [
231
- os.path.join(SAMPLE_DIR, "pillow", "input_video.mp4"),
232
- os.path.join(SAMPLE_DIR, "pillow", "quadmask_0.mp4"),
233
  "Two pillows placed on the table.",
234
  30, 1.0, 42,
235
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  ]
237
 
238
  with gr.Blocks(title="VOID – Video Object & Interaction Deletion") as demo:
@@ -243,7 +261,7 @@ with gr.Blocks(title="VOID – Video Object & Interaction Deletion") as demo:
243
  [🌐 Project Page](https://void-model.github.io/) | [πŸ’» GitHub](https://github.com/void-model/VOID)
244
 
245
  Upload a video and its **quadmask**, enter a prompt describing the scene *after* removal,
246
- and VOID will erase the object along with its physical interactions.
247
 
248
  > Built on **CogVideoX-Fun-V1.5-5B** fine-tuned for interaction-aware video inpainting.
249
  """
 
39
  VOID_CKPT_FILE = "void_pass1.safetensors"
40
 
41
  SAMPLE_SIZE = (384, 672) # H Γ— W
42
+ MAX_VID_LEN = 197
43
+ TEMPORAL_WIN = 85
44
  FPS = 12
45
  WEIGHT_DTYPE = torch.bfloat16
46
  NEG_PROMPT = (
 
120
 
121
  Quadmask pixel values:
122
  0 β†’ primary object (to erase)
123
+ 63 β†’ overlap / interaction zone
124
+ 127 β†’ affected region (shadows, reflections …)
125
  255 β†’ background (keep)
126
 
127
  After quantisation the mask is inverted so 255 = "erase", 0 = "keep",
 
216
  SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "sample")
217
  EXAMPLES = [
218
  [
219
+ os.path.join(SAMPLE_DIR, "lime", "input_video.mp4"),
220
+ os.path.join(SAMPLE_DIR, "lime", "quadmask_0.mp4"),
221
  "A lime falls on the table.",
222
  30, 1.0, 42,
223
  ],
224
  [
225
+ os.path.join(SAMPLE_DIR, "moving_ball", "input_video.mp4"),
226
+ os.path.join(SAMPLE_DIR, "moving_ball", "quadmask_0.mp4"),
227
  "A ball rolls off the table.",
228
  30, 1.0, 42,
229
  ],
230
  [
231
+ os.path.join(SAMPLE_DIR, "pillow", "input_video.mp4"),
232
+ os.path.join(SAMPLE_DIR, "pillow", "quadmask_0.mp4"),
233
  "Two pillows placed on the table.",
234
  30, 1.0, 42,
235
  ],
236
+ [
237
+ os.path.join(SAMPLE_DIR, "bowling", "input_video.mp4"),
238
+ os.path.join(SAMPLE_DIR, "bowling", "quadmask_0.mp4"),
239
+ "Bowling pins standing on the grass.",
240
+ 30, 1.0, 42,
241
+ ],
242
+ [
243
+ os.path.join(SAMPLE_DIR, "crush-can", "input_video.mp4"),
244
+ os.path.join(SAMPLE_DIR, "crush-can", "quadmask_0.mp4"),
245
+ "An empty table.",
246
+ 30, 1.0, 42,
247
+ ],
248
+ [
249
+ os.path.join(SAMPLE_DIR, "toast-shmello", "input_video.mp4"),
250
+ os.path.join(SAMPLE_DIR, "toast-shmello", "quadmask_0.mp4"),
251
+ "A plate on the table.",
252
+ 30, 1.0, 42,
253
+ ],
254
  ]
255
 
256
  with gr.Blocks(title="VOID – Video Object & Interaction Deletion") as demo:
 
261
  [🌐 Project Page](https://void-model.github.io/) | [πŸ’» GitHub](https://github.com/void-model/VOID)
262
 
263
  Upload a video and its **quadmask**, enter a prompt describing the scene *after* removal,
264
+ and VOID will erase the object along with its physical interactions (shadows, deformations, secondary motion).
265
 
266
  > Built on **CogVideoX-Fun-V1.5-5B** fine-tuned for interaction-aware video inpainting.
267
  """