Clara211111 commited on
Commit
225a205
·
1 Parent(s): fb4b0cb
Files changed (4) hide show
  1. README.md +1 -1
  2. app copy.py +0 -740
  3. app.py +0 -7
  4. requirements.txt +1 -0
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 👁
4
  colorFrom: pink
5
  colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.17.1
8
  app_file: app.py
9
  pinned: false
10
  ---
 
4
  colorFrom: pink
5
  colorTo: green
6
  sdk: gradio
7
+ sdk_version: 5.37.0
8
  app_file: app.py
9
  pinned: false
10
  ---
app copy.py DELETED
@@ -1,740 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
- import os, time, sys
7
- print("### SPACE BOOT: file =", __file__, flush=True)
8
- print("### argv =", sys.argv, flush=True)
9
- print("### cwd =", os.getcwd(), flush=True)
10
- print("### gradio_ssr =", os.environ.get("GRADIO_SSR_MODE"), "zero =", os.environ.get("SPACES_ZERO_GPU"), flush=True)
11
-
12
-
13
- import os
14
- import cv2
15
- import torch
16
- import numpy as np
17
- import gradio as gr
18
- import sys
19
- import shutil
20
- from datetime import datetime
21
- import glob
22
- import gc
23
- import time
24
- import trimesh
25
- import matplotlib
26
- import spaces
27
-
28
- from flow3r.models.flow3r import Flow3r
29
- from flow3r.utils.basic import load_images_as_tensor
30
- from flow3r.utils.geometry import depth_edge
31
-
32
- from scipy.spatial.transform import Rotation
33
- from huggingface_hub import hf_hub_download
34
-
35
- # sys.path.append("flow3r/")
36
-
37
- device = "cuda" if torch.cuda.is_available() else "cpu"
38
-
39
- print("Initializing and loading Flow3r model...")
40
-
41
- model = Flow3r()
42
- ckpt_path = hf_hub_download(repo_id="Clara211111/flow3r", filename="flow3r.bin")
43
- checkpoint = torch.load(ckpt_path, weights_only=False, map_location='cpu')
44
- model.load_state_dict(checkpoint, strict=True)
45
-
46
- model.eval()
47
- model = model.to(device)
48
-
49
- # -------------------------------------------------------------------------
50
- # Utils
51
- # -------------------------------------------------------------------------
52
- def predictions_to_glb(
53
- predictions,
54
- conf_thres=50.0,
55
- filter_by_frames="all",
56
- show_cam=True,
57
- ) -> trimesh.Scene:
58
- """
59
- Converts predictions to a 3D scene represented as a GLB file.
60
-
61
- Args:
62
- predictions (dict): Dictionary containing model predictions with keys:
63
- - world_points: 3D point coordinates (S, H, W, 3)
64
- - world_points_conf: Confidence scores (S, H, W)
65
- - images: Input images (S, H, W, 3)
66
- - extrinsic: Camera extrinsic matrices (S, 3, 4)
67
- conf_thres (float): Percentage of low-confidence points to filter out (default: 50.0)
68
- filter_by_frames (str): Frame filter specification (default: "all")
69
- show_cam (bool): Include camera visualization (default: True)
70
-
71
- Returns:
72
- trimesh.Scene: Processed 3D scene containing point cloud and cameras
73
-
74
- Raises:
75
- ValueError: If input predictions structure is invalid
76
- """
77
- if not isinstance(predictions, dict):
78
- raise ValueError("predictions must be a dictionary")
79
-
80
- if conf_thres is None:
81
- conf_thres = 10
82
-
83
- print("Building GLB scene")
84
- selected_frame_idx = None
85
- if filter_by_frames != "all" and filter_by_frames != "All":
86
- try:
87
- # Extract the index part before the colon
88
- selected_frame_idx = int(filter_by_frames.split(":")[0])
89
- except (ValueError, IndexError):
90
- pass
91
-
92
- pred_world_points = predictions["points"]
93
- pred_world_points_conf = predictions.get("conf", np.ones_like(pred_world_points[..., 0]))
94
-
95
- # Get images from predictions
96
- images = predictions["images"]
97
- # Use extrinsic matrices instead of pred_extrinsic_list
98
- camera_poses = predictions["camera_poses"]
99
-
100
- if selected_frame_idx is not None:
101
- pred_world_points = pred_world_points[selected_frame_idx][None]
102
- pred_world_points_conf = pred_world_points_conf[selected_frame_idx][None]
103
- images = images[selected_frame_idx][None]
104
- camera_poses = camera_poses[selected_frame_idx][None]
105
-
106
- vertices_3d = pred_world_points.reshape(-1, 3)
107
- # Handle different image formats - check if images need transposing
108
- if images.ndim == 4 and images.shape[1] == 3: # NCHW format
109
- colors_rgb = np.transpose(images, (0, 2, 3, 1))
110
- else: # Assume already in NHWC format
111
- colors_rgb = images
112
- colors_rgb = (colors_rgb.reshape(-1, 3) * 255).astype(np.uint8)
113
-
114
- conf = pred_world_points_conf.reshape(-1)
115
- # Convert percentage threshold to actual confidence value
116
- if conf_thres == 0.0:
117
- conf_threshold = 0.0
118
- else:
119
- # conf_threshold = np.percentile(conf, conf_thres)
120
- conf_threshold = conf_thres / 100
121
-
122
- conf_mask = (conf >= conf_threshold) & (conf > 1e-5)
123
-
124
- vertices_3d = vertices_3d[conf_mask]
125
- colors_rgb = colors_rgb[conf_mask]
126
-
127
- if vertices_3d is None or np.asarray(vertices_3d).size == 0:
128
- vertices_3d = np.array([[1, 0, 0]])
129
- colors_rgb = np.array([[255, 255, 255]])
130
- scene_scale = 1
131
- else:
132
- # Calculate the 5th and 95th percentiles along each axis
133
- lower_percentile = np.percentile(vertices_3d, 5, axis=0)
134
- upper_percentile = np.percentile(vertices_3d, 95, axis=0)
135
-
136
- # Calculate the diagonal length of the percentile bounding box
137
- scene_scale = np.linalg.norm(upper_percentile - lower_percentile)
138
-
139
- colormap = matplotlib.colormaps.get_cmap("gist_rainbow")
140
-
141
- # Initialize a 3D scene
142
- scene_3d = trimesh.Scene()
143
-
144
- # Add point cloud data to the scene
145
- point_cloud_data = trimesh.PointCloud(vertices=vertices_3d, colors=colors_rgb)
146
-
147
- scene_3d.add_geometry(point_cloud_data)
148
-
149
- # Prepare 4x4 matrices for camera extrinsics
150
- num_cameras = len(camera_poses)
151
-
152
- if show_cam:
153
- # Add camera models to the scene
154
- for i in range(num_cameras):
155
- camera_to_world = camera_poses[i]
156
- rgba_color = colormap(i / num_cameras)
157
- current_color = tuple(int(255 * x) for x in rgba_color[:3])
158
-
159
- # integrate_camera_into_scene(scene_3d, camera_to_world, current_color, scene_scale)
160
- integrate_camera_into_scene(scene_3d, camera_to_world, current_color, 1.) # fixed camera size
161
-
162
- # Rotate scene for better visualize
163
- align_rotation = np.eye(4)
164
- align_rotation[:3, :3] = Rotation.from_euler("y", 100, degrees=True).as_matrix() # plane rotate
165
- align_rotation[:3, :3] = align_rotation[:3, :3] @ Rotation.from_euler("x", 155, degrees=True).as_matrix() # roll
166
- scene_3d.apply_transform(align_rotation)
167
-
168
- print("GLB Scene built")
169
- return scene_3d
170
-
171
- def get_opengl_conversion_matrix() -> np.ndarray:
172
- """
173
- Constructs and returns the OpenGL conversion matrix.
174
-
175
- Returns:
176
- numpy.ndarray: A 4x4 OpenGL conversion matrix.
177
- """
178
- # Create an identity matrix
179
- matrix = np.identity(4)
180
-
181
- # Flip the y and z axes
182
- matrix[1, 1] = -1
183
- matrix[2, 2] = -1
184
-
185
- return matrix
186
-
187
- def integrate_camera_into_scene(scene: trimesh.Scene, transform: np.ndarray, face_colors: tuple, scene_scale: float):
188
- """
189
- Integrates a fake camera mesh into the 3D scene.
190
-
191
- Args:
192
- scene (trimesh.Scene): The 3D scene to add the camera model.
193
- transform (np.ndarray): Transformation matrix for camera positioning.
194
- face_colors (tuple): Color of the camera face.
195
- scene_scale (float): Scale of the scene.
196
- """
197
-
198
- cam_width = scene_scale * 0.05
199
- cam_height = scene_scale * 0.1
200
-
201
- # Create cone shape for camera
202
- rot_45_degree = np.eye(4)
203
- rot_45_degree[:3, :3] = Rotation.from_euler("z", 45, degrees=True).as_matrix()
204
- rot_45_degree[2, 3] = -cam_height
205
-
206
- opengl_transform = get_opengl_conversion_matrix()
207
- # Combine transformations
208
- complete_transform = transform @ opengl_transform @ rot_45_degree
209
- camera_cone_shape = trimesh.creation.cone(cam_width, cam_height, sections=4)
210
-
211
- # Generate mesh for the camera
212
- slight_rotation = np.eye(4)
213
- slight_rotation[:3, :3] = Rotation.from_euler("z", 2, degrees=True).as_matrix()
214
-
215
- vertices_combined = np.concatenate(
216
- [
217
- camera_cone_shape.vertices,
218
- 0.95 * camera_cone_shape.vertices,
219
- transform_points(slight_rotation, camera_cone_shape.vertices),
220
- ]
221
- )
222
- vertices_transformed = transform_points(complete_transform, vertices_combined)
223
-
224
- mesh_faces = compute_camera_faces(camera_cone_shape)
225
-
226
- # Add the camera mesh to the scene
227
- camera_mesh = trimesh.Trimesh(vertices=vertices_transformed, faces=mesh_faces)
228
- camera_mesh.visual.face_colors[:, :3] = face_colors
229
- scene.add_geometry(camera_mesh)
230
-
231
- def transform_points(transformation: np.ndarray, points: np.ndarray, dim: int = None) -> np.ndarray:
232
- """
233
- Applies a 4x4 transformation to a set of points.
234
-
235
- Args:
236
- transformation (np.ndarray): Transformation matrix.
237
- points (np.ndarray): Points to be transformed.
238
- dim (int, optional): Dimension for reshaping the result.
239
-
240
- Returns:
241
- np.ndarray: Transformed points.
242
- """
243
- points = np.asarray(points)
244
- initial_shape = points.shape[:-1]
245
- dim = dim or points.shape[-1]
246
-
247
- # Apply transformation
248
- transformation = transformation.swapaxes(-1, -2) # Transpose the transformation matrix
249
- points = points @ transformation[..., :-1, :] + transformation[..., -1:, :]
250
-
251
- # Reshape the result
252
- result = points[..., :dim].reshape(*initial_shape, dim)
253
- return result
254
-
255
- def compute_camera_faces(cone_shape: trimesh.Trimesh) -> np.ndarray:
256
- """
257
- Computes the faces for the camera mesh.
258
-
259
- Args:
260
- cone_shape (trimesh.Trimesh): The shape of the camera cone.
261
-
262
- Returns:
263
- np.ndarray: Array of faces for the camera mesh.
264
- """
265
- # Create pseudo cameras
266
- faces_list = []
267
- num_vertices_cone = len(cone_shape.vertices)
268
-
269
- for face in cone_shape.faces:
270
- if 0 in face:
271
- continue
272
- v1, v2, v3 = face
273
- v1_offset, v2_offset, v3_offset = face + num_vertices_cone
274
- v1_offset_2, v2_offset_2, v3_offset_2 = face + 2 * num_vertices_cone
275
-
276
- faces_list.extend(
277
- [
278
- (v1, v2, v2_offset),
279
- (v1, v1_offset, v3),
280
- (v3_offset, v2, v3),
281
- (v1, v2, v2_offset_2),
282
- (v1, v1_offset_2, v3),
283
- (v3_offset_2, v2, v3),
284
- ]
285
- )
286
-
287
- faces_list += [(v3, v2, v1) for v1, v2, v3 in faces_list]
288
- return np.array(faces_list)
289
-
290
- # -------------------------------------------------------------------------
291
- # 1) Core model inference
292
- # -------------------------------------------------------------------------
293
- @spaces.GPU(duration=120)
294
- def run_model(target_dir, model) -> dict:
295
- print(f"Processing images from {target_dir}")
296
-
297
- # Device check
298
- device = "cuda" if torch.cuda.is_available() else "cpu"
299
- if not torch.cuda.is_available():
300
- raise ValueError("CUDA is not available. Check your environment.")
301
-
302
- # Move model to device
303
- model = model.to(device)
304
- model.eval()
305
-
306
- # Load and preprocess images
307
- image_names = glob.glob(os.path.join(target_dir, "images", "*"))
308
- image_names = sorted(image_names)
309
- print(f"Found {len(image_names)} images")
310
- if len(image_names) == 0:
311
- raise ValueError("No images found. Check your upload.")
312
-
313
- # interval = 10 if target_dir.endswith('.mp4') else 1
314
- interval = 1
315
- imgs = load_images_as_tensor(os.path.join(target_dir, "images"), interval=interval).to(device) # (N, 3, H, W)
316
-
317
- # Run inference
318
- print("Running inference...")
319
- dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16
320
-
321
- with torch.no_grad():
322
- with torch.cuda.amp.autocast(dtype=dtype):
323
- predictions = model(imgs[None]) # Add batch dimension
324
- predictions['images'] = imgs[None].permute(0, 1, 3, 4, 2)
325
- predictions['conf'] = torch.sigmoid(predictions['conf'])
326
- edge = depth_edge(predictions['local_points'][..., 2], rtol=0.03)
327
- predictions['conf'][edge] = 0.0
328
- del predictions['local_points']
329
-
330
- # Convert tensors to numpy
331
- for key in predictions.keys():
332
- if isinstance(predictions[key], torch.Tensor):
333
- predictions[key] = predictions[key].cpu().numpy().squeeze(0) # remove batch dimension
334
-
335
- # Clean up
336
- torch.cuda.empty_cache()
337
- return predictions
338
-
339
-
340
- # -------------------------------------------------------------------------
341
- # 2) Handle uploaded video/images --> produce target_dir + images
342
- # -------------------------------------------------------------------------
343
- def handle_uploads(input_video, input_images):
344
- """
345
- Create a new 'target_dir' + 'images' subfolder, and place user-uploaded
346
- images or extracted frames from video into it. Return (target_dir, image_paths).
347
- """
348
- start_time = time.time()
349
- gc.collect()
350
- torch.cuda.empty_cache()
351
-
352
- # Create a unique folder name
353
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
354
- target_dir = f"input_images_{timestamp}"
355
- target_dir_images = os.path.join(target_dir, "images")
356
-
357
- # Clean up if somehow that folder already exists
358
- if os.path.exists(target_dir):
359
- shutil.rmtree(target_dir)
360
- os.makedirs(target_dir)
361
- os.makedirs(target_dir_images)
362
-
363
- image_paths = []
364
-
365
- # --- Handle images ---
366
- if input_images is not None:
367
- for file_data in input_images:
368
- if isinstance(file_data, dict) and "name" in file_data:
369
- file_path = file_data["name"]
370
- else:
371
- file_path = file_data
372
- dst_path = os.path.join(target_dir_images, os.path.basename(file_path))
373
- shutil.copy(file_path, dst_path)
374
- image_paths.append(dst_path)
375
-
376
- # --- Handle video ---
377
- if input_video is not None:
378
- if isinstance(input_video, dict) and "name" in input_video:
379
- video_path = input_video["name"]
380
- else:
381
- video_path = input_video
382
-
383
- vs = cv2.VideoCapture(video_path)
384
- fps = vs.get(cv2.CAP_PROP_FPS)
385
- frame_interval = int(fps * 1) # 1 frame/sec
386
-
387
- count = 0
388
- video_frame_num = 0
389
- while True:
390
- gotit, frame = vs.read()
391
- if not gotit:
392
- break
393
- count += 1
394
- if count % frame_interval == 0:
395
- image_path = os.path.join(target_dir_images, f"{video_frame_num:06}.png")
396
- cv2.imwrite(image_path, frame)
397
- image_paths.append(image_path)
398
- video_frame_num += 1
399
-
400
- # Sort final images for gallery
401
- image_paths = sorted(image_paths)
402
-
403
- end_time = time.time()
404
- print(f"Files copied to {target_dir_images}; took {end_time - start_time:.3f} seconds")
405
- return target_dir, image_paths
406
-
407
-
408
- # -------------------------------------------------------------------------
409
- # 3) Update gallery on upload
410
- # -------------------------------------------------------------------------
411
- def update_gallery_on_upload(input_video, input_images):
412
- """
413
- Whenever user uploads or changes files, immediately handle them
414
- and show in the gallery. Return (target_dir, image_paths).
415
- If nothing is uploaded, returns "None" and empty list.
416
- """
417
- if not input_video and not input_images:
418
- return None, None, None, None
419
- target_dir, image_paths = handle_uploads(input_video, input_images)
420
- return None, target_dir, image_paths, "Upload complete. Click 'Reconstruct' to begin 3D processing."
421
-
422
-
423
- # -------------------------------------------------------------------------
424
- # 4) Reconstruction: uses the target_dir plus any viz parameters
425
- # -------------------------------------------------------------------------
426
- @spaces.GPU(duration=120)
427
- def gradio_demo(
428
- target_dir,
429
- conf_thres=3.0,
430
- frame_filter="All",
431
- show_cam=True,
432
- ):
433
- """
434
- Perform reconstruction using the already-created target_dir/images.
435
- """
436
- if not os.path.isdir(target_dir) or target_dir == "None":
437
- return None, "No valid target directory found. Please upload first.", None, None
438
-
439
- start_time = time.time()
440
- gc.collect()
441
- torch.cuda.empty_cache()
442
-
443
- # Prepare frame_filter dropdown
444
- target_dir_images = os.path.join(target_dir, "images")
445
- all_files = sorted(os.listdir(target_dir_images)) if os.path.isdir(target_dir_images) else []
446
- all_files = [f"{i}: {filename}" for i, filename in enumerate(all_files)]
447
- frame_filter_choices = ["All"] + all_files
448
-
449
- print("Running run_model...")
450
- with torch.no_grad():
451
- predictions = run_model(target_dir, model)
452
-
453
- # Save predictions
454
- prediction_save_path = os.path.join(target_dir, "predictions.npz")
455
- np.savez(prediction_save_path, **predictions)
456
-
457
- # Handle None frame_filter
458
- if frame_filter is None:
459
- frame_filter = "All"
460
-
461
- # Build a GLB file name
462
- glbfile = os.path.join(
463
- target_dir,
464
- f"glbscene_{conf_thres}_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}.glb",
465
- )
466
-
467
- # Convert predictions to GLB
468
- glbscene = predictions_to_glb(
469
- predictions,
470
- conf_thres=conf_thres,
471
- filter_by_frames=frame_filter,
472
- show_cam=show_cam,
473
- # mask_sky=mask_sky,
474
- )
475
- glbscene.export(file_obj=glbfile)
476
-
477
- # Cleanup
478
- del predictions
479
- gc.collect()
480
- torch.cuda.empty_cache()
481
-
482
- end_time = time.time()
483
- print(f"Total time: {end_time - start_time:.2f} seconds (including IO)")
484
- log_msg = f"Reconstruction Success ({len(all_files)} frames). Waiting for visualization."
485
-
486
- return glbfile, log_msg, gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True)
487
-
488
-
489
- # -------------------------------------------------------------------------
490
- # 5) Helper functions for UI resets + re-visualization
491
- # -------------------------------------------------------------------------
492
- def clear_fields():
493
- """
494
- Clears the 3D viewer, the stored target_dir, and empties the gallery.
495
- """
496
- return None
497
-
498
-
499
- def update_log():
500
- """
501
- Display a quick log message while waiting.
502
- """
503
- return "Loading and Reconstructing..."
504
-
505
-
506
- def update_visualization(
507
- target_dir, conf_thres, frame_filter, show_cam, is_example
508
- ):
509
- """
510
- Reload saved predictions from npz, create (or reuse) the GLB for new parameters,
511
- and return it for the 3D viewer. If is_example == "True", skip.
512
- """
513
-
514
- # If it's an example click, skip as requested
515
- if is_example == "True":
516
- return None, "No reconstruction available. Please click the Reconstruct button first."
517
-
518
- if not target_dir or target_dir == "None" or not os.path.isdir(target_dir):
519
- return None, "No reconstruction available. Please click the Reconstruct button first."
520
-
521
- predictions_path = os.path.join(target_dir, "predictions.npz")
522
- if not os.path.exists(predictions_path):
523
- return None, f"No reconstruction available at {predictions_path}. Please run 'Reconstruct' first."
524
-
525
- key_list = [
526
- "images",
527
- "points",
528
- "conf",
529
- "camera_poses",
530
- ]
531
-
532
- loaded = np.load(predictions_path)
533
- predictions = {key: np.array(loaded[key]) for key in key_list}
534
-
535
- glbfile = os.path.join(
536
- target_dir,
537
- f"glbscene_{conf_thres}_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}.glb",
538
- )
539
-
540
- if not os.path.exists(glbfile):
541
- glbscene = predictions_to_glb(
542
- predictions,
543
- conf_thres=conf_thres,
544
- filter_by_frames=frame_filter,
545
- show_cam=show_cam,
546
- # mask_sky=mask_sky,
547
- )
548
- glbscene.export(file_obj=glbfile)
549
-
550
- return glbfile, "Updating Visualization"
551
-
552
-
553
- # -------------------------------------------------------------------------
554
- # Example images
555
- # -------------------------------------------------------------------------
556
-
557
- great_wall_video = "examples/videos/great_wall.mp4"
558
- colosseum_video = "examples/videos/Colosseum.mp4"
559
- room_video = "examples/videos/room.mp4"
560
- kitchen_video = "examples/videos/kitchen.mp4"
561
- fern_video = "examples/videos/fern.mp4"
562
- single_cartoon_video = "examples/videos/single_cartoon.mp4"
563
- single_oil_painting_video = "examples/videos/single_oil_painting.mp4"
564
- pyramid_video = "examples/videos/pyramid.mp4"
565
-
566
-
567
- # -------------------------------------------------------------------------
568
- # 6) Build Gradio UI
569
- # -------------------------------------------------------------------------
570
- theme = gr.themes.Ocean()
571
- theme.set(
572
- checkbox_label_background_fill_selected="*button_primary_background_fill",
573
- checkbox_label_text_color_selected="*button_primary_text_color",
574
- )
575
-
576
- with gr.Blocks(
577
- theme=theme,
578
- css="""
579
- .custom-log * {
580
- font-style: italic;
581
- font-size: 22px !important;
582
- background-image: linear-gradient(120deg, #0ea5e9 0%, #6ee7b7 60%, #34d399 100%);
583
- -webkit-background-clip: text;
584
- background-clip: text;
585
- font-weight: bold !important;
586
- color: transparent !important;
587
- text-align: center !important;
588
- }
589
-
590
- .example-log * {
591
- font-style: italic;
592
- font-size: 16px !important;
593
- background-image: linear-gradient(120deg, #0ea5e9 0%, #6ee7b7 60%, #34d399 100%);
594
- -webkit-background-clip: text;
595
- background-clip: text;
596
- color: transparent !important;
597
- }
598
-
599
- #my_radio .wrap {
600
- display: flex;
601
- flex-wrap: nowrap;
602
- justify-content: center;
603
- align-items: center;
604
- }
605
-
606
- #my_radio .wrap label {
607
- display: flex;
608
- width: 50%;
609
- justify-content: center;
610
- align-items: center;
611
- margin: 0;
612
- padding: 10px 0;
613
- box-sizing: border-box;
614
- }
615
- """,
616
- ) as demo:
617
- # Instead of gr.State, we use a hidden Textbox:
618
- is_example = gr.Textbox(label="is_example", visible=False, value="None")
619
- num_images = gr.Textbox(label="num_images", visible=False, value="None")
620
-
621
- gr.HTML(
622
- """
623
- <h1>Flow3r: Factored Flow Prediction for Visual Geometry Learning</h1>
624
- <p>
625
- <a href="https://github.com/Kidrauh/flow3r">GitHub Repository</a> |
626
- <a href="https://flow3r-project.github.io/">Project Page</a>
627
- </p>
628
-
629
- <div style="font-size: 16px; line-height: 1.5;">
630
- <p>Upload a video or a set of images to create a 3D reconstruction of a scene or object. Flow3r takes these images and generates a 3D point cloud, along with estimated camera poses.</p>
631
-
632
- </div>
633
- """
634
- )
635
-
636
- target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None")
637
-
638
- with gr.Row():
639
- with gr.Column(scale=2):
640
- input_video = gr.Video(label="Upload Video", interactive=True)
641
- input_images = gr.File(file_count="multiple", label="Upload Images", interactive=True)
642
-
643
- image_gallery = gr.Gallery(
644
- label="Preview",
645
- columns=4,
646
- height="300px",
647
- # show_download_button=True,
648
- object_fit="contain",
649
- preview=True,
650
- )
651
-
652
- with gr.Column(scale=4):
653
- with gr.Column():
654
- gr.Markdown("**3D Reconstruction (Point Cloud and Camera Poses)**")
655
- log_output = gr.Markdown(
656
- "Please upload a video or images, then click Reconstruct.", elem_classes=["custom-log"]
657
- )
658
- reconstruction_output = gr.Model3D(height=520, zoom_speed=0.5, pan_speed=0.5)
659
-
660
- with gr.Row():
661
- submit_btn = gr.Button("Reconstruct", scale=1, variant="primary")
662
- clear_btn = gr.ClearButton(
663
- [input_video, input_images, reconstruction_output, log_output, target_dir_output, image_gallery],
664
- scale=1,
665
- )
666
-
667
- with gr.Row():
668
- conf_thres = gr.Slider(minimum=0, maximum=100, value=0, step=0.1, label="Confidence Threshold (%)")
669
- frame_filter = gr.Dropdown(choices=["All"], value="All", label="Show Points from Frame")
670
- with gr.Column():
671
- show_cam = gr.Checkbox(label="Show Camera", value=True)
672
-
673
-
674
- submit_btn.click(fn=clear_fields, inputs=[], outputs=[reconstruction_output]).then(
675
- fn=update_log, inputs=[], outputs=[log_output]
676
- ).then(
677
- fn=gradio_demo,
678
- inputs=[
679
- target_dir_output,
680
- conf_thres,
681
- frame_filter,
682
- show_cam,
683
- ],
684
- outputs=[reconstruction_output, log_output, frame_filter],
685
- ).then(
686
- fn=lambda: "False", inputs=[], outputs=[is_example] # set is_example to "False"
687
- )
688
-
689
- # -------------------------------------------------------------------------
690
- # Real-time Visualization Updates
691
- # -------------------------------------------------------------------------
692
- conf_thres.change(
693
- update_visualization,
694
- [
695
- target_dir_output,
696
- conf_thres,
697
- frame_filter,
698
- show_cam,
699
- is_example,
700
- ],
701
- [reconstruction_output, log_output],
702
- )
703
- frame_filter.change(
704
- update_visualization,
705
- [
706
- target_dir_output,
707
- conf_thres,
708
- frame_filter,
709
- show_cam,
710
- is_example,
711
- ],
712
- [reconstruction_output, log_output],
713
- )
714
-
715
- show_cam.change(
716
- update_visualization,
717
- [
718
- target_dir_output,
719
- conf_thres,
720
- frame_filter,
721
- show_cam,
722
- is_example,
723
- ],
724
- [reconstruction_output, log_output],
725
- )
726
- # -------------------------------------------------------------------------
727
- # Auto-update gallery whenever user uploads or changes their files
728
- # -------------------------------------------------------------------------
729
- input_video.change(
730
- fn=update_gallery_on_upload,
731
- inputs=[input_video, input_images],
732
- outputs=[reconstruction_output, target_dir_output, image_gallery, log_output],
733
- )
734
- input_images.change(
735
- fn=update_gallery_on_upload,
736
- inputs=[input_video, input_images],
737
- outputs=[reconstruction_output, target_dir_output, image_gallery, log_output],
738
- )
739
- print("### ABOUT TO LAUNCH", flush=True)
740
- demo.queue(max_size=20).launch(show_error=True, share=True, ssr_mode=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -4,12 +4,6 @@
4
  # This source code is licensed under the license found in the
5
  # LICENSE file in the root directory of this source tree.
6
  import os, time, sys
7
- print("### SPACE BOOT: file =", __file__, flush=True)
8
- print("### argv =", sys.argv, flush=True)
9
- print("### cwd =", os.getcwd(), flush=True)
10
- print("### gradio_ssr =", os.environ.get("GRADIO_SSR_MODE"), "zero =", os.environ.get("SPACES_ZERO_GPU"), flush=True)
11
-
12
-
13
  import os
14
  import cv2
15
  import torch
@@ -736,5 +730,4 @@ with gr.Blocks(
736
  inputs=[input_video, input_images],
737
  outputs=[reconstruction_output, target_dir_output, image_gallery, log_output],
738
  )
739
- print("### ABOUT TO LAUNCH", flush=True)
740
  demo.queue(max_size=20).launch(show_error=True, share=True, ssr_mode=False)
 
4
  # This source code is licensed under the license found in the
5
  # LICENSE file in the root directory of this source tree.
6
  import os, time, sys
 
 
 
 
 
 
7
  import os
8
  import cv2
9
  import torch
 
730
  inputs=[input_video, input_images],
731
  outputs=[reconstruction_output, target_dir_output, image_gallery, log_output],
732
  )
 
733
  demo.queue(max_size=20).launch(show_error=True, share=True, ssr_mode=False)
requirements.txt CHANGED
@@ -6,6 +6,7 @@ opencv-python
6
  plyfile
7
  huggingface_hub
8
  safetensors
 
9
 
10
  # below for gradio
11
  gradio
 
6
  plyfile
7
  huggingface_hub
8
  safetensors
9
+ pydantic==2.10.6
10
 
11
  # below for gradio
12
  gradio