Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +59 -10
- examples/depth_normal/depth/0036.png +0 -0
- examples/depth_normal/depth/0125.png +0 -0
- examples/depth_normal/depth/0166.png +0 -0
- examples/depth_normal/depth/0168.png +0 -0
- examples/depth_normal/depth/0211.png +0 -0
- examples/depth_normal/depth/0278.png +0 -0
- examples/depth_normal/depth/0282.png +0 -0
- examples/depth_normal/depth/0331.png +0 -0
- examples/depth_normal/depth/0384.png +0 -0
- examples/depth_normal/depth/0432.png +0 -0
- examples/depth_normal/depth/0444.png +0 -0
- examples/depth_normal/depth/0475.png +0 -0
- examples/depth_normal/depth/0476.png +0 -0
- examples/depth_normal/depth/0517.png +0 -0
- examples/depth_normal/depth/0523.png +0 -0
- examples/depth_normal/depth/0524.png +0 -0
- examples/depth_normal/depth/0536.png +0 -0
- examples/depth_normal/depth/0561.png +0 -0
- examples/depth_normal/depth/0565.png +0 -0
- examples/depth_normal/depth/0590.png +0 -0
- examples/depth_normal/depth/0618.png +0 -0
- examples/depth_normal/depth/0716.png +0 -0
- examples/depth_normal/depth/0724.png +0 -0
- examples/depth_normal/depth/0758.png +0 -0
- examples/depth_normal/depth/0759.png +0 -0
- examples/depth_normal/depth/0767.png +0 -0
- examples/depth_normal/depth/0840.png +0 -0
- examples/depth_normal/depth/0849.png +0 -0
- examples/depth_normal/depth/0857.png +0 -0
- examples/depth_normal/depth/0870.png +0 -0
- examples/depth_normal/depth/0905.png +0 -0
- examples/depth_normal/depth/0993.png +0 -0
- examples/depth_normal/depth/1038.png +0 -0
- examples/depth_normal/depth/1074.png +0 -0
- examples/depth_normal/depth/1099.png +0 -0
- examples/depth_normal/depth/1101.png +0 -0
- examples/depth_normal/depth/1146.png +0 -0
- examples/depth_normal/depth/1148.png +0 -0
- examples/depth_normal/depth/1165.png +0 -0
- examples/depth_normal/depth/1173.png +0 -0
- examples/depth_normal/depth/1193.png +0 -0
- examples/depth_normal/depth/1225.png +0 -0
- examples/depth_normal/depth/1257.png +0 -0
- examples/depth_normal/depth/1291.png +0 -0
- examples/depth_normal/depth/1294.png +0 -0
- examples/depth_normal/depth/1346.png +0 -0
- examples/depth_normal/depth/1389.png +0 -0
- examples/depth_normal/depth/1398.png +0 -0
- examples/depth_normal/depth/1407.png +0 -0
app.py
CHANGED
|
@@ -25,7 +25,7 @@ import plotly.graph_objects as go
|
|
| 25 |
from data.fintune_dataset import pc_norm
|
| 26 |
from functools import partial
|
| 27 |
import glob
|
| 28 |
-
|
| 29 |
|
| 30 |
T_random_resized_crop = transforms.Compose([
|
| 31 |
transforms.RandomResizedCrop(size=(224, 224), scale=(0.9, 1.0), ratio=(0.75, 1.3333), interpolation=3,
|
|
@@ -33,6 +33,23 @@ T_random_resized_crop = transforms.Compose([
|
|
| 33 |
transforms.ToTensor(),
|
| 34 |
transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])])
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
def load_audio(audio_path):
|
| 38 |
fbank = make_audio_features(audio_path, mel_bins=128)
|
|
@@ -55,6 +72,17 @@ def load_fmri(fmri_path):
|
|
| 55 |
data = torch.tensor(data[None])
|
| 56 |
return data
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def model_worker(
|
| 59 |
rank: int, args: argparse.Namespace, barrier: mp.Barrier,
|
| 60 |
request_queue: mp.Queue, response_queue: Optional[mp.Queue] = None,
|
|
@@ -107,7 +135,7 @@ def model_worker(
|
|
| 107 |
barrier.wait()
|
| 108 |
|
| 109 |
while True:
|
| 110 |
-
img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, temperature, top_p, modality = request_queue.get()
|
| 111 |
if 'image' in modality and img_path is not None:
|
| 112 |
image = Image.open(img_path).convert('RGB')
|
| 113 |
inputs = T_random_resized_crop(image)
|
|
@@ -119,6 +147,10 @@ def model_worker(
|
|
| 119 |
inputs = load_point(point_path)
|
| 120 |
elif 'fmri' in modality and fmri_path is not None:
|
| 121 |
inputs = load_fmri(fmri_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
else:
|
| 123 |
inputs = None
|
| 124 |
|
|
@@ -184,9 +216,9 @@ def gradio_worker(
|
|
| 184 |
def show_user_input(msg, chatbot):
|
| 185 |
return "", chatbot + [[msg, None]]
|
| 186 |
|
| 187 |
-
def stream_model_output(img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality):
|
| 188 |
for queue in request_queues:
|
| 189 |
-
queue.put((img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality))
|
| 190 |
while True:
|
| 191 |
content_piece = response_queue.get()
|
| 192 |
chatbot[-1][1] = content_piece["text"]
|
|
@@ -293,10 +325,25 @@ def gradio_worker(
|
|
| 293 |
examples_per_page=3,
|
| 294 |
)
|
| 295 |
with gr.Tab('Depth Map') as depth_tab:
|
| 296 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
with gr.Tab('Normal Map') as normal_tab:
|
| 298 |
-
gr.
|
| 299 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
with gr.Column(scale=2):
|
| 301 |
chatbot = gr.Chatbot(elem_id="chatbot")
|
| 302 |
msg = gr.Textbox()
|
|
@@ -304,7 +351,7 @@ def gradio_worker(
|
|
| 304 |
with gr.Row():
|
| 305 |
submit_button = gr.Button("Submit", variant="primary")
|
| 306 |
undo_button = gr.Button("Undo")
|
| 307 |
-
clear_button = gr.ClearButton([chatbot, msg, img_path, audio_path, video_path, point_path, fmri_path, point_vis])
|
| 308 |
with gr.Row():
|
| 309 |
max_gen_len = gr.Slider(
|
| 310 |
minimum=1, maximum=args.model_max_seq_len // 2,
|
|
@@ -325,16 +372,18 @@ def gradio_worker(
|
|
| 325 |
audio_tab.select(partial(change_modality, 'audio'), [], [modality])
|
| 326 |
point_tab.select(partial(change_modality, 'point'), [], [modality])
|
| 327 |
fmri_tab.select(partial(change_modality, 'fmri'), [], [modality])
|
|
|
|
|
|
|
| 328 |
|
| 329 |
msg.submit(
|
| 330 |
show_user_input, [msg, chatbot], [msg, chatbot],
|
| 331 |
).then(
|
| 332 |
-
stream_model_output, [img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot,
|
| 333 |
)
|
| 334 |
submit_button.click(
|
| 335 |
show_user_input, [msg, chatbot], [msg, chatbot],
|
| 336 |
).then(
|
| 337 |
-
stream_model_output, [img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot,
|
| 338 |
)
|
| 339 |
undo_button.click(undo, chatbot, chatbot)
|
| 340 |
# img_path.change(clear, [], [chatbot, msg])
|
|
|
|
| 25 |
from data.fintune_dataset import pc_norm
|
| 26 |
from functools import partial
|
| 27 |
import glob
|
| 28 |
+
import torchvision.transforms.functional as F
|
| 29 |
|
| 30 |
T_random_resized_crop = transforms.Compose([
|
| 31 |
transforms.RandomResizedCrop(size=(224, 224), scale=(0.9, 1.0), ratio=(0.75, 1.3333), interpolation=3,
|
|
|
|
| 33 |
transforms.ToTensor(),
|
| 34 |
transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])])
|
| 35 |
|
| 36 |
+
class PairRandomResizedCrop(transforms.RandomResizedCrop):
|
| 37 |
+
def forward(self, imgs):
|
| 38 |
+
i, j, h, w = self.get_params(imgs[0], self.scale, self.ratio)
|
| 39 |
+
return [F.resized_crop(img, i, j, h, w, self.size, self.interpolation, antialias=self.antialias) for img in imgs]
|
| 40 |
+
|
| 41 |
+
class PairToTensor(transforms.ToTensor):
|
| 42 |
+
def __call__(self, pics):
|
| 43 |
+
return [F.to_tensor(pic) for pic in pics]
|
| 44 |
+
|
| 45 |
+
class PairNormalize(transforms.Normalize):
|
| 46 |
+
def forward(self, tensors):
|
| 47 |
+
return [F.normalize(tensor, self.mean, self.std, self.inplace) for tensor in tensors]
|
| 48 |
+
|
| 49 |
+
transform_pairimg_train = transforms.Compose([
|
| 50 |
+
PairRandomResizedCrop(size=(224, 224), scale=(0.99, 1.0), ratio=(0.75, 1.3333), interpolation=3, antialias=None), # 3 is bicubic
|
| 51 |
+
PairToTensor(),
|
| 52 |
+
PairNormalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])])
|
| 53 |
|
| 54 |
def load_audio(audio_path):
|
| 55 |
fbank = make_audio_features(audio_path, mel_bins=128)
|
|
|
|
| 72 |
data = torch.tensor(data[None])
|
| 73 |
return data
|
| 74 |
|
| 75 |
+
def load_rgbx(image_path, x_image_path):
|
| 76 |
+
image = Image.open(image_path).convert('RGB')
|
| 77 |
+
x_image = Image.open(x_image_path).convert('RGB')
|
| 78 |
+
x_image = x_image.resize(image.size[-2:])
|
| 79 |
+
|
| 80 |
+
image, x_image = transform_pairimg_train([image, x_image])
|
| 81 |
+
|
| 82 |
+
# [2, 3, H, W]
|
| 83 |
+
image = torch.stack([image, x_image], dim=0)
|
| 84 |
+
return image
|
| 85 |
+
|
| 86 |
def model_worker(
|
| 87 |
rank: int, args: argparse.Namespace, barrier: mp.Barrier,
|
| 88 |
request_queue: mp.Queue, response_queue: Optional[mp.Queue] = None,
|
|
|
|
| 135 |
barrier.wait()
|
| 136 |
|
| 137 |
while True:
|
| 138 |
+
img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, temperature, top_p, modality = request_queue.get()
|
| 139 |
if 'image' in modality and img_path is not None:
|
| 140 |
image = Image.open(img_path).convert('RGB')
|
| 141 |
inputs = T_random_resized_crop(image)
|
|
|
|
| 147 |
inputs = load_point(point_path)
|
| 148 |
elif 'fmri' in modality and fmri_path is not None:
|
| 149 |
inputs = load_fmri(fmri_path)
|
| 150 |
+
elif 'rgbd' in modality and depth_path is not None and depth_rgb_path is not None:
|
| 151 |
+
inputs = load_rgbx(depth_rgb_path, depth_path)
|
| 152 |
+
elif 'rgbn' in modality and normal_path is not None and normal_rgb_path is not None:
|
| 153 |
+
inputs = load_rgbx(normal_rgb_path, normal_path)
|
| 154 |
else:
|
| 155 |
inputs = None
|
| 156 |
|
|
|
|
| 216 |
def show_user_input(msg, chatbot):
|
| 217 |
return "", chatbot + [[msg, None]]
|
| 218 |
|
| 219 |
+
def stream_model_output(img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, gen_t, top_p, modality):
|
| 220 |
for queue in request_queues:
|
| 221 |
+
queue.put((img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, gen_t, top_p, modality))
|
| 222 |
while True:
|
| 223 |
content_piece = response_queue.get()
|
| 224 |
chatbot[-1][1] = content_piece["text"]
|
|
|
|
| 325 |
examples_per_page=3,
|
| 326 |
)
|
| 327 |
with gr.Tab('Depth Map') as depth_tab:
|
| 328 |
+
depth_path = gr.Image(label='Depth Map', type='filepath')
|
| 329 |
+
depth_rgb_path = gr.Image(label='RGB Image', type='filepath')
|
| 330 |
+
gr.Examples(
|
| 331 |
+
examples=[
|
| 332 |
+
[rgb_image.replace('rgb', 'depth'), rgb_image]
|
| 333 |
+
for rgb_image in glob.glob("examples/depth_normal/rgb/*.png")[:9]
|
| 334 |
+
],
|
| 335 |
+
inputs=[depth_path, depth_rgb_path]
|
| 336 |
+
)
|
| 337 |
with gr.Tab('Normal Map') as normal_tab:
|
| 338 |
+
normal_path = gr.Image(label='Normal Map', type='filepath')
|
| 339 |
+
normal_rgb_path = gr.Image(label='RGB Image', type='filepath')
|
| 340 |
+
gr.Examples(
|
| 341 |
+
examples=[
|
| 342 |
+
[rgb_image.replace('rgb', 'normal'), rgb_image]
|
| 343 |
+
for rgb_image in glob.glob("examples/depth_normal/rgb/*.png")[-9:]
|
| 344 |
+
],
|
| 345 |
+
inputs=[normal_path, normal_rgb_path]
|
| 346 |
+
)
|
| 347 |
with gr.Column(scale=2):
|
| 348 |
chatbot = gr.Chatbot(elem_id="chatbot")
|
| 349 |
msg = gr.Textbox()
|
|
|
|
| 351 |
with gr.Row():
|
| 352 |
submit_button = gr.Button("Submit", variant="primary")
|
| 353 |
undo_button = gr.Button("Undo")
|
| 354 |
+
clear_button = gr.ClearButton([chatbot, msg, img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, point_vis])
|
| 355 |
with gr.Row():
|
| 356 |
max_gen_len = gr.Slider(
|
| 357 |
minimum=1, maximum=args.model_max_seq_len // 2,
|
|
|
|
| 372 |
audio_tab.select(partial(change_modality, 'audio'), [], [modality])
|
| 373 |
point_tab.select(partial(change_modality, 'point'), [], [modality])
|
| 374 |
fmri_tab.select(partial(change_modality, 'fmri'), [], [modality])
|
| 375 |
+
depth_tab.select(partial(change_modality, 'rgbd'), [], [modality])
|
| 376 |
+
normal_tab.select(partial(change_modality, 'rgbn'), [], [modality])
|
| 377 |
|
| 378 |
msg.submit(
|
| 379 |
show_user_input, [msg, chatbot], [msg, chatbot],
|
| 380 |
).then(
|
| 381 |
+
stream_model_output, [img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot,
|
| 382 |
)
|
| 383 |
submit_button.click(
|
| 384 |
show_user_input, [msg, chatbot], [msg, chatbot],
|
| 385 |
).then(
|
| 386 |
+
stream_model_output, [img_path, audio_path, video_path, point_path, fmri_path, depth_path, depth_rgb_path, normal_path, normal_rgb_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot,
|
| 387 |
)
|
| 388 |
undo_button.click(undo, chatbot, chatbot)
|
| 389 |
# img_path.change(clear, [], [chatbot, msg])
|
examples/depth_normal/depth/0036.png
ADDED
|
examples/depth_normal/depth/0125.png
ADDED
|
examples/depth_normal/depth/0166.png
ADDED
|
examples/depth_normal/depth/0168.png
ADDED
|
examples/depth_normal/depth/0211.png
ADDED
|
examples/depth_normal/depth/0278.png
ADDED
|
examples/depth_normal/depth/0282.png
ADDED
|
examples/depth_normal/depth/0331.png
ADDED
|
examples/depth_normal/depth/0384.png
ADDED
|
examples/depth_normal/depth/0432.png
ADDED
|
examples/depth_normal/depth/0444.png
ADDED
|
examples/depth_normal/depth/0475.png
ADDED
|
examples/depth_normal/depth/0476.png
ADDED
|
examples/depth_normal/depth/0517.png
ADDED
|
examples/depth_normal/depth/0523.png
ADDED
|
examples/depth_normal/depth/0524.png
ADDED
|
examples/depth_normal/depth/0536.png
ADDED
|
examples/depth_normal/depth/0561.png
ADDED
|
examples/depth_normal/depth/0565.png
ADDED
|
examples/depth_normal/depth/0590.png
ADDED
|
examples/depth_normal/depth/0618.png
ADDED
|
examples/depth_normal/depth/0716.png
ADDED
|
examples/depth_normal/depth/0724.png
ADDED
|
examples/depth_normal/depth/0758.png
ADDED
|
examples/depth_normal/depth/0759.png
ADDED
|
examples/depth_normal/depth/0767.png
ADDED
|
examples/depth_normal/depth/0840.png
ADDED
|
examples/depth_normal/depth/0849.png
ADDED
|
examples/depth_normal/depth/0857.png
ADDED
|
examples/depth_normal/depth/0870.png
ADDED
|
examples/depth_normal/depth/0905.png
ADDED
|
examples/depth_normal/depth/0993.png
ADDED
|
examples/depth_normal/depth/1038.png
ADDED
|
examples/depth_normal/depth/1074.png
ADDED
|
examples/depth_normal/depth/1099.png
ADDED
|
examples/depth_normal/depth/1101.png
ADDED
|
examples/depth_normal/depth/1146.png
ADDED
|
examples/depth_normal/depth/1148.png
ADDED
|
examples/depth_normal/depth/1165.png
ADDED
|
examples/depth_normal/depth/1173.png
ADDED
|
examples/depth_normal/depth/1193.png
ADDED
|
examples/depth_normal/depth/1225.png
ADDED
|
examples/depth_normal/depth/1257.png
ADDED
|
examples/depth_normal/depth/1291.png
ADDED
|
examples/depth_normal/depth/1294.png
ADDED
|
examples/depth_normal/depth/1346.png
ADDED
|
examples/depth_normal/depth/1389.png
ADDED
|
examples/depth_normal/depth/1398.png
ADDED
|
examples/depth_normal/depth/1407.png
ADDED
|