Vo Minh Vu commited on
Commit
3ed799f
Β·
1 Parent(s): d661d73

convert into fastapi

Browse files
Files changed (2) hide show
  1. app.py +163 -346
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,123 +1,57 @@
1
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
2
- # Monkey‐patch for diffusers<=0.19.3 which still does
3
- # from huggingface_hub import cached_download
4
- #
5
- # New HF-Hub versions (>=0.14.0) removed cached_download, so we alias it.
6
- # This must appear before any imports of diffusers / transformers.
7
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
8
  import huggingface_hub
9
  huggingface_hub.cached_download = huggingface_hub.hf_hub_download
10
-
11
-
12
- import spaces
13
 
14
  import os
15
- import imageio
 
 
16
  import numpy as np
17
  import torch
18
- import rembg
 
19
  from PIL import Image
20
  from torchvision.transforms import v2
21
  from pytorch_lightning import seed_everything
22
  from omegaconf import OmegaConf
23
- from einops import rearrange, repeat
24
- from tqdm import tqdm
25
  from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
26
 
 
 
 
 
 
 
27
  from src.utils.train_util import instantiate_from_config
28
  from src.utils.camera_util import (
29
- FOV_to_intrinsics,
30
  get_zero123plus_input_cameras,
31
  get_circular_camera_poses,
32
  )
33
  from src.utils.mesh_util import save_obj, save_glb
34
- from src.utils.infer_util import remove_background, resize_foreground, images_to_video
35
-
36
- import tempfile
37
- from functools import partial
38
-
39
- from huggingface_hub import hf_hub_download
40
-
41
- import gradio as gr
42
-
43
-
44
- def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
45
- """
46
- Get the rendering camera parameters.
47
- """
48
- c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
49
- if is_flexicubes:
50
- cameras = torch.linalg.inv(c2ws)
51
- cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1)
52
- else:
53
- extrinsics = c2ws.flatten(-2)
54
- intrinsics = FOV_to_intrinsics(50.0).unsqueeze(0).repeat(M, 1, 1).float().flatten(-2)
55
- cameras = torch.cat([extrinsics, intrinsics], dim=-1)
56
- cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1)
57
- return cameras
58
-
59
-
60
- def images_to_video(images, output_path, fps=30):
61
- # images: (N, C, H, W)
62
- os.makedirs(os.path.dirname(output_path), exist_ok=True)
63
- frames = []
64
- for i in range(images.shape[0]):
65
- frame = (images[i].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8).clip(0, 255)
66
- assert frame.shape[0] == images.shape[2] and frame.shape[1] == images.shape[3], \
67
- f"Frame shape mismatch: {frame.shape} vs {images.shape}"
68
- assert frame.min() >= 0 and frame.max() <= 255, \
69
- f"Frame value out of range: {frame.min()} ~ {frame.max()}"
70
- frames.append(frame)
71
- imageio.mimwrite(output_path, np.stack(frames), fps=fps, codec='h264')
72
-
73
 
74
- ###############################################################################
75
- # Configuration.
76
- ###############################################################################
77
-
78
- import shutil
79
-
80
- def find_cuda():
81
- # Check if CUDA_HOME or CUDA_PATH environment variables are set
82
- cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
83
-
84
- if cuda_home and os.path.exists(cuda_home):
85
- return cuda_home
86
-
87
- # Search for the nvcc executable in the system's PATH
88
- nvcc_path = shutil.which('nvcc')
89
-
90
- if nvcc_path:
91
- # Remove the 'bin/nvcc' part to get the CUDA installation path
92
- cuda_path = os.path.dirname(os.path.dirname(nvcc_path))
93
- return cuda_path
94
-
95
- return None
96
-
97
- cuda_path = find_cuda()
98
-
99
- if cuda_path:
100
- print(f"CUDA installation found at: {cuda_path}")
101
- else:
102
- print("CUDA installation not found")
103
 
 
104
  config_path = 'configs/instant-mesh-large.yaml'
105
  config = OmegaConf.load(config_path)
106
- config_name = os.path.basename(config_path).replace('.yaml', '')
107
  model_config = config.model_config
108
  infer_config = config.infer_config
 
109
 
110
- IS_FLEXICUBES = True if config_name.startswith('instant-mesh') else False
111
-
112
- # pick GPU if available, else CPU
113
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
114
  if device.type == 'cpu':
115
- print("⚠️ No CUDA GPU detected. Falling back to CPU (this will be very slow!)")
116
 
117
- # load diffusion model
118
- print('Loading diffusion model ...')
119
  pipeline = DiffusionPipeline.from_pretrained(
120
- "sudo-ai/zero123plus-v1.2",
121
  custom_pipeline="zero123plus",
122
  torch_dtype=torch.float16,
123
  )
@@ -125,280 +59,163 @@ pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
125
  pipeline.scheduler.config, timestep_spacing='trailing'
126
  )
127
 
128
- # load custom white-background UNet
129
- unet_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="diffusion_pytorch_model.bin", repo_type="model")
130
- state_dict = torch.load(unet_ckpt_path, map_location='cpu')
131
- pipeline.unet.load_state_dict(state_dict, strict=True)
132
-
 
 
 
133
  pipeline = pipeline.to(device)
134
 
135
- # load reconstruction model
136
- print('Loading reconstruction model ...')
137
- model_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="instant_mesh_large.ckpt", repo_type="model")
 
 
 
 
138
  model = instantiate_from_config(model_config)
139
- state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
140
- state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.') and 'source_camera' not in k}
141
- model.load_state_dict(state_dict, strict=True)
142
-
143
- model = model.to(device)
144
-
145
- print('Loading Finished!')
146
-
147
-
148
- def check_input_image(input_image):
149
- if input_image is None:
150
- raise gr.Error("No image uploaded!")
151
-
152
 
153
- def preprocess(input_image, do_remove_background):
 
 
154
 
155
- rembg_session = rembg.new_session() if do_remove_background else None
 
 
 
 
 
 
 
 
 
156
 
 
 
157
  if do_remove_background:
158
- input_image = remove_background(input_image, rembg_session)
 
159
  input_image = resize_foreground(input_image, 0.85)
160
-
161
  return input_image
162
 
163
-
164
- @spaces.GPU
165
- def generate_mvs(input_image, sample_steps, sample_seed):
166
-
167
  seed_everything(sample_seed)
168
-
169
- # sampling
170
- z123_image = pipeline(
171
- input_image,
172
- num_inference_steps=sample_steps
173
- ).images[0]
174
-
175
- show_image = np.asarray(z123_image, dtype=np.uint8)
176
- show_image = torch.from_numpy(show_image) # (960, 640, 3)
177
- show_image = rearrange(show_image, '(n h) (m w) c -> (n m) h w c', n=3, m=2)
178
- show_image = rearrange(show_image, '(n m) h w c -> (n h) (m w) c', n=2, m=3)
179
- show_image = Image.fromarray(show_image.numpy())
180
-
181
- return z123_image, show_image
182
-
183
-
184
- @spaces.GPU
185
- def make3d(images):
186
-
187
- global model
188
  if IS_FLEXICUBES:
189
  model.init_flexicubes_geometry(device, use_renderer=False)
190
- model = model.eval()
191
 
192
- images = np.asarray(images, dtype=np.float32) / 255.0
193
- images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float() # (3, 960, 640)
194
- images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=3, m=2) # (6, 3, 320, 320)
 
195
 
196
- input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=4.0).to(device)
197
- render_cameras = get_render_cameras(batch_size=1, radius=2.5, is_flexicubes=IS_FLEXICUBES).to(device)
 
 
198
 
199
- images = images.unsqueeze(0).to(device)
200
- images = v2.functional.resize(images, (320, 320), interpolation=3, antialias=True).clamp(0, 1)
201
 
202
- mesh_fpath = tempfile.NamedTemporaryFile(suffix=f".obj", delete=False).name
203
- print(mesh_fpath)
204
- mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
205
- mesh_dirname = os.path.dirname(mesh_fpath)
206
- video_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.mp4")
207
- mesh_glb_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.glb")
208
 
209
  with torch.no_grad():
210
- # get triplane
211
- planes = model.forward_planes(images, input_cameras)
212
-
213
- # # get video
214
- # chunk_size = 20 if IS_FLEXICUBES else 1
215
- # render_size = 384
216
-
217
- # frames = []
218
- # for i in tqdm(range(0, render_cameras.shape[1], chunk_size)):
219
- # if IS_FLEXICUBES:
220
- # frame = model.forward_geometry(
221
- # planes,
222
- # render_cameras[:, i:i+chunk_size],
223
- # render_size=render_size,
224
- # )['img']
225
- # else:
226
- # frame = model.synthesizer(
227
- # planes,
228
- # cameras=render_cameras[:, i:i+chunk_size],
229
- # render_size=render_size,
230
- # )['images_rgb']
231
- # frames.append(frame)
232
- # frames = torch.cat(frames, dim=1)
233
-
234
- # images_to_video(
235
- # frames[0],
236
- # video_fpath,
237
- # fps=30,
238
- # )
239
-
240
- # print(f"Video saved to {video_fpath}")
241
-
242
- # get mesh
243
- mesh_out = model.extract_mesh(
244
- planes,
245
- use_texture_map=False,
246
- **infer_config,
247
  )
248
-
249
- vertices, faces, vertex_colors = mesh_out
250
- vertices = vertices[:, [1, 2, 0]]
251
-
252
- save_glb(vertices, faces, vertex_colors, mesh_glb_fpath)
253
- save_obj(vertices, faces, vertex_colors, mesh_fpath)
254
-
255
- print(f"Mesh saved to {mesh_fpath}")
256
-
257
- return mesh_fpath, mesh_glb_fpath
258
-
259
-
260
- _HEADER_ = '''
261
- <h2><b>Official πŸ€— Gradio Demo</b></h2><h2><a href='https://github.com/TencentARC/InstantMesh' target='_blank'><b>InstantMesh: Efficient 3D Mesh Generation from a Single Image with Sparse-view Large Reconstruction Models</b></a></h2>
262
-
263
- **InstantMesh** is a feed-forward framework for efficient 3D mesh generation from a single image based on the LRM/Instant3D architecture.
264
-
265
- Code: <a href='https://github.com/TencentARC/InstantMesh' target='_blank'>GitHub</a>. Techenical report: <a href='https://arxiv.org/abs/2404.07191' target='_blank'>ArXiv</a>.
266
-
267
- ❗️❗️❗️**Important Notes:**
268
- - Our demo can export a .obj mesh with vertex colors or a .glb mesh now. If you prefer to export a .obj mesh with a **texture map**, please refer to our <a href='https://github.com/TencentARC/InstantMesh?tab=readme-ov-file#running-with-command-line' target='_blank'>Github Repo</a>.
269
- - The 3D mesh generation results highly depend on the quality of generated multi-view images. Please try a different **seed value** if the result is unsatisfying (Default: 42).
270
- '''
271
-
272
- _CITE_ = r"""
273
- If InstantMesh is helpful, please help to ⭐ the <a href='https://github.com/TencentARC/InstantMesh' target='_blank'>Github Repo</a>. Thanks! [![GitHub Stars](https://img.shields.io/github/stars/TencentARC/InstantMesh?style=social)](https://github.com/TencentARC/InstantMesh)
274
- ---
275
- πŸ“ **Citation**
276
-
277
- If you find our work useful for your research or applications, please cite using this bibtex:
278
- ```bibtex
279
- @article{xu2024instantmesh,
280
- title={InstantMesh: Efficient 3D Mesh Generation from a Single Image with Sparse-view Large Reconstruction Models},
281
- author={Xu, Jiale and Cheng, Weihao and Gao, Yiming and Wang, Xintao and Gao, Shenghua and Shan, Ying},
282
- journal={arXiv preprint arXiv:2404.07191},
283
- year={2024}
284
- }
285
- ```
286
-
287
- πŸ“‹ **License**
288
-
289
- Apache-2.0 LICENSE. Please refer to the [LICENSE file](https://huggingface.co/spaces/TencentARC/InstantMesh/blob/main/LICENSE) for details.
290
-
291
- πŸ“§ **Contact**
292
-
293
- If you have any questions, feel free to open a discussion or contact us at <b>bluestyle928@gmail.com</b>.
294
- """
295
-
296
-
297
- with gr.Blocks() as demo:
298
- gr.Markdown(_HEADER_)
299
- with gr.Row(variant="panel"):
300
- with gr.Column():
301
- with gr.Row():
302
- input_image = gr.Image(
303
- label="Input Image",
304
- image_mode="RGBA",
305
- sources="upload",
306
- #width=256,
307
- #height=256,
308
- type="pil",
309
- elem_id="content_image",
310
- )
311
- processed_image = gr.Image(
312
- label="Processed Image",
313
- image_mode="RGBA",
314
- #width=256,
315
- #height=256,
316
- type="pil",
317
- interactive=False
318
- )
319
- with gr.Row():
320
- with gr.Group():
321
- do_remove_background = gr.Checkbox(
322
- label="Remove Background", value=True
323
- )
324
- sample_seed = gr.Number(value=42, label="Seed Value", precision=0)
325
-
326
- sample_steps = gr.Slider(
327
- label="Sample Steps",
328
- minimum=30,
329
- maximum=75,
330
- value=75,
331
- step=5
332
- )
333
-
334
- with gr.Row():
335
- submit = gr.Button("Generate", elem_id="generate", variant="primary")
336
-
337
- with gr.Row(variant="panel"):
338
- gr.Examples(
339
- examples=[
340
- os.path.join("examples", img_name) for img_name in sorted(os.listdir("examples"))
341
- ],
342
- inputs=[input_image],
343
- label="Examples",
344
- cache_examples=False,
345
- examples_per_page=16
346
- )
347
-
348
- with gr.Column():
349
-
350
- with gr.Row():
351
-
352
- with gr.Column():
353
- mv_show_images = gr.Image(
354
- label="Generated Multi-views",
355
- type="pil",
356
- width=379,
357
- interactive=False
358
- )
359
-
360
- # with gr.Column():
361
- # output_video = gr.Video(
362
- # label="video", format="mp4",
363
- # width=379,
364
- # autoplay=True,
365
- # interactive=False
366
- # )
367
-
368
- with gr.Row():
369
- with gr.Tab("OBJ"):
370
- output_model_obj = gr.Model3D(
371
- label="Output Model (OBJ Format)",
372
- interactive=False,
373
- )
374
- gr.Markdown("Note: Downloaded .obj model will be flipped. Export .glb instead or manually flip it before usage.")
375
- with gr.Tab("GLB"):
376
- output_model_glb = gr.Model3D(
377
- label="Output Model (GLB Format)",
378
- interactive=False,
379
- )
380
- gr.Markdown("Note: The model shown here has a darker appearance. Download to get correct results.")
381
-
382
- with gr.Row():
383
- gr.Markdown('''Try a different <b>seed value</b> if the result is unsatisfying (Default: 42).''')
384
-
385
- gr.Markdown(_CITE_)
386
-
387
- mv_images = gr.State()
388
-
389
- submit.click(fn=check_input_image, inputs=[input_image]).success(
390
- fn=preprocess,
391
- inputs=[input_image, do_remove_background],
392
- outputs=[processed_image],
393
- ).success(
394
- fn=generate_mvs,
395
- inputs=[processed_image, sample_steps, sample_seed],
396
- outputs=[mv_images, mv_show_images]
397
-
398
- ).success(
399
- fn=make3d,
400
- inputs=[mv_images],
401
- outputs=[output_model_obj, output_model_glb]
402
  )
403
 
404
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import huggingface_hub
2
  huggingface_hub.cached_download = huggingface_hub.hf_hub_download
3
+ from huggingface_hub import hf_hub_download
 
 
4
 
5
  import os
6
+ import io
7
+ import base64
8
+ import tempfile
9
  import numpy as np
10
  import torch
11
+ from fastapi import FastAPI, File, UploadFile, Form, HTTPException
12
+ from fastapi.responses import JSONResponse
13
  from PIL import Image
14
  from torchvision.transforms import v2
15
  from pytorch_lightning import seed_everything
16
  from omegaconf import OmegaConf
17
+ from einops import rearrange
 
18
  from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
19
 
20
+ # Monkey-patch for diffusers<=0.19.3 which still does
21
+ # from huggingface_hub import cached_download
22
+ # New HF-Hub versions (>=0.14.0) removed cached_download, so we alias it.
23
+
24
+
25
+ # your util functions & model loaders
26
  from src.utils.train_util import instantiate_from_config
27
  from src.utils.camera_util import (
28
+ FOV_to_intrinsics,
29
  get_zero123plus_input_cameras,
30
  get_circular_camera_poses,
31
  )
32
  from src.utils.mesh_util import save_obj, save_glb
33
+ from src.utils.infer_util import remove_background, resize_foreground
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
36
+ # 1) CONFIGURATION & MODEL LOADING
37
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ # Load our YAML config
40
  config_path = 'configs/instant-mesh-large.yaml'
41
  config = OmegaConf.load(config_path)
 
42
  model_config = config.model_config
43
  infer_config = config.infer_config
44
+ IS_FLEXICUBES = os.path.basename(config_path).startswith('instant-mesh')
45
 
46
+ # pick device
 
 
47
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
48
  if device.type == 'cpu':
49
+ print("⚠️ No CUDA GPU detected. Falling back to CPU (very slow!)")
50
 
51
+ # β€”β€”β€” Load diffusion (Zero123) pipeline β€”β€”β€”
52
+ print("Loading diffusion model …")
53
  pipeline = DiffusionPipeline.from_pretrained(
54
+ "sudo-ai/zero123plus-v1.2",
55
  custom_pipeline="zero123plus",
56
  torch_dtype=torch.float16,
57
  )
 
59
  pipeline.scheduler.config, timestep_spacing='trailing'
60
  )
61
 
62
+ # patch UNet to white-background version
63
+ unet_ckpt = hf_hub_download(
64
+ repo_id="TencentARC/InstantMesh",
65
+ filename="diffusion_pytorch_model.bin",
66
+ repo_type="model",
67
+ )
68
+ sd = torch.load(unet_ckpt, map_location='cpu')
69
+ pipeline.unet.load_state_dict(sd, strict=True)
70
  pipeline = pipeline.to(device)
71
 
72
+ # β€”β€”β€” Load reconstruction (InstantMesh) model β€”β€”β€”
73
+ print("Loading reconstruction model …")
74
+ model_ckpt = hf_hub_download(
75
+ repo_id="TencentARC/InstantMesh",
76
+ filename="instant_mesh_large.ckpt",
77
+ repo_type="model",
78
+ )
79
  model = instantiate_from_config(model_config)
80
+ full_sd = torch.load(model_ckpt, map_location='cpu')['state_dict']
81
+ # strip the "lrm_generator." prefix & unwanted keys
82
+ sd = {
83
+ k[len("lrm_generator.") :]: v
84
+ for k, v in full_sd.items()
85
+ if k.startswith("lrm_generator.") and "source_camera" not in k
86
+ }
87
+ model.load_state_dict(sd, strict=True)
88
+ model = model.to(device).eval()
89
+ print("Models loaded βœ…")
 
 
 
90
 
91
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
92
+ # 2) HELPERS & INFERENCE LOGIC
93
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
94
 
95
+ def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
96
+ c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
97
+ if is_flexicubes:
98
+ cams = torch.linalg.inv(c2ws)
99
+ return cams.unsqueeze(0).repeat(batch_size, 1, 1, 1)
100
+ else:
101
+ ext = c2ws.flatten(-2)
102
+ intr = FOV_to_intrinsics(50.0).unsqueeze(0).repeat(M, 1, 1).float().flatten(-2)
103
+ cams = torch.cat([ext, intr], dim=-1)
104
+ return cams.unsqueeze(0).repeat(batch_size, 1, 1)
105
 
106
+ def preprocess(input_image: Image.Image, do_remove_background: bool):
107
+ rembg_sess = None
108
  if do_remove_background:
109
+ rembg_sess = __import__("rembg").new_session()
110
+ input_image = remove_background(input_image, rembg_sess)
111
  input_image = resize_foreground(input_image, 0.85)
 
112
  return input_image
113
 
114
+ def generate_mvs(
115
+ input_image: Image.Image, sample_steps: int, sample_seed: int
116
+ ) -> tuple[Image.Image, Image.Image]:
117
+ """Return (raw_multi_view, preview_image)."""
118
  seed_everything(sample_seed)
119
+ out = pipeline(input_image, num_inference_steps=sample_steps)
120
+ mv = out.images[0] # PIL, shape (HΓ—WΓ—3)
121
+
122
+ # create a tiled preview
123
+ arr = np.asarray(mv, dtype=np.uint8)
124
+ t = torch.from_numpy(arr)
125
+ t = rearrange(t, "(n h) (m w) c -> (n m) h w c", n=3, m=2)
126
+ t = rearrange(t, "(n m) h w c -> (n h) (m w) c", n=2, m=3)
127
+ preview = Image.fromarray(t.numpy())
128
+ return mv, preview
129
+
130
+ def make3d(
131
+ mv: Image.Image,
132
+ ) -> tuple[str, str]:
133
+ """Return (path_to_obj, path_to_glb)."""
134
+ # initialize flexicubes if needed
 
 
 
 
135
  if IS_FLEXICUBES:
136
  model.init_flexicubes_geometry(device, use_renderer=False)
 
137
 
138
+ # normalize & reshape
139
+ imgs = np.asarray(mv, dtype=np.float32) / 255.0
140
+ t = torch.from_numpy(imgs).permute(2, 0, 1).contiguous().float()
141
+ t = rearrange(t, "c (n h) (m w) -> (n m) c h w", n=3, m=2)
142
 
143
+ cam_in = get_zero123plus_input_cameras(1, radius=4.0).to(device)
144
+ cam_render = get_render_cameras(
145
+ 1, radius=2.5, is_flexicubes=IS_FLEXICUBES
146
+ ).to(device)
147
 
148
+ t = t.unsqueeze(0).to(device)
149
+ t = v2.functional.resize(t, (320, 320), interpolation=3, antialias=True).clamp(0, 1)
150
 
151
+ # temp file names
152
+ obj_f = tempfile.NamedTemporaryFile(suffix=".obj", delete=False).name
153
+ base = os.path.splitext(obj_f)[0]
154
+ glb_f = base + ".glb"
 
 
155
 
156
  with torch.no_grad():
157
+ planes = model.forward_planes(t, cam_in)
158
+ mesh = model.extract_mesh(
159
+ planes, use_texture_map=False, **infer_config
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  )
161
+ verts, faces, colors = mesh
162
+ verts = verts[:, [1, 2, 0]]
163
+ save_obj(verts, faces, colors, obj_f)
164
+ save_glb(verts, faces, colors, glb_f)
165
+
166
+ return obj_f, glb_f
167
+
168
+ def _pil_to_b64(img: Image.Image, fmt: str = "PNG") -> str:
169
+ buf = io.BytesIO()
170
+ img.save(buf, fmt)
171
+ return base64.b64encode(buf.getvalue()).decode()
172
+
173
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
174
+ # 3) FASTAPI APP
175
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
176
+
177
+ app = FastAPI(title="InstantMesh FastAPI Demo")
178
+
179
+ @app.post("/infer")
180
+ async def infer(
181
+ file: UploadFile = File(...),
182
+ remove_background: bool = Form(True),
183
+ sample_steps: int = Form(75, ge=1, le=100),
184
+ sample_seed: int = Form(42),
185
+ ):
186
+ # 1) load the RGBA image
187
+ data = await file.read()
188
+ try:
189
+ img = Image.open(io.BytesIO(data)).convert("RGBA")
190
+ except Exception:
191
+ raise HTTPException(400, detail="Invalid image")
192
+
193
+ # 2) run through pipeline
194
+ proc = preprocess(img, remove_background)
195
+ mv_raw, mv_preview = generate_mvs(proc, sample_steps, sample_seed)
196
+ obj_path, glb_path = make3d(mv_raw)
197
+
198
+ # 3) read back the mesh bytes
199
+ with open(obj_path, "rb") as f:
200
+ obj_b = f.read()
201
+ with open(glb_path, "rb") as f:
202
+ glb_b = f.read()
203
+
204
+ return JSONResponse(
205
+ {
206
+ "preview_png": _pil_to_b64(mv_preview),
207
+ "multi_views_png": _pil_to_b64(mv_raw),
208
+ "obj_data_b64": base64.b64encode(obj_b).decode(),
209
+ "glb_data_b64": base64.b64encode(glb_b).decode(),
210
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  )
212
 
213
+ if __name__ == "__main__":
214
+ import uvicorn
215
+
216
+ uvicorn.run(
217
+ "main:app",
218
+ host="0.0.0.0",
219
+ port=int(os.environ.get("PORT", 8000)),
220
+ reload=True,
221
+ )
requirements.txt CHANGED
@@ -21,4 +21,5 @@ plyfile
21
  xformers==0.0.22.post7
22
  git+https://github.com/NVlabs/nvdiffrast/
23
  huggingface-hub
24
- onnxruntime
 
 
21
  xformers==0.0.22.post7
22
  git+https://github.com/NVlabs/nvdiffrast/
23
  huggingface-hub
24
+ onnxruntime
25
+ fastapi