bhatanerohan commited on
Commit
678e148
ยท
verified ยท
1 Parent(s): 78c14df

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +532 -0
app.py ADDED
@@ -0,0 +1,532 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Text-to-3D Pipeline with Editing: Gemini + SAM-3D
3
+ MCP Server + Gradio UI for MCP Hackathon
4
+ """
5
+
6
+ import os
7
+ import io
8
+ import json
9
+ import tempfile
10
+ import gradio as gr
11
+ from google import genai
12
+ from google.genai import types
13
+ from PIL import Image
14
+ import modal
15
+
16
+ # Initialize Gemini client
17
+ client = None
18
+
19
+ def init_gemini():
20
+ global client
21
+ api_key = os.environ.get("GEMINI_API_KEY")
22
+ if api_key:
23
+ os.environ["GEMINI_API_KEY"] = api_key
24
+ client = genai.Client()
25
+ return True
26
+ return False
27
+
28
+ def image_to_bytes(image):
29
+ """Convert PIL Image to PNG bytes"""
30
+ buffer = io.BytesIO()
31
+ image.save(buffer, format='PNG')
32
+ return buffer.getvalue()
33
+
34
+ def run_sam3d(image, mask):
35
+ """Send image and mask to SAM-3D on Modal"""
36
+ img_bytes = image_to_bytes(image.convert("RGB"))
37
+ mask_bytes = image_to_bytes(mask)
38
+
39
+ SAM3DModel = modal.Cls.from_name("sam3d-objects-inference", "SAM3DModel")
40
+ model = SAM3DModel()
41
+ ply_bytes, glb_bytes = model.reconstruct.remote(img_bytes, mask_bytes)
42
+
43
+ return ply_bytes, glb_bytes
44
+
45
+
46
+ # ============================================================
47
+ # MCP TOOLS - These functions are exposed as MCP tools
48
+ # ============================================================
49
+
50
+ def generate_3d_model(prompt: str) -> str:
51
+ """
52
+ Generate a 3D model from a text description.
53
+
54
+ Args:
55
+ prompt: Text description of the object to generate (e.g., "a red sports car", "a wooden chair")
56
+
57
+ Returns:
58
+ JSON string with paths to generated files
59
+ """
60
+ if not client:
61
+ if not init_gemini():
62
+ return json.dumps({"error": "GEMINI_API_KEY not configured"})
63
+
64
+ try:
65
+ # STEP 1: Generate image
66
+ initial_prompt = f"{prompt}, three-quarter front view angle, natural daylight, soft shadows showing depth and contours, clean simple background, full object visible, photorealistic"
67
+
68
+ response_gen = client.models.generate_content(
69
+ model="gemini-2.5-flash-image",
70
+ contents=[initial_prompt],
71
+ )
72
+
73
+ initial_image = None
74
+ for part in response_gen.parts:
75
+ if part.inline_data:
76
+ image_bytes = part.inline_data.data
77
+ initial_image = Image.open(io.BytesIO(image_bytes))
78
+ break
79
+
80
+ if initial_image is None:
81
+ return json.dumps({"error": "Image generation failed"})
82
+
83
+ # STEP 2: Remove background
84
+ edit_prompt = "Remove the background completely, make the background transparent. Preserve the object's shadow for realism."
85
+ image_part = types.Part.from_bytes(
86
+ data=image_to_bytes(initial_image),
87
+ mime_type="image/png"
88
+ )
89
+
90
+ response_edit = client.models.generate_content(
91
+ model="gemini-3-pro-image-preview",
92
+ contents=[edit_prompt, image_part],
93
+ )
94
+
95
+ final_image = None
96
+ for part in response_edit.parts:
97
+ if part.inline_data:
98
+ edited_bytes = part.inline_data.data
99
+ final_image = Image.open(io.BytesIO(edited_bytes))
100
+ break
101
+
102
+ if final_image is None:
103
+ return json.dumps({"error": "Background removal failed"})
104
+
105
+ # STEP 3: Create grayscale mask
106
+ gray = final_image.convert("L")
107
+
108
+ # STEP 4: Run SAM-3D
109
+ ply_bytes, glb_bytes = run_sam3d(final_image, gray)
110
+
111
+ # Save all outputs
112
+ temp_dir = tempfile.mkdtemp()
113
+
114
+ original_path = os.path.join(temp_dir, "original.png")
115
+ nobg_path = os.path.join(temp_dir, "transparent.png")
116
+ mask_path = os.path.join(temp_dir, "mask.png")
117
+ ply_path = os.path.join(temp_dir, "model.ply")
118
+
119
+ initial_image.save(original_path)
120
+ final_image.save(nobg_path)
121
+ gray.save(mask_path)
122
+
123
+ with open(ply_path, 'wb') as f:
124
+ f.write(ply_bytes)
125
+
126
+ glb_path = None
127
+ if glb_bytes:
128
+ glb_path = os.path.join(temp_dir, "model.glb")
129
+ with open(glb_path, 'wb') as f:
130
+ f.write(glb_bytes)
131
+
132
+ return json.dumps({
133
+ "success": True,
134
+ "prompt": prompt,
135
+ "original_image": original_path,
136
+ "transparent_image": nobg_path,
137
+ "mask_image": mask_path,
138
+ "ply_model": ply_path,
139
+ "glb_model": glb_path,
140
+ "message": f"Successfully generated 3D model for: {prompt}"
141
+ })
142
+
143
+ except Exception as e:
144
+ return json.dumps({"error": str(e)})
145
+
146
+
147
+ def edit_3d_model(edit_prompt: str, transparent_image_path: str) -> str:
148
+ """
149
+ Edit an existing 3D model by modifying its transparent image and regenerating.
150
+
151
+ Args:
152
+ edit_prompt: Description of the edit to apply (e.g., "remove the wings", "change color to blue")
153
+ transparent_image_path: Path to the transparent PNG image from a previous generation
154
+
155
+ Returns:
156
+ JSON string with paths to the new edited files
157
+ """
158
+ if not client:
159
+ if not init_gemini():
160
+ return json.dumps({"error": "GEMINI_API_KEY not configured"})
161
+
162
+ try:
163
+ current_image = Image.open(transparent_image_path)
164
+
165
+ image_part = types.Part.from_bytes(
166
+ data=image_to_bytes(current_image),
167
+ mime_type="image/png"
168
+ )
169
+
170
+ full_edit_prompt = f"{edit_prompt}. Keep the background transparent. Maintain image quality and lighting."
171
+
172
+ response_edit = client.models.generate_content(
173
+ model="gemini-3-pro-image-preview",
174
+ contents=[full_edit_prompt, image_part],
175
+ )
176
+
177
+ edited_image = None
178
+ for part in response_edit.parts:
179
+ if part.inline_data:
180
+ edited_bytes = part.inline_data.data
181
+ edited_image = Image.open(io.BytesIO(edited_bytes))
182
+ break
183
+
184
+ if edited_image is None:
185
+ return json.dumps({"error": "Edit failed"})
186
+
187
+ gray = edited_image.convert("L")
188
+ ply_bytes, glb_bytes = run_sam3d(edited_image, gray)
189
+
190
+ temp_dir = tempfile.mkdtemp()
191
+
192
+ nobg_path = os.path.join(temp_dir, "edited.png")
193
+ mask_path = os.path.join(temp_dir, "mask.png")
194
+ ply_path = os.path.join(temp_dir, "model.ply")
195
+
196
+ edited_image.save(nobg_path)
197
+ gray.save(mask_path)
198
+
199
+ with open(ply_path, 'wb') as f:
200
+ f.write(ply_bytes)
201
+
202
+ glb_path = None
203
+ if glb_bytes:
204
+ glb_path = os.path.join(temp_dir, "model.glb")
205
+ with open(glb_path, 'wb') as f:
206
+ f.write(glb_bytes)
207
+
208
+ return json.dumps({
209
+ "success": True,
210
+ "edit_prompt": edit_prompt,
211
+ "transparent_image": nobg_path,
212
+ "mask_image": mask_path,
213
+ "ply_model": ply_path,
214
+ "glb_model": glb_path,
215
+ "message": f"Successfully applied edit: {edit_prompt}"
216
+ })
217
+
218
+ except Exception as e:
219
+ return json.dumps({"error": str(e)})
220
+
221
+
222
+ # ============================================================
223
+ # GRADIO UI FUNCTIONS
224
+ # ============================================================
225
+
226
+ def generate_3d_ui(prompt, progress=gr.Progress()):
227
+ """UI wrapper with progress updates"""
228
+ if not client:
229
+ if not init_gemini():
230
+ raise gr.Error("GEMINI_API_KEY not set in Space secrets")
231
+
232
+ progress(0.1, desc="Generating image...")
233
+
234
+ initial_prompt = f"{prompt}, three-quarter front view angle, natural daylight, soft shadows showing depth and contours, clean simple background, full object visible, photorealistic"
235
+
236
+ try:
237
+ response_gen = client.models.generate_content(
238
+ model="gemini-2.5-flash-image",
239
+ contents=[initial_prompt],
240
+ )
241
+
242
+ initial_image = None
243
+ for part in response_gen.parts:
244
+ if part.inline_data:
245
+ image_bytes = part.inline_data.data
246
+ initial_image = Image.open(io.BytesIO(image_bytes))
247
+ break
248
+
249
+ if initial_image is None:
250
+ raise gr.Error("Image generation failed")
251
+
252
+ except Exception as e:
253
+ raise gr.Error(f"Image generation failed: {e}")
254
+
255
+ progress(0.3, desc="Removing background...")
256
+
257
+ try:
258
+ image_part = types.Part.from_bytes(
259
+ data=image_to_bytes(initial_image),
260
+ mime_type="image/png"
261
+ )
262
+
263
+ response_edit = client.models.generate_content(
264
+ model="gemini-3-pro-image-preview",
265
+ contents=["Remove the background completely, make the background transparent. Preserve the object's shadow for realism.", image_part],
266
+ )
267
+
268
+ final_image = None
269
+ for part in response_edit.parts:
270
+ if part.inline_data:
271
+ edited_bytes = part.inline_data.data
272
+ final_image = Image.open(io.BytesIO(edited_bytes))
273
+ break
274
+
275
+ if final_image is None:
276
+ raise gr.Error("Background removal failed")
277
+
278
+ except Exception as e:
279
+ raise gr.Error(f"Background removal failed: {e}")
280
+
281
+ progress(0.4, desc="Creating mask...")
282
+ gray = final_image.convert("L")
283
+
284
+ progress(0.5, desc="Running SAM-3D (1-2 min, first run may take longer)...")
285
+
286
+ try:
287
+ ply_bytes, glb_bytes = run_sam3d(final_image, gray)
288
+ except Exception as e:
289
+ raise gr.Error(f"SAM-3D failed: {e}")
290
+
291
+ progress(0.9, desc="Saving outputs...")
292
+
293
+ temp_dir = tempfile.mkdtemp()
294
+
295
+ original_path = os.path.join(temp_dir, "original.png")
296
+ nobg_path = os.path.join(temp_dir, "no_background.png")
297
+ mask_path = os.path.join(temp_dir, "mask.png")
298
+ ply_path = os.path.join(temp_dir, "model.ply")
299
+
300
+ initial_image.save(original_path)
301
+ final_image.save(nobg_path)
302
+ gray.save(mask_path)
303
+
304
+ with open(ply_path, 'wb') as f:
305
+ f.write(ply_bytes)
306
+
307
+ glb_path = None
308
+ if glb_bytes:
309
+ glb_path = os.path.join(temp_dir, "model.glb")
310
+ with open(glb_path, 'wb') as f:
311
+ f.write(glb_bytes)
312
+
313
+ progress(1.0, desc="Done!")
314
+
315
+ return (
316
+ original_path,
317
+ nobg_path,
318
+ mask_path,
319
+ glb_path if glb_path else ply_path,
320
+ glb_path,
321
+ ply_path,
322
+ final_image,
323
+ 1,
324
+ )
325
+
326
+
327
+ def edit_3d_ui(edit_prompt, current_image, edit_count, progress=gr.Progress()):
328
+ """UI wrapper for editing"""
329
+ if current_image is None:
330
+ raise gr.Error("No image to edit. Generate a 3D model first!")
331
+
332
+ if not client:
333
+ if not init_gemini():
334
+ raise gr.Error("GEMINI_API_KEY not set")
335
+
336
+ progress(0.1, desc=f"Applying edit: {edit_prompt}...")
337
+
338
+ try:
339
+ image_part = types.Part.from_bytes(
340
+ data=image_to_bytes(current_image),
341
+ mime_type="image/png"
342
+ )
343
+
344
+ full_edit_prompt = f"{edit_prompt}. Keep the background transparent. Maintain image quality and lighting."
345
+
346
+ response_edit = client.models.generate_content(
347
+ model="gemini-3-pro-image-preview",
348
+ contents=[full_edit_prompt, image_part],
349
+ )
350
+
351
+ edited_image = None
352
+ for part in response_edit.parts:
353
+ if part.inline_data:
354
+ edited_bytes = part.inline_data.data
355
+ edited_image = Image.open(io.BytesIO(edited_bytes))
356
+ break
357
+
358
+ if edited_image is None:
359
+ raise gr.Error("Edit failed")
360
+
361
+ except Exception as e:
362
+ raise gr.Error(f"Edit failed: {e}")
363
+
364
+ progress(0.3, desc="Creating new mask...")
365
+ gray = edited_image.convert("L")
366
+
367
+ progress(0.4, desc="Running SAM-3D (1-2 min)...")
368
+
369
+ try:
370
+ ply_bytes, glb_bytes = run_sam3d(edited_image, gray)
371
+ except Exception as e:
372
+ raise gr.Error(f"SAM-3D failed: {e}")
373
+
374
+ progress(0.9, desc="Saving outputs...")
375
+
376
+ temp_dir = tempfile.mkdtemp()
377
+
378
+ nobg_path = os.path.join(temp_dir, "edited.png")
379
+ mask_path = os.path.join(temp_dir, "mask.png")
380
+ ply_path = os.path.join(temp_dir, "model.ply")
381
+
382
+ edited_image.save(nobg_path)
383
+ gray.save(mask_path)
384
+
385
+ with open(ply_path, 'wb') as f:
386
+ f.write(ply_bytes)
387
+
388
+ glb_path = None
389
+ if glb_bytes:
390
+ glb_path = os.path.join(temp_dir, "model.glb")
391
+ with open(glb_path, 'wb') as f:
392
+ f.write(glb_bytes)
393
+
394
+ new_edit_count = edit_count + 1
395
+ progress(1.0, desc=f"Edit #{new_edit_count} complete!")
396
+
397
+ return (
398
+ nobg_path,
399
+ mask_path,
400
+ glb_path if glb_path else ply_path,
401
+ glb_path,
402
+ ply_path,
403
+ edited_image,
404
+ new_edit_count,
405
+ )
406
+
407
+
408
+ # ============================================================
409
+ # MCP TOOL INTERFACES
410
+ # ============================================================
411
+
412
+ generate_tool = gr.Interface(
413
+ fn=generate_3d_model,
414
+ inputs=gr.Textbox(label="Prompt", placeholder="A red sports car"),
415
+ outputs=gr.Textbox(label="Result (JSON)"),
416
+ api_name="generate_3d",
417
+ title="Generate 3D Model",
418
+ description="Generate a 3D model from a text description"
419
+ )
420
+
421
+ edit_tool = gr.Interface(
422
+ fn=edit_3d_model,
423
+ inputs=[
424
+ gr.Textbox(label="Edit Prompt", placeholder="Remove the wings"),
425
+ gr.Textbox(label="Transparent Image Path", placeholder="/path/to/transparent.png")
426
+ ],
427
+ outputs=gr.Textbox(label="Result (JSON)"),
428
+ api_name="edit_3d",
429
+ title="Edit 3D Model",
430
+ description="Edit an existing 3D model"
431
+ )
432
+
433
+
434
+ # ============================================================
435
+ # MAIN UI
436
+ # ============================================================
437
+
438
+ with gr.Blocks() as main_ui:
439
+
440
+ current_image_state = gr.State(None)
441
+ edit_count_state = gr.State(0)
442
+
443
+ gr.Markdown("""
444
+ # ๐ŸŽจ Text to 3D Model (MCP Server)
445
+ ### Powered by Gemini + SAM-3D Objects
446
+
447
+ **This app is also an MCP Server!** Claude Desktop, Cursor, and other MCP clients can use the `generate_3d` and `edit_3d` tools.
448
+
449
+ โฑ๏ธ *Generation takes 1-2 minutes. First run may take longer as the model warms up.*
450
+ """)
451
+
452
+ gr.Markdown("## 1๏ธโƒฃ Generate Initial 3D Model")
453
+
454
+ with gr.Row():
455
+ with gr.Column(scale=2):
456
+ prompt_input = gr.Textbox(label="Text Prompt", placeholder="A plane with eagle wings", lines=2)
457
+ with gr.Column(scale=1):
458
+ generate_btn = gr.Button("๐Ÿš€ Generate", variant="primary", size="lg")
459
+
460
+ gr.Examples(
461
+ examples=["A plane with eagle wings", "A wooden chair", "A red sports car", "A ceramic coffee mug", "A robot dog"],
462
+ inputs=prompt_input
463
+ )
464
+
465
+ gr.Markdown("## 2๏ธโƒฃ Edit Your Model")
466
+
467
+ with gr.Row():
468
+ with gr.Column(scale=2):
469
+ edit_input = gr.Textbox(label="Edit Prompt", placeholder="Remove the wings", lines=2)
470
+ with gr.Column(scale=1):
471
+ edit_btn = gr.Button("โœ๏ธ Apply Edit", variant="secondary", size="lg")
472
+ edit_counter = gr.Markdown("*No edits yet*")
473
+
474
+ gr.Examples(
475
+ examples=["Remove the wings", "Change color to blue", "Add racing stripes", "Make it larger", "Add wheels"],
476
+ inputs=edit_input
477
+ )
478
+
479
+ gr.Markdown("## ๐Ÿ“ธ Images")
480
+ with gr.Row():
481
+ original_output = gr.Image(label="1. Original", type="filepath")
482
+ nobg_output = gr.Image(label="2. Transparent", type="filepath")
483
+ mask_output = gr.Image(label="3. Mask", type="filepath")
484
+
485
+ gr.Markdown("## ๐ŸŽฎ 3D Model")
486
+ model_output = gr.Model3D(label="Interactive 3D Model (drag to rotate)", clear_color=[0.1, 0.1, 0.1, 1.0])
487
+
488
+ gr.Markdown("## ๐Ÿ“ฅ Downloads")
489
+ with gr.Row():
490
+ glb_download = gr.File(label="GLB (mesh)")
491
+ ply_download = gr.File(label="PLY (splat)")
492
+
493
+ gr.Markdown("""
494
+ ---
495
+ ## ๐Ÿ”Œ MCP Server Info
496
+
497
+ This app exposes two MCP tools: `generate_3d` and `edit_3d`
498
+
499
+ **Connect via:** `https://YOUR-SPACE.hf.space/gradio_api/mcp/sse`
500
+
501
+ ---
502
+ **Built for [MCP 1st Birthday Hackathon](https://huggingface.co/MCP-1st-Birthday)** ๐ŸŽ‚
503
+ """)
504
+
505
+ def update_counter(count):
506
+ return "*No edits yet*" if count == 0 else f"**Edits applied: {count}**"
507
+
508
+ generate_btn.click(
509
+ fn=generate_3d_ui,
510
+ inputs=[prompt_input],
511
+ outputs=[original_output, nobg_output, mask_output, model_output, glb_download, ply_download, current_image_state, edit_count_state]
512
+ ).then(fn=update_counter, inputs=[edit_count_state], outputs=[edit_counter])
513
+
514
+ edit_btn.click(
515
+ fn=edit_3d_ui,
516
+ inputs=[edit_input, current_image_state, edit_count_state],
517
+ outputs=[nobg_output, mask_output, model_output, glb_download, ply_download, current_image_state, edit_count_state]
518
+ ).then(fn=update_counter, inputs=[edit_count_state], outputs=[edit_counter])
519
+
520
+
521
+ # ============================================================
522
+ # COMBINE UI + MCP TOOLS
523
+ # ============================================================
524
+
525
+ demo = gr.TabbedInterface(
526
+ interface_list=[main_ui, generate_tool, edit_tool],
527
+ tab_names=["๐ŸŽจ Interactive UI", "๐Ÿ”ง Generate Tool", "โœ๏ธ Edit Tool"],
528
+ title="Text to 3D | MCP Server"
529
+ )
530
+
531
+ if __name__ == "__main__":
532
+ demo.launch(mcp_server=True)