dcr / app.py
Desarrollo33's picture
Increase GLM Polling Timeout
c4c32f6
import gradio as gr
import numpy as np
from PIL import Image
from transformers import pipeline
from gradio_client import Client, handle_file
import tempfile
import os
import json
import torch
# --- CONFIGURATION ---
SPACE_HUNYUAN = "tencent/Hunyuan3D-2"
SPACE_TRELLIS = "JeffreyXiang/TRELLIS"
# --- LOCAL PIPELINES ---
try:
from hy3dgen.texgen import Hunyuan3DPaintPipeline
from hy3dgen.shapegen import Hunyuan3DDiTFlowMatchingPipeline
LOCAL_PIPELINE_AVAILABLE = True
except ImportError:
LOCAL_PIPELINE_AVAILABLE = False
# --- LOAD LOCAL MODELS ---
print("Loading RMBG-1.4 model...")
rmbg_pipe = pipeline("image-segmentation", model="briaai/RMBG-1.4", trust_remote_code=True)
print("Model loaded!")
# Global pipelines (lazy loaded)
dit_pipeline = None
paint_pipeline = None
def load_local_pipelines():
global dit_pipeline, paint_pipeline
if LOCAL_PIPELINE_AVAILABLE:
if dit_pipeline is None:
print("Loading Hunyuan3DDiTFlowMatchingPipeline...")
dit_pipeline = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained('tencent/Hunyuan3D-2')
if paint_pipeline is None:
print("Loading Hunyuan3DPaintPipeline...")
paint_pipeline = Hunyuan3DPaintPipeline.from_pretrained('tencent/Hunyuan3D-2')
# --- IMAGE PROCESSING ---
def remove_background(image: Image.Image) -> Image.Image:
"""Stage 1: Remove background using RMBG-1.4"""
result = rmbg_pipe(image)
return result
def add_studio_background(image_no_bg: Image.Image, bg_type: str) -> Image.Image:
"""Stage 2: Add studio background"""
if bg_type == "transparent":
return image_no_bg
bg_colors = {
"white": (255, 255, 255),
"dcr_dark": (10, 10, 10),
"dcr_gradient": (10, 10, 10)
}
if image_no_bg.mode != "RGBA":
image_no_bg = image_no_bg.convert("RGBA")
bg = Image.new("RGBA", image_no_bg.size, bg_colors.get(bg_type, (255, 255, 255)) + (255,))
composite = Image.alpha_composite(bg, image_no_bg)
return composite.convert("RGB")
def upscale_lanczos(image: Image.Image, scale: float = 2.0) -> Image.Image:
"""Stage 3: Upscale image using Lanczos interpolation"""
new_width = int(image.width * scale)
new_height = int(image.height * scale)
max_dimension = 4096
if new_width > max_dimension or new_height > max_dimension:
ratio = min(max_dimension / new_width, max_dimension / new_height)
new_width = int(new_width * ratio)
new_height = int(new_height * ratio)
return image.resize((new_width, new_height), Image.LANCZOS)
def process_vehicle(image: Image.Image, background_type: str = "white", upscale_factor: float = 2.0) -> tuple[Image.Image, Image.Image, Image.Image]:
"""Pipeline: Remove BG -> Add Studio BG -> Upscale"""
if image is None:
raise gr.Error("Please upload an image first")
no_bg = remove_background(image)
with_bg = add_studio_background(no_bg, background_type)
upscaled = upscale_lanczos(with_bg, upscale_factor)
return no_bg, with_bg, upscaled
# --- 3D GENERATION ---
def generate_with_trellis(image_path: str, logs: list) -> str:
"""Attempt textured generation using Trellis Space"""
logs.append("🔵 ENGINE: TRELLIS (Textured)")
client = Client(SPACE_TRELLIS)
logs.append("1. Generating 3D Asset (Video)...")
# Trellis requires an upload.
# Note: We use try/except in the calling function, but here we expect 'gradio_client' errors
result_video = client.predict(
image=handle_file(image_path),
multiimages=[],
seed=0,
ss_guidance_strength=7.5,
ss_sampling_steps=12,
slat_guidance_strength=3.0,
slat_sampling_steps=12,
multiimage_algo="stochastic",
api_name="/image_to_3d"
)
logs.append(f"Video generated: {result_video}")
logs.append("2. Extracting GLB Model...")
# This call relies on the session state from step 1
result_glb_tuple = client.predict(
mesh_simplify=0.95,
texture_size=1024,
api_name="/extract_glb"
)
# Trellis returns (LitModel3D_file, Download_Path)
# Usually index 1 is the .glb file path for download
logs.append(f"Extraction result type: {type(result_glb_tuple)}")
final_path = None
if isinstance(result_glb_tuple, (list, tuple)):
# Inspect items
for i, item in enumerate(result_glb_tuple):
logs.append(f"Item {i}: {item}")
if isinstance(item, str) and item.endswith('.glb'):
final_path = item
if not final_path and len(result_glb_tuple) > 0:
# Fallback to first item if it looks like a file
final_path = result_glb_tuple[-1]
else:
final_path = result_glb_tuple
# Handle dictionary return if any
if isinstance(final_path, dict) and 'value' in final_path:
final_path = final_path['value']
if not final_path:
raise ValueError("Trellis did not return a valid GLB path.")
return final_path
def generate_with_hunyuan_remote(image_path: str, logs: list) -> str:
"""Fallback generation using Hunyuan Remote (Geometry Only)"""
logs.append("🟠 ENGINE: HUNYUAN REMOTE (Geometry Only)")
client = Client(SPACE_HUNYUAN)
logs.append("Calling /shape_generation...")
result = client.predict(
caption="A 3D model of a vehicle",
image=handle_file(image_path),
mv_image_front=None,
mv_image_back=None,
mv_image_left=None,
mv_image_right=None,
steps=30,
guidance_scale=5.0,
seed=1234,
octree_resolution=256,
check_box_rembg=True,
num_chunks=8000,
randomize_seed=True,
api_name="/shape_generation"
)
final_path = None
if isinstance(result, tuple):
final_path = result[0]
else:
final_path = result
# Handle dictionary return
if isinstance(final_path, dict) and 'value' in final_path:
final_path = final_path['value']
return final_path
def generate_3d_model(image: Image.Image, engine_choice: str = "Trellis (Textured)", use_local: bool = False) -> tuple[str, str, str]:
"""Master 3D Generation Function"""
debug_logs = []
if image is None:
raise gr.Error("Please upload an image first")
# Save temp image
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
image.save(f.name)
temp_path = f.name
debug_logs.append(f"Input image saved to {temp_path}")
final_model_path = None
# --- STRATEGY: LOCAL (GPU) ---
if use_local and LOCAL_PIPELINE_AVAILABLE:
try:
debug_logs.append("🟣 ENGINE: LOCAL HUNYUAN (Textured)")
load_local_pipelines()
mesh = dit_pipeline(image=temp_path)[0]
mesh = paint_pipeline(mesh, image=temp_path)
output_path = tempfile.mktemp(suffix=".glb")
mesh.export(output_path)
debug_logs.append(f"Local success: {output_path}")
return output_path, output_path, "\n".join(debug_logs)
except Exception as e:
debug_logs.append(f"❌ Local Failed: {str(e)}")
# Fall through to remote strategies
# --- STRATEGY: REMOTE TRELLIS ---
if "Trellis" in engine_choice:
try:
final_model_path = generate_with_trellis(temp_path, debug_logs)
debug_logs.append(f"✅ Trellis Success: {final_model_path}")
return final_model_path, final_model_path, "\n".join(debug_logs)
except Exception as e:
debug_logs.append(f"❌ Trellis Failed: {str(e)}")
debug_logs.append("⚠️ Falling back to Hunyuan (Geometry Only)...")
# Fall through to Hunyuan
# --- STRATEGY: REMOTE HUNYUAN (Fallback for everything) ---
try:
final_model_path = generate_with_hunyuan_remote(temp_path, debug_logs)
debug_logs.append(f"✅ Hunyuan Success: {final_model_path}")
return final_model_path, final_model_path, "\n".join(debug_logs)
except Exception as e:
debug_logs.append(f"❌ Hunyuan Failed: {str(e)}")
debug_logs.append("💀 All engines failed.")
return None, None, "\n".join(debug_logs)
# --- GLM-IMAGE EDITING ---
import requests
import io
import traceback
import json
import time
def edit_image_with_glm(image: Image.Image, prompt: str, strength: float = 0.8) -> tuple[Image.Image, str]:
"""
Edit image using zai-org/GLM-Image via Custom HF Router.
Handles Async API polling.
Endpoint: https://router.huggingface.co/zai-org/api/paas/v4/async/images/generations
"""
logs = []
if image is None:
logs.append("Error: No image provided")
return None, "\n".join(logs)
if not prompt:
logs.append("Error: No prompt provided")
return None, "\n".join(logs)
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
logs.append("Warning: HF_TOKEN not found in environment")
return None, "Error: HF_TOKEN secret is missing"
base_url = "https://router.huggingface.co/zai-org/api/paas/v4"
create_url = f"{base_url}/async/images/generations"
headers = {
"Authorization": f"Bearer {hf_token}",
"Content-Type": "application/json"
}
import base64
buffered = io.BytesIO()
image.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
payload = {
"model": "glm-image",
"prompt": prompt,
"image_url": {"url": f"data:image/png;base64,{img_str}"},
# "image": f"data:image/png;base64,{img_str}", # Using standard Zhipu/OpenAI format preference
"parameters": {
"strength": strength
}
}
logs.append(f"1. Sending Async Request to {create_url}")
logs.append(f"Prompt: {prompt}")
try:
response = requests.post(create_url, headers=headers, json=payload, timeout=60)
logs.append(f"Status: {response.status_code}")
if response.status_code != 200:
logs.append(f"Error Response: {response.text}")
return None, "\n".join(logs)
resp_json = response.json()
logs.append(f"Create Response: {json.dumps(resp_json, indent=2)}")
task_id = resp_json.get("id") or resp_json.get("request_id")
task_status = resp_json.get("task_status")
if not task_id:
logs.append("Error: No Task ID returned")
return None, "\n".join(logs)
# POLLING LOOP
logs.append(f"2. Polling for Task ID: {task_id}")
max_retries = 90 # Increased wait time to ~3 minutes
for i in range(max_retries):
time.sleep(2) # Wait 2s between checks
result_url = f"{base_url}/async-result/{task_id}"
poll_resp = requests.get(result_url, headers=headers, timeout=30)
if poll_resp.status_code != 200:
logs.append(f"Poll Failed ({poll_resp.status_code}): {poll_resp.text}")
continue # Retry?
poll_data = poll_resp.json()
status = poll_data.get("task_status")
logs.append(f"Poll {i+1}: {status}")
if status == "SUCCESS":
logs.append("Task Completed Successfully!")
# Extract image
# Typical legacy response: { "image_result": [ { "url": ... } ] }
# Or standard: { "data": [ { "url": ... } ] }
img_url = None
# Check known keys
items = poll_data.get('items') or poll_data.get('data') or poll_data.get('choices') or poll_data.get('image_result')
if items and len(items) > 0:
first = items[0]
img_url = first.get('url') or first.get('image')
b64 = first.get('b64_json')
if img_url:
logs.append(f"Downloading Result: {img_url}")
return Image.open(requests.get(img_url, stream=True).raw), "\n".join(logs)
elif b64:
logs.append("Decoding Base64 Result...")
return Image.open(io.BytesIO(base64.b64decode(b64))), "\n".join(logs)
logs.append(f"Success but no image found in keys: {poll_data.keys()}")
logs.append(f"Full Dump: {json.dumps(poll_data)}")
return None, "\n".join(logs)
elif status == "FAIL" or status == "FAILED":
logs.append(f"Task Failed: {poll_data}")
return None, "\n".join(logs)
# If PROCESSING, continue loop
logs.append("Timeout: Task did not complete in time.")
return None, "\n".join(logs)
except Exception as e:
logs.append(f"Exception: {str(e)}")
logs.append(traceback.format_exc())
return None, "\n".join(logs)
# --- UI ---
with gr.Blocks(title="DCR Vehicle Studio", theme=gr.themes.Monochrome()) as demo:
gr.Markdown("# 🚗 DCR Vehicle Studio")
gr.Markdown("Remove background, add studio backgrounds, upscale, and generate 3D models")
with gr.Tabs():
with gr.Tab("📸 Image Processing"):
# ... (Existing Image Tab Code) ...
with gr.Row():
with gr.Column():
input_image = gr.Image(type="pil", label="Input Vehicle Photo")
bg_type = gr.Dropdown(
choices=["white", "dcr_dark", "dcr_gradient", "transparent"],
value="white",
label="Background Type"
)
upscale_factor = gr.Slider(minimum=1.0, maximum=4.0, value=2.0, step=0.5, label="Upscale Factor")
process_btn = gr.Button("🎨 Process Image", variant="primary")
with gr.Column():
out_no_bg = gr.Image(label="Stage 1: Background Removed")
out_with_bg = gr.Image(label="Stage 2: Studio Background")
out_upscaled = gr.Image(label="Stage 3: Upscaled")
process_btn.click(
fn=process_vehicle,
inputs=[input_image, bg_type, upscale_factor],
outputs=[out_no_bg, out_with_bg, out_upscaled]
)
with gr.Tab("✨ GLM Image Editor"):
gr.Markdown("### Experimental: Z.AI GLM-Image Editor")
gr.Markdown("Use `zai-org/GLM-Image` to modify your vehicle photos.")
with gr.Row():
with gr.Column():
glm_input = gr.Image(type="pil", label="Input Image")
glm_prompt = gr.Textbox(label="Editing Prompt", placeholder="e.g. Change the car color to red, add snow on the ground")
glm_strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.8, label="Transformation Strength (0.1 = subtle, 1.0 = heavy)")
glm_btn = gr.Button("✨ Re-Imagine", variant="primary")
glm_debug = gr.Textbox(label="API Debug Log", lines=10, interactive=False)
with gr.Column():
glm_output = gr.Image(label="GLM Result")
glm_btn.click(
fn=edit_image_with_glm,
inputs=[glm_input, glm_prompt, glm_strength],
outputs=[glm_output, glm_debug]
)
with gr.Tab("🎮 3D Generation"):
gr.Markdown("### Generate 3D Model")
gr.Markdown("Choose your engine. **Trellis** provides textures but is experimental. **Hunyuan** is geometry only.")
with gr.Row():
with gr.Column(scale=1):
input_3d = gr.Image(type="pil", label="Vehicle Image")
engine_choice = gr.Dropdown(
choices=["Trellis (Textured)", "Hunyuan (Geometry Only)"],
value="Trellis (Textured)",
label="Generation Engine"
)
use_local_cb = gr.Checkbox(label="Use Local Pipeline (Requires GPU)", value=False)
generate_3d_btn = gr.Button("🎮 Generate 3D Model", variant="primary")
output_file = gr.File(label="Download GLB")
debug_text = gr.Textbox(label="Debug Logs", lines=15, interactive=False)
with gr.Column(scale=2):
output_3d_viewer = gr.Model3D(
label="3D Model Viewer",
clear_color=[0.1, 0.1, 0.1, 1.0],
height=500
)
generate_3d_btn.click(
fn=generate_3d_model,
inputs=[input_3d, engine_choice, use_local_cb],
outputs=[output_3d_viewer, output_file, debug_text]
)
demo.launch()