Spaces:

Desarrollo33
/

dcr

Sleeping

App Files Files Community

dcr / app.py

Desarrollo33

Increase GLM Polling Timeout

c4c32f6 about 1 month ago

raw

history blame contribute delete

17.5 kB

	import gradio as gr
	import numpy as np
	from PIL import Image
	from transformers import pipeline
	from gradio_client import Client, handle_file
	import tempfile
	import os
	import json
	import torch

	# --- CONFIGURATION ---
	SPACE_HUNYUAN = "tencent/Hunyuan3D-2"
	SPACE_TRELLIS = "JeffreyXiang/TRELLIS"

	# --- LOCAL PIPELINES ---
	try:
	from hy3dgen.texgen import Hunyuan3DPaintPipeline
	from hy3dgen.shapegen import Hunyuan3DDiTFlowMatchingPipeline
	LOCAL_PIPELINE_AVAILABLE = True
	except ImportError:
	LOCAL_PIPELINE_AVAILABLE = False

	# --- LOAD LOCAL MODELS ---
	print("Loading RMBG-1.4 model...")
	rmbg_pipe = pipeline("image-segmentation", model="briaai/RMBG-1.4", trust_remote_code=True)
	print("Model loaded!")

	# Global pipelines (lazy loaded)
	dit_pipeline = None
	paint_pipeline = None

	def load_local_pipelines():
	global dit_pipeline, paint_pipeline
	if LOCAL_PIPELINE_AVAILABLE:
	if dit_pipeline is None:
	print("Loading Hunyuan3DDiTFlowMatchingPipeline...")
	dit_pipeline = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained('tencent/Hunyuan3D-2')
	if paint_pipeline is None:
	print("Loading Hunyuan3DPaintPipeline...")
	paint_pipeline = Hunyuan3DPaintPipeline.from_pretrained('tencent/Hunyuan3D-2')

	# --- IMAGE PROCESSING ---
	def remove_background(image: Image.Image) -> Image.Image:
	"""Stage 1: Remove background using RMBG-1.4"""
	result = rmbg_pipe(image)
	return result

	def add_studio_background(image_no_bg: Image.Image, bg_type: str) -> Image.Image:
	"""Stage 2: Add studio background"""
	if bg_type == "transparent":
	return image_no_bg

	bg_colors = {
	"white": (255, 255, 255),
	"dcr_dark": (10, 10, 10),
	"dcr_gradient": (10, 10, 10)
	}

	if image_no_bg.mode != "RGBA":
	image_no_bg = image_no_bg.convert("RGBA")

	bg = Image.new("RGBA", image_no_bg.size, bg_colors.get(bg_type, (255, 255, 255)) + (255,))
	composite = Image.alpha_composite(bg, image_no_bg)
	return composite.convert("RGB")

	def upscale_lanczos(image: Image.Image, scale: float = 2.0) -> Image.Image:
	"""Stage 3: Upscale image using Lanczos interpolation"""
	new_width = int(image.width * scale)
	new_height = int(image.height * scale)
	max_dimension = 4096
	if new_width > max_dimension or new_height > max_dimension:
	ratio = min(max_dimension / new_width, max_dimension / new_height)
	new_width = int(new_width * ratio)
	new_height = int(new_height * ratio)
	return image.resize((new_width, new_height), Image.LANCZOS)

	def process_vehicle(image: Image.Image, background_type: str = "white", upscale_factor: float = 2.0) -> tuple[Image.Image, Image.Image, Image.Image]:
	"""Pipeline: Remove BG -> Add Studio BG -> Upscale"""
	if image is None:
	raise gr.Error("Please upload an image first")
	no_bg = remove_background(image)
	with_bg = add_studio_background(no_bg, background_type)
	upscaled = upscale_lanczos(with_bg, upscale_factor)
	return no_bg, with_bg, upscaled

	# --- 3D GENERATION ---

	def generate_with_trellis(image_path: str, logs: list) -> str:
	"""Attempt textured generation using Trellis Space"""
	logs.append("🔵 ENGINE: TRELLIS (Textured)")
	client = Client(SPACE_TRELLIS)

	logs.append("1. Generating 3D Asset (Video)...")
	# Trellis requires an upload.
	# Note: We use try/except in the calling function, but here we expect 'gradio_client' errors
	result_video = client.predict(
	image=handle_file(image_path),
	multiimages=[],
	seed=0,
	ss_guidance_strength=7.5,
	ss_sampling_steps=12,
	slat_guidance_strength=3.0,
	slat_sampling_steps=12,
	multiimage_algo="stochastic",
	api_name="/image_to_3d"
	)
	logs.append(f"Video generated: {result_video}")

	logs.append("2. Extracting GLB Model...")
	# This call relies on the session state from step 1
	result_glb_tuple = client.predict(
	mesh_simplify=0.95,
	texture_size=1024,
	api_name="/extract_glb"
	)

	# Trellis returns (LitModel3D_file, Download_Path)
	# Usually index 1 is the .glb file path for download
	logs.append(f"Extraction result type: {type(result_glb_tuple)}")

	final_path = None
	if isinstance(result_glb_tuple, (list, tuple)):
	# Inspect items
	for i, item in enumerate(result_glb_tuple):
	logs.append(f"Item {i}: {item}")
	if isinstance(item, str) and item.endswith('.glb'):
	final_path = item

	if not final_path and len(result_glb_tuple) > 0:
	# Fallback to first item if it looks like a file
	final_path = result_glb_tuple[-1]
	else:
	final_path = result_glb_tuple

	# Handle dictionary return if any
	if isinstance(final_path, dict) and 'value' in final_path:
	final_path = final_path['value']

	if not final_path:
	raise ValueError("Trellis did not return a valid GLB path.")

	return final_path

	def generate_with_hunyuan_remote(image_path: str, logs: list) -> str:
	"""Fallback generation using Hunyuan Remote (Geometry Only)"""
	logs.append("🟠 ENGINE: HUNYUAN REMOTE (Geometry Only)")
	client = Client(SPACE_HUNYUAN)

	logs.append("Calling /shape_generation...")
	result = client.predict(
	caption="A 3D model of a vehicle",
	image=handle_file(image_path),
	mv_image_front=None,
	mv_image_back=None,
	mv_image_left=None,
	mv_image_right=None,
	steps=30,
	guidance_scale=5.0,
	seed=1234,
	octree_resolution=256,
	check_box_rembg=True,
	num_chunks=8000,
	randomize_seed=True,
	api_name="/shape_generation"
	)

	final_path = None
	if isinstance(result, tuple):
	final_path = result[0]
	else:
	final_path = result

	# Handle dictionary return
	if isinstance(final_path, dict) and 'value' in final_path:
	final_path = final_path['value']

	return final_path

	def generate_3d_model(image: Image.Image, engine_choice: str = "Trellis (Textured)", use_local: bool = False) -> tuple[str, str, str]:
	"""Master 3D Generation Function"""
	debug_logs = []

	if image is None:
	raise gr.Error("Please upload an image first")

	# Save temp image
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
	image.save(f.name)
	temp_path = f.name
	debug_logs.append(f"Input image saved to {temp_path}")

	final_model_path = None

	# --- STRATEGY: LOCAL (GPU) ---
	if use_local and LOCAL_PIPELINE_AVAILABLE:
	try:
	debug_logs.append("🟣 ENGINE: LOCAL HUNYUAN (Textured)")
	load_local_pipelines()
	mesh = dit_pipeline(image=temp_path)[0]
	mesh = paint_pipeline(mesh, image=temp_path)
	output_path = tempfile.mktemp(suffix=".glb")
	mesh.export(output_path)
	debug_logs.append(f"Local success: {output_path}")
	return output_path, output_path, "\n".join(debug_logs)
	except Exception as e:
	debug_logs.append(f"❌ Local Failed: {str(e)}")
	# Fall through to remote strategies

	# --- STRATEGY: REMOTE TRELLIS ---
	if "Trellis" in engine_choice:
	try:
	final_model_path = generate_with_trellis(temp_path, debug_logs)
	debug_logs.append(f"✅ Trellis Success: {final_model_path}")
	return final_model_path, final_model_path, "\n".join(debug_logs)
	except Exception as e:
	debug_logs.append(f"❌ Trellis Failed: {str(e)}")
	debug_logs.append("⚠️ Falling back to Hunyuan (Geometry Only)...")
	# Fall through to Hunyuan

	# --- STRATEGY: REMOTE HUNYUAN (Fallback for everything) ---
	try:
	final_model_path = generate_with_hunyuan_remote(temp_path, debug_logs)
	debug_logs.append(f"✅ Hunyuan Success: {final_model_path}")
	return final_model_path, final_model_path, "\n".join(debug_logs)
	except Exception as e:
	debug_logs.append(f"❌ Hunyuan Failed: {str(e)}")
	debug_logs.append("💀 All engines failed.")
	return None, None, "\n".join(debug_logs)

	# --- GLM-IMAGE EDITING ---
	import requests
	import io
	import traceback
	import json
	import time

	def edit_image_with_glm(image: Image.Image, prompt: str, strength: float = 0.8) -> tuple[Image.Image, str]:
	"""
	Edit image using zai-org/GLM-Image via Custom HF Router.
	Handles Async API polling.
	Endpoint: https://router.huggingface.co/zai-org/api/paas/v4/async/images/generations
	"""
	logs = []

	if image is None:
	logs.append("Error: No image provided")
	return None, "\n".join(logs)
	if not prompt:
	logs.append("Error: No prompt provided")
	return None, "\n".join(logs)

	hf_token = os.environ.get("HF_TOKEN")
	if not hf_token:
	logs.append("Warning: HF_TOKEN not found in environment")
	return None, "Error: HF_TOKEN secret is missing"

	base_url = "https://router.huggingface.co/zai-org/api/paas/v4"
	create_url = f"{base_url}/async/images/generations"
	headers = {
	"Authorization": f"Bearer {hf_token}",
	"Content-Type": "application/json"
	}

	import base64
	buffered = io.BytesIO()
	image.save(buffered, format="PNG")
	img_str = base64.b64encode(buffered.getvalue()).decode()

	payload = {
	"model": "glm-image",
	"prompt": prompt,
	"image_url": {"url": f"data:image/png;base64,{img_str}"},
	# "image": f"data:image/png;base64,{img_str}", # Using standard Zhipu/OpenAI format preference
	"parameters": {
	"strength": strength
	}
	}

	logs.append(f"1. Sending Async Request to {create_url}")
	logs.append(f"Prompt: {prompt}")

	try:
	response = requests.post(create_url, headers=headers, json=payload, timeout=60)
	logs.append(f"Status: {response.status_code}")

	if response.status_code != 200:
	logs.append(f"Error Response: {response.text}")
	return None, "\n".join(logs)

	resp_json = response.json()
	logs.append(f"Create Response: {json.dumps(resp_json, indent=2)}")

	task_id = resp_json.get("id") or resp_json.get("request_id")
	task_status = resp_json.get("task_status")

	if not task_id:
	logs.append("Error: No Task ID returned")
	return None, "\n".join(logs)

	# POLLING LOOP
	logs.append(f"2. Polling for Task ID: {task_id}")
	max_retries = 90 # Increased wait time to ~3 minutes
	for i in range(max_retries):
	time.sleep(2) # Wait 2s between checks

	result_url = f"{base_url}/async-result/{task_id}"
	poll_resp = requests.get(result_url, headers=headers, timeout=30)

	if poll_resp.status_code != 200:
	logs.append(f"Poll Failed ({poll_resp.status_code}): {poll_resp.text}")
	continue # Retry?

	poll_data = poll_resp.json()
	status = poll_data.get("task_status")
	logs.append(f"Poll {i+1}: {status}")

	if status == "SUCCESS":
	logs.append("Task Completed Successfully!")
	# Extract image
	# Typical legacy response: { "image_result": [ { "url": ... } ] }
	# Or standard: { "data": [ { "url": ... } ] }

	img_url = None

	# Check known keys
	items = poll_data.get('items') or poll_data.get('data') or poll_data.get('choices') or poll_data.get('image_result')

	if items and len(items) > 0:
	first = items[0]
	img_url = first.get('url') or first.get('image')
	b64 = first.get('b64_json')

	if img_url:
	logs.append(f"Downloading Result: {img_url}")
	return Image.open(requests.get(img_url, stream=True).raw), "\n".join(logs)
	elif b64:
	logs.append("Decoding Base64 Result...")
	return Image.open(io.BytesIO(base64.b64decode(b64))), "\n".join(logs)

	logs.append(f"Success but no image found in keys: {poll_data.keys()}")
	logs.append(f"Full Dump: {json.dumps(poll_data)}")
	return None, "\n".join(logs)

	elif status == "FAIL" or status == "FAILED":
	logs.append(f"Task Failed: {poll_data}")
	return None, "\n".join(logs)

	# If PROCESSING, continue loop

	logs.append("Timeout: Task did not complete in time.")
	return None, "\n".join(logs)

	except Exception as e:
	logs.append(f"Exception: {str(e)}")
	logs.append(traceback.format_exc())
	return None, "\n".join(logs)

	# --- UI ---
	with gr.Blocks(title="DCR Vehicle Studio", theme=gr.themes.Monochrome()) as demo:
	gr.Markdown("# 🚗 DCR Vehicle Studio")
	gr.Markdown("Remove background, add studio backgrounds, upscale, and generate 3D models")

	with gr.Tabs():
	with gr.Tab("📸 Image Processing"):
	# ... (Existing Image Tab Code) ...
	with gr.Row():
	with gr.Column():
	input_image = gr.Image(type="pil", label="Input Vehicle Photo")
	bg_type = gr.Dropdown(
	choices=["white", "dcr_dark", "dcr_gradient", "transparent"],
	value="white",
	label="Background Type"
	)
	upscale_factor = gr.Slider(minimum=1.0, maximum=4.0, value=2.0, step=0.5, label="Upscale Factor")
	process_btn = gr.Button("🎨 Process Image", variant="primary")
	with gr.Column():
	out_no_bg = gr.Image(label="Stage 1: Background Removed")
	out_with_bg = gr.Image(label="Stage 2: Studio Background")
	out_upscaled = gr.Image(label="Stage 3: Upscaled")

	process_btn.click(
	fn=process_vehicle,
	inputs=[input_image, bg_type, upscale_factor],
	outputs=[out_no_bg, out_with_bg, out_upscaled]
	)

	with gr.Tab("✨ GLM Image Editor"):
	gr.Markdown("### Experimental: Z.AI GLM-Image Editor")
	gr.Markdown("Use `zai-org/GLM-Image` to modify your vehicle photos.")

	with gr.Row():
	with gr.Column():
	glm_input = gr.Image(type="pil", label="Input Image")
	glm_prompt = gr.Textbox(label="Editing Prompt", placeholder="e.g. Change the car color to red, add snow on the ground")
	glm_strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.8, label="Transformation Strength (0.1 = subtle, 1.0 = heavy)")
	glm_btn = gr.Button("✨ Re-Imagine", variant="primary")
	glm_debug = gr.Textbox(label="API Debug Log", lines=10, interactive=False)

	with gr.Column():
	glm_output = gr.Image(label="GLM Result")

	glm_btn.click(
	fn=edit_image_with_glm,
	inputs=[glm_input, glm_prompt, glm_strength],
	outputs=[glm_output, glm_debug]
	)

	with gr.Tab("🎮 3D Generation"):
	gr.Markdown("### Generate 3D Model")
	gr.Markdown("Choose your engine. Trellis provides textures but is experimental. Hunyuan is geometry only.")

	with gr.Row():
	with gr.Column(scale=1):
	input_3d = gr.Image(type="pil", label="Vehicle Image")

	engine_choice = gr.Dropdown(
	choices=["Trellis (Textured)", "Hunyuan (Geometry Only)"],
	value="Trellis (Textured)",
	label="Generation Engine"
	)

	use_local_cb = gr.Checkbox(label="Use Local Pipeline (Requires GPU)", value=False)

	generate_3d_btn = gr.Button("🎮 Generate 3D Model", variant="primary")
	output_file = gr.File(label="Download GLB")
	debug_text = gr.Textbox(label="Debug Logs", lines=15, interactive=False)

	with gr.Column(scale=2):
	output_3d_viewer = gr.Model3D(
	label="3D Model Viewer",
	clear_color=[0.1, 0.1, 0.1, 1.0],
	height=500
	)

	generate_3d_btn.click(
	fn=generate_3d_model,
	inputs=[input_3d, engine_choice, use_local_cb],
	outputs=[output_3d_viewer, output_file, debug_text]
	)

	demo.launch()