unity-3d-assets / app.py
prajwaluppoor's picture
fallback to v1 stable
fa50451 verified
"""Unity 3D AI Tools Hub for Hugging Face Spaces."""
# Disable SSR mode for better Hugging Face Spaces stability.
import os
os.environ["GRADIO_SSR_MODE"] = "false"
import io
import tempfile
from pathlib import Path
import gradio as gr
import requests
from PIL import Image, ImageDraw
def _patch_gradio_schema_bug():
"""Patch Gradio client schema parsing for boolean JSON schema fragments.
HF Spaces with Gradio 5.12.x can intermittently throw:
`TypeError: argument of type 'bool' is not iterable`
while rendering `/` when API info is generated. The root cause is a bool
JSON schema node (e.g., `additionalProperties: true`) being passed into
`gradio_client.utils.get_type`, which expects a dict.
"""
try:
from gradio_client import utils as client_utils
except Exception:
return
original_get_type = getattr(client_utils, "get_type", None)
if original_get_type is None or getattr(original_get_type, "_hf_space_patched", False):
return
def safe_get_type(schema):
if isinstance(schema, bool):
return "Any"
return original_get_type(schema)
safe_get_type._hf_space_patched = True
client_utils.get_type = safe_get_type
_patch_gradio_schema_bug()
TITLE = "Unity 3D AI Tools Hub"
DESCRIPTION = """
### AI-powered toolkit to accelerate Unity game development.
*Designed by Prajwal Uppoor โ€” SWE III @ Walmart*
This hub proxies heavy AI tasks to existing Hugging Face Spaces and APIs, providing a lightweight, fast, and free experience.
"""
HF_TOKEN = os.getenv("HF_TOKEN", "")
HF_INFERENCE_BASE = "https://router.huggingface.co/hf-inference/models"
REQUEST_TIMEOUT_SECONDS = 120
STYLE_PRESETS = {
"Game Texture": "seamless tileable game texture, {prompt}, pbr, 4k",
"Concept Art": "concept art, {prompt}, fantasy, high detail, artstation",
"UI/Icon": "game ui icon, {prompt}, flat, clean, vector",
}
VOICE_MODELS = {
"English": "facebook/mms-tts-eng",
"Hindi": "facebook/mms-tts-hin",
"Spanish": "facebook/mms-tts-spa",
}
GUIDE_MD = """
# LoRA Fine-Tuning Guide
1. **Prepare**: Collect at least 10 images that match your target style.
2. **Train**: Use [AutoTrain Advanced](https://huggingface.co/spaces/Caramelily/autotrain-advanced) to fine-tune a LoRA.
3. **Export**: Download LoRA weights and config.
4. **Integrate**: Generate assets in this hub and import them into Unity.
"""
def hf_inference(model_id, *, payload=None, data=None, token=""):
"""Send an inference request through the HF router."""
api_token = token or HF_TOKEN
headers = {"Authorization": f"Bearer {api_token}"} if api_token else {}
url = f"{HF_INFERENCE_BASE}/{model_id}"
if data is not None:
return requests.post(url, headers=headers, data=data, timeout=REQUEST_TIMEOUT_SECONDS)
return requests.post(url, headers=headers, json=payload, timeout=REQUEST_TIMEOUT_SECONDS)
def _format_http_error(response, fallback_message="Request failed"):
try:
details = response.json()
except ValueError:
details = response.text.strip()
return f"{fallback_message} ({response.status_code}): {details or 'No details returned'}"
def generate_3d_asset(image, seed, guidance, steps, token):
"""Proxy image-to-GLB generation to TRELLIS."""
if image is None:
return None, "Please upload a reference image first."
tmp_path = None
try:
from gradio_client import Client, handle_file
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_img:
image.save(tmp_img.name)
tmp_path = Path(tmp_img.name)
client = Client("trellis-community/TRELLIS")
result = client.predict(
image=handle_file(str(tmp_path)),
multiimages=[],
is_multiimage=False,
seed=int(seed),
ss_guidance_strength=float(guidance),
ss_sampling_steps=int(steps),
slat_guidance_strength=7.5,
slat_sampling_steps=12,
multiimage_algo="stochastic",
mesh_simplify=0.95,
texture_size=1024,
api_name="/generate_and_extract_glb",
)
return result[1], "3D model generated! Download the .GLB and import it into Unity."
except Exception as exc:
return None, f"3D generation failed: {exc}"
finally:
if tmp_path and tmp_path.exists():
tmp_path.unlink()
def generate_texture(prompt, style_preset, token):
"""Generate textures or concept art using FLUX Schnell."""
if not prompt:
return None, "Please enter a prompt."
api_token = token or HF_TOKEN
if not api_token:
return None, "Auth required: provide a Hugging Face token."
prompt_template = STYLE_PRESETS.get(style_preset, "{prompt}")
full_prompt = prompt_template.format(prompt=prompt)
try:
response = hf_inference(
"black-forest-labs/FLUX.1-schnell",
payload={"inputs": full_prompt},
token=api_token,
)
if response.status_code == 200:
return Image.open(io.BytesIO(response.content)), "Texture generated successfully."
return None, _format_http_error(response, "Texture generation failed")
except Exception as exc:
return None, f"Texture generation failed: {exc}"
def detect_objects(image, confidence_threshold, token):
"""Run object detection via DETR and draw bounding boxes."""
if image is None:
return None, "Please upload an image."
try:
img_bytes = io.BytesIO()
image.save(img_bytes, format="PNG")
response = hf_inference(
"facebook/detr-resnet-50",
data=img_bytes.getvalue(),
token=token or HF_TOKEN,
)
if response.status_code != 200:
return image, _format_http_error(response, "Object detection failed")
detections = response.json()
annotated = image.copy()
drawer = ImageDraw.Draw(annotated)
lines = ["Found objects:"]
kept = 0
for detection in detections:
score = detection.get("score", 0)
if score < confidence_threshold:
continue
box = detection["box"]
drawer.rectangle(
[box["xmin"], box["ymin"], box["xmax"], box["ymax"]],
outline="red",
width=3,
)
lines.append(f"- {detection['label']} ({score:.1%})")
kept += 1
if kept == 0:
lines.append("- No objects matched the confidence threshold.")
return annotated, "\n".join(lines)
except Exception as exc:
return image, f"Object detection failed: {exc}"
def generate_voice(text, language, token):
"""Generate speech audio from text via MMS TTS."""
if not text:
return None, "Please enter dialogue text."
model_id = VOICE_MODELS.get(language, VOICE_MODELS["English"])
try:
response = hf_inference(model_id, payload={"inputs": text}, token=token or HF_TOKEN)
if response.status_code != 200:
return None, _format_http_error(response, "Voice generation failed")
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
tmp_file.write(response.content)
tmp_audio_path = tmp_file.name
return tmp_audio_path, "Voice clip generated successfully."
except Exception as exc:
return None, f"Voice generation failed: {exc}"
with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
gr.Markdown(f"# {TITLE}\n{DESCRIPTION}")
token_input = gr.Textbox(
label="Hugging Face Token (optional, required for some models)",
type="password",
placeholder="hf_xxx...",
)
with gr.Tabs():
with gr.Tab("๐ŸŽฎ 3D Asset Generator"):
with gr.Row():
with gr.Column():
img_3d = gr.Image(type="pil", label="Reference Image")
with gr.Row():
seed_3d = gr.Number(value=42, label="Seed", precision=0)
guidance_3d = gr.Slider(1, 15, 7.5, label="Guidance")
steps_3d = gr.Slider(8, 32, 12, step=1, label="Sampling Steps")
btn_3d = gr.Button("Generate .GLB", variant="primary")
with gr.Column():
out_3d = gr.File(label="Download Mesh (.glb)")
status_3d = gr.Textbox(label="Status")
btn_3d.click(
fn=generate_3d_asset,
inputs=[img_3d, seed_3d, guidance_3d, steps_3d, token_input],
outputs=[out_3d, status_3d],
)
with gr.Tab("๐ŸŽจ Texture Generator"):
with gr.Row():
with gr.Column():
prompt_texture = gr.Textbox(
label="Prompt",
placeholder="Stone wall texture for a fantasy dungeon",
)
style_texture = gr.Dropdown(
choices=list(STYLE_PRESETS.keys()),
value="Game Texture",
label="Style Preset",
)
btn_texture = gr.Button("Generate Texture", variant="primary")
with gr.Column():
out_texture = gr.Image(label="Generated Image")
status_texture = gr.Textbox(label="Status")
btn_texture.click(
fn=generate_texture,
inputs=[prompt_texture, style_texture, token_input],
outputs=[out_texture, status_texture],
)
with gr.Tab("๐Ÿ” Object Detector"):
with gr.Row():
with gr.Column():
image_detect = gr.Image(type="pil", label="Reference Scene")
confidence_detect = gr.Slider(0.1, 0.9, 0.7, label="Confidence Threshold")
btn_detect = gr.Button("Detect Objects", variant="primary")
with gr.Column():
out_detect = gr.Image(label="Detection Result")
status_detect = gr.Textbox(label="Results")
btn_detect.click(
fn=detect_objects,
inputs=[image_detect, confidence_detect, token_input],
outputs=[out_detect, status_detect],
)
with gr.Tab("๐ŸŽค Voice Generator"):
with gr.Row():
with gr.Column():
text_voice = gr.Textbox(label="Dialogue", placeholder="Welcome to Factory XR Lab.")
language_voice = gr.Dropdown(
choices=list(VOICE_MODELS.keys()),
value="English",
label="Language",
)
btn_voice = gr.Button("Generate Voice", variant="primary")
with gr.Column():
out_voice = gr.Audio(label="Generated Audio", type="filepath")
status_voice = gr.Textbox(label="Status")
btn_voice.click(
fn=generate_voice,
inputs=[text_voice, language_voice, token_input],
outputs=[out_voice, status_voice],
)
with gr.Tab("๐Ÿ“š LoRA Guide"):
gr.Markdown(GUIDE_MD)
gr.Markdown("--- Built by **Prajwal Uppoor**")
demo.queue().launch(share=True,show_api=False)