InstantTexture

Sleeping

File size: 6,752 Bytes

import os
import sys
import logging

# -----------------------------------------------------------------------------
# SEÇÃO 1: COMPATIBILIDADE DE SISTEMA (Polyfill para Python 3.13+)
# -----------------------------------------------------------------------------
# O Python 3.13 removeu o módulo 'audioop'. O Gradio e Pydub dependem dele.
# Este bloco tenta importar o substituto 'audioop-lts' e injetá-lo como 'audioop'.
try:
    import audioop
except ImportError:
    try:
        import audioop_lts as audioop
        sys.modules["audioop"] = audioop
        print("Módulo 'audioop-lts' carregado com sucesso para compatibilidade Python 3.13.")
    except ImportError:
        print("AVISO CRÍTICO: 'audioop' não encontrado. Recursos de áudio falharão em Python 3.13+.")

# -----------------------------------------------------------------------------
# SEÇÃO 2: IMPORTAÇÕES E CONFIGURAÇÃO DE AMBIENTE
# -----------------------------------------------------------------------------
import gradio as gr
import spaces  # Biblioteca essencial para gerenciamento de GPU em HF Spaces
import torch
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
from PIL import Image

# Configuração de Logs para diagnóstico facilitado
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

logger.info(f"Python Version: {sys.version}")
logger.info(f"Gradio Version: {gr.__version__}")
logger.info(f"Pydantic Version: {sys.modules.get('pydantic', 'Not Loaded')}")
logger.info(f"CUDA Available: {torch.cuda.is_available()}")

# -----------------------------------------------------------------------------
# SEÇÃO 3: CARREGAMENTO DO MODELO (Cache Global)
# -----------------------------------------------------------------------------
# Carregar modelos fora da função de inferência para evitar recarregamento a cada requisição.
# Utiliza variáveis de ambiente ou IDs padrão.

MODEL_ID = "runwayml/stable-diffusion-v1-5"
CONTROLNET_ID = "lllyasviel/sd-controlnet-canny"

try:
    if torch.cuda.is_available():
        logger.info("Inicializando modelos na GPU...")
        controlnet = ControlNetModel.from_pretrained(CONTROLNET_ID, torch_dtype=torch.float16)
        pipe = StableDiffusionControlNetPipeline.from_pretrained(
            MODEL_ID, controlnet=controlnet, torch_dtype=torch.float16
        )
        pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
        pipe.to("cuda")
        pipe.enable_model_cpu_offload() # Otimização para Spaces com VRAM limitada
    else:
        logger.warning("GPU não detectada. O modelo rodará em CPU (extremamente lento).")
        controlnet = ControlNetModel.from_pretrained(CONTROLNET_ID)
        pipe = StableDiffusionControlNetPipeline.from_pretrained(MODEL_ID, controlnet=controlnet)
except Exception as e:
    logger.error(f"Erro fatal ao carregar modelos: {e}")
    # Não interrompemos o script aqui para permitir que a UI carregue e mostre o erro, se necessário

# -----------------------------------------------------------------------------
# SEÇÃO 4: LÓGICA DE INFERÊNCIA COM DECORADOR ZERO-GPU
# -----------------------------------------------------------------------------

@spaces.GPU(duration=120) # Aloca a GPU para esta função por até 120 segundos
def process_image(input_image, prompt, negative_prompt, num_steps, guidance_scale):
    """
    Função principal de geração. O decorador @spaces.GPU gerencia a fila e a alocação
    de hardware automaticamente nos Hugging Face Spaces.
    """
    if input_image is None:
        raise gr.Error("Por favor, forneça uma imagem de entrada.")
    
    if not torch.cuda.is_available():
        raise gr.Error("GPU não disponível neste ambiente. Impossível gerar imagem.")

    try:
        # Pre-processamento Canny (Exemplo para ControlNet Canny)
        import cv2
        import numpy as np
        
        image = np.array(input_image)
        low_threshold = 100
        high_threshold = 200
        image = cv2.Canny(image, low_threshold, high_threshold)
        image = image[:, :, None]
        image = np.concatenate([image, image, image], axis=2)
        canny_image = Image.fromarray(image)

        # Inferência
        output = pipe(
            prompt,
            image=canny_image,
            negative_prompt=negative_prompt,
            num_inference_steps=int(num_steps),
            guidance_scale=guidance_scale
        ).images
        
        return output
    except Exception as e:
        logger.error(f"Erro durante a inferência: {e}")
        raise gr.Error(f"Falha na geração: {str(e)}")

# -----------------------------------------------------------------------------
# SEÇÃO 5: CONSTRUÇÃO DA INTERFACE (Gradio Blocks)
# -----------------------------------------------------------------------------

with gr.Blocks(title="ControlNet Stable Diffusion", theme=gr.themes.Base()) as demo:
    gr.Markdown("# 🎨 Stable Diffusion com ControlNet (Canny)")
    gr.Markdown("Gere variações artísticas baseadas na estrutura de uma imagem de entrada.")
    
    with gr.Row():
        with gr.Column():
            input_img = gr.Image(label="Imagem de Referência", type="pil", sources=["upload", "webcam"])
            prompt_txt = gr.Textbox(label="Prompt Positivo", value="cyberpunk city, neon lights, highly detailed", lines=2)
            neg_prompt_txt = gr.Textbox(label="Prompt Negativo", value="low quality, blurred, ugly, disfigured", lines=2)
            
            with gr.Accordion("Configurações Avançadas", open=False):
                steps_slider = gr.Slider(label="Passos de Inferência", minimum=10, maximum=50, value=25, step=1)
                cfg_slider = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=20.0, value=7.5, step=0.5)
            
            generate_btn = gr.Button("Gerar Imagem", variant="primary", size="lg")
        
        with gr.Column():
            output_img = gr.Image(label="Resultado Gerado", interactive=False)

    # Conexão de Eventos
    generate_btn.click(
        fn=process_image,
        inputs=[input_img, prompt_txt, neg_prompt_txt, steps_slider, cfg_slider],
        outputs=output_img
    )

# -----------------------------------------------------------------------------
# SEÇÃO 6: LANÇAMENTO DO SERVIDOR
# -----------------------------------------------------------------------------
if __name__ == "__main__":
    # share=False é CRUCIAL. share=True tenta criar túneis SSH que falham em Spaces e
    # geram o erro "ValueError: When localhost is not accessible".
    demo.launch(
        server_name="0.0.0.0", 
        server_port=7860, 
        share=False,
        allowed_paths=["."]
    )