Spaces:

spac333
/

Spac3imag3

Sleeping

File size: 12,904 Bytes

import gradio as gr
import torch
from diffusers import (
    StableDiffusionPipeline,
    DPMSolverMultistepScheduler,
    EulerAncestralDiscreteScheduler
)
from PIL import Image
import random

print("🎨 Initializing Stable Diffusion pipeline...")

# Configurazione
MODEL_ID = "runwayml/stable-diffusion-v1-5"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
USE_SAFETENSORS = True

# Carica pipeline
print(f"📦 Loading model: {MODEL_ID}")

pipe = StableDiffusionPipeline.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
    use_safetensors=USE_SAFETENSORS,
    safety_checker=None,  # Disabilita per velocità (opzionale)
)

# Scheduler ottimizzato
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

pipe.to(DEVICE)

# Ottimizzazioni
if DEVICE == "cuda":
    pipe.enable_model_cpu_offload()
    pipe.enable_vae_slicing()
    print("✅ GPU optimizations enabled")

print(f"✅ Pipeline loaded on {DEVICE}")


def generate_image(
    prompt,
    negative_prompt="",
    width=512,
    height=512,
    num_inference_steps=25,
    guidance_scale=7.5,
    num_images=1,
    seed=-1,
    scheduler_type="DPM++ 2M",
    progress=gr.Progress()
):
    """
    Genera immagini da prompt testuale
    
    Args:
        prompt: Descrizione dell'immagine da generare
        negative_prompt: Cosa evitare
        width: Larghezza immagine (multiplo di 8)
        height: Altezza immagine (multiplo di 8)
        num_inference_steps: Step di qualità (15-50)
        guidance_scale: Aderenza al prompt (5-15)
        num_images: Numero di immagini da generare (1-4)
        seed: Random seed (-1 per random)
        scheduler_type: Tipo di scheduler
        progress: Progress tracker
    """
    
    if not prompt or len(prompt.strip()) == 0:
        return None, "❌ Inserisci un prompt!"
    
    try:
        progress(0, desc="🎨 Initializing generation...")
        
        # Imposta seed
        if seed == -1:
            seed = random.randint(0, 2147483647)
        
        generator = torch.Generator(device=DEVICE).manual_seed(seed)
        
        print(f"📝 Prompt: {prompt}")
        print(f"🎲 Seed: {seed}")
        print(f"📐 Size: {width}x{height}")
        print(f"🎨 Steps: {num_inference_steps}")
        
        # Cambia scheduler se richiesto
        if scheduler_type == "Euler a":
            pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(
                pipe.scheduler.config
            )
        elif scheduler_type == "DPM++ 2M":
            pipe.scheduler = DPMSolverMultistepScheduler.from_config(
                pipe.scheduler.config
            )
        
        progress(0.2, desc="🖼️ Generating image...")
        
        # Genera immagini
        with torch.no_grad():
            result = pipe(
                prompt=prompt,
                negative_prompt=negative_prompt if negative_prompt else None,
                width=width,
                height=height,
                num_inference_steps=num_inference_steps,
                guidance_scale=guidance_scale,
                num_images_per_prompt=num_images,
                generator=generator,
            )
        
        images = result.images
        
        progress(1.0, desc="✅ Complete!")
        
        # Info
        info = f"""
        ✅ **Immagine generata con successo!**
        
        📊 **Dettagli:**
        - Prompt: "{prompt}"
        - Negative: "{negative_prompt if negative_prompt else 'None'}"
        - Risoluzione: {width}x{height}
        - Steps: {num_inference_steps}
        - Guidance Scale: {guidance_scale}
        - Seed: {seed}
        - Scheduler: {scheduler_type}
        - Device: {DEVICE.upper()}
        - Immagini generate: {len(images)}
        
        💡 **Tip:** Salva il seed per ricreare immagini simili!
        """
        
        # Restituisci prima immagine + gallery
        return images[0], images if len(images) > 1 else None, info
        
    except Exception as e:
        error_msg = f"""
        ❌ **Errore durante la generazione:**
        
        {str(e)}
        
        💡 **Possibili soluzioni:**
        - Riduci risoluzione (512x512 consigliato)
        - Riduci inference steps (20-25)
        - Semplifica il prompt
        - Verifica che width e height siano multipli di 8
        """
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()
        return None, None, error_msg


# Esempi predefiniti
EXAMPLES = [
    [
        "a beautiful landscape with mountains and a lake at sunset, highly detailed, 4k",
        "blurry, low quality, distorted, ugly",
        512, 512, 25, 7.5, 1, 42, "DPM++ 2M"
    ],
    [
        "portrait of a cute cat wearing a wizard hat, digital art, detailed fur",
        "low quality, blurry",
        512, 512, 30, 8.0, 1, 123, "DPM++ 2M"
    ],
    [
        "futuristic city with flying cars, neon lights, cyberpunk style, detailed",
        "blurry, low quality",
        768, 512, 25, 7.5, 1, 456, "DPM++ 2M"
    ],
    [
        "medieval castle on a hill, dramatic lighting, fantasy art, intricate details",
        "modern, contemporary",
        512, 768, 30, 7.5, 1, 789, "Euler a"
    ],
]


# Interfaccia Gradio
with gr.Blocks(
    title="🎨 Stable Diffusion Generator",
    theme=gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="cyan"
    ),
    css="""
    .gradio-container {max-width: 1400px !important}
    """
) as demo:
    
    gr.Markdown("""
    # 🎨 Stable Diffusion Image Generator
    ### Create Stunning AI Art from Text Descriptions
    
    Powered by **Stable Diffusion 2.1** - State-of-the-art text-to-image generation
    
    💡 **Tips for better results:**
    - Be specific and descriptive
    - Mention style, lighting, and details
    - Use negative prompts to avoid unwanted elements
    - Experiment with different seeds and settings
    """)
    
    with gr.Row():
        # Colonna sinistra - Input
        with gr.Column(scale=1):
            prompt_input = gr.Textbox(
                label="✨ Prompt (Describe what you want to create)",
                placeholder="Example: a serene japanese garden with cherry blossoms, koi pond, soft lighting, highly detailed, 4k",
                lines=4,
                value="a beautiful landscape with mountains and a lake at sunset, highly detailed, 4k"
            )
            
            negative_prompt_input = gr.Textbox(
                label="🚫 Negative Prompt (What to avoid)",
                placeholder="Example: blurry, low quality, distorted, ugly, deformed",
                lines=2,
                value="blurry, low quality, distorted, ugly"
            )
            
            with gr.Row():
                width = gr.Slider(
                    minimum=256,
                    maximum=1024,
                    value=512,
                    step=64,
                    label="📐 Width",
                    info="Must be multiple of 64"
                )
                
                height = gr.Slider(
                    minimum=256,
                    maximum=1024,
                    value=512,
                    step=64,
                    label="📐 Height",
                    info="Must be multiple of 64"
                )
            
            with gr.Accordion("⚙️ Advanced Settings", open=False):
                num_inference_steps = gr.Slider(
                    minimum=10,
                    maximum=50,
                    value=25,
                    step=5,
                    label="🎨 Inference Steps",
                    info="More = better quality but slower"
                )
                
                guidance_scale = gr.Slider(
                    minimum=1.0,
                    maximum=20.0,
                    value=7.5,
                    step=0.5,
                    label="🎯 Guidance Scale",
                    info="How closely to follow the prompt (7-10 recommended)"
                )
                
                num_images = gr.Slider(
                    minimum=1,
                    maximum=4,
                    value=1,
                    step=1,
                    label="🖼️ Number of Images",
                    info="Generate multiple variations"
                )
                
                scheduler_type = gr.Dropdown(
                    choices=["DPM++ 2M", "Euler a"],
                    value="DPM++ 2M",
                    label="🔧 Scheduler",
                    info="Different sampling methods"
                )
                
                seed = gr.Number(
                    value=-1,
                    label="🎲 Seed (-1 for random)",
                    info="Use same seed for consistent results",
                    precision=0
                )
            
            generate_btn = gr.Button(
                "🎨 Generate Image",
                variant="primary",
                size="lg"
            )
            
            gr.Markdown("""
            ### 📊 Performance Guide
            
            **CPU (Free tier):**
            - Resolution: 512x512
            - Steps: 15-20
            - Time: ~2-5 min
            
            **GPU T4 ($0.60/h):**
            - Resolution: 768x768
            - Steps: 25-35
            - Time: ~10-30 sec
            """)
        
        # Colonna destra - Output
        with gr.Column(scale=1):
            image_output = gr.Image(
                label="🖼️ Generated Image",
                type="pil",
                height=512
            )
            
            gallery_output = gr.Gallery(
                label="🎨 Image Variations",
                columns=2,
                rows=2,
                height=400,
                visible=False
            )
            
            info_output = gr.Markdown(
                value="👆 Write a prompt and click 'Generate' to create your image!",
                label="ℹ️ Generation Info"
            )
    
    # Sezione esempi
    gr.Markdown("### 🎨 Example Prompts - Click to try")
    
    gr.Examples(
        examples=EXAMPLES,
        inputs=[
            prompt_input,
            negative_prompt_input,
            width,
            height,
            num_inference_steps,
            guidance_scale,
            num_images,
            seed,
            scheduler_type
        ],
        outputs=[image_output, gallery_output, info_output],
        fn=generate_image,
        cache_examples=False,
    )
    
    # Event handler
    generate_btn.click(
        fn=generate_image,
        inputs=[
            prompt_input,
            negative_prompt_input,
            width,
            height,
            num_inference_steps,
            guidance_scale,
            num_images,
            seed,
            scheduler_type
        ],
        outputs=[image_output, gallery_output, info_output],
    )
    
    # Footer
    gr.Markdown("""
    ---
    ### 📚 Prompt Engineering Guide
    
    **Structure:** `[Subject] + [Style] + [Lighting] + [Details] + [Quality]`
    
    **Good Prompt Examples:**
    - "a serene japanese garden with cherry blossoms, koi pond, soft golden hour lighting, highly detailed, 4k, photorealistic"
    - "portrait of an astronaut floating in space, cinematic lighting, digital art, trending on artstation"
    - "fantasy castle on a floating island, dramatic storm clouds, epic scale, concept art, octane render"
    
    **Style Keywords:**
    - photorealistic, digital art, oil painting, watercolor, anime, concept art
    - cinematic, dramatic, ethereal, vibrant, muted, pastel
    
    **Quality Modifiers:**
    - highly detailed, 4k, 8k, ultra detailed, intricate, sharp focus
    - trending on artstation, award winning, masterpiece
    
    **Common Negative Prompts:**
    - blurry, low quality, distorted, ugly, deformed, duplicate
    - bad anatomy, poorly drawn, amateur, watermark, signature
    
    ---
    
    ### 🔧 Technical Details
    
    - **Model**: Stable Diffusion 2.1 (768px base model)
    - **Scheduler**: DPM++ 2M Karras / Euler Ancestral
    - **Device**: {device}
    - **VRAM**: Optimized with CPU offload and VAE slicing
    
    ### 💡 Tips
    
    - **Square images** (512x512) are fastest
    - **Portrait** (512x768) or **Landscape** (768x512) for specific ratios
    - Start with **guidance scale 7-8**, adjust if needed
    - Use **20-25 steps** for good quality
    - Save your **seed** to recreate variations
    
    ---
    
    **Made with ❤️ using HuggingFace Diffusers & Stable Diffusion**
    """.replace("{device}", DEVICE.upper()))

# Launch
if __name__ == "__main__":
    demo.queue(max_size=20)
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )