Spaces:

alexander00001
/

New.Space

Paused

File size: 22,701 Bytes

try:
    import spaces
    SPACES_AVAILABLE = True
    print("✅ Spaces available - ZeroGPU mode")
except ImportError:
    SPACES_AVAILABLE = False
    print("⚠️ Spaces not available - running in regular mode")

import gradio as gr
import torch
from diffusers import DiffusionPipeline, StableDiffusionXLPipeline
from PIL import Image
import datetime
import io
import json
import os
import re
from typing import Optional, List, Dict
import numpy as np

# ======================
# Configuration Section (Modify here to expand)
# ======================

# 1. Base Model - Illustrious XL v1.0
BASE_MODEL = "OnomaAIResearch/Illustrious-XL-v1.0"

# 2. Fixed LoRAs (Auto-loaded, not user-selectable)
FIXED_LORAS = {
    "quality_enhancer": {
        "repo_id": "stabilityai/stable-diffusion-xl-base-1.0",
        "filename": "sd_xl_base_1.0.safetensors",
        "weight": 0.8,
        "trigger_words": "high quality, detailed, sharp"
    },
    "pose_control": {
        "repo_id": "latent-consistency/lcm-lora-sdxl",
        "filename": None,
        "weight": 0.7,
        "trigger_words": "perfect anatomy, natural pose"
    }
}

# 3. Style Templates (Auto-prepended to user prompts)
STYLE_PROMPTS = {
    "None": "",
    "Realistic": "photorealistic, ultra-detailed skin, natural lighting, 8k uhd, professional photography, DSLR, soft lighting, high quality, film grain, Fujifilm XT3, masterpiece, ",
    "Anime": "anime style, cel shading, vibrant colors, detailed eyes, studio ghibli style, manga style, trending on pixiv, masterpiece, ",
    "Comic": "comic book style, bold outlines, dynamic angles, comic panel, Marvel DC style, inked lines, pop art, masterpiece, ",
    "Watercolor": "watercolor painting, soft brush strokes, translucent layers, artistic, painterly, paper texture, traditional art, masterpiece, ",
}

# 4. Optional LoRAs (User-selectable via dropdown, can select multiple)
OPTIONAL_LORAS = {
    "None": {
        "repo_id": None,
        "weight": 0.0,
        "trigger_words": "",
        "description": "No additional LoRA"
    },
    "Detail Enhancement": {
        "repo_id": "ByteDance/SDXL-Lightning",
        "weight": 0.8,
        "trigger_words": "extremely detailed, intricate details, hyperdetailed",
        "description": "Enhances fine details and textures"
    },
    "Portrait Master": {
        "repo_id": "ostris/face-helper-sdxl-lora",
        "weight": 0.9,
        "trigger_words": "perfect face, beautiful eyes, detailed skin texture",
        "description": "Specializes in realistic portraits"
    },
    "Cinematic Style": {
        "repo_id": "goofyai/cyborg_style_xl",
        "weight": 0.7,
        "trigger_words": "cinematic lighting, dramatic shadows, film noir",
        "description": "Adds cinematic atmosphere"
    },
    "Vintage Photo": {
        "repo_id": "artificialguybr/LogoRedmond-LogoLoraForSDXL-V2",
        "weight": 0.6,
        "trigger_words": "vintage photo, retro style, film photography",
        "description": "Vintage photography effects"
    },
    "Art Nouveau": {
        "repo_id": "ostris/super-cereal-sdxl-lora",
        "weight": 0.8,
        "trigger_words": "art nouveau style, ornate decorations, flowing lines",
        "description": "Art Nouveau artistic style"
    }
}

# Default Parameters
DEFAULT_SEED = -1
DEFAULT_WIDTH = 1024
DEFAULT_HEIGHT = 1024
DEFAULT_LORA_SCALE = 0.8
DEFAULT_STEPS = 30
DEFAULT_CFG = 7.5

# Supported Languages (for future expansion)
SUPPORTED_LANGUAGES = {
    "en": "English",
    "zh": "中文",
    "ja": "日本語",
    "ko": "한국어"
}

# ======================
# Global Variables: Lazy Loading
# ======================
pipe = None
current_loras = {}
device = "cuda" if torch.cuda.is_available() else "cpu"

def load_pipeline():
    """Load the base Illustrious XL pipeline"""
    global pipe
    if pipe is None:
        print("🚀 Loading Illustrious XL base model...")
        pipe = StableDiffusionXLPipeline.from_pretrained(
            BASE_MODEL,
            torch_dtype=torch.float16,
            use_safetensors=True,
            variant="fp16"
        ).to(device)
        
        # Enable memory optimizations for ZeroGPU
        pipe.enable_attention_slicing()
        pipe.enable_vae_slicing()
        pipe.enable_model_cpu_offload()
        pipe.enable_xformers_memory_efficient_attention()
        
        print("✅ Illustrious XL model loaded successfully.")
    return pipe

def unload_pipeline():
    """Unload pipeline to free memory"""
    global pipe, current_loras
    if pipe is not None:
        # Clear any loaded LoRAs
        try:
            pipe.unload_lora_weights()
        except:
            pass
        del pipe
        torch.cuda.empty_cache()
        pipe = None
        current_loras = {}
        print("🗑️ Pipeline unloaded.")

def load_lora_weights(lora_configs: List[Dict]):
    """Load multiple LoRA weights efficiently"""
    global pipe, current_loras
    
    if not lora_configs:
        return
    
    # Unload existing LoRAs if different
    new_lora_ids = [config['repo_id'] for config in lora_configs if config['repo_id']]
    if set(current_loras.keys()) != set(new_lora_ids):
        try:
            pipe.unload_lora_weights()
            current_loras = {}
        except:
            pass
    
    # Load new LoRAs
    adapter_names = []
    adapter_weights = []
    
    for config in lora_configs:
        if config['repo_id'] and config['repo_id'] not in current_loras:
            try:
                pipe.load_lora_weights(
                    config['repo_id'], 
                    adapter_name=config['name']
                )
                current_loras[config['repo_id']] = config['name']
                print(f"✅ Loaded LoRA: {config['name']}")
            except Exception as e:
                print(f"❌ Failed to load LoRA {config['name']}: {e}")
                continue
        
        if config['repo_id']:
            adapter_names.append(config['name'])
            adapter_weights.append(config['weight'])
    
    # Set adapter weights
    if adapter_names:
        try:
            pipe.set_adapters(adapter_names, adapter_weights=adapter_weights)
        except Exception as e:
            print(f"⚠️ Warning setting adapter weights: {e}")

def process_long_prompt(prompt: str, max_length: int = 77) -> str:
    """Process long prompts by intelligent truncation and optimization"""
    if len(prompt.split()) <= max_length:
        return prompt
    
    # Split into sentences and prioritize
    sentences = re.split(r'[.!?]+', prompt)
    sentences = [s.strip() for s in sentences if s.strip()]
    
    # Keep most important parts (first sentence + key descriptors)
    if sentences:
        result = sentences[0]
        remaining = max_length - len(result.split())
        
        for sentence in sentences[1:]:
            words = sentence.split()
            if len(words) <= remaining:
                result += ". " + sentence
                remaining -= len(words)
            else:
                # Add partial sentence with most important words
                important_words = [w for w in words if len(w) > 3][:remaining]
                if important_words:
                    result += ". " + " ".join(important_words)
                break
        
        return result
    
    return " ".join(prompt.split()[:max_length])

# ======================
# Main Generation Function
# ======================
@spaces.GPU(duration=60) if SPACES_AVAILABLE else lambda x: x
def generate_image(
    prompt: str,
    negative_prompt: str,
    style: str,
    seed: int,
    width: int,
    height: int,
    selected_loras: List[str],
    lora_scale: float,
    steps: int,
    cfg_scale: float,
    language: str = "en"
):
    """Main image generation function with ZeroGPU optimization"""
    global pipe
    
    try:
        # Load pipeline
        pipe = load_pipeline()
        
        # Handle seed
        if seed == -1:
            seed = torch.randint(0, 2**32, (1,)).item()
        generator = torch.Generator(device=device).manual_seed(seed)
        
        # Process prompts
        style_prefix = STYLE_PROMPTS.get(style, "")
        processed_prompt = process_long_prompt(style_prefix + prompt, max_length=150)
        processed_negative = process_long_prompt(negative_prompt, max_length=100)
        
        # Prepare LoRA configurations
        lora_configs = []
        active_trigger_words = []
        
        # Add fixed LoRAs
        for name, config in FIXED_LORAS.items():
            if config["repo_id"]:
                lora_configs.append({
                    'name': name,
                    'repo_id': config["repo_id"],
                    'weight': config["weight"]
                })
                if config["trigger_words"]:
                    active_trigger_words.append(config["trigger_words"])
        
        # Add selected optional LoRAs
        for lora_name in selected_loras:
            if lora_name != "None" and lora_name in OPTIONAL_LORAS:
                config = OPTIONAL_LORAS[lora_name]
                if config["repo_id"]:
                    lora_configs.append({
                        'name': lora_name,
                        'repo_id': config["repo_id"],
                        'weight': config["weight"] * lora_scale
                    })
                    if config["trigger_words"]:
                        active_trigger_words.append(config["trigger_words"])
        
        # Load LoRAs
        load_lora_weights(lora_configs)
        
        # Combine trigger words with prompt
        if active_trigger_words:
            trigger_text = ", ".join(active_trigger_words)
            final_prompt = f"{processed_prompt}, {trigger_text}"
        else:
            final_prompt = processed_prompt
        
        # Generate image
        with torch.autocast(device):
            image = pipe(
                prompt=final_prompt,
                negative_prompt=processed_negative,
                num_inference_steps=steps,
                guidance_scale=cfg_scale,
                width=width,
                height=height,
                generator=generator,
            ).images[0]
        
        # Generate metadata
        timestamp = datetime.datetime.now()
        metadata = {
            "prompt": final_prompt,
            "original_prompt": prompt,
            "negative_prompt": processed_negative,
            "base_model": BASE_MODEL,
            "style": style,
            "fixed_loras": [name for name in FIXED_LORAS.keys()],
            "selected_loras": [name for name in selected_loras if name != "None"],
            "lora_scale": lora_scale,
            "seed": seed,
            "steps": steps,
            "cfg_scale": cfg_scale,
            "width": width,
            "height": height,
            "language": language,
            "timestamp": timestamp.isoformat(),
            "trigger_words": active_trigger_words
        }
        
        # Generate filenames
        timestamp_str = timestamp.strftime("%y%m%d%H%M")
        filename_base = f"{seed}-{timestamp_str}"
        
        # Save image as WebP
        img_buffer = io.BytesIO()
        image.save(img_buffer, format="WEBP", quality=95, method=6)
        img_buffer.seek(0)
        
        # Save metadata as JSON
        metadata_str = json.dumps(metadata, indent=2, ensure_ascii=False)
        
        return (
            image,
            metadata_str,
            f"{filename_base}.webp",
            f"{filename_base}.txt"
        )
        
    except Exception as e:
        error_msg = f"Generation failed: {str(e)}"
        print(f"❌ {error_msg}")
        return None, error_msg, "", ""

# ======================
# Gradio Interface
# ======================
def create_interface():
    """Create the Gradio interface"""
    
    with gr.Blocks(
        theme=gr.themes.Soft(
            primary_hue="blue",
            secondary_hue="green",
            neutral_hue="slate",
        ).set(
            body_background_fill="linear-gradient(135deg, #1e40af, #059669)",
            button_primary_background_fill="white",
            button_primary_text_color="#1e40af",
            input_background_fill="rgba(255,255,255,0.9)",
            block_background_fill="rgba(255,255,255,0.1)",
        ),
        css="""
        body { 
            font-family: 'Segoe UI', 'Arial', sans-serif; 
            background: linear-gradient(135deg, #1e40af, #059669);
        }
        .gr-button { 
            font-family: 'Segoe UI', 'Arial', sans-serif; 
            font-weight: 600;
            border-radius: 8px;
        }
        .gr-textbox { 
            font-family: 'Consolas', 'Monaco', 'Courier New', monospace; 
            border-radius: 8px;
        }
        .gr-dropdown, .gr-slider, .gr-radio { 
            border-radius: 8px;
        }
        .gr-form { 
            background: rgba(255,255,255,0.05);
            border-radius: 16px;
            padding: 20px;
            margin: 10px;
        }
        """,
        title="AI Photo Generator - Illustrious XL"
    ) as demo:
        
        gr.Markdown("""
        # 🎨 AI Photo Generator (Illustrious XL + Multi-LoRA)
        **PRO + ZeroGPU Optimized | Multi-LoRA Support | Style Templates | Metadata Export | 1536x1536 Native Resolution**
        """)
        
        with gr.Row():
            # Left Column - Controls
            with gr.Column(scale=3, elem_classes=["gr-form"]):
                
                # a. Prompt Input
                prompt_input = gr.Textbox(
                    label="Prompt (Positive)",
                    placeholder="A beautiful woman with flowing hair, golden hour lighting, cinematic composition, high detail...",
                    lines=6,
                    max_lines=20,
                    elem_classes=["gr-textbox"]
                )
                
                # b. Negative Prompt Input
                negative_prompt_input = gr.Textbox(
                    label="Negative Prompt",
                    value="blurry, low quality, deformed, cartoon, anime, text, watermark, signature, username, worst quality, low res, bad anatomy, bad hands, error, missing fingers, extra digit, fewer digits, cropped, jpeg artifacts, bad feet, extra fingers, mutated hands, poorly drawn hands, bad proportions, extra limbs, disfigured, ugly, gross proportions, malformed limbs",
                    lines=4,
                    max_lines=15,
                    elem_classes=["gr-textbox"]
                )
                
                # c. Style Selection
                style_radio = gr.Radio(
                    choices=list(STYLE_PROMPTS.keys()),
                    label="Style Template",
                    value="Realistic",
                    elem_classes=["gr-radio"]
                )
                
                # Multi-row controls
                with gr.Row():
                    # d. Seed Control
                    with gr.Column():
                        seed_input = gr.Slider(
                            minimum=-1,
                            maximum=99999999,
                            step=1,
                            value=DEFAULT_SEED,
                            label="Seed (-1 = Random)"
                        )
                        seed_reset = gr.Button("Reset Seed", size="sm")
                
                with gr.Row():
                    # e. Width Control
                    with gr.Column():
                        width_input = gr.Slider(
                            minimum=512,
                            maximum=1536,
                            step=64,
                            value=DEFAULT_WIDTH,
                            label="Width"
                        )
                        width_reset = gr.Button("Reset Width", size="sm")
                    
                    # f. Height Control
                    with gr.Column():
                        height_input = gr.Slider(
                            minimum=512,
                            maximum=1536,
                            step=64,
                            value=DEFAULT_HEIGHT,
                            label="Height"
                        )
                        height_reset = gr.Button("Reset Height", size="sm")
                
                # g. LoRA Selection (Multi-select)
                lora_dropdown = gr.Dropdown(
                    choices=list(OPTIONAL_LORAS.keys()),
                    label="Optional LoRAs (Multi-select)",
                    value=["None"],
                    multiselect=True,
                    elem_classes=["gr-dropdown"]
                )
                
                # h. LoRA Scale Control
                with gr.Row():
                    lora_scale_slider = gr.Slider(
                        minimum=0.0,
                        maximum=1.5,
                        step=0.05,
                        value=DEFAULT_LORA_SCALE,
                        label="LoRA Scale"
                    )
                    lora_reset = gr.Button("Reset LoRA", size="sm")
                
                # i. Generation Controls
                with gr.Row():
                    steps_slider = gr.Slider(
                        minimum=10,
                        maximum=100,
                        step=1,
                        value=DEFAULT_STEPS,
                        label="Steps"
                    )
                    cfg_slider = gr.Slider(
                        minimum=1.0,
                        maximum=20.0,
                        step=0.5,
                        value=DEFAULT_CFG,
                        label="CFG Scale"
                    )
                    gen_reset = gr.Button("Reset Generation", size="sm")
                
                # Language Selection (Optional)
                language_dropdown = gr.Dropdown(
                    choices=list(SUPPORTED_LANGUAGES.keys()),
                    label="Language (Optional)",
                    value="en",
                    visible=False  # Hidden for now, can be enabled later
                )
                
                # m. Generate Button
                generate_btn = gr.Button(
                    "✨ Generate Image", 
                    variant="primary", 
                    size="lg",
                    elem_classes=["gr-button"]
                )
            
            # Right Column - Outputs
            with gr.Column(scale=2):
                # j. Image Display
                image_output = gr.Image(
                    label="Generated Image", 
                    height=600, 
                    format="webp"
                )
                
                # l. Download Buttons (between image and metadata)
                with gr.Row():
                    download_img_btn = gr.DownloadButton(
                        "⬇️ Download Image (WebP)",
                        variant="secondary"
                    )
                    download_meta_btn = gr.DownloadButton(
                        "⬇️ Download Metadata (TXT)",
                        variant="secondary"
                    )
                
                # k. Metadata Display
                metadata_output = gr.Textbox(
                    label="Generation Metadata (JSON)",
                    lines=15,
                    max_lines=25,
                    elem_classes=["gr-textbox"]
                )
        
        # ======================
        # Event Handlers
        # ======================
        
        # Reset buttons
        seed_reset.click(fn=lambda: -1, outputs=seed_input)
        width_reset.click(fn=lambda: DEFAULT_WIDTH, outputs=width_input)
        height_reset.click(fn=lambda: DEFAULT_HEIGHT, outputs=height_input)
        lora_reset.click(fn=lambda: DEFAULT_LORA_SCALE, outputs=lora_scale_slider)
        gen_reset.click(
            fn=lambda: (DEFAULT_STEPS, DEFAULT_CFG),
            outputs=[steps_slider, cfg_slider]
        )
        
        # Main generation function
        def generate_and_prepare_downloads(*args):
            result = generate_image(*args)
            if result[0] is not None:  # Success
                image, metadata, img_filename, meta_filename = result
                
                # Prepare download files
                img_buffer = io.BytesIO()
                image.save(img_buffer, format="WEBP", quality=95)
                img_buffer.seek(0)
                
                meta_buffer = io.BytesIO()
                meta_buffer.write(metadata.encode('utf-8'))
                meta_buffer.seek(0)
                
                return (
                    image,
                    metadata,
                    gr.DownloadButton.update(value=img_buffer.getvalue(), filename=img_filename),
                    gr.DownloadButton.update(value=meta_buffer.getvalue(), filename=meta_filename)
                )
            else:  # Error
                return result[0], result[1], None, None
        
        # Generate button click
        generate_btn.click(
            fn=generate_and_prepare_downloads,
            inputs=[
                prompt_input, negative_prompt_input, style_radio,
                seed_input, width_input, height_input,
                lora_dropdown, lora_scale_slider,
                steps_slider, cfg_slider, language_dropdown
            ],
            outputs=[
                image_output, metadata_output,
                download_img_btn, download_meta_btn
            ]
        )
        
        # Show LoRA descriptions
        def show_lora_info(selected_loras):
            if not selected_loras or selected_loras == ["None"]:
                return "No LoRAs selected"
            
            info = "Selected LoRAs:\n"
            for lora_name in selected_loras:
                if lora_name in OPTIONAL_LORAS:
                    config = OPTIONAL_LORAS[lora_name]
                    info += f"• {lora_name}: {config['description']}\n"
                    if config['trigger_words']:
                        info += f"  Triggers: {config['trigger_words']}\n"
            return info
        
        lora_dropdown.change(
            fn=show_lora_info,
            inputs=[lora_dropdown],
            outputs=[gr.Textbox(label="LoRA Information", visible=False)]
        )
    
    return demo

# ======================
# Launch Application
# ======================
if __name__ == "__main__":
    demo = create_interface()
    demo.queue(max_size=20)
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_error=True
    )