Spaces:

chrisjcc
/

generate_game

Sleeping

File size: 2,480 Bytes

8f61bf1
 
 
 
 
 
1ef3684
6b6ca07
8f61bf1
1975373
 
 
 
8f61bf1
 
5810306
 
 
8f61bf1
 
 
5d02f57
 
 
 
 
 
8f61bf1
34f1bac
5d02f57
 
 
 
 
 
 
 
 
 
bd84758
34f1bac
8f61bf1
 
34f1bac
8f61bf1
 
 
bd84758
34f1bac
5d02f57
34f1bac
5d02f57
 
 
 
 
34f1bac
5d02f57
 
8f61bf1

import os
import io
from PIL import Image
import base64 

import torch

import spaces
from transformers import pipeline

from diffusers import EulerDiscreteScheduler
from diffusers import StableDiffusionPipeline

import gradio as gr

# Move pipeline to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"

# Set Hugging Face API (needed for gated models)
hf_api_key = os.environ.get('HF_API_KEY')

# Load the Stable Diffusion pipeline
model_id = "sd-legacy/stable-diffusion-v1-5"

# Use the Euler scheduler here instead
scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")

# Load the image-to-text pipeline with BLIP model
get_completion = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

# Load the Stable Diffusion pipeline
pipe = StableDiffusionPipeline.from_pretrained(
    model_id, 
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,  # Use float16 on GPU, float32 on CPU
    scheduler=scheduler,
    use_auth_token=hf_api_key  # Required for gated model
)
pipe = pipe.to(device)

# Caption generate function
@spaces.GPU(duration=120) # Designed to be effect-free in non-ZeroGPU environments, ensuring compatibility across different setups.
def captioner(image):
    # The BLIP model expects a PIL image directly
    result = get_completion(image)
    
    return result[0]['generated_text']

# Image generate function
@spaces.GPU(duration=120) # Designed to be effect-free in non-ZeroGPU environments, ensuring compatibility across different setups.
def generate(prompt, steps):  
    # Generate an image with Stable Diffusion
    output = pipe(
        prompt,
        negative_prompt=None,  # Handle empty negative prompt
        num_inference_steps=25,
    )

    return output.images[0]  # Return the first generated image (PIL format)


# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Describe-and-Generate game 🖍️")
    image_upload = gr.Image(label="Your first image",type="pil")
    
    btn_caption = gr.Button("Generate caption")
    caption = gr.Textbox(label="Generated caption")

    btn_image = gr.Button("Generate image")
    image_output = gr.Image(label="Generated Image")
    
    btn_caption.click(fn=captioner, inputs=[image_upload], outputs=[caption])
    btn_image.click(fn=generate, inputs=[caption], outputs=[image_output])


# Launch the app
demo.launch(
    share=True,
    #server_port=int(os.environ['PORT4'])
)