Action2Vision / app.py
yutengz's picture
Update app.py
15c51f4 verified
import gradio as gr
from PIL import Image
import torch
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
# Load model from HF Hub (your model repo)
model_id = "yutengz/Action2Vision"
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
model_id,
# torch_dtype=torch.float16,
torch_dtype=torch.float32,
safety_checker=None,
# ).to("cuda")
).to("cpu")
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
def predict(image: Image.Image, prompt: str):
image = image.convert("RGB").resize((256, 256))
result = pipe(image=image, prompt=prompt).images[0]
return result
demo = gr.Interface(
fn=predict,
inputs=[
gr.Image(type="pil", label="Source Image"),
gr.Textbox(label="Instruction Prompt", placeholder="e.g., stack the blocks"),
],
outputs=gr.Image(label="Predicted Image"),
title="🧠 Action2Vision",
description="A fine-tuned InstructPix2Pix model for robotic action frame prediction."
)
demo.launch()