File size: 1,065 Bytes
671ddad
 
 
e350fb5
671ddad
b1b9f09
3f19484
b1b9f09
e350fb5
b1b9f09
15c51f4
 
b1b9f09
15c51f4
 
b1b9f09
671ddad
 
b1b9f09
 
 
 
671ddad
b1b9f09
 
 
 
 
 
 
3f19484
b1b9f09
 
671ddad
b1b9f09
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import gradio as gr
from PIL import Image
import torch
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler

# Load model from HF Hub (your model repo)
model_id = "yutengz/Action2Vision"

pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
    model_id,
    # torch_dtype=torch.float16,
    torch_dtype=torch.float32,
    safety_checker=None,
# ).to("cuda")
    ).to("cpu")

pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)

def predict(image: Image.Image, prompt: str):
    image = image.convert("RGB").resize((256, 256))
    result = pipe(image=image, prompt=prompt).images[0]
    return result

demo = gr.Interface(
    fn=predict,
    inputs=[
        gr.Image(type="pil", label="Source Image"),
        gr.Textbox(label="Instruction Prompt", placeholder="e.g., stack the blocks"),
    ],
    outputs=gr.Image(label="Predicted Image"),
    title="🧠 Action2Vision",
    description="A fine-tuned InstructPix2Pix model for robotic action frame prediction."
)

demo.launch()