Update README.md
Browse files
README.md
CHANGED
|
@@ -3,7 +3,7 @@ license: mit
|
|
| 3 |
tags:
|
| 4 |
- image-to-image
|
| 5 |
---
|
| 6 |
-
#
|
| 7 |
GitHub: https://github.com/yutengzhang03/Action2Vision
|
| 8 |
<img src='img/show-example.png'/>
|
| 9 |
|
|
@@ -22,18 +22,24 @@ import PIL
|
|
| 22 |
import requests
|
| 23 |
import torch
|
| 24 |
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
|
|
|
|
| 25 |
model_id = "yutengz/Action2Vision"
|
| 26 |
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, safety_checker=None)
|
| 27 |
pipe.to("cuda")
|
| 28 |
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
def download_image(url):
|
| 31 |
-
|
| 32 |
-
image = PIL.
|
| 33 |
-
image = image.convert("RGB")
|
| 34 |
return image
|
|
|
|
|
|
|
| 35 |
image = download_image(url)
|
| 36 |
prompt = "There is a hammer and a block in the middle of the table. If the block is closer to the left robotic arm, it uses the left arm to pick up the hammer and strike the block; otherwise, it does the opposite."
|
| 37 |
-
images = pipe(prompt, image=image
|
| 38 |
images[0]
|
| 39 |
```
|
|
|
|
| 3 |
tags:
|
| 4 |
- image-to-image
|
| 5 |
---
|
| 6 |
+
# Action2Vision: InstructPix2Pix Fine-tuning for Robotic Action Frame Prediction
|
| 7 |
GitHub: https://github.com/yutengzhang03/Action2Vision
|
| 8 |
<img src='img/show-example.png'/>
|
| 9 |
|
|
|
|
| 22 |
import requests
|
| 23 |
import torch
|
| 24 |
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
|
| 25 |
+
|
| 26 |
model_id = "yutengz/Action2Vision"
|
| 27 |
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, safety_checker=None)
|
| 28 |
pipe.to("cuda")
|
| 29 |
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
to_tensor = transforms.ToTensor()
|
| 33 |
+
resize = transforms.Resize((256, 256))
|
| 34 |
+
|
| 35 |
def download_image(url):
|
| 36 |
+
def download_image(url):
|
| 37 |
+
image = PIL.Image.open(requests.get(url, stream=True).raw).convert("RGB").resize((256, 256))
|
|
|
|
| 38 |
return image
|
| 39 |
+
|
| 40 |
+
url = "https://github.com/yutengzhang03/Action2Vision/blob/main/img/source.png"
|
| 41 |
image = download_image(url)
|
| 42 |
prompt = "There is a hammer and a block in the middle of the table. If the block is closer to the left robotic arm, it uses the left arm to pick up the hammer and strike the block; otherwise, it does the opposite."
|
| 43 |
+
images = pipe(prompt, image=image).images
|
| 44 |
images[0]
|
| 45 |
```
|