yutengz commited on
Commit
02c8b0b
·
verified ·
1 Parent(s): 4d14930

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -6
README.md CHANGED
@@ -3,7 +3,7 @@ license: mit
3
  tags:
4
  - image-to-image
5
  ---
6
- # ip2p-RoboPredict: InstructPix2Pix Fine-tuning for Robotic Action Frame Prediction
7
  GitHub: https://github.com/yutengzhang03/Action2Vision
8
  <img src='img/show-example.png'/>
9
 
@@ -22,18 +22,24 @@ import PIL
22
  import requests
23
  import torch
24
  from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
 
25
  model_id = "yutengz/Action2Vision"
26
  pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, safety_checker=None)
27
  pipe.to("cuda")
28
  pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
29
- url = "https://github.com/yutengzhang03/Action2Vision/blob/main/img/source.png"
 
 
 
 
30
  def download_image(url):
31
- image = PIL.Image.open(requests.get(url, stream=True).raw)
32
- image = PIL.ImageOps.exif_transpose(image)
33
- image = image.convert("RGB")
34
  return image
 
 
35
  image = download_image(url)
36
  prompt = "There is a hammer and a block in the middle of the table. If the block is closer to the left robotic arm, it uses the left arm to pick up the hammer and strike the block; otherwise, it does the opposite."
37
- images = pipe(prompt, image=image, num_inference_steps=10, image_guidance_scale=1).images
38
  images[0]
39
  ```
 
3
  tags:
4
  - image-to-image
5
  ---
6
+ # Action2Vision: InstructPix2Pix Fine-tuning for Robotic Action Frame Prediction
7
  GitHub: https://github.com/yutengzhang03/Action2Vision
8
  <img src='img/show-example.png'/>
9
 
 
22
  import requests
23
  import torch
24
  from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
25
+
26
  model_id = "yutengz/Action2Vision"
27
  pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, safety_checker=None)
28
  pipe.to("cuda")
29
  pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
30
+
31
+
32
+ to_tensor = transforms.ToTensor()
33
+ resize = transforms.Resize((256, 256))
34
+
35
  def download_image(url):
36
+ def download_image(url):
37
+ image = PIL.Image.open(requests.get(url, stream=True).raw).convert("RGB").resize((256, 256))
 
38
  return image
39
+
40
+ url = "https://github.com/yutengzhang03/Action2Vision/blob/main/img/source.png"
41
  image = download_image(url)
42
  prompt = "There is a hammer and a block in the middle of the table. If the block is closer to the left robotic arm, it uses the left arm to pick up the hammer and strike the block; otherwise, it does the opposite."
43
+ images = pipe(prompt, image=image).images
44
  images[0]
45
  ```