FALCON-VLA
/

FALCON-series

vision-language-action

spatial-understanding

generalist-robot-policies

Model card Files Files and versions

flameeee commited on 26 days ago

Commit

d3a5c46

·

verified ·

1 Parent(s): dbee35c

Update README.md

Files changed (1) hide show

README.md +3 -5

README.md CHANGED Viewed

@@ -121,12 +121,10 @@ FALCON can be used to predict action based on the vision and language input. FAL
 ```python
 import torch
-import json, functools
 from PIL import Image
-from copy import deepcopy
 from falcon.train.base_trainer import BaseTrainer
-from falcon.data.data_utils import preprocess_image
-from falcon.data.data_utils import get_text_function
 from falcon.model.policy_head.esm_utils.vggt.utils.load_fn import load_and_preprocess_images_square_new
 configs = josn.load(open('configs/falcon-esm-fc-calvin-abc.json', 'r'))
@@ -147,7 +145,7 @@ text_tensor, attention_mask = text_fn([prompt])
 for step in range(MAX_STEPS):
     image: Image.Image = get_from_side_camera(...)
     # get inputs for esm
-    image_vggt = deepcopy(image)
     image = image_fn([image]).unsqueeze(0)
     esm_target_size = 224

 ```python
 import torch
+import json, functools, copy
 from PIL import Image
 from falcon.train.base_trainer import BaseTrainer
+from falcon.data.data_utils import preprocess_image, get_text_function
 from falcon.model.policy_head.esm_utils.vggt.utils.load_fn import load_and_preprocess_images_square_new
 configs = josn.load(open('configs/falcon-esm-fc-calvin-abc.json', 'r'))
 for step in range(MAX_STEPS):
     image: Image.Image = get_from_side_camera(...)
     # get inputs for esm
+    image_vggt = copy.deepcopy(image)
     image = image_fn([image]).unsqueeze(0)
     esm_target_size = 224