Update README.md
Browse files
README.md
CHANGED
|
@@ -108,11 +108,8 @@ bash scripts/spatialvla_4b_finetune/finetune_lora.sh
|
|
| 108 |
```
|
| 109 |
|
| 110 |
## Evaluation
|
|
|
|
| 111 |
|
| 112 |
-
<details>
|
| 113 |
-
<summary>
|
| 114 |
-
SimplerEnv evaluation on Google Robot tasks.
|
| 115 |
-
</summary>
|
| 116 |
<table border="1" class="dataframe">
|
| 117 |
<thead>
|
| 118 |
<tr style="text-align: center;">
|
|
@@ -278,137 +275,131 @@ bash scripts/spatialvla_4b_finetune/finetune_lora.sh
|
|
| 278 |
</tbody>
|
| 279 |
</table>
|
| 280 |
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
<
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
<
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
</
|
| 298 |
-
<
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
<
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
<
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
<
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
<
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
<
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
<
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
<
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
<
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
</tbody>
|
| 407 |
-
</table>
|
| 408 |
-
</details>
|
| 409 |
-
|
| 410 |
-
<details>
|
| 411 |
-
<summary>LIBERO Simulation Benchmark Results.</summary>
|
| 412 |
<table border="1" class="dataframe">
|
| 413 |
<thead>
|
| 414 |
<tr style="text-align: center;">
|
|
@@ -501,22 +492,20 @@ bash scripts/spatialvla_4b_finetune/finetune_lora.sh
|
|
| 501 |
</tbody>
|
| 502 |
</table>
|
| 503 |
|
| 504 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 505 |
|
| 506 |
-
|
| 507 |
-
<summary>Zero-shot Robot Control Evaluation on WidowX Robot.</summary>
|
| 508 |
-
<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/SUPyXwcdfnWranO04tulL.png" alt="perform">
|
| 509 |
-
</details>
|
| 510 |
|
| 511 |
-
<
|
| 512 |
-
<summary>Spatial Understanding Capability Evaluation..</summary>
|
| 513 |
-
<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/g-EfM-6M7iM9IYryUTwLA.png" alt="perform">
|
| 514 |
-
</details>
|
| 515 |
|
| 516 |
-
<details>
|
| 517 |
-
<summary>Adapting to New Robot Setups on Franka Robot.</summary>
|
| 518 |
-
<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/4Z_vjQvsDGUcHCwmBCtRa.png" alt="perform">
|
| 519 |
-
</details>
|
| 520 |
|
| 521 |
|
| 522 |
## Citation
|
|
|
|
| 108 |
```
|
| 109 |
|
| 110 |
## Evaluation
|
| 111 |
+
- SimplerEnv evaluation on Google Robot tasks.
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
<table border="1" class="dataframe">
|
| 114 |
<thead>
|
| 115 |
<tr style="text-align: center;">
|
|
|
|
| 275 |
</tbody>
|
| 276 |
</table>
|
| 277 |
|
| 278 |
+
- SimplerEnv evaluation on WidowX Robot tasks.
|
| 279 |
+
|
| 280 |
+
<table border="1" class="dataframe">
|
| 281 |
+
<thead>
|
| 282 |
+
<tr style="text-align: center;">
|
| 283 |
+
<th rowspan="2">Model</th>
|
| 284 |
+
<th colspan="2">Put Spoon on Towel</th>
|
| 285 |
+
<th colspan="2">Put Carrot on Plate</th>
|
| 286 |
+
<th colspan="2">Stack Green Block on Yellow Block</th>
|
| 287 |
+
<th colspan="2">Put Eggplant in Yellow Basket</th>
|
| 288 |
+
<th rowspan="2">#Overall Average</th>
|
| 289 |
+
</tr>
|
| 290 |
+
<tr style="text-align: center;">
|
| 291 |
+
<th>Grasp Spoon</th>
|
| 292 |
+
<th>Success</th>
|
| 293 |
+
<th>Grasp Carrot</th>
|
| 294 |
+
<th>Success</th>
|
| 295 |
+
<th>Grasp Green Block</th>
|
| 296 |
+
<th>Success</th>
|
| 297 |
+
<th>Grasp Eggplant</th>
|
| 298 |
+
<th>Success</th>
|
| 299 |
+
</tr>
|
| 300 |
+
</thead>
|
| 301 |
+
<tbody>
|
| 302 |
+
<tr>
|
| 303 |
+
<td>RT-1-X</td>
|
| 304 |
+
<td>16.7%</td>
|
| 305 |
+
<td>0.0%</td>
|
| 306 |
+
<td>20.8%</td>
|
| 307 |
+
<td>4.2%</td>
|
| 308 |
+
<td>8.3%</td>
|
| 309 |
+
<td>0.0%</td>
|
| 310 |
+
<td>0.0%</td>
|
| 311 |
+
<td>0.0%</td>
|
| 312 |
+
<td>1.1%</td>
|
| 313 |
+
</tr>
|
| 314 |
+
<tr>
|
| 315 |
+
<td>Octo-Base</td>
|
| 316 |
+
<td>34.7%</td>
|
| 317 |
+
<td>12.5%</td>
|
| 318 |
+
<td>52.8%</td>
|
| 319 |
+
<td>8.3%</td>
|
| 320 |
+
<td>31.9%</td>
|
| 321 |
+
<td>0.0%</td>
|
| 322 |
+
<td>66.7%</td>
|
| 323 |
+
<td>43.1%</td>
|
| 324 |
+
<td>16.0%</td>
|
| 325 |
+
</tr>
|
| 326 |
+
<tr>
|
| 327 |
+
<td>Octo-Small</td>
|
| 328 |
+
<td>77.8%</td>
|
| 329 |
+
<td>47.2%</td>
|
| 330 |
+
<td>27.8%</td>
|
| 331 |
+
<td>9.7%</td>
|
| 332 |
+
<td>40.3%</td>
|
| 333 |
+
<td>4.2%</td>
|
| 334 |
+
<td>87.5%</td>
|
| 335 |
+
<td>56.9%</td>
|
| 336 |
+
<td>30.0%</td>
|
| 337 |
+
</tr>
|
| 338 |
+
<tr>
|
| 339 |
+
<td>OpenVLA</td>
|
| 340 |
+
<td>4.1%</td>
|
| 341 |
+
<td>0.0%</td>
|
| 342 |
+
<td>33.3%</td>
|
| 343 |
+
<td>0.0%</td>
|
| 344 |
+
<td>12.5%</td>
|
| 345 |
+
<td>0.0%</td>
|
| 346 |
+
<td>8.3%</td>
|
| 347 |
+
<td>4.1%</td>
|
| 348 |
+
<td>1.0%</td>
|
| 349 |
+
</tr>
|
| 350 |
+
<tr>
|
| 351 |
+
<td>RoboVLM (zero-shot)</td>
|
| 352 |
+
<td>37.5%</td>
|
| 353 |
+
<td>20.8%</td>
|
| 354 |
+
<td>33.3%</td>
|
| 355 |
+
<td>25.0%</td>
|
| 356 |
+
<td>8.3%</td>
|
| 357 |
+
<td>8.3%</td>
|
| 358 |
+
<td>0.0%</td>
|
| 359 |
+
<td>0.0%</td>
|
| 360 |
+
<td>13.5%</td>
|
| 361 |
+
</tr>
|
| 362 |
+
<tr>
|
| 363 |
+
<td>RoboVLM (fine-tuning)</td>
|
| 364 |
+
<td>54.2%</td>
|
| 365 |
+
<td>29.2%</td>
|
| 366 |
+
<td>25.0%</td>
|
| 367 |
+
<td>25.0%</td>
|
| 368 |
+
<td>45.8%</td>
|
| 369 |
+
<td>12.5%</td>
|
| 370 |
+
<td>58.3%</td>
|
| 371 |
+
<td>58.3%</td>
|
| 372 |
+
<td>31.3%</td>
|
| 373 |
+
</tr>
|
| 374 |
+
<tr>
|
| 375 |
+
<td>SpatialVLA (zero-shot)</td>
|
| 376 |
+
<td><b>25.0%</b></td>
|
| 377 |
+
<td><b>20.8%</b></td>
|
| 378 |
+
<td><b>41.7%</b></td>
|
| 379 |
+
<td>20.8%</td>
|
| 380 |
+
<td><b>58.3%</b></td>
|
| 381 |
+
<td>25.0%</td>
|
| 382 |
+
<td><b>79.2%</b></td>
|
| 383 |
+
<td>70.8%</td>
|
| 384 |
+
<td><b>34.4%</b></td>
|
| 385 |
+
</tr>
|
| 386 |
+
<tr>
|
| 387 |
+
<td>SpatialVLA (fine-tuning)</td>
|
| 388 |
+
<td><b>20.8%</b></td>
|
| 389 |
+
<td>16.7%</td>
|
| 390 |
+
<td>29.2%</td>
|
| 391 |
+
<td>25.0%</td>
|
| 392 |
+
<td><b>62.5%</b></td>
|
| 393 |
+
<td>29.2%</td>
|
| 394 |
+
<td><b>100.0%</b></td>
|
| 395 |
+
<td><b>100.0%</b></td>
|
| 396 |
+
<td><b>42.7%</b></td>
|
| 397 |
+
</tr>
|
| 398 |
+
</tbody>
|
| 399 |
+
</table>
|
| 400 |
+
|
| 401 |
+
- LIBERO Simulation Benchmark Results.
|
| 402 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
<table border="1" class="dataframe">
|
| 404 |
<thead>
|
| 405 |
<tr style="text-align: center;">
|
|
|
|
| 492 |
</tbody>
|
| 493 |
</table>
|
| 494 |
|
| 495 |
+
- Zero-shot Robot Control Evaluation on WidowX Robot.
|
| 496 |
+
|
| 497 |
+
<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/SUPyXwcdfnWranO04tulL.png" alt="perform">
|
| 498 |
+
|
| 499 |
+
|
| 500 |
+
- Spatial Understanding Capability Evaluation.
|
| 501 |
+
|
| 502 |
+
<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/g-EfM-6M7iM9IYryUTwLA.png" alt="perform">
|
| 503 |
+
|
| 504 |
|
| 505 |
+
- Adapting to New Robot Setups on Franka Robot.
|
|
|
|
|
|
|
|
|
|
| 506 |
|
| 507 |
+
<img src="https://cdn-uploads.huggingface.co/production/uploads/6535045a910b844786a6642f/4Z_vjQvsDGUcHCwmBCtRa.png" alt="perform">
|
|
|
|
|
|
|
|
|
|
| 508 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
|
| 510 |
|
| 511 |
## Citation
|