tfrere HF Staff commited on
Commit
f45f4a9
·
1 Parent(s): d37700c

update code handling in latex to mdx

Browse files
app/.astro/astro/content.d.ts CHANGED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ declare module 'astro:content' {
2
+ interface Render {
3
+ '.mdx': Promise<{
4
+ Content: import('astro').MarkdownInstance<{}>['Content'];
5
+ headings: import('astro').MarkdownHeading[];
6
+ remarkPluginFrontmatter: Record<string, any>;
7
+ components: import('astro').MDXInstance<{}>['components'];
8
+ }>;
9
+ }
10
+ }
11
+
12
+ declare module 'astro:content' {
13
+ interface RenderResult {
14
+ Content: import('astro/runtime/server/index.js').AstroComponentFactory;
15
+ headings: import('astro').MarkdownHeading[];
16
+ remarkPluginFrontmatter: Record<string, any>;
17
+ }
18
+ interface Render {
19
+ '.md': Promise<RenderResult>;
20
+ }
21
+
22
+ export interface RenderedContent {
23
+ html: string;
24
+ metadata?: {
25
+ imagePaths: Array<string>;
26
+ [key: string]: unknown;
27
+ };
28
+ }
29
+ }
30
+
31
+ declare module 'astro:content' {
32
+ type Flatten<T> = T extends { [K: string]: infer U } ? U : never;
33
+
34
+ export type CollectionKey = keyof AnyEntryMap;
35
+ export type CollectionEntry<C extends CollectionKey> = Flatten<AnyEntryMap[C]>;
36
+
37
+ export type ContentCollectionKey = keyof ContentEntryMap;
38
+ export type DataCollectionKey = keyof DataEntryMap;
39
+
40
+ type AllValuesOf<T> = T extends any ? T[keyof T] : never;
41
+ type ValidContentEntrySlug<C extends keyof ContentEntryMap> = AllValuesOf<
42
+ ContentEntryMap[C]
43
+ >['slug'];
44
+
45
+ /** @deprecated Use `getEntry` instead. */
46
+ export function getEntryBySlug<
47
+ C extends keyof ContentEntryMap,
48
+ E extends ValidContentEntrySlug<C> | (string & {}),
49
+ >(
50
+ collection: C,
51
+ // Note that this has to accept a regular string too, for SSR
52
+ entrySlug: E,
53
+ ): E extends ValidContentEntrySlug<C>
54
+ ? Promise<CollectionEntry<C>>
55
+ : Promise<CollectionEntry<C> | undefined>;
56
+
57
+ /** @deprecated Use `getEntry` instead. */
58
+ export function getDataEntryById<C extends keyof DataEntryMap, E extends keyof DataEntryMap[C]>(
59
+ collection: C,
60
+ entryId: E,
61
+ ): Promise<CollectionEntry<C>>;
62
+
63
+ export function getCollection<C extends keyof AnyEntryMap, E extends CollectionEntry<C>>(
64
+ collection: C,
65
+ filter?: (entry: CollectionEntry<C>) => entry is E,
66
+ ): Promise<E[]>;
67
+ export function getCollection<C extends keyof AnyEntryMap>(
68
+ collection: C,
69
+ filter?: (entry: CollectionEntry<C>) => unknown,
70
+ ): Promise<CollectionEntry<C>[]>;
71
+
72
+ export function getEntry<
73
+ C extends keyof ContentEntryMap,
74
+ E extends ValidContentEntrySlug<C> | (string & {}),
75
+ >(entry: {
76
+ collection: C;
77
+ slug: E;
78
+ }): E extends ValidContentEntrySlug<C>
79
+ ? Promise<CollectionEntry<C>>
80
+ : Promise<CollectionEntry<C> | undefined>;
81
+ export function getEntry<
82
+ C extends keyof DataEntryMap,
83
+ E extends keyof DataEntryMap[C] | (string & {}),
84
+ >(entry: {
85
+ collection: C;
86
+ id: E;
87
+ }): E extends keyof DataEntryMap[C]
88
+ ? Promise<DataEntryMap[C][E]>
89
+ : Promise<CollectionEntry<C> | undefined>;
90
+ export function getEntry<
91
+ C extends keyof ContentEntryMap,
92
+ E extends ValidContentEntrySlug<C> | (string & {}),
93
+ >(
94
+ collection: C,
95
+ slug: E,
96
+ ): E extends ValidContentEntrySlug<C>
97
+ ? Promise<CollectionEntry<C>>
98
+ : Promise<CollectionEntry<C> | undefined>;
99
+ export function getEntry<
100
+ C extends keyof DataEntryMap,
101
+ E extends keyof DataEntryMap[C] | (string & {}),
102
+ >(
103
+ collection: C,
104
+ id: E,
105
+ ): E extends keyof DataEntryMap[C]
106
+ ? Promise<DataEntryMap[C][E]>
107
+ : Promise<CollectionEntry<C> | undefined>;
108
+
109
+ /** Resolve an array of entry references from the same collection */
110
+ export function getEntries<C extends keyof ContentEntryMap>(
111
+ entries: {
112
+ collection: C;
113
+ slug: ValidContentEntrySlug<C>;
114
+ }[],
115
+ ): Promise<CollectionEntry<C>[]>;
116
+ export function getEntries<C extends keyof DataEntryMap>(
117
+ entries: {
118
+ collection: C;
119
+ id: keyof DataEntryMap[C];
120
+ }[],
121
+ ): Promise<CollectionEntry<C>[]>;
122
+
123
+ export function render<C extends keyof AnyEntryMap>(
124
+ entry: AnyEntryMap[C][string],
125
+ ): Promise<RenderResult>;
126
+
127
+ export function reference<C extends keyof AnyEntryMap>(
128
+ collection: C,
129
+ ): import('astro/zod').ZodEffects<
130
+ import('astro/zod').ZodString,
131
+ C extends keyof ContentEntryMap
132
+ ? {
133
+ collection: C;
134
+ slug: ValidContentEntrySlug<C>;
135
+ }
136
+ : {
137
+ collection: C;
138
+ id: keyof DataEntryMap[C];
139
+ }
140
+ >;
141
+ // Allow generic `string` to avoid excessive type errors in the config
142
+ // if `dev` is not running to update as you edit.
143
+ // Invalid collection names will be caught at build time.
144
+ export function reference<C extends string>(
145
+ collection: C,
146
+ ): import('astro/zod').ZodEffects<import('astro/zod').ZodString, never>;
147
+
148
+ type ReturnTypeOrOriginal<T> = T extends (...args: any[]) => infer R ? R : T;
149
+ type InferEntrySchema<C extends keyof AnyEntryMap> = import('astro/zod').infer<
150
+ ReturnTypeOrOriginal<Required<ContentConfig['collections'][C]>['schema']>
151
+ >;
152
+
153
+ type ContentEntryMap = {
154
+ "chapters": {
155
+ "00_abstract.mdx": {
156
+ id: "00_abstract.mdx";
157
+ slug: "00_abstract";
158
+ body: string;
159
+ collection: "chapters";
160
+ data: any
161
+ } & { render(): Render[".mdx"] };
162
+ "01_introduction.mdx": {
163
+ id: "01_introduction.mdx";
164
+ slug: "01_introduction";
165
+ body: string;
166
+ collection: "chapters";
167
+ data: any
168
+ } & { render(): Render[".mdx"] };
169
+ "02_classic_robotics.mdx": {
170
+ id: "02_classic_robotics.mdx";
171
+ slug: "02_classic_robotics";
172
+ body: string;
173
+ collection: "chapters";
174
+ data: any
175
+ } & { render(): Render[".mdx"] };
176
+ "03_reinforcement_learning.mdx": {
177
+ id: "03_reinforcement_learning.mdx";
178
+ slug: "03_reinforcement_learning";
179
+ body: string;
180
+ collection: "chapters";
181
+ data: any
182
+ } & { render(): Render[".mdx"] };
183
+ "04_imitation_learning.mdx": {
184
+ id: "04_imitation_learning.mdx";
185
+ slug: "04_imitation_learning";
186
+ body: string;
187
+ collection: "chapters";
188
+ data: any
189
+ } & { render(): Render[".mdx"] };
190
+ "06_next_directions.mdx": {
191
+ id: "06_next_directions.mdx";
192
+ slug: "06_next_directions";
193
+ body: string;
194
+ collection: "chapters";
195
+ data: any
196
+ } & { render(): Render[".mdx"] };
197
+ "07_conclusions.mdx": {
198
+ id: "07_conclusions.mdx";
199
+ slug: "07_conclusions";
200
+ body: string;
201
+ collection: "chapters";
202
+ data: any
203
+ } & { render(): Render[".mdx"] };
204
+ "A_foreword.mdx": {
205
+ id: "A_foreword.mdx";
206
+ slug: "a_foreword";
207
+ body: string;
208
+ collection: "chapters";
209
+ data: any
210
+ } & { render(): Render[".mdx"] };
211
+ };
212
+ "embeds2": {
213
+ "vibe-code-d3-embeds-directives.md": {
214
+ id: "vibe-code-d3-embeds-directives.md";
215
+ slug: "vibe-code-d3-embeds-directives";
216
+ body: string;
217
+ collection: "embeds2";
218
+ data: any
219
+ } & { render(): Render[".md"] };
220
+ };
221
+ "test": {
222
+ "converted-sample.mdx": {
223
+ id: "converted-sample.mdx";
224
+ slug: "converted-sample";
225
+ body: string;
226
+ collection: "test";
227
+ data: any
228
+ } & { render(): Render[".mdx"] };
229
+ };
230
+
231
+ };
232
+
233
+ type DataEntryMap = {
234
+ "assets": {
235
+ "data/somedata": {
236
+ id: "data/somedata";
237
+ collection: "assets";
238
+ data: any
239
+ };
240
+ };
241
+
242
+ };
243
+
244
+ type AnyEntryMap = ContentEntryMap & DataEntryMap;
245
+
246
+ export type ContentConfig = never;
247
+ }
app/.astro/settings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed6d28be38b13c36af0d93f09ca03071e80381d49463aa549a5ee625ef9a8b56
3
  size 58
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1865b6aacedcdcad152d991001f2a6e0b3c3874242f788c312d21d727a1ffbef
3
  size 58
app/package-lock.json CHANGED
Binary files a/app/package-lock.json and b/app/package-lock.json differ
 
app/scripts/latex-to-mdx/metadata-extractor.mjs CHANGED
@@ -15,6 +15,7 @@ export function extractLatexMetadata(latexContent) {
15
  const titleMatch = latexContent.match(/\\title\s*\{\s*([^}]+)\s*\}/s);
16
  if (titleMatch) {
17
  metadata.title = titleMatch[1]
 
18
  .replace(/\n/g, ' ')
19
  .trim();
20
  }
@@ -39,6 +40,8 @@ export function extractLatexMetadata(latexContent) {
39
  let authorName = fullAuthorInfo
40
  .replace(/\\ensps/g, '') // Remove École macro
41
  .replace(/\\hf/g, '') // Remove Hugging Face macro
 
 
42
  .replace(/\s+/g, ' ') // Normalize whitespace
43
  .trim();
44
 
 
15
  const titleMatch = latexContent.match(/\\title\s*\{\s*([^}]+)\s*\}/s);
16
  if (titleMatch) {
17
  metadata.title = titleMatch[1]
18
+ .replace(/\\[a-zA-Z]+/g, '') // Remove LaTeX commands
19
  .replace(/\n/g, ' ')
20
  .trim();
21
  }
 
40
  let authorName = fullAuthorInfo
41
  .replace(/\\ensps/g, '') // Remove École macro
42
  .replace(/\\hf/g, '') // Remove Hugging Face macro
43
+ .replace(/\\oxford/g, '') // Remove Oxford macro
44
+ .replace(/\\[a-zA-Z]+/g, '') // Remove any other LaTeX commands
45
  .replace(/\s+/g, ' ') // Normalize whitespace
46
  .trim();
47
 
app/scripts/latex-to-mdx/output/assets/image/figures/ch3/ch3-hil-serl-architecture.png ADDED

Git LFS Details

  • SHA256: c11857fb0113a346d95cd04164362be1c8ea4fca1b58b3965c4b42e3af377584
  • Pointer size: 132 Bytes
  • Size of remote file: 1.21 MB
app/scripts/latex-to-mdx/output/assets/image/figures/ch4/ch4-diffusion-policy.png CHANGED

Git LFS Details

  • SHA256: c3cb644c79fd016e77c78bd7fcf185908b18fb127f656003eb577349cfb6da40
  • Pointer size: 132 Bytes
  • Size of remote file: 2.81 MB

Git LFS Details

  • SHA256: e66afa347ccd952aeb4f4e071b1bb8b0fc37001771a3950e86a343f7c86d1da1
  • Pointer size: 132 Bytes
  • Size of remote file: 2.79 MB
app/scripts/latex-to-mdx/output/assets/image/figures/ch4/ch4-diffusion-robot-actions.png CHANGED

Git LFS Details

  • SHA256: a59b816b60a53784127e3dcf0aad612ba14474bde57e1c2b73b670665d1b70ec
  • Pointer size: 132 Bytes
  • Size of remote file: 8.93 MB

Git LFS Details

  • SHA256: f5e3b7f576bcd0c15dbfbbca2861f22ecb10ae5d55e4c140930e5d3546c5582f
  • Pointer size: 132 Bytes
  • Size of remote file: 8.92 MB
app/scripts/latex-to-mdx/output/assets/image/figures/ch4/ch4-normalizing-flows.png CHANGED

Git LFS Details

  • SHA256: 51f73d09b35b8ccd5685c6b26f7615f8d6ab3df7d045b2502e9232bfe33beace
  • Pointer size: 131 Bytes
  • Size of remote file: 278 kB

Git LFS Details

  • SHA256: c8cd7d408e3c9e5da34ac202714089c049322a66a35bd20a168b3452fcb88046
  • Pointer size: 132 Bytes
  • Size of remote file: 4.73 MB
app/scripts/latex-to-mdx/output/assets/image/figures/ch5/ch5-smolvla.png CHANGED

Git LFS Details

  • SHA256: 49575d51c64eb320c588673fb9b33d1d0a3de7f6af7165a18c35ffb40af93e7a
  • Pointer size: 132 Bytes
  • Size of remote file: 1.33 MB

Git LFS Details

  • SHA256: e36e8efceac1ef7d3196cf1d9f27aaab5102418d8448df94847c677abff6c929
  • Pointer size: 132 Bytes
  • Size of remote file: 1.63 MB
app/scripts/latex-to-mdx/output/main.bib CHANGED
@@ -352,17 +352,6 @@
352
 
353
  }
354
 
355
- @misc{cadene2024lerobot,
356
- title = {{{LeRobot}}: {{State-of-the-art}} Machine Learning for Real-World Robotics in Pytorch},
357
- author = {Cadene, Remi and Alibert, Simon and Soare, Alexander and Gallouedec, Quentin and Zouitine, Adil and Palma, Steven and Kooijmans, Pepijn and Aractingi, Michel and Shukor, Mustafa and Aubakirova, Dana and Russi, Martino and Capuano, Francesco and Pascal, Caroline and Choghari, Jade and Moss, Jess and Wolf, Thomas},
358
- year = {2024}
359
- }
360
-
361
- @misc{cadeneLeRobotStateoftheartMachine,
362
- title = {{{LeRobot}}: {{State-of-the-art Machine Learning}} for {{Real-World Robotics}} in {{Pytorch}}},
363
- author = {Cadene, Remi}
364
- }
365
-
366
  @misc{cadeneLeRobotStateoftheartMachine2024,
367
  title = {{{LeRobot}}: {{State-of-the-art Machine Learning}} for {{Real-World Robotics}} in {{Pytorch}}},
368
  author = {Cadene, Remi and Alibert, Simon and Soare, Alexander and Galloudec, Quentin and Zouitine, Adil and Palma, Steven and Kooijmans, Pepijn and Aractingi, Michel and Shukor, Mustafa and Aubakirova, Dana and Russi, Martino and Capuano, Francesco and Pascal, Caroline and Chogari, Jade and Moss, Jess and Wolf, Thomas},
@@ -386,15 +375,6 @@
386
 
387
  }
388
 
389
- @inproceedings{chebotar2019closing,
390
- title = {Closing the Sim-to-Real Loop: {{Adapting}} Simulation Randomization with Real World Experience},
391
- booktitle = {2019 International Conference on Robotics and Automation ({{ICRA}})},
392
- author = {Chebotar, Yevgen and Handa, Ankur and Makoviychuk, Viktor and Macklin, Miles and Issac, Jan and Ratliff, Nathan and Fox, Dieter},
393
- year = {2019},
394
- pages = {8973--8979},
395
- publisher = {IEEE}
396
- }
397
-
398
  @inproceedings{chebotarClosingSimtorealLoop2019,
399
  title = {Closing the Sim-to-Real Loop: {{Adapting}} Simulation Randomization with Real World Experience},
400
  shorttitle = {Closing the Sim-to-Real Loop},
@@ -442,24 +422,6 @@
442
 
443
  }
444
 
445
- @misc{collaborationOpenXEmbodimentRobotic2025,
446
- title = {Open {{X-Embodiment}}: {{Robotic Learning Datasets}} and {{RT-X Models}}},
447
- shorttitle = {Open {{X-Embodiment}}},
448
- author = {Collaboration, Open X.-Embodiment and O'Neill, Abby and Rehman, Abdul and Gupta, Abhinav and Maddukuri, Abhiram and Gupta, Abhishek and Padalkar, Abhishek and Lee, Abraham and Pooley, Acorn and Gupta, Agrim and Mandlekar, Ajay and Jain, Ajinkya and Tung, Albert and Bewley, Alex and Herzog, Alex and Irpan, Alex and Khazatsky, Alexander and Rai, Anant and Gupta, Anchit and Wang, Andrew and Kolobov, Andrey and Singh, Anikait and Garg, Animesh and Kembhavi, Aniruddha and Xie, Annie and Brohan, Anthony and Raffin, Antonin and Sharma, Archit and Yavary, Arefeh and Jain, Arhan and Balakrishna, Ashwin and Wahid, Ayzaan and {Burgess-Limerick}, Ben and Kim, Beomjoon and Sch{\"o}lkopf, Bernhard and Wulfe, Blake and Ichter, Brian and Lu, Cewu and Xu, Charles and Le, Charlotte and Finn, Chelsea and Wang, Chen and Xu, Chenfeng and Chi, Cheng and Huang, Chenguang and Chan, Christine and Agia, Christopher and Pan, Chuer and Fu, Chuyuan and Devin, Coline and Xu, Danfei and Morton, Daniel and Driess, Danny and Chen, Daphne and Pathak, Deepak and Shah, Dhruv and B{\"u}chler, Dieter and Jayaraman, Dinesh and Kalashnikov, Dmitry and Sadigh, Dorsa and Johns, Edward and Foster, Ethan and Liu, Fangchen and Ceola, Federico and Xia, Fei and Zhao, Feiyu and Frujeri, Felipe Vieira and Stulp, Freek and Zhou, Gaoyue and Sukhatme, Gaurav S. and Salhotra, Gautam and Yan, Ge and Feng, Gilbert and Schiavi, Giulio and Berseth, Glen and Kahn, Gregory and Yang, Guangwen and Wang, Guanzhi and Su, Hao and Fang, Hao-Shu and Shi, Haochen and Bao, Henghui and Amor, Heni Ben and Christensen, Henrik I. and Furuta, Hiroki and Bharadhwaj, Homanga and Walke, Homer and Fang, Hongjie and Ha, Huy and Mordatch, Igor and Radosavovic, Ilija and Leal, Isabel and Liang, Jacky and {Abou-Chakra}, Jad and Kim, Jaehyung and Drake, Jaimyn and Peters, Jan and Schneider, Jan and Hsu, Jasmine and Vakil, Jay and Bohg, Jeannette and Bingham, Jeffrey and Wu, Jeffrey and Gao, Jensen and Hu, Jiaheng and Wu, Jiajun and Wu, Jialin and Sun, Jiankai and Luo, Jianlan and Gu, Jiayuan and Tan, Jie and Oh, Jihoon and Wu, Jimmy and Lu, Jingpei and Yang, Jingyun and Malik, Jitendra and Silv{\'e}rio, Jo{\~a}o and Hejna, Joey and Booher, Jonathan and Tompson, Jonathan and Yang, Jonathan and Salvador, Jordi and Lim, Joseph J. and Han, Junhyek and Wang, Kaiyuan and Rao, Kanishka and Pertsch, Karl and Hausman, Karol and Go, Keegan and Gopalakrishnan, Keerthana and Goldberg, Ken and Byrne, Kendra and Oslund, Kenneth and Kawaharazuka, Kento and Black, Kevin and Lin, Kevin and Zhang, Kevin and Ehsani, Kiana and Lekkala, Kiran and Ellis, Kirsty and Rana, Krishan and Srinivasan, Krishnan and Fang, Kuan and Singh, Kunal Pratap and Zeng, Kuo-Hao and Hatch, Kyle and Hsu, Kyle and Itti, Laurent and Chen, Lawrence Yunliang and Pinto, Lerrel and {Fei-Fei}, Li and Tan, Liam and Fan, Linxi "Jim" and Ott, Lionel and Lee, Lisa and Weihs, Luca and Chen, Magnum and Lepert, Marion and Memmel, Marius and Tomizuka, Masayoshi and Itkina, Masha and Castro, Mateo Guaman and Spero, Max and Du, Maximilian and Ahn, Michael and Yip, Michael C. and Zhang, Mingtong and Ding, Mingyu and Heo, Minho and Srirama, Mohan Kumar and Sharma, Mohit and Kim, Moo Jin and Irshad, Muhammad Zubair and Kanazawa, Naoaki and Hansen, Nicklas and Heess, Nicolas and Joshi, Nikhil J. and Suenderhauf, Niko and Liu, Ning and Palo, Norman Di and Shafiullah, Nur Muhammad Mahi and Mees, Oier and Kroemer, Oliver and Bastani, Osbert and Sanketi, Pannag R. and Miller, Patrick "Tree" and Yin, Patrick and Wohlhart, Paul and Xu, Peng and Fagan, Peter David and Mitrano, Peter and Sermanet, Pierre and Abbeel, Pieter and Sundaresan, Priya and Chen, Qiuyu and Vuong, Quan and Rafailov, Rafael and Tian, Ran and Doshi, Ria and {Mart{\'i}n-Mart{\'i}n}, Roberto and Baijal, Rohan and Scalise, Rosario and Hendrix, Rose and Lin, Roy and Qian, Runjia and Zhang, Ruohan and Mendonca, Russell and Shah, Rutav and Hoque, Ryan and Julian, Ryan and Bustamante, Samuel and Kirmani, Sean and Levine, Sergey and Lin, Shan and Moore, Sherry and Bahl, Shikhar and Dass, Shivin and Sonawani, Shubham and Tulsiani, Shubham and Song, Shuran and Xu, Sichun and Haldar, Siddhant and Karamcheti, Siddharth and Adebola, Simeon and Guist, Simon and Nasiriany, Soroush and Schaal, Stefan and Welker, Stefan and Tian, Stephen and Ramamoorthy, Subramanian and Dasari, Sudeep and Belkhale, Suneel and Park, Sungjae and Nair, Suraj and Mirchandani, Suvir and Osa, Takayuki and Gupta, Tanmay and Harada, Tatsuya and Matsushima, Tatsuya and Xiao, Ted and Kollar, Thomas and Yu, Tianhe and Ding, Tianli and Davchev, Todor and Zhao, Tony Z. and Armstrong, Travis and Darrell, Trevor and Chung, Trinity and Jain, Vidhi and Kumar, Vikash and Vanhoucke, Vincent and Guizilini, Vitor and Zhan, Wei and Zhou, Wenxuan and Burgard, Wolfram and Chen, Xi and Chen, Xiangyu and Wang, Xiaolong and Zhu, Xinghao and Geng, Xinyang and Liu, Xiyuan and Liangwei, Xu and Li, Xuanlin and Pang, Yansong and Lu, Yao and Ma, Yecheng Jason and Kim, Yejin and Chebotar, Yevgen and Zhou, Yifan and Zhu, Yifeng and Wu, Yilin and Xu, Ying and Wang, Yixuan and Bisk, Yonatan and Dou, Yongqiang and Cho, Yoonyoung and Lee, Youngwoon and Cui, Yuchen and Cao, Yue and Wu, Yueh-Hua and Tang, Yujin and Zhu, Yuke and Zhang, Yunchu and Jiang, Yunfan and Li, Yunshuang and Li, Yunzhu and Iwasawa, Yusuke and Matsuo, Yutaka and Ma, Zehan and Xu, Zhuo and Cui, Zichen Jeff and Zhang, Zichen and Fu, Zipeng and Lin, Zipeng},
449
- year = {2025},
450
- month = may,
451
- number = {arXiv:2310.08864},
452
- eprint = {2310.08864},
453
- primaryclass = {cs},
454
- publisher = {arXiv},
455
- doi = {10.48550/arXiv.2310.08864},
456
- urldate = {2025-09-08},
457
- abstract = {Large, high-capacity models trained on diverse datasets have shown remarkable successes on efficiently tackling downstream applications. In domains from NLP to Computer Vision, this has led to a consolidation of pretrained models, with general pretrained backbones serving as a starting point for many applications. Can such a consolidation happen in robotics? Conventionally, robotic learning methods train a separate model for every application, every robot, and even every environment. Can we instead train generalist X-robot policy that can be adapted efficiently to new robots, tasks, and environments? In this paper, we provide datasets in standardized data formats and models to make it possible to explore this possibility in the context of robotic manipulation, alongside experimental results that provide an example of effective X-robot policies. We assemble a dataset from 22 different robots collected through a collaboration between 21 institutions, demonstrating 527 skills (160266 tasks). We show that a high-capacity model trained on this data, which we call RT-X, exhibits positive transfer and improves the capabilities of multiple robots by leveraging experience from other platforms. More details can be found on the project website https://robotics-transformer-x.github.io.},
458
- archiveprefix = {arXiv},
459
- keywords = {Computer Science - Robotics}
460
-
461
- }
462
-
463
  @book{connellRobotLearning1993,
464
  title = {Robot {{Learning}}},
465
  editor = {Connell, Jonathan H. and Mahadevan, Sridhar},
@@ -663,40 +625,6 @@
663
 
664
  }
665
 
666
- @misc{haarnojaReinforcementLearningDeep2017,
667
- title = {Reinforcement {{Learning}} with {{Deep Energy-Based Policies}}},
668
- author = {Haarnoja, Tuomas and Tang, Haoran and Abbeel, Pieter and Levine, Sergey},
669
- year = {2017},
670
- month = jul,
671
- number = {arXiv:1702.08165},
672
- eprint = {1702.08165},
673
- primaryclass = {cs},
674
- publisher = {arXiv},
675
- doi = {10.48550/arXiv.1702.08165},
676
- urldate = {2025-08-31},
677
- abstract = {We propose a method for learning expressive energy-based policies for continuous states and actions, which has been feasible only in tabular domains before. We apply our method to learning maximum entropy policies, resulting into a new algorithm, called soft Q-learning, that expresses the optimal policy via a Boltzmann distribution. We use the recently proposed amortized Stein variational gradient descent to learn a stochastic sampling network that approximates samples from this distribution. The benefits of the proposed algorithm include improved exploration and compositionality that allows transferring skills between tasks, which we confirm in simulated experiments with swimming and walking robots. We also draw a connection to actor-critic methods, which can be viewed performing approximate inference on the corresponding energy-based model.},
678
- archiveprefix = {arXiv},
679
- keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning}
680
-
681
- }
682
-
683
- @misc{haarnojaReinforcementLearningDeep2017a,
684
- title = {Reinforcement {{Learning}} with {{Deep Energy-Based Policies}}},
685
- author = {Haarnoja, Tuomas and Tang, Haoran and Abbeel, Pieter and Levine, Sergey},
686
- year = {2017},
687
- month = jul,
688
- number = {arXiv:1702.08165},
689
- eprint = {1702.08165},
690
- primaryclass = {cs},
691
- publisher = {arXiv},
692
- doi = {10.48550/arXiv.1702.08165},
693
- urldate = {2025-08-31},
694
- abstract = {We propose a method for learning expressive energy-based policies for continuous states and actions, which has been feasible only in tabular domains before. We apply our method to learning maximum entropy policies, resulting into a new algorithm, called soft Q-learning, that expresses the optimal policy via a Boltzmann distribution. We use the recently proposed amortized Stein variational gradient descent to learn a stochastic sampling network that approximates samples from this distribution. The benefits of the proposed algorithm include improved exploration and compositionality that allows transferring skills between tasks, which we confirm in simulated experiments with swimming and walking robots. We also draw a connection to actor-critic methods, which can be viewed performing approximate inference on the corresponding energy-based model.},
695
- archiveprefix = {arXiv},
696
- keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning}
697
-
698
- }
699
-
700
  @inproceedings{haarnojaReinforcementLearningDeep2017b,
701
  title = {Reinforcement {{Learning}} with {{Deep Energy-Based Policies}}},
702
  booktitle = {Proceedings of the 34th {{International Conference}} on {{Machine Learning}}},
@@ -788,22 +716,6 @@
788
 
789
  }
790
 
791
- @article{hwangboLearningAgileDynamic2019,
792
- title = {Learning Agile and Dynamic Motor Skills for Legged Robots},
793
- author = {Hwangbo, Jemin and Lee, Joonho and Dosovitskiy, Alexey and Bellicoso, Dario and Tsounis, Vassilios and Koltun, Vladlen and Hutter, Marco},
794
- year = {2019},
795
- month = jan,
796
- journal = {Science Robotics},
797
- volume = {4},
798
- number = {26},
799
- pages = {eaau5872},
800
- publisher = {American Association for the Advancement of Science},
801
- doi = {10.1126/scirobotics.aau5872},
802
- urldate = {2025-08-27},
803
- abstract = {Legged robots pose one of the greatest challenges in robotics. Dynamic and agile maneuvers of animals cannot be imitated by existing methods that are crafted by humans. A compelling alternative is reinforcement learning, which requires minimal craftsmanship and promotes the natural evolution of a control policy. However, so far, reinforcement learning research for legged robots is mainly limited to simulation, and only few and comparably simple examples have been deployed on real systems. The primary reason is that training with real robots, particularly with dynamically balancing systems, is complicated and expensive. In the present work, we introduce a method for training a neural network policy in simulation and transferring it to a state-of-the-art legged system, thereby leveraging fast, automated, and cost-effective data generation schemes. The approach is applied to the ANYmal robot, a sophisticated medium-dog--sized quadrupedal system. Using policies trained in simulation, the quadrupedal machine achieves locomotion skills that go beyond what had been achieved with prior methods: ANYmal is capable of precisely and energy-efficiently following high-level body velocity commands, running faster than before, and recovering from falling even in complex configurations.}
804
-
805
- }
806
-
807
  @inproceedings{ImageNet_VSS09,
808
  title = {Construction and Analysis of a Large Scale Image Ontology},
809
  author = {Deng, J. and Li, K. and Do, M. and Su, H. and {Fei-Fei}, L.},
@@ -818,6 +730,24 @@
818
  year = {2023}
819
  }
820
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
821
  @misc{jangBCZZeroShotTask2022,
822
  title = {{{BC-Z}}: {{Zero-Shot Task Generalization}} with {{Robotic Imitation Learning}}},
823
  shorttitle = {{{BC-Z}}},
@@ -929,14 +859,6 @@
929
 
930
  }
931
 
932
- @article{khatibRealTimeObstancleAvoidance1986,
933
- title = {Real-{{Time Obstancle Avoidance}} for {{Manipulators}} and {{Mobile Robots}}},
934
- author = {Khatib, Oussama},
935
- year = {1986},
936
- journal = {The International Journal of Robotics Research},
937
- volume = {5}
938
- }
939
-
940
  @misc{khazatskyDROIDLargeScaleInTheWild2025,
941
  title = {{{DROID}}: {{A Large-Scale In-The-Wild Robot Manipulation Dataset}}},
942
  shorttitle = {{{DROID}}},
@@ -973,21 +895,14 @@
973
 
974
  }
975
 
976
- @misc{kingmaAutoEncodingVariationalBayes2022,
977
- title = {Auto-{{Encoding Variational Bayes}}},
978
- author = {Kingma, Diederik P. and Welling, Max},
979
- year = {2022},
980
- month = dec,
981
- number = {arXiv:1312.6114},
982
  eprint = {1312.6114},
983
- primaryclass = {stat},
984
- publisher = {arXiv},
985
- doi = {10.48550/arXiv.1312.6114},
986
- urldate = {2025-09-02},
987
  abstract = {How can we perform efficient inference and learning in directed probabilistic models, in the presence of continuous latent variables with intractable posterior distributions, and large datasets? We introduce a stochastic variational inference and learning algorithm that scales to large datasets and, under some mild differentiability conditions, even works in the intractable case. Our contributions are two-fold. First, we show that a reparameterization of the variational lower bound yields a lower bound estimator that can be straightforwardly optimized using standard stochastic gradient methods. Second, we show that for i.i.d. datasets with continuous latent variables per datapoint, posterior inference can be made especially efficient by fitting an approximate inference model (also called a recognition model) to the intractable posterior using the proposed lower bound estimator. Theoretical advantages are reflected in experimental results.},
988
- archiveprefix = {arXiv},
989
- keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
990
-
991
  }
992
 
993
  @misc{knightStandardOpenSO100,
@@ -1120,23 +1035,6 @@
1120
 
1121
  }
1122
 
1123
- @misc{lillicrapContinuousControlDeep2019,
1124
- title = {Continuous Control with Deep Reinforcement Learning},
1125
- author = {Lillicrap, Timothy P. and Hunt, Jonathan J. and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
1126
- year = {2019},
1127
- month = jul,
1128
- number = {arXiv:1509.02971},
1129
- eprint = {1509.02971},
1130
- primaryclass = {cs},
1131
- publisher = {arXiv},
1132
- doi = {10.48550/arXiv.1509.02971},
1133
- urldate = {2025-08-31},
1134
- abstract = {We adapt the ideas underlying the success of Deep Q-Learning to the continuous action domain. We present an actor-critic, model-free algorithm based on the deterministic policy gradient that can operate over continuous action spaces. Using the same learning algorithm, network architecture and hyper-parameters, our algorithm robustly solves more than 20 simulated physics tasks, including classic problems such as cartpole swing-up, dexterous manipulation, legged locomotion and car driving. Our algorithm is able to find policies whose performance is competitive with those found by a planning algorithm with full access to the dynamics of the domain and its derivatives. We further demonstrate that for many of the tasks the algorithm can learn policies end-to-end: directly from raw pixel inputs.},
1135
- archiveprefix = {arXiv},
1136
- keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
1137
-
1138
- }
1139
-
1140
  @misc{lillicrapContinuousControlDeep2019a,
1141
  title = {Continuous Control with Deep Reinforcement Learning},
1142
  author = {Lillicrap, Timothy P. and Hunt, Jonathan J. and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
@@ -1257,6 +1155,25 @@
1257
 
1258
  }
1259
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1260
  @book{lynchModernRoboticsMechanics2017,
1261
  title = {Modern {{Robotics}}: {{Mechanics}}, {{Planning}}, and {{Control}}},
1262
  shorttitle = {Modern {{Robotics}}},
@@ -1431,6 +1348,24 @@
1431
  year = {2023}
1432
  }
1433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1434
  @misc{openaiGPT4TechnicalReport2024,
1435
  title = {{{GPT-4 Technical Report}}},
1436
  author = {OpenAI and Achiam, Josh and Adler, Steven and Agarwal, Sandhini and Ahmad, Lama and Akkaya, Ilge and Aleman, Florencia Leoni and Almeida, Diogo and Altenschmidt, Janko and Altman, Sam and Anadkat, Shyamal and Avila, Red and Babuschkin, Igor and Balaji, Suchir and Balcom, Valerie and Baltescu, Paul and Bao, Haiming and Bavarian, Mohammad and Belgum, Jeff and Bello, Irwan and Berdine, Jake and {Bernadett-Shapiro}, Gabriel and Berner, Christopher and Bogdonoff, Lenny and Boiko, Oleg and Boyd, Madelaine and Brakman, Anna-Luisa and Brockman, Greg and Brooks, Tim and Brundage, Miles and Button, Kevin and Cai, Trevor and Campbell, Rosie and Cann, Andrew and Carey, Brittany and Carlson, Chelsea and Carmichael, Rory and Chan, Brooke and Chang, Che and Chantzis, Fotis and Chen, Derek and Chen, Sully and Chen, Ruby and Chen, Jason and Chen, Mark and Chess, Ben and Cho, Chester and Chu, Casey and Chung, Hyung Won and Cummings, Dave and Currier, Jeremiah and Dai, Yunxing and Decareaux, Cory and Degry, Thomas and Deutsch, Noah and Deville, Damien and Dhar, Arka and Dohan, David and Dowling, Steve and Dunning, Sheila and Ecoffet, Adrien and Eleti, Atty and Eloundou, Tyna and Farhi, David and Fedus, Liam and Felix, Niko and Fishman, Sim{\'o}n Posada and Forte, Juston and Fulford, Isabella and Gao, Leo and Georges, Elie and Gibson, Christian and Goel, Vik and Gogineni, Tarun and Goh, Gabriel and {Gontijo-Lopes}, Rapha and Gordon, Jonathan and Grafstein, Morgan and Gray, Scott and Greene, Ryan and Gross, Joshua and Gu, Shixiang Shane and Guo, Yufei and Hallacy, Chris and Han, Jesse and Harris, Jeff and He, Yuchen and Heaton, Mike and Heidecke, Johannes and Hesse, Chris and Hickey, Alan and Hickey, Wade and Hoeschele, Peter and Houghton, Brandon and Hsu, Kenny and Hu, Shengli and Hu, Xin and Huizinga, Joost and Jain, Shantanu and Jain, Shawn and Jang, Joanne and Jiang, Angela and Jiang, Roger and Jin, Haozhun and Jin, Denny and Jomoto, Shino and Jonn, Billie and Jun, Heewoo and Kaftan, Tomer and Kaiser, {\L}ukasz and Kamali, Ali and Kanitscheider, Ingmar and Keskar, Nitish Shirish and Khan, Tabarak and Kilpatrick, Logan and Kim, Jong Wook and Kim, Christina and Kim, Yongjik and Kirchner, Jan Hendrik and Kiros, Jamie and Knight, Matt and Kokotajlo, Daniel and Kondraciuk, {\L}ukasz and Kondrich, Andrew and Konstantinidis, Aris and Kosic, Kyle and Krueger, Gretchen and Kuo, Vishal and Lampe, Michael and Lan, Ikai and Lee, Teddy and Leike, Jan and Leung, Jade and Levy, Daniel and Li, Chak Ming and Lim, Rachel and Lin, Molly and Lin, Stephanie and Litwin, Mateusz and Lopez, Theresa and Lowe, Ryan and Lue, Patricia and Makanju, Anna and Malfacini, Kim and Manning, Sam and Markov, Todor and Markovski, Yaniv and Martin, Bianca and Mayer, Katie and Mayne, Andrew and McGrew, Bob and McKinney, Scott Mayer and McLeavey, Christine and McMillan, Paul and McNeil, Jake and Medina, David and Mehta, Aalok and Menick, Jacob and Metz, Luke and Mishchenko, Andrey and Mishkin, Pamela and Monaco, Vinnie and Morikawa, Evan and Mossing, Daniel and Mu, Tong and Murati, Mira and Murk, Oleg and M{\'e}ly, David and Nair, Ashvin and Nakano, Reiichiro and Nayak, Rajeev and Neelakantan, Arvind and Ngo, Richard and Noh, Hyeonwoo and Ouyang, Long and O'Keefe, Cullen and Pachocki, Jakub and Paino, Alex and Palermo, Joe and Pantuliano, Ashley and Parascandolo, Giambattista and Parish, Joel and Parparita, Emy and Passos, Alex and Pavlov, Mikhail and Peng, Andrew and Perelman, Adam and Peres, Filipe de Avila Belbute and Petrov, Michael and Pinto, Henrique Ponde de Oliveira and Michael and Pokorny and Pokrass, Michelle and Pong, Vitchyr H. and Powell, Tolly and Power, Alethea and Power, Boris and Proehl, Elizabeth and Puri, Raul and Radford, Alec and Rae, Jack and Ramesh, Aditya and Raymond, Cameron and Real, Francis and Rimbach, Kendra and Ross, Carl and Rotsted, Bob and Roussez, Henri and Ryder, Nick and Saltarelli, Mario and Sanders, Ted and Santurkar, Shibani and Sastry, Girish and Schmidt, Heather and Schnurr, David and Schulman, John and Selsam, Daniel and Sheppard, Kyla and Sherbakov, Toki and Shieh, Jessica and Shoker, Sarah and Shyam, Pranav and Sidor, Szymon and Sigler, Eric and Simens, Maddie and Sitkin, Jordan and Slama, Katarina and Sohl, Ian and Sokolowsky, Benjamin and Song, Yang and Staudacher, Natalie and Such, Felipe Petroski and Summers, Natalie and Sutskever, Ilya and Tang, Jie and Tezak, Nikolas and Thompson, Madeleine B. and Tillet, Phil and Tootoonchian, Amin and Tseng, Elizabeth and Tuggle, Preston and Turley, Nick and Tworek, Jerry and Uribe, Juan Felipe Cer{\'o}n and Vallone, Andrea and Vijayvergiya, Arun and Voss, Chelsea and Wainwright, Carroll and Wang, Justin Jay and Wang, Alvin and Wang, Ben and Ward, Jonathan and Wei, Jason and Weinmann, C. J. and Welihinda, Akila and Welinder, Peter and Weng, Jiayi and Weng, Lilian and Wiethoff, Matt and Willner, Dave and Winter, Clemens and Wolrich, Samuel and Wong, Hannah and Workman, Lauren and Wu, Sherwin and Wu, Jeff and Wu, Michael and Xiao, Kai and Xu, Tao and Yoo, Sarah and Yu, Kevin and Yuan, Qiming and Zaremba, Wojciech and Zellers, Rowan and Zhang, Chong and Zhang, Marvin and Zhao, Shengjia and Zheng, Tianhao and Zhuang, Juntang and Zhuk, William and Zoph, Barret},
@@ -1448,15 +1383,6 @@
1448
 
1449
  }
1450
 
1451
- @misc{OpenXEmbodimentRobotic,
1452
- title = {Open {{X-Embodiment}}: {{Robotic Learning Datasets}} and {{RT-X Models}}},
1453
- shorttitle = {Open {{X-Embodiment}}},
1454
- urldate = {2025-08-27},
1455
- abstract = {Project page for Open X-Embodiment: Robotic Learning Datasets and RT-X Models.},
1456
- howpublished = {https://robotics-transformer-x.github.io/}
1457
-
1458
- }
1459
-
1460
  @misc{oquabDINOv2LearningRobust2024,
1461
  title = {{{DINOv2}}: {{Learning Robust Visual Features}} without {{Supervision}}},
1462
  shorttitle = {{{DINOv2}}},
@@ -1554,19 +1480,6 @@
1554
 
1555
  }
1556
 
1557
- @inproceedings{pomerleauALVINNAutonomousLand1988a,
1558
- title = {{{ALVINN}}: {{An Autonomous Land Vehicle}} in a {{Neural Network}}},
1559
- shorttitle = {{{ALVINN}}},
1560
- booktitle = {Advances in {{Neural Information Processing Systems}}},
1561
- author = {Pomerleau, Dean A.},
1562
- year = {1988},
1563
- volume = {1},
1564
- publisher = {Morgan-Kaufmann},
1565
- urldate = {2025-09-01},
1566
- abstract = {ALVINN (Autonomous Land Vehicle In a Neural Network) is a 3-layer back-propagation network designed for the task of road following. Cur(cid:173) rently ALVINN takes images from a camera and a laser range finder as input and produces as output the direction the vehicle should travel in order to follow the road. Training has been conducted using simulated road images. Successful tests on the Carnegie Mellon autonomous navigation test vehicle indicate that the network can effectively follow real roads under certain field conditions. The representation developed to perfOIm the task differs dra(cid:173) matically when the networlc is trained under various conditions, suggesting the possibility of a novel adaptive autonomous navigation system capable of tailoring its processing to the conditions at hand.}
1567
-
1568
- }
1569
-
1570
  @book{prince2023understanding,
1571
  title = {Understanding Deep Learning},
1572
  author = {Prince, Simon J.D.},
@@ -1728,7 +1641,7 @@
1728
  edition = {1},
1729
  publisher = {Cambridge University Press},
1730
  doi = {10.1017/CBO9781107298019},
1731
- urldate = {2025-09-01},
1732
  abstract = {Machine learning is one of the fastest growing areas of computer science, with far-reaching applications. The aim of this textbook is to introduce machine learning, and the algorithmic paradigms it offers, in a principled way. The book provides a theoretical account of the fundamentals underlying machine learning and the mathematical derivations that transform these principles into practical algorithms. Following a presentation of the basics, the book covers a wide array of central topics unaddressed by previous textbooks. These include a discussion of the computational complexity of learning and the concepts of convexity and stability; important algorithmic paradigms including stochastic gradient descent, neural networks, and structured output learning; and emerging theoretical concepts such as the PAC-Bayes approach and compression-based bounds. Designed for advanced undergraduates or beginning graduates, the text makes the fundamentals and algorithms of machine learning accessible to students and non-expert readers in statistics, computer science, mathematics and engineering.},
1733
  copyright = {https://www.cambridge.org/core/terms},
1734
  isbn = {978-1-107-05713-5 978-1-107-29801-9},
@@ -1804,61 +1717,6 @@
1804
 
1805
  }
1806
 
1807
- @misc{SignYourAccount,
1808
- title = {Sign in to Your Account},
1809
- urldate = {2025-09-02},
1810
- howpublished = {https://login.microsoftonline.com/cc95de1b-97f5-4f93-b4ba-fe68b852cf91/login}
1811
-
1812
- }
1813
-
1814
- @article{silverDeterministicPolicyGradient,
1815
- title = {Deterministic {{Policy Gradient Algorithms}}},
1816
- author = {Silver, David and Lever, Guy and Heess, Nicolas and Degris, Thomas and Wierstra, Daan and Riedmiller, Martin},
1817
- abstract = {In this paper we consider deterministic policy gradient algorithms for reinforcement learning with continuous actions. The deterministic policy gradient has a particularly appealing form: it is the expected gradient of the action-value function. This simple form means that the deterministic policy gradient can be estimated much more efficiently than the usual stochastic policy gradient. To ensure adequate exploration, we introduce an off-policy actor-critic algorithm that learns a deterministic target policy from an exploratory behaviour policy. We demonstrate that deterministic policy gradient algorithms can significantly outperform their stochastic counterparts in high-dimensional action spaces.},
1818
- langid = {english}
1819
-
1820
- }
1821
-
1822
- @inproceedings{silverDeterministicPolicyGradient2014,
1823
- title = {Deterministic {{Policy Gradient Algorithms}}},
1824
- booktitle = {Proceedings of the 31st {{International Conference}} on {{Machine Learning}}},
1825
- author = {Silver, David and Lever, Guy and Heess, Nicolas and Degris, Thomas and Wierstra, Daan and Riedmiller, Martin},
1826
- year = {2014},
1827
- month = jan,
1828
- pages = {387--395},
1829
- publisher = {PMLR},
1830
- issn = {1938-7228},
1831
- urldate = {2025-08-31},
1832
- abstract = {In this paper we consider deterministic policy gradient algorithms for reinforcement learning with continuous actions. The deterministic policy gradient has a particularly appealing form: it is the expected gradient of the action-value function. This simple form means that the deterministic policy gradient can be estimated much more efficiently than the usual stochastic policy gradient. To ensure adequate exploration, we introduce an off-policy actor-critic algorithm that learns a deterministic target policy from an exploratory behaviour policy. Deterministic policy gradient algorithms outperformed their stochastic counterparts in several benchmark problems, particularly in high-dimensional action spaces.},
1833
- langid = {english}
1834
-
1835
- }
1836
-
1837
- @article{silverDeterministicPolicyGradienta,
1838
- title = {Deterministic {{Policy Gradient Algorithms}}},
1839
- author = {Silver, David and Lever, Guy and Heess, Nicolas and Degris, Thomas and Wierstra, Daan and Riedmiller, Martin},
1840
- abstract = {In this paper we consider deterministic policy gradient algorithms for reinforcement learning with continuous actions. The deterministic policy gradient has a particularly appealing form: it is the expected gradient of the action-value function. This simple form means that the deterministic policy gradient can be estimated much more efficiently than the usual stochastic policy gradient. To ensure adequate exploration, we introduce an off-policy actor-critic algorithm that learns a deterministic target policy from an exploratory behaviour policy. We demonstrate that deterministic policy gradient algorithms can significantly outperform their stochastic counterparts in high-dimensional action spaces.},
1841
- langid = {english}
1842
-
1843
- }
1844
-
1845
- @misc{sohl-dicksteinDeepUnsupervisedLearning2015,
1846
- title = {Deep {{Unsupervised Learning}} Using {{Nonequilibrium Thermodynamics}}},
1847
- author = {{Sohl-Dickstein}, Jascha and Weiss, Eric A. and Maheswaranathan, Niru and Ganguli, Surya},
1848
- year = {2015},
1849
- month = nov,
1850
- number = {arXiv:1503.03585},
1851
- eprint = {1503.03585},
1852
- primaryclass = {cs},
1853
- publisher = {arXiv},
1854
- doi = {10.48550/arXiv.1503.03585},
1855
- urldate = {2025-09-04},
1856
- abstract = {A central problem in machine learning involves modeling complex data-sets using highly flexible families of probability distributions in which learning, sampling, inference, and evaluation are still analytically or computationally tractable. Here, we develop an approach that simultaneously achieves both flexibility and tractability. The essential idea, inspired by non-equilibrium statistical physics, is to systematically and slowly destroy structure in a data distribution through an iterative forward diffusion process. We then learn a reverse diffusion process that restores structure in data, yielding a highly flexible and tractable generative model of the data. This approach allows us to rapidly learn, sample from, and evaluate probabilities in deep generative models with thousands of layers or time steps, as well as to compute conditional and posterior probabilities under the learned model. We additionally release an open source reference implementation of the algorithm.},
1857
- archiveprefix = {arXiv},
1858
- keywords = {Computer Science - Machine Learning,Condensed Matter - Disordered Systems and Neural Networks,Quantitative Biology - Neurons and Cognition,Statistics - Machine Learning}
1859
-
1860
- }
1861
-
1862
  @inproceedings{sohnLearningStructuredOutput2015,
1863
  title = {Learning {{Structured Output Representation}} Using {{Deep Conditional Generative Models}}},
1864
  booktitle = {Advances in {{Neural Information Processing Systems}}},
@@ -1894,13 +1752,6 @@
1894
  year = {2018}
1895
  }
1896
 
1897
- @misc{SuttonBartoBook,
1898
- title = {Sutton \& {{Barto Book}}: {{Reinforcement Learning}}: {{An Introduction}}},
1899
- urldate = {2025-08-28},
1900
- howpublished = {http://incompleteideas.net/book/the-book-2nd.html}
1901
-
1902
- }
1903
-
1904
  @inproceedings{suttonPolicyGradientMethods1999,
1905
  title = {Policy {{Gradient Methods}} for {{Reinforcement Learning}} with {{Function Approximation}}},
1906
  booktitle = {Advances in {{Neural Information Processing Systems}}},
@@ -1959,24 +1810,6 @@
1959
 
1960
  }
1961
 
1962
- @misc{tangDeepReinforcementLearning2024,
1963
- title = {Deep {{Reinforcement Learning}} for {{Robotics}}: {{A Survey}} of {{Real-World Successes}}},
1964
- shorttitle = {Deep {{Reinforcement Learning}} for {{Robotics}}},
1965
- author = {Tang, Chen and Abbatematteo, Ben and Hu, Jiaheng and Chandra, Rohan and {Mart{\'i}n-Mart{\'i}n}, Roberto and Stone, Peter},
1966
- year = {2024},
1967
- month = sep,
1968
- number = {arXiv:2408.03539},
1969
- eprint = {2408.03539},
1970
- primaryclass = {cs},
1971
- publisher = {arXiv},
1972
- doi = {10.48550/arXiv.2408.03539},
1973
- urldate = {2025-08-29},
1974
- abstract = {Reinforcement learning (RL), particularly its combination with deep neural networks referred to as deep RL (DRL), has shown tremendous promise across a wide range of applications, suggesting its potential for enabling the development of sophisticated robotic behaviors. Robotics problems, however, pose fundamental difficulties for the application of RL, stemming from the complexity and cost of interacting with the physical world. This article provides a modern survey of DRL for robotics, with a particular focus on evaluating the real-world successes achieved with DRL in realizing several key robotic competencies. Our analysis aims to identify the key factors underlying those exciting successes, reveal underexplored areas, and provide an overall characterization of the status of DRL in robotics. We highlight several important avenues for future work, emphasizing the need for stable and sample-efficient real-world RL paradigms, holistic approaches for discovering and integrating various competencies to tackle complex long-horizon, open-world tasks, and principled development and evaluation procedures. This survey is designed to offer insights for both RL practitioners and roboticists toward harnessing RL's power to create generally capable real-world robotic systems.},
1975
- archiveprefix = {arXiv},
1976
- keywords = {Computer Science - Machine Learning,Computer Science - Robotics}
1977
-
1978
- }
1979
-
1980
  @article{tangDeepReinforcementLearning2025,
1981
  title = {Deep {{Reinforcement Learning}} for {{Robotics}}: {{A Survey}} of {{Real-World Successes}}},
1982
  shorttitle = {Deep {{Reinforcement Learning}} for {{Robotics}}},
@@ -2219,29 +2052,9 @@
2219
 
2220
  }
2221
 
2222
- @misc{zhongPracticalBlockwiseNeural2018,
2223
- title = {Practical {{Block-wise Neural Network Architecture Generation}}},
2224
- author = {Zhong, Zhao and Yan, Junjie and Wu, Wei and Shao, Jing and Liu, Cheng-Lin},
2225
- year = {2018},
2226
- month = may,
2227
- number = {arXiv:1708.05552},
2228
- eprint = {1708.05552},
2229
- primaryclass = {cs},
2230
- publisher = {arXiv},
2231
- urldate = {2023-05-05},
2232
- abstract = {Convolutional neural networks have gained a remarkable success in computer vision. However, most usable network architectures are hand-crafted and usually require expertise and elaborate design. In this paper, we provide a block-wise network generation pipeline called BlockQNN which automatically builds high-performance networks using the Q-Learning paradigm with epsilon-greedy exploration strategy. The optimal network block is constructed by the learning agent which is trained sequentially to choose component layers. We stack the block to construct the whole auto-generated network. To accelerate the generation process, we also propose a distributed asynchronous framework and an early stop strategy. The block-wise generation brings unique advantages: (1) it performs competitive results in comparison to the hand-crafted state-of-the-art networks on image classification, additionally, the best network generated by BlockQNN achieves 3.54\% top-1 error rate on CIFAR-10 which beats all existing auto-generate networks. (2) in the meanwhile, it offers tremendous reduction of the search space in designing networks which only spends 3 days with 32 GPUs, and (3) moreover, it has strong generalizability that the network built on CIFAR also performs well on a larger-scale ImageNet dataset.},
2233
- archiveprefix = {arXiv},
2234
- keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning}
2235
-
2236
- }
2237
-
2238
  @inproceedings{zhu2024minigpt,
2239
  title = {{{MiniGPT-4}}: {{Enhancing}} Vision-Language Understanding with Advanced Large Language Models},
2240
  booktitle = {The Twelfth International Conference on Learning Representations},
2241
  author = {Zhu, Deyao and Chen, Jun and Shen, Xiaoqian and Li, Xiang and Elhoseiny, Mohamed},
2242
  year = {2024}
2243
  }
2244
-
2245
- @misc{zotero-item-169,
2246
- type = {Misc}
2247
- }
 
352
 
353
  }
354
 
 
 
 
 
 
 
 
 
 
 
 
355
  @misc{cadeneLeRobotStateoftheartMachine2024,
356
  title = {{{LeRobot}}: {{State-of-the-art Machine Learning}} for {{Real-World Robotics}} in {{Pytorch}}},
357
  author = {Cadene, Remi and Alibert, Simon and Soare, Alexander and Galloudec, Quentin and Zouitine, Adil and Palma, Steven and Kooijmans, Pepijn and Aractingi, Michel and Shukor, Mustafa and Aubakirova, Dana and Russi, Martino and Capuano, Francesco and Pascal, Caroline and Chogari, Jade and Moss, Jess and Wolf, Thomas},
 
375
 
376
  }
377
 
 
 
 
 
 
 
 
 
 
378
  @inproceedings{chebotarClosingSimtorealLoop2019,
379
  title = {Closing the Sim-to-Real Loop: {{Adapting}} Simulation Randomization with Real World Experience},
380
  shorttitle = {Closing the Sim-to-Real Loop},
 
422
 
423
  }
424
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  @book{connellRobotLearning1993,
426
  title = {Robot {{Learning}}},
427
  editor = {Connell, Jonathan H. and Mahadevan, Sridhar},
 
625
 
626
  }
627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
628
  @inproceedings{haarnojaReinforcementLearningDeep2017b,
629
  title = {Reinforcement {{Learning}} with {{Deep Energy-Based Policies}}},
630
  booktitle = {Proceedings of the 34th {{International Conference}} on {{Machine Learning}}},
 
716
 
717
  }
718
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719
  @inproceedings{ImageNet_VSS09,
720
  title = {Construction and Analysis of a Large Scale Image Ontology},
721
  author = {Deng, J. and Li, K. and Do, M. and Su, H. and {Fei-Fei}, L.},
 
730
  year = {2023}
731
  }
732
 
733
+ @misc{intelligence$p_05$VisionLanguageActionModel2025,
734
+ title = {\${$\pi\_$}\{0.5\}\$: A {{Vision-Language-Action Model}} with {{Open-World Generalization}}},
735
+ shorttitle = {\${$\pi\_$}\{0.5\}\$},
736
+ author = {Intelligence, Physical and Black, Kevin and Brown, Noah and Darpinian, James and Dhabalia, Karan and Driess, Danny and Esmail, Adnan and Equi, Michael and Finn, Chelsea and Fusai, Niccolo and Galliker, Manuel Y. and Ghosh, Dibya and Groom, Lachy and Hausman, Karol and Ichter, Brian and Jakubczak, Szymon and Jones, Tim and Ke, Liyiming and LeBlanc, Devin and Levine, Sergey and {Li-Bell}, Adrian and Mothukuri, Mohith and Nair, Suraj and Pertsch, Karl and Ren, Allen Z. and Shi, Lucy Xiaoyang and Smith, Laura and Springenberg, Jost Tobias and Stachowicz, Kyle and Tanner, James and Vuong, Quan and Walke, Homer and Walling, Anna and Wang, Haohuan and Yu, Lili and Zhilinsky, Ury},
737
+ year = {2025},
738
+ month = apr,
739
+ number = {arXiv:2504.16054},
740
+ eprint = {2504.16054},
741
+ primaryclass = {cs},
742
+ publisher = {arXiv},
743
+ doi = {10.48550/arXiv.2504.16054},
744
+ urldate = {2025-09-12},
745
+ abstract = {In order for robots to be useful, they must perform practically relevant tasks in the real world, outside of the lab. While vision-language-action (VLA) models have demonstrated impressive results for end-to-end robot control, it remains an open question how far such models can generalize in the wild. We describe \${\textbackslash}pi\_\{0.5\}\$, a new model based on \${\textbackslash}pi\_\{0\}\$ that uses co-training on heterogeneous tasks to enable broad generalization. \${\textbackslash}pi\_\{0.5\}\${\textbackslash} uses data from multiple robots, high-level semantic prediction, web data, and other sources to enable broadly generalizable real-world robotic manipulation. Our system uses a combination of co-training and hybrid multi-modal examples that combine image observations, language commands, object detections, semantic subtask prediction, and low-level actions. Our experiments show that this kind of knowledge transfer is essential for effective generalization, and we demonstrate for the first time that an end-to-end learning-enabled robotic system can perform long-horizon and dexterous manipulation skills, such as cleaning a kitchen or bedroom, in entirely new homes.},
746
+ archiveprefix = {arXiv},
747
+ keywords = {Computer Science - Machine Learning,Computer Science - Robotics}
748
+
749
+ }
750
+
751
  @misc{jangBCZZeroShotTask2022,
752
  title = {{{BC-Z}}: {{Zero-Shot Task Generalization}} with {{Robotic Imitation Learning}}},
753
  shorttitle = {{{BC-Z}}},
 
859
 
860
  }
861
 
 
 
 
 
 
 
 
 
862
  @misc{khazatskyDROIDLargeScaleInTheWild2025,
863
  title = {{{DROID}}: {{A Large-Scale In-The-Wild Robot Manipulation Dataset}}},
864
  shorttitle = {{{DROID}}},
 
895
 
896
  }
897
 
898
+ @article{kingma2013auto,
899
+ title = {Auto-Encoding Variational Bayes},
900
+ author = {Kingma, Diederik P and Welling, Max},
901
+ year = {2013},
902
+ journal = {arXiv preprint arXiv:1312.6114},
 
903
  eprint = {1312.6114},
 
 
 
 
904
  abstract = {How can we perform efficient inference and learning in directed probabilistic models, in the presence of continuous latent variables with intractable posterior distributions, and large datasets? We introduce a stochastic variational inference and learning algorithm that scales to large datasets and, under some mild differentiability conditions, even works in the intractable case. Our contributions are two-fold. First, we show that a reparameterization of the variational lower bound yields a lower bound estimator that can be straightforwardly optimized using standard stochastic gradient methods. Second, we show that for i.i.d. datasets with continuous latent variables per datapoint, posterior inference can be made especially efficient by fitting an approximate inference model (also called a recognition model) to the intractable posterior using the proposed lower bound estimator. Theoretical advantages are reflected in experimental results.},
905
+ archiveprefix = {arXiv}
 
 
906
  }
907
 
908
  @misc{knightStandardOpenSO100,
 
1035
 
1036
  }
1037
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1038
  @misc{lillicrapContinuousControlDeep2019a,
1039
  title = {Continuous Control with Deep Reinforcement Learning},
1040
  author = {Lillicrap, Timothy P. and Hunt, Jonathan J. and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
 
1155
 
1156
  }
1157
 
1158
+ @misc{luoUnderstandingDiffusionModels2022,
1159
+ title = {Understanding {{Diffusion Models}}: {{A Unified Perspective}}},
1160
+ shorttitle = {Understanding {{Diffusion Models}}},
1161
+ author = {Luo, Calvin},
1162
+ year = {2022},
1163
+ month = aug,
1164
+ number = {arXiv:2208.11970},
1165
+ eprint = {2208.11970},
1166
+ primaryclass = {cs},
1167
+ publisher = {arXiv},
1168
+ doi = {10.48550/arXiv.2208.11970},
1169
+ urldate = {2025-09-28},
1170
+ abstract = {Diffusion models have shown incredible capabilities as generative models; indeed, they power the current state-of-the-art models on text-conditioned image generation such as Imagen and DALL-E 2. In this work we review, demystify, and unify the understanding of diffusion models across both variational and score-based perspectives. We first derive Variational Diffusion Models (VDM) as a special case of a Markovian Hierarchical Variational Autoencoder, where three key assumptions enable tractable computation and scalable optimization of the ELBO. We then prove that optimizing a VDM boils down to learning a neural network to predict one of three potential objectives: the original source input from any arbitrary noisification of it, the original source noise from any arbitrarily noisified input, or the score function of a noisified input at any arbitrary noise level. We then dive deeper into what it means to learn the score function, and connect the variational perspective of a diffusion model explicitly with the Score-based Generative Modeling perspective through Tweedie's Formula. Lastly, we cover how to learn a conditional distribution using diffusion models via guidance.},
1171
+ archiveprefix = {arXiv},
1172
+ langid = {english},
1173
+ keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning}
1174
+
1175
+ }
1176
+
1177
  @book{lynchModernRoboticsMechanics2017,
1178
  title = {Modern {{Robotics}}: {{Mechanics}}, {{Planning}}, and {{Control}}},
1179
  shorttitle = {Modern {{Robotics}}},
 
1348
  year = {2023}
1349
  }
1350
 
1351
+ @misc{oneillOpenXEmbodimentRobotic2025,
1352
+ title = {Open {{X-Embodiment}}: {{Robotic Learning Datasets}} and {{RT-X Models}}},
1353
+ shorttitle = {Open {{X-Embodiment}}},
1354
+ author = {O'Neill, Abby and Rehman, Abdul and Gupta, Abhinav and Maddukuri, Abhiram and Gupta, Abhishek and Padalkar, Abhishek and Lee, Abraham and Pooley, Acorn and Gupta, Agrim and Mandlekar, Ajay and Jain, Ajinkya and Tung, Albert and Bewley, Alex and Herzog, Alex and Irpan, Alex and Khazatsky, Alexander and Rai, Anant and Gupta, Anchit and Wang, Andrew and Kolobov, Andrey and Singh, Anikait and Garg, Animesh and Kembhavi, Aniruddha and Xie, Annie and Brohan, Anthony and Raffin, Antonin and Sharma, Archit and Yavary, Arefeh and Jain, Arhan and Balakrishna, Ashwin and Wahid, Ayzaan and {Burgess-Limerick}, Ben and Kim, Beomjoon and Sch{\"o}lkopf, Bernhard and Wulfe, Blake and Ichter, Brian and Lu, Cewu and Xu, Charles and Le, Charlotte and Finn, Chelsea and Wang, Chen and Xu, Chenfeng and Chi, Cheng and Huang, Chenguang and Chan, Christine and Agia, Christopher and Pan, Chuer and Fu, Chuyuan and Devin, Coline and Xu, Danfei and Morton, Daniel and Driess, Danny and Chen, Daphne and Pathak, Deepak and Shah, Dhruv and B{\"u}chler, Dieter and Jayaraman, Dinesh and Kalashnikov, Dmitry and Sadigh, Dorsa and Johns, Edward and Foster, Ethan and Liu, Fangchen and Ceola, Federico and Xia, Fei and Zhao, Feiyu and Frujeri, Felipe Vieira and Stulp, Freek and Zhou, Gaoyue and Sukhatme, Gaurav S. and Salhotra, Gautam and Yan, Ge and Feng, Gilbert and Schiavi, Giulio and Berseth, Glen and Kahn, Gregory and Yang, Guangwen and Wang, Guanzhi and Su, Hao and Fang, Hao-Shu and Shi, Haochen and Bao, Henghui and Amor, Heni Ben and Christensen, Henrik I. and Furuta, Hiroki and Bharadhwaj, Homanga and Walke, Homer and Fang, Hongjie and Ha, Huy and Mordatch, Igor and Radosavovic, Ilija and Leal, Isabel and Liang, Jacky and {Abou-Chakra}, Jad and Kim, Jaehyung and Drake, Jaimyn and Peters, Jan and Schneider, Jan and Hsu, Jasmine and Vakil, Jay and Bohg, Jeannette and Bingham, Jeffrey and Wu, Jeffrey and Gao, Jensen and Hu, Jiaheng and Wu, Jiajun and Wu, Jialin and Sun, Jiankai and Luo, Jianlan and Gu, Jiayuan and Tan, Jie and Oh, Jihoon and Wu, Jimmy and Lu, Jingpei and Yang, Jingyun and Malik, Jitendra and Silv{\'e}rio, Jo{\~a}o and Hejna, Joey and Booher, Jonathan and Tompson, Jonathan and Yang, Jonathan and Salvador, Jordi and Lim, Joseph J. and Han, Junhyek and Wang, Kaiyuan and Rao, Kanishka and Pertsch, Karl and Hausman, Karol and Go, Keegan and Gopalakrishnan, Keerthana and Goldberg, Ken and Byrne, Kendra and Oslund, Kenneth and Kawaharazuka, Kento and Black, Kevin and Lin, Kevin and Zhang, Kevin and Ehsani, Kiana and Lekkala, Kiran and Ellis, Kirsty and Rana, Krishan and Srinivasan, Krishnan and Fang, Kuan and Singh, Kunal Pratap and Zeng, Kuo-Hao and Hatch, Kyle and Hsu, Kyle and Itti, Laurent and Chen, Lawrence Yunliang and Pinto, Lerrel and {Fei-Fei}, Li and Tan, Liam and Fan, Linxi "Jim" and Ott, Lionel and Lee, Lisa and Weihs, Luca and Chen, Magnum and Lepert, Marion and Memmel, Marius and Tomizuka, Masayoshi and Itkina, Masha and Castro, Mateo Guaman and Spero, Max and Du, Maximilian and Ahn, Michael and Yip, Michael C. and Zhang, Mingtong and Ding, Mingyu and Heo, Minho and Srirama, Mohan Kumar and Sharma, Mohit and Kim, Moo Jin and Irshad, Muhammad Zubair and Kanazawa, Naoaki and Hansen, Nicklas and Heess, Nicolas and Joshi, Nikhil J. and Suenderhauf, Niko and Liu, Ning and Palo, Norman Di and Shafiullah, Nur Muhammad Mahi and Mees, Oier and Kroemer, Oliver and Bastani, Osbert and Sanketi, Pannag R. and Miller, Patrick "Tree" and Yin, Patrick and Wohlhart, Paul and Xu, Peng and Fagan, Peter David and Mitrano, Peter and Sermanet, Pierre and Abbeel, Pieter and Sundaresan, Priya and Chen, Qiuyu and Vuong, Quan and Rafailov, Rafael and Tian, Ran and Doshi, Ria and {Mart{\'i}n-Mart{\'i}n}, Roberto and Baijal, Rohan and Scalise, Rosario and Hendrix, Rose and Lin, Roy and Qian, Runjia and Zhang, Ruohan and Mendonca, Russell and Shah, Rutav and Hoque, Ryan and Julian, Ryan and Bustamante, Samuel and Kirmani, Sean and Levine, Sergey and Lin, Shan and Moore, Sherry and Bahl, Shikhar and Dass, Shivin and Sonawani, Shubham and Tulsiani, Shubham and Song, Shuran and Xu, Sichun and Haldar, Siddhant and Karamcheti, Siddharth and Adebola, Simeon and Guist, Simon and Nasiriany, Soroush and Schaal, Stefan and Welker, Stefan and Tian, Stephen and Ramamoorthy, Subramanian and Dasari, Sudeep and Belkhale, Suneel and Park, Sungjae and Nair, Suraj and Mirchandani, Suvir and Osa, Takayuki and Gupta, Tanmay and Harada, Tatsuya and Matsushima, Tatsuya and Xiao, Ted and Kollar, Thomas and Yu, Tianhe and Ding, Tianli and Davchev, Todor and Zhao, Tony Z. and Armstrong, Travis and Darrell, Trevor and Chung, Trinity and Jain, Vidhi and Kumar, Vikash and Vanhoucke, Vincent and Guizilini, Vitor and Zhan, Wei and Zhou, Wenxuan and Burgard, Wolfram and Chen, Xi and Chen, Xiangyu and Wang, Xiaolong and Zhu, Xinghao and Geng, Xinyang and Liu, Xiyuan and Liangwei, Xu and Li, Xuanlin and Pang, Yansong and Lu, Yao and Ma, Yecheng Jason and Kim, Yejin and Chebotar, Yevgen and Zhou, Yifan and Zhu, Yifeng and Wu, Yilin and Xu, Ying and Wang, Yixuan and Bisk, Yonatan and Dou, Yongqiang and Cho, Yoonyoung and Lee, Youngwoon and Cui, Yuchen and Cao, Yue and Wu, Yueh-Hua and Tang, Yujin and Zhu, Yuke and Zhang, Yunchu and Jiang, Yunfan and Li, Yunshuang and Li, Yunzhu and Iwasawa, Yusuke and Matsuo, Yutaka and Ma, Zehan and Xu, Zhuo and Cui, Zichen Jeff and Zhang, Zichen and Fu, Zipeng and Lin, Zipeng},
1355
+ year = {2025},
1356
+ month = may,
1357
+ number = {arXiv:2310.08864},
1358
+ eprint = {2310.08864},
1359
+ primaryclass = {cs},
1360
+ publisher = {arXiv},
1361
+ doi = {10.48550/arXiv.2310.08864},
1362
+ urldate = {2025-09-08},
1363
+ abstract = {Large, high-capacity models trained on diverse datasets have shown remarkable successes on efficiently tackling downstream applications. In domains from NLP to Computer Vision, this has led to a consolidation of pretrained models, with general pretrained backbones serving as a starting point for many applications. Can such a consolidation happen in robotics? Conventionally, robotic learning methods train a separate model for every application, every robot, and even every environment. Can we instead train generalist X-robot policy that can be adapted efficiently to new robots, tasks, and environments? In this paper, we provide datasets in standardized data formats and models to make it possible to explore this possibility in the context of robotic manipulation, alongside experimental results that provide an example of effective X-robot policies. We assemble a dataset from 22 different robots collected through a collaboration between 21 institutions, demonstrating 527 skills (160266 tasks). We show that a high-capacity model trained on this data, which we call RT-X, exhibits positive transfer and improves the capabilities of multiple robots by leveraging experience from other platforms. More details can be found on the project website https://robotics-transformer-x.github.io.},
1364
+ archiveprefix = {arXiv},
1365
+ keywords = {Computer Science - Robotics}
1366
+
1367
+ }
1368
+
1369
  @misc{openaiGPT4TechnicalReport2024,
1370
  title = {{{GPT-4 Technical Report}}},
1371
  author = {OpenAI and Achiam, Josh and Adler, Steven and Agarwal, Sandhini and Ahmad, Lama and Akkaya, Ilge and Aleman, Florencia Leoni and Almeida, Diogo and Altenschmidt, Janko and Altman, Sam and Anadkat, Shyamal and Avila, Red and Babuschkin, Igor and Balaji, Suchir and Balcom, Valerie and Baltescu, Paul and Bao, Haiming and Bavarian, Mohammad and Belgum, Jeff and Bello, Irwan and Berdine, Jake and {Bernadett-Shapiro}, Gabriel and Berner, Christopher and Bogdonoff, Lenny and Boiko, Oleg and Boyd, Madelaine and Brakman, Anna-Luisa and Brockman, Greg and Brooks, Tim and Brundage, Miles and Button, Kevin and Cai, Trevor and Campbell, Rosie and Cann, Andrew and Carey, Brittany and Carlson, Chelsea and Carmichael, Rory and Chan, Brooke and Chang, Che and Chantzis, Fotis and Chen, Derek and Chen, Sully and Chen, Ruby and Chen, Jason and Chen, Mark and Chess, Ben and Cho, Chester and Chu, Casey and Chung, Hyung Won and Cummings, Dave and Currier, Jeremiah and Dai, Yunxing and Decareaux, Cory and Degry, Thomas and Deutsch, Noah and Deville, Damien and Dhar, Arka and Dohan, David and Dowling, Steve and Dunning, Sheila and Ecoffet, Adrien and Eleti, Atty and Eloundou, Tyna and Farhi, David and Fedus, Liam and Felix, Niko and Fishman, Sim{\'o}n Posada and Forte, Juston and Fulford, Isabella and Gao, Leo and Georges, Elie and Gibson, Christian and Goel, Vik and Gogineni, Tarun and Goh, Gabriel and {Gontijo-Lopes}, Rapha and Gordon, Jonathan and Grafstein, Morgan and Gray, Scott and Greene, Ryan and Gross, Joshua and Gu, Shixiang Shane and Guo, Yufei and Hallacy, Chris and Han, Jesse and Harris, Jeff and He, Yuchen and Heaton, Mike and Heidecke, Johannes and Hesse, Chris and Hickey, Alan and Hickey, Wade and Hoeschele, Peter and Houghton, Brandon and Hsu, Kenny and Hu, Shengli and Hu, Xin and Huizinga, Joost and Jain, Shantanu and Jain, Shawn and Jang, Joanne and Jiang, Angela and Jiang, Roger and Jin, Haozhun and Jin, Denny and Jomoto, Shino and Jonn, Billie and Jun, Heewoo and Kaftan, Tomer and Kaiser, {\L}ukasz and Kamali, Ali and Kanitscheider, Ingmar and Keskar, Nitish Shirish and Khan, Tabarak and Kilpatrick, Logan and Kim, Jong Wook and Kim, Christina and Kim, Yongjik and Kirchner, Jan Hendrik and Kiros, Jamie and Knight, Matt and Kokotajlo, Daniel and Kondraciuk, {\L}ukasz and Kondrich, Andrew and Konstantinidis, Aris and Kosic, Kyle and Krueger, Gretchen and Kuo, Vishal and Lampe, Michael and Lan, Ikai and Lee, Teddy and Leike, Jan and Leung, Jade and Levy, Daniel and Li, Chak Ming and Lim, Rachel and Lin, Molly and Lin, Stephanie and Litwin, Mateusz and Lopez, Theresa and Lowe, Ryan and Lue, Patricia and Makanju, Anna and Malfacini, Kim and Manning, Sam and Markov, Todor and Markovski, Yaniv and Martin, Bianca and Mayer, Katie and Mayne, Andrew and McGrew, Bob and McKinney, Scott Mayer and McLeavey, Christine and McMillan, Paul and McNeil, Jake and Medina, David and Mehta, Aalok and Menick, Jacob and Metz, Luke and Mishchenko, Andrey and Mishkin, Pamela and Monaco, Vinnie and Morikawa, Evan and Mossing, Daniel and Mu, Tong and Murati, Mira and Murk, Oleg and M{\'e}ly, David and Nair, Ashvin and Nakano, Reiichiro and Nayak, Rajeev and Neelakantan, Arvind and Ngo, Richard and Noh, Hyeonwoo and Ouyang, Long and O'Keefe, Cullen and Pachocki, Jakub and Paino, Alex and Palermo, Joe and Pantuliano, Ashley and Parascandolo, Giambattista and Parish, Joel and Parparita, Emy and Passos, Alex and Pavlov, Mikhail and Peng, Andrew and Perelman, Adam and Peres, Filipe de Avila Belbute and Petrov, Michael and Pinto, Henrique Ponde de Oliveira and Michael and Pokorny and Pokrass, Michelle and Pong, Vitchyr H. and Powell, Tolly and Power, Alethea and Power, Boris and Proehl, Elizabeth and Puri, Raul and Radford, Alec and Rae, Jack and Ramesh, Aditya and Raymond, Cameron and Real, Francis and Rimbach, Kendra and Ross, Carl and Rotsted, Bob and Roussez, Henri and Ryder, Nick and Saltarelli, Mario and Sanders, Ted and Santurkar, Shibani and Sastry, Girish and Schmidt, Heather and Schnurr, David and Schulman, John and Selsam, Daniel and Sheppard, Kyla and Sherbakov, Toki and Shieh, Jessica and Shoker, Sarah and Shyam, Pranav and Sidor, Szymon and Sigler, Eric and Simens, Maddie and Sitkin, Jordan and Slama, Katarina and Sohl, Ian and Sokolowsky, Benjamin and Song, Yang and Staudacher, Natalie and Such, Felipe Petroski and Summers, Natalie and Sutskever, Ilya and Tang, Jie and Tezak, Nikolas and Thompson, Madeleine B. and Tillet, Phil and Tootoonchian, Amin and Tseng, Elizabeth and Tuggle, Preston and Turley, Nick and Tworek, Jerry and Uribe, Juan Felipe Cer{\'o}n and Vallone, Andrea and Vijayvergiya, Arun and Voss, Chelsea and Wainwright, Carroll and Wang, Justin Jay and Wang, Alvin and Wang, Ben and Ward, Jonathan and Wei, Jason and Weinmann, C. J. and Welihinda, Akila and Welinder, Peter and Weng, Jiayi and Weng, Lilian and Wiethoff, Matt and Willner, Dave and Winter, Clemens and Wolrich, Samuel and Wong, Hannah and Workman, Lauren and Wu, Sherwin and Wu, Jeff and Wu, Michael and Xiao, Kai and Xu, Tao and Yoo, Sarah and Yu, Kevin and Yuan, Qiming and Zaremba, Wojciech and Zellers, Rowan and Zhang, Chong and Zhang, Marvin and Zhao, Shengjia and Zheng, Tianhao and Zhuang, Juntang and Zhuk, William and Zoph, Barret},
 
1383
 
1384
  }
1385
 
 
 
 
 
 
 
 
 
 
1386
  @misc{oquabDINOv2LearningRobust2024,
1387
  title = {{{DINOv2}}: {{Learning Robust Visual Features}} without {{Supervision}}},
1388
  shorttitle = {{{DINOv2}}},
 
1480
 
1481
  }
1482
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1483
  @book{prince2023understanding,
1484
  title = {Understanding Deep Learning},
1485
  author = {Prince, Simon J.D.},
 
1641
  edition = {1},
1642
  publisher = {Cambridge University Press},
1643
  doi = {10.1017/CBO9781107298019},
1644
+ urldate = {2025-10-10},
1645
  abstract = {Machine learning is one of the fastest growing areas of computer science, with far-reaching applications. The aim of this textbook is to introduce machine learning, and the algorithmic paradigms it offers, in a principled way. The book provides a theoretical account of the fundamentals underlying machine learning and the mathematical derivations that transform these principles into practical algorithms. Following a presentation of the basics, the book covers a wide array of central topics unaddressed by previous textbooks. These include a discussion of the computational complexity of learning and the concepts of convexity and stability; important algorithmic paradigms including stochastic gradient descent, neural networks, and structured output learning; and emerging theoretical concepts such as the PAC-Bayes approach and compression-based bounds. Designed for advanced undergraduates or beginning graduates, the text makes the fundamentals and algorithms of machine learning accessible to students and non-expert readers in statistics, computer science, mathematics and engineering.},
1646
  copyright = {https://www.cambridge.org/core/terms},
1647
  isbn = {978-1-107-05713-5 978-1-107-29801-9},
 
1717
 
1718
  }
1719
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1720
  @inproceedings{sohnLearningStructuredOutput2015,
1721
  title = {Learning {{Structured Output Representation}} Using {{Deep Conditional Generative Models}}},
1722
  booktitle = {Advances in {{Neural Information Processing Systems}}},
 
1752
  year = {2018}
1753
  }
1754
 
 
 
 
 
 
 
 
1755
  @inproceedings{suttonPolicyGradientMethods1999,
1756
  title = {Policy {{Gradient Methods}} for {{Reinforcement Learning}} with {{Function Approximation}}},
1757
  booktitle = {Advances in {{Neural Information Processing Systems}}},
 
1810
 
1811
  }
1812
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1813
  @article{tangDeepReinforcementLearning2025,
1814
  title = {Deep {{Reinforcement Learning}} for {{Robotics}}: {{A Survey}} of {{Real-World Successes}}},
1815
  shorttitle = {Deep {{Reinforcement Learning}} for {{Robotics}}},
 
2052
 
2053
  }
2054
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2055
  @inproceedings{zhu2024minigpt,
2056
  title = {{{MiniGPT-4}}: {{Enhancing}} Vision-Language Understanding with Advanced Large Language Models},
2057
  booktitle = {The Twelfth International Conference on Learning Representations},
2058
  author = {Zhu, Deyao and Chen, Jun and Shen, Xiaoqian and Li, Xiang and Elhoseiny, Mohamed},
2059
  year = {2024}
2060
  }
 
 
 
 
app/scripts/latex-to-mdx/output/main.md CHANGED
The diff for this file is too large to render. See raw diff
 
app/scripts/latex-to-mdx/output/main.mdx CHANGED
The diff for this file is too large to render. See raw diff
 
app/scripts/latex-to-mdx/post-processor.mjs CHANGED
@@ -300,6 +300,30 @@ function fixLinkTextContent(content) {
300
  return cleanedContent;
301
  }
302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  /**
304
  * Convert align anchor markers to proper HTML spans outside math blocks
305
  * @param {string} content - Markdown content
@@ -345,6 +369,7 @@ export function postProcessMarkdown(content, inputDir = null) {
345
  processedContent = fixMultilineMath(processedContent);
346
  processedContent = fixAllAttributes(processedContent);
347
  processedContent = fixLinkTextContent(processedContent);
 
348
 
349
  // Inject code snippets if input directory is provided
350
  if (inputDir) {
 
300
  return cleanedContent;
301
  }
302
 
303
+ /**
304
+ * Fix autolink URLs with angle brackets for MDX compatibility
305
+ * Converts <https://...> to [https://...](https://...)
306
+ * @param {string} content - Markdown content
307
+ * @returns {string} - Cleaned content
308
+ */
309
+ function fixAutolinkUrls(content) {
310
+ console.log(' 🔗 Fixing autolink URLs with angle brackets...');
311
+
312
+ let fixedCount = 0;
313
+
314
+ // Convert <http://...> or <https://...> to [url](url)
315
+ const cleanedContent = content.replace(/<(https?:\/\/[^>]+)>/g, (match, url) => {
316
+ fixedCount++;
317
+ return `[${url}](${url})`;
318
+ });
319
+
320
+ if (fixedCount > 0) {
321
+ console.log(` ✅ Fixed ${fixedCount} autolink URL(s)`);
322
+ }
323
+
324
+ return cleanedContent;
325
+ }
326
+
327
  /**
328
  * Convert align anchor markers to proper HTML spans outside math blocks
329
  * @param {string} content - Markdown content
 
369
  processedContent = fixMultilineMath(processedContent);
370
  processedContent = fixAllAttributes(processedContent);
371
  processedContent = fixLinkTextContent(processedContent);
372
+ processedContent = fixAutolinkUrls(processedContent);
373
 
374
  // Inject code snippets if input directory is provided
375
  if (inputDir) {
app/src/content/article.mdx CHANGED
The diff for this file is too large to render. See raw diff
 
app/src/content/assets/image/figures/ch3/ch3-hil-serl-architecture.png ADDED

Git LFS Details

  • SHA256: c11857fb0113a346d95cd04164362be1c8ea4fca1b58b3965c4b42e3af377584
  • Pointer size: 132 Bytes
  • Size of remote file: 1.21 MB