mishig HF Staff commited on
Commit
83d531b
·
unverified ·
2 Parent(s): 2092eeaeddbda3

Merge pull request #38 from huggingface/feat/add-tests

Browse files

Add unit tests for all dataset version parsers + test CI workflow

.github/workflows/test.yml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - uses: oven-sh/setup-bun@v1
16
+ with:
17
+ bun-version: latest
18
+
19
+ - name: Install dependencies
20
+ run: bun install
21
+
22
+ - name: Run tests
23
+ run: bun test
bun.lock CHANGED
@@ -7,7 +7,6 @@
7
  "dependencies": {
8
  "@react-three/drei": "^10.7.7",
9
  "@react-three/fiber": "^9.5.0",
10
- "@types/three": "^0.182.0",
11
  "hyparquet": "^1.12.1",
12
  "next": "15.3.6",
13
  "react": "^19.0.0",
@@ -20,9 +19,11 @@
20
  "devDependencies": {
21
  "@eslint/eslintrc": "^3",
22
  "@tailwindcss/postcss": "^4",
 
23
  "@types/node": "^20",
24
  "@types/react": "^19",
25
  "@types/react-dom": "^19",
 
26
  "eslint": "^9",
27
  "eslint-config-next": "15.3.1",
28
  "prettier": "^3.5.3",
@@ -210,6 +211,8 @@
210
 
211
  "@tybys/wasm-util": ["@tybys/wasm-util@0.10.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg=="],
212
 
 
 
213
  "@types/d3-array": ["@types/d3-array@3.2.2", "", {}, "sha512-hOLWVbm7uRza0BYXpIIW5pxfrKe0W+D5lrFiAEYR+pb6w3N2SwSMaJbXdUfSEv+dT4MfHBLtn5js0LAWaO6otw=="],
214
 
215
  "@types/d3-color": ["@types/d3-color@3.1.3", "", {}, "sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A=="],
@@ -366,6 +369,8 @@
366
 
367
  "buffer": ["buffer@6.0.3", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.2.1" } }, "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA=="],
368
 
 
 
369
  "busboy": ["busboy@1.6.0", "", { "dependencies": { "streamsearch": "^1.1.0" } }, "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA=="],
370
 
371
  "call-bind": ["call-bind@1.0.8", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.0", "es-define-property": "^1.0.0", "get-intrinsic": "^1.2.4", "set-function-length": "^1.2.2" } }, "sha512-oKlSFMcMwpUg2ednkhQ454wfWiU/ul3CkJe/PEHcTKuiX6RpbehUiFMXu13HalGZxfUwCQzZG747YXBn1im9ww=="],
 
7
  "dependencies": {
8
  "@react-three/drei": "^10.7.7",
9
  "@react-three/fiber": "^9.5.0",
 
10
  "hyparquet": "^1.12.1",
11
  "next": "15.3.6",
12
  "react": "^19.0.0",
 
19
  "devDependencies": {
20
  "@eslint/eslintrc": "^3",
21
  "@tailwindcss/postcss": "^4",
22
+ "@types/bun": "^1.3.10",
23
  "@types/node": "^20",
24
  "@types/react": "^19",
25
  "@types/react-dom": "^19",
26
+ "@types/three": "^0.182.0",
27
  "eslint": "^9",
28
  "eslint-config-next": "15.3.1",
29
  "prettier": "^3.5.3",
 
211
 
212
  "@tybys/wasm-util": ["@tybys/wasm-util@0.10.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg=="],
213
 
214
+ "@types/bun": ["@types/bun@1.3.10", "", { "dependencies": { "bun-types": "1.3.10" } }, "sha512-0+rlrUrOrTSskibryHbvQkDOWRJwJZqZlxrUs1u4oOoTln8+WIXBPmAuCF35SWB2z4Zl3E84Nl/D0P7803nigQ=="],
215
+
216
  "@types/d3-array": ["@types/d3-array@3.2.2", "", {}, "sha512-hOLWVbm7uRza0BYXpIIW5pxfrKe0W+D5lrFiAEYR+pb6w3N2SwSMaJbXdUfSEv+dT4MfHBLtn5js0LAWaO6otw=="],
217
 
218
  "@types/d3-color": ["@types/d3-color@3.1.3", "", {}, "sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A=="],
 
369
 
370
  "buffer": ["buffer@6.0.3", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.2.1" } }, "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA=="],
371
 
372
+ "bun-types": ["bun-types@1.3.10", "", { "dependencies": { "@types/node": "*" } }, "sha512-tcpfCCl6XWo6nCVnpcVrxQ+9AYN1iqMIzgrSKYMB/fjLtV2eyAVEg7AxQJuCq/26R6HpKWykQXuSOq/21RYcbg=="],
373
+
374
  "busboy": ["busboy@1.6.0", "", { "dependencies": { "streamsearch": "^1.1.0" } }, "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA=="],
375
 
376
  "call-bind": ["call-bind@1.0.8", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.0", "es-define-property": "^1.0.0", "get-intrinsic": "^1.2.4", "set-function-length": "^1.2.2" } }, "sha512-oKlSFMcMwpUg2ednkhQ454wfWiU/ul3CkJe/PEHcTKuiX6RpbehUiFMXu13HalGZxfUwCQzZG747YXBn1im9ww=="],
package.json CHANGED
@@ -9,8 +9,9 @@
9
  "lint": "next lint",
10
  "format": "prettier --write .",
11
  "format:check": "prettier --check .",
12
- "type-check": "tsc --noEmit",
13
  "type-check:watch": "tsc --noEmit --watch",
 
14
  "validate": "bun run type-check && bun run lint && bun run format:check"
15
  },
16
  "dependencies": {
@@ -27,11 +28,12 @@
27
  },
28
  "devDependencies": {
29
  "@eslint/eslintrc": "^3",
30
- "@types/three": "^0.182.0",
31
  "@tailwindcss/postcss": "^4",
 
32
  "@types/node": "^20",
33
  "@types/react": "^19",
34
  "@types/react-dom": "^19",
 
35
  "eslint": "^9",
36
  "eslint-config-next": "15.3.1",
37
  "prettier": "^3.5.3",
 
9
  "lint": "next lint",
10
  "format": "prettier --write .",
11
  "format:check": "prettier --check .",
12
+ "type-check": "tsc --noEmit && tsc -p tsconfig.test.json --noEmit",
13
  "type-check:watch": "tsc --noEmit --watch",
14
+ "test": "bun test",
15
  "validate": "bun run type-check && bun run lint && bun run format:check"
16
  },
17
  "dependencies": {
 
28
  },
29
  "devDependencies": {
30
  "@eslint/eslintrc": "^3",
 
31
  "@tailwindcss/postcss": "^4",
32
+ "@types/bun": "^1.3.10",
33
  "@types/node": "^20",
34
  "@types/react": "^19",
35
  "@types/react-dom": "^19",
36
+ "@types/three": "^0.182.0",
37
  "eslint": "^9",
38
  "eslint-config-next": "15.3.1",
39
  "prettier": "^3.5.3",
src/app/[org]/[dataset]/[episode]/__tests__/fetch-data.test.ts ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { describe, expect, test } from "bun:test";
2
+ import { computeColumnMinMax } from "@/app/[org]/[dataset]/[episode]/fetch-data";
3
+ import type { ChartRow } from "@/app/[org]/[dataset]/[episode]/fetch-data";
4
+
5
+ // ---------------------------------------------------------------------------
6
+ // computeColumnMinMax
7
+ // Used by the stats panel to display per-column min/max for any dataset version.
8
+ // ---------------------------------------------------------------------------
9
+
10
+ describe("computeColumnMinMax — flat numeric values (v2.x / v3.0 style)", () => {
11
+ test("returns empty array for empty chart data groups", () => {
12
+ expect(computeColumnMinMax([])).toEqual([]);
13
+ });
14
+
15
+ test("returns empty array for groups with only timestamp columns", () => {
16
+ const groups: ChartRow[][] = [[{ timestamp: 0 }, { timestamp: 1 }]];
17
+ expect(computeColumnMinMax(groups)).toEqual([]);
18
+ });
19
+
20
+ test("computes min/max for a single flat series", () => {
21
+ const groups: ChartRow[][] = [
22
+ [
23
+ { timestamp: 0, "progress | sparse": 0.1 },
24
+ { timestamp: 0.5, "progress | sparse": 0.5 },
25
+ { timestamp: 1.0, "progress | sparse": 0.9 },
26
+ ],
27
+ ];
28
+ const result = computeColumnMinMax(groups);
29
+ expect(result).toHaveLength(1);
30
+ expect(result[0].column).toBe("progress | sparse");
31
+ expect(result[0].min).toBe(0.1);
32
+ expect(result[0].max).toBe(0.9);
33
+ });
34
+
35
+ test("rounds to 3 decimal places", () => {
36
+ const groups: ChartRow[][] = [
37
+ [
38
+ { timestamp: 0, col: 1.23456789 },
39
+ { timestamp: 1, col: 2.0 },
40
+ ],
41
+ ];
42
+ const result = computeColumnMinMax(groups);
43
+ expect(result[0].min).toBe(1.235); // rounded
44
+ expect(result[0].max).toBe(2.0);
45
+ });
46
+
47
+ test("ignores non-finite values (Infinity, NaN)", () => {
48
+ const groups: ChartRow[][] = [
49
+ [
50
+ { timestamp: 0, col: Infinity },
51
+ { timestamp: 0.5, col: 3.0 },
52
+ { timestamp: 1, col: NaN },
53
+ ],
54
+ ];
55
+ const result = computeColumnMinMax(groups);
56
+ expect(result[0].min).toBe(3.0);
57
+ expect(result[0].max).toBe(3.0);
58
+ });
59
+ });
60
+
61
+ describe("computeColumnMinMax — nested group values (grouped suffix format)", () => {
62
+ test("computes min/max for nested observation.state group (v2.x 6-DoF robot)", () => {
63
+ const groups: ChartRow[][] = [
64
+ [
65
+ {
66
+ timestamp: 0,
67
+ "observation.state": { "0": -0.5, "1": 0.2, "2": 1.5 },
68
+ },
69
+ {
70
+ timestamp: 0.1,
71
+ "observation.state": { "0": -0.3, "1": 0.8, "2": 0.7 },
72
+ },
73
+ ],
74
+ ];
75
+ const result = computeColumnMinMax(groups);
76
+ const colMap = Object.fromEntries(result.map((r) => [r.column, r]));
77
+
78
+ // observation.state | 0: min=-0.5, max=-0.3
79
+ expect(colMap["observation.state | 0"].min).toBe(-0.5);
80
+ expect(colMap["observation.state | 0"].max).toBe(-0.3);
81
+
82
+ // observation.state | 1: min=0.2, max=0.8
83
+ expect(colMap["observation.state | 1"].min).toBe(0.2);
84
+ expect(colMap["observation.state | 1"].max).toBe(0.8);
85
+
86
+ // observation.state | 2: min=0.7, max=1.5
87
+ expect(colMap["observation.state | 2"].min).toBe(0.7);
88
+ expect(colMap["observation.state | 2"].max).toBe(1.5);
89
+ });
90
+
91
+ test("handles multiple groups (action + state) across multiple chart data groups", () => {
92
+ const groups: ChartRow[][] = [
93
+ [
94
+ {
95
+ timestamp: 0,
96
+ "observation.state": { "0": 0.1, "1": 0.2 },
97
+ },
98
+ ],
99
+ [
100
+ {
101
+ timestamp: 0,
102
+ action: { "0": -1.0, "1": 1.0 },
103
+ },
104
+ ],
105
+ ];
106
+ const result = computeColumnMinMax(groups);
107
+ const colMap = Object.fromEntries(result.map((r) => [r.column, r]));
108
+
109
+ expect(colMap["observation.state | 0"]).toBeDefined();
110
+ expect(colMap["action | 0"].min).toBe(-1.0);
111
+ expect(colMap["action | 0"].max).toBe(-1.0);
112
+ expect(colMap["action | 1"].min).toBe(1.0);
113
+ });
114
+ });
115
+
116
+ // ---------------------------------------------------------------------------
117
+ // Version-specific path construction integration tests
118
+ //
119
+ // These tests verify that the path templates for v2.0, v2.1, and v3.0 produce
120
+ // correct URLs when combined with buildVersionedUrl and formatStringWithVars.
121
+ // ---------------------------------------------------------------------------
122
+
123
+ import { buildVersionedUrl } from "@/utils/versionUtils";
124
+ import { formatStringWithVars } from "@/utils/parquetUtils";
125
+ import {
126
+ buildV3DataPath,
127
+ buildV3VideoPath,
128
+ buildV3EpisodesMetadataPath,
129
+ } from "@/utils/stringFormatting";
130
+ import { PADDING } from "@/utils/constants";
131
+
132
+ const DATASET_BASE = "https://huggingface.co/datasets";
133
+
134
+ function makeChunkAndIndex(episodeId: number, chunkSize: number) {
135
+ const episode_chunk = Math.floor(episodeId / chunkSize)
136
+ .toString()
137
+ .padStart(PADDING.CHUNK_INDEX, "0");
138
+ const episode_index = episodeId
139
+ .toString()
140
+ .padStart(PADDING.EPISODE_INDEX, "0");
141
+ return { episode_chunk, episode_index };
142
+ }
143
+
144
+ describe("v2.0 path construction (rabhishek100/so100_train_dataset style)", () => {
145
+ const repoId = "rabhishek100/so100_train_dataset";
146
+ const version = "v2.0";
147
+ const dataPath =
148
+ "data/{episode_chunk:03d}/episode_{episode_index:06d}.parquet";
149
+ const videoPath =
150
+ "videos/{video_key}/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.mp4";
151
+
152
+ test("episode 0 in chunk 0", () => {
153
+ const { episode_chunk, episode_index } = makeChunkAndIndex(0, 1000);
154
+ const path = formatStringWithVars(dataPath, {
155
+ episode_chunk,
156
+ episode_index,
157
+ });
158
+ const url = buildVersionedUrl(repoId, version, path);
159
+ expect(url).toBe(
160
+ `${DATASET_BASE}/${repoId}/resolve/main/data/000/episode_000000.parquet`,
161
+ );
162
+ });
163
+
164
+ test("episode 42 in chunk 0", () => {
165
+ const { episode_chunk, episode_index } = makeChunkAndIndex(42, 1000);
166
+ const path = formatStringWithVars(dataPath, {
167
+ episode_chunk,
168
+ episode_index,
169
+ });
170
+ expect(
171
+ formatStringWithVars(dataPath, { episode_chunk, episode_index }),
172
+ ).toBe("data/000/episode_000042.parquet");
173
+ const url = buildVersionedUrl(repoId, version, path);
174
+ expect(url).toContain("/data/000/episode_000042.parquet");
175
+ });
176
+
177
+ test("episode 1000 in chunk 1 (chunk boundary)", () => {
178
+ const { episode_chunk, episode_index } = makeChunkAndIndex(1000, 1000);
179
+ const path = formatStringWithVars(dataPath, {
180
+ episode_chunk,
181
+ episode_index,
182
+ });
183
+ expect(path).toBe("data/001/episode_001000.parquet");
184
+ });
185
+
186
+ test("v2.0 video URL for top camera", () => {
187
+ const { episode_chunk, episode_index } = makeChunkAndIndex(7, 1000);
188
+ const path = formatStringWithVars(videoPath, {
189
+ video_key: "observation.images.top",
190
+ episode_chunk,
191
+ episode_index,
192
+ });
193
+ const url = buildVersionedUrl(repoId, version, path);
194
+ expect(url).toBe(
195
+ `${DATASET_BASE}/${repoId}/resolve/main/videos/observation.images.top/chunk-000/episode_000007.mp4`,
196
+ );
197
+ });
198
+ });
199
+
200
+ describe("v2.1 path construction (youliangtan/so101-table-cleanup style)", () => {
201
+ // v2.1 uses the same path templates as v2.0
202
+ const dataPath =
203
+ "data/{episode_chunk:03d}/episode_{episode_index:06d}.parquet";
204
+
205
+ test("episode 0 resolves correctly", () => {
206
+ const { episode_chunk, episode_index } = makeChunkAndIndex(0, 1000);
207
+ const path = formatStringWithVars(dataPath, {
208
+ episode_chunk,
209
+ episode_index,
210
+ });
211
+ expect(path).toBe("data/000/episode_000000.parquet");
212
+ });
213
+
214
+ test("episode in second chunk (chunk_size=1000, episode 1500)", () => {
215
+ const { episode_chunk, episode_index } = makeChunkAndIndex(1500, 1000);
216
+ const path = formatStringWithVars(dataPath, {
217
+ episode_chunk,
218
+ episode_index,
219
+ });
220
+ expect(path).toBe("data/001/episode_001500.parquet");
221
+ });
222
+
223
+ test("v2.1 URL is the same format as v2.0 (backward compatible)", () => {
224
+ const { episode_chunk, episode_index } = makeChunkAndIndex(5, 1000);
225
+ const v20path = formatStringWithVars(dataPath, {
226
+ episode_chunk,
227
+ episode_index,
228
+ });
229
+ const v21path = formatStringWithVars(dataPath, {
230
+ episode_chunk,
231
+ episode_index,
232
+ });
233
+ expect(v20path).toBe(v21path);
234
+ });
235
+ });
236
+
237
+ describe("v3.0 path construction (lerobot-data-collection/level12_rac_2_2026-02-07 style)", () => {
238
+ const repoId = "lerobot-data-collection/level12_rac_2_2026-02-07";
239
+ const version = "v3.0";
240
+
241
+ test("episode metadata path for first file", () => {
242
+ const path = buildV3EpisodesMetadataPath(0, 0);
243
+ const url = buildVersionedUrl(repoId, version, path);
244
+ expect(url).toBe(
245
+ `${DATASET_BASE}/${repoId}/resolve/main/meta/episodes/chunk-000/file-000.parquet`,
246
+ );
247
+ });
248
+
249
+ test("data path from episode metadata (chunk 0, file 2)", () => {
250
+ const path = buildV3DataPath(0, 2);
251
+ const url = buildVersionedUrl(repoId, version, path);
252
+ expect(url).toBe(
253
+ `${DATASET_BASE}/${repoId}/resolve/main/data/chunk-000/file-002.parquet`,
254
+ );
255
+ });
256
+
257
+ test("video path for top camera (chunk 0, file 0)", () => {
258
+ const path = buildV3VideoPath("observation.images.top", 0, 0);
259
+ const url = buildVersionedUrl(repoId, version, path);
260
+ expect(url).toBe(
261
+ `${DATASET_BASE}/${repoId}/resolve/main/videos/observation.images.top/chunk-000/file-000.mp4`,
262
+ );
263
+ });
264
+
265
+ test("video path for wrist camera with non-zero file index (per-camera metadata)", () => {
266
+ // v3.0 supports per-camera video segmentation — each camera can have different file indices
267
+ const path = buildV3VideoPath("observation.images.wrist", 0, 3);
268
+ expect(path).toBe("videos/observation.images.wrist/chunk-000/file-003.mp4");
269
+ });
270
+
271
+ test("data path for large dataset spanning multiple chunks", () => {
272
+ // Episode in chunk 1, file 5 based on episode metadata
273
+ const path = buildV3DataPath(1, 5);
274
+ expect(path).toBe("data/chunk-001/file-005.parquet");
275
+ });
276
+ });
277
+
278
+ // ---------------------------------------------------------------------------
279
+ // v3.0 episode metadata row parsing (parseEpisodeRowSimple-equivalent logic)
280
+ // Tests that the BigInt conversion and field extraction work correctly with
281
+ // realistic parquet row shapes from v3.0 datasets.
282
+ // ---------------------------------------------------------------------------
283
+
284
+ import { bigIntToNumber } from "@/utils/typeGuards";
285
+
286
+ describe("v3.0 episode metadata row parsing helpers", () => {
287
+ const toBigIntSafe = (value: unknown): number => {
288
+ if (typeof value === "bigint") return Number(value);
289
+ if (typeof value === "number") return value;
290
+ if (typeof value === "string") return parseInt(value) || 0;
291
+ return 0;
292
+ };
293
+
294
+ const toNumSafe = (value: unknown): number => {
295
+ if (typeof value === "number") return value;
296
+ if (typeof value === "bigint") return Number(value);
297
+ if (typeof value === "string") return parseFloat(value) || 0;
298
+ return 0;
299
+ };
300
+
301
+ test("parses named-key row (v3.0 primary format)", () => {
302
+ // Simulates a row from meta/episodes/chunk-000/file-000.parquet
303
+ const row: Record<string, unknown> = {
304
+ episode_index: 0n,
305
+ "data/chunk_index": 0n,
306
+ "data/file_index": 2n,
307
+ dataset_from_index: 0n,
308
+ dataset_to_index: 200n,
309
+ length: 200n,
310
+ "videos/observation.images.top/chunk_index": 0n,
311
+ "videos/observation.images.top/file_index": 0n,
312
+ "videos/observation.images.top/from_timestamp": 0.0,
313
+ "videos/observation.images.top/to_timestamp": 4.0,
314
+ };
315
+
316
+ expect(toBigIntSafe(row["episode_index"])).toBe(0);
317
+ expect(toBigIntSafe(row["data/file_index"])).toBe(2);
318
+ expect(toBigIntSafe(row["dataset_from_index"])).toBe(0);
319
+ expect(toBigIntSafe(row["dataset_to_index"])).toBe(200);
320
+ expect(toBigIntSafe(row["length"])).toBe(200);
321
+ expect(toNumSafe(row["videos/observation.images.top/from_timestamp"])).toBe(
322
+ 0.0,
323
+ );
324
+ expect(toNumSafe(row["videos/observation.images.top/to_timestamp"])).toBe(
325
+ 4.0,
326
+ );
327
+ });
328
+
329
+ test("parses numeric-key row (fallback format)", () => {
330
+ // Fallback when column names are not available (older v3 datasets)
331
+ const row: Record<string, unknown> = {
332
+ "0": 5, // episode_index
333
+ "1": 0, // data_chunk_index
334
+ "2": 3, // data_file_index
335
+ "3": 600, // dataset_from_index
336
+ "4": 800, // dataset_to_index
337
+ "5": 0, // video_chunk_index
338
+ "6": 3, // video_file_index
339
+ "7": 12.0, // video_from_timestamp
340
+ "8": 16.0, // video_to_timestamp
341
+ "9": 200, // length
342
+ };
343
+
344
+ const toNum = (v: unknown, fallback = 0): number =>
345
+ typeof v === "number" ? v : typeof v === "bigint" ? Number(v) : fallback;
346
+
347
+ expect(toNum(row["0"])).toBe(5); // episode_index
348
+ expect(toNum(row["2"])).toBe(3); // data_file_index
349
+ expect(toNum(row["3"])).toBe(600); // dataset_from_index
350
+ expect(toNum(row["4"])).toBe(800); // dataset_to_index
351
+ expect(toNum(row["8"], 30)).toBe(16.0); // video_to_timestamp
352
+ });
353
+
354
+ test("bigIntToNumber converts all BigInt parquet columns correctly", () => {
355
+ // v3.0 integer columns come out of hyparquet as BigInt
356
+ expect(bigIntToNumber(0n, 0)).toBe(0);
357
+ expect(bigIntToNumber(200n, 0)).toBe(200);
358
+ expect(bigIntToNumber(1234567n, 0)).toBe(1234567);
359
+ // Float columns remain as regular numbers
360
+ expect(bigIntToNumber(4.0, 0)).toBe(4.0);
361
+ });
362
+
363
+ test("video segmentation timestamps are correctly derived for multiple episodes", () => {
364
+ // Each episode has its own video segment; timestamps accumulate per episode
365
+ const episodes = [
366
+ { from_timestamp: 0.0, to_timestamp: 4.0, length: 200 },
367
+ { from_timestamp: 4.0, to_timestamp: 8.2, length: 210 },
368
+ { from_timestamp: 8.2, to_timestamp: 12.0, length: 190 },
369
+ ];
370
+
371
+ episodes.forEach((ep) => {
372
+ const duration = ep.to_timestamp - ep.from_timestamp;
373
+ expect(duration).toBeGreaterThan(0);
374
+ expect(ep.from_timestamp).toBeLessThan(ep.to_timestamp);
375
+ });
376
+
377
+ // Segments are contiguous (each episode starts where the previous ends)
378
+ for (let i = 1; i < episodes.length; i++) {
379
+ expect(episodes[i].from_timestamp).toBeCloseTo(
380
+ episodes[i - 1].to_timestamp,
381
+ 5,
382
+ );
383
+ }
384
+ });
385
+ });
src/utils/__tests__/dataProcessing.test.ts ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { describe, expect, test } from "bun:test";
2
+ import {
3
+ groupRowBySuffix,
4
+ buildSuffixGroupsMap,
5
+ computeGroupStats,
6
+ groupByScale,
7
+ flattenScaleGroups,
8
+ processChartDataGroups,
9
+ } from "@/utils/dataProcessing";
10
+ import { CHART_CONFIG } from "@/utils/constants";
11
+
12
+ const DELIM = CHART_CONFIG.SERIES_NAME_DELIMITER; // " | "
13
+
14
+ // ---------------------------------------------------------------------------
15
+ // groupRowBySuffix
16
+ // ---------------------------------------------------------------------------
17
+ describe("groupRowBySuffix", () => {
18
+ test("passes through timestamp unchanged", () => {
19
+ const result = groupRowBySuffix({ timestamp: 1.5 });
20
+ expect(result.timestamp).toBe(1.5);
21
+ });
22
+
23
+ test("keeps single-prefix suffix keys as flat entries with full original name", () => {
24
+ // `action | 0`, `action | 1`, `action | 2` each have a UNIQUE prefix per suffix,
25
+ // so they stay flat (no nesting). Nesting only occurs when multiple prefixes
26
+ // share the same numeric suffix (e.g. state | 0 AND action | 0).
27
+ const row = {
28
+ [`action${DELIM}0`]: 0.1,
29
+ [`action${DELIM}1`]: 0.2,
30
+ [`action${DELIM}2`]: 0.3,
31
+ timestamp: 0,
32
+ };
33
+ const result = groupRowBySuffix(row);
34
+ expect(result[`action${DELIM}0`]).toBe(0.1);
35
+ expect(result[`action${DELIM}1`]).toBe(0.2);
36
+ expect(result[`action${DELIM}2`]).toBe(0.3);
37
+ });
38
+
39
+ test("keeps keys without delimiter at top level", () => {
40
+ const row = { progress: 0.75, timestamp: 2.0 };
41
+ const result = groupRowBySuffix(row);
42
+ expect(result["progress"]).toBe(0.75);
43
+ });
44
+
45
+ test("preserves single-member suffix as full original key", () => {
46
+ // A key like "observation.state | 0" that is alone in its suffix group
47
+ // should remain at the top level with its full original name
48
+ const row = { [`solo_col${DELIM}joint`]: 1.0 };
49
+ const result = groupRowBySuffix(row);
50
+ expect(result[`solo_col${DELIM}joint`]).toBe(1.0);
51
+ });
52
+
53
+ test("groups by suffix when multiple prefixes share the same suffix (v2.x state+action)", () => {
54
+ // `observation.state | 0` and `action | 0` both have suffix "0",
55
+ // so they are grouped under the key "0" as a nested object { "observation.state": ..., "action": ... }.
56
+ const row = {
57
+ [`observation.state${DELIM}0`]: 0.1,
58
+ [`observation.state${DELIM}1`]: 0.2,
59
+ [`action${DELIM}0`]: 0.5,
60
+ [`action${DELIM}1`]: 0.6,
61
+ timestamp: 0.5,
62
+ };
63
+ const result = groupRowBySuffix(row);
64
+ // Both suffix "0" groups: observation.state and action
65
+ const group0 = result["0"] as Record<string, number>;
66
+ const group1 = result["1"] as Record<string, number>;
67
+ expect(group0["observation.state"]).toBe(0.1);
68
+ expect(group0["action"]).toBe(0.5);
69
+ expect(group1["observation.state"]).toBe(0.2);
70
+ expect(group1["action"]).toBe(0.6);
71
+ });
72
+ });
73
+
74
+ // ---------------------------------------------------------------------------
75
+ // buildSuffixGroupsMap
76
+ // ---------------------------------------------------------------------------
77
+ describe("buildSuffixGroupsMap", () => {
78
+ test("groups keys by their suffix", () => {
79
+ const keys = [
80
+ `action${DELIM}0`,
81
+ `action${DELIM}1`,
82
+ `observation.state${DELIM}0`,
83
+ ];
84
+ const map = buildSuffixGroupsMap(keys);
85
+ expect(map["action"]).toBeUndefined(); // suffix is "0" and "1"
86
+ expect(map["0"]).toContain(`action${DELIM}0`);
87
+ expect(map["0"]).toContain(`observation.state${DELIM}0`);
88
+ expect(map["1"]).toContain(`action${DELIM}1`);
89
+ });
90
+
91
+ test("keys without delimiter fall back to the key itself", () => {
92
+ const map = buildSuffixGroupsMap(["progress"]);
93
+ expect(map["progress"]).toEqual(["progress"]);
94
+ });
95
+
96
+ test("returns empty object for empty input", () => {
97
+ expect(buildSuffixGroupsMap([])).toEqual({});
98
+ });
99
+ });
100
+
101
+ // ---------------------------------------------------------------------------
102
+ // computeGroupStats
103
+ // ---------------------------------------------------------------------------
104
+ describe("computeGroupStats", () => {
105
+ test("computes correct min and max across all rows for each group", () => {
106
+ const chartData = [
107
+ { "action | 0": 1.0, "action | 1": -2.0 },
108
+ { "action | 0": 3.0, "action | 1": 0.5 },
109
+ ];
110
+ const groups = [["action | 0", "action | 1"]];
111
+ const stats = computeGroupStats(chartData, groups);
112
+ expect(stats["action | 0"].min).toBe(-2.0);
113
+ expect(stats["action | 0"].max).toBe(3.0);
114
+ });
115
+
116
+ test("ignores NaN values", () => {
117
+ const chartData = [{ col: NaN }, { col: 5 }, { col: 2 }];
118
+ const stats = computeGroupStats(chartData, [["col"]]);
119
+ expect(stats["col"].min).toBe(2);
120
+ expect(stats["col"].max).toBe(5);
121
+ });
122
+
123
+ test("returns Infinity/-Infinity for all-NaN group (group skipped in groupByScale)", () => {
124
+ const chartData = [{ col: NaN }];
125
+ const stats = computeGroupStats(chartData, [["col"]]);
126
+ expect(stats["col"].min).toBe(Infinity);
127
+ expect(stats["col"].max).toBe(-Infinity);
128
+ });
129
+ });
130
+
131
+ // ---------------------------------------------------------------------------
132
+ // groupByScale
133
+ // ---------------------------------------------------------------------------
134
+ describe("groupByScale", () => {
135
+ test("groups series with similar scale together", () => {
136
+ // Two series both in range ~[0, 1] — should be grouped
137
+ const suffixGroups = [["a"], ["b"]];
138
+ const stats = {
139
+ a: { min: 0.1, max: 1.0 },
140
+ b: { min: 0.2, max: 0.9 },
141
+ };
142
+ const result = groupByScale(suffixGroups, stats);
143
+ const groups = Object.values(result);
144
+ // Both a and b have similar log-scale range, expect them merged
145
+ expect(groups.some((g) => g.length === 2)).toBe(true);
146
+ });
147
+
148
+ test("keeps series with vastly different scales separate", () => {
149
+ // One series in [0,1], another in [0, 1000]
150
+ const suffixGroups = [["small"], ["large"]];
151
+ const stats = {
152
+ small: { min: 0.001, max: 1.0 },
153
+ large: { min: 100, max: 1000 },
154
+ };
155
+ const result = groupByScale(suffixGroups, stats);
156
+ // Each should be in its own group
157
+ expect(Object.keys(result).length).toBe(2);
158
+ });
159
+
160
+ test("skips groups with non-finite stats", () => {
161
+ const suffixGroups = [["bad"]];
162
+ const stats = { bad: { min: Infinity, max: -Infinity } };
163
+ const result = groupByScale(suffixGroups, stats);
164
+ expect(Object.keys(result).length).toBe(0);
165
+ });
166
+ });
167
+
168
+ // ---------------------------------------------------------------------------
169
+ // flattenScaleGroups
170
+ // ---------------------------------------------------------------------------
171
+ describe("flattenScaleGroups", () => {
172
+ test("returns each scale group as a flat array of keys", () => {
173
+ const scaleGroups = { a: [["a", "b"], ["c"]] };
174
+ const result = flattenScaleGroups(scaleGroups);
175
+ expect(result).toEqual([["a", "b", "c"]]);
176
+ });
177
+
178
+ test("splits large groups exceeding MAX_SERIES_PER_GROUP", () => {
179
+ const MAX = CHART_CONFIG.MAX_SERIES_PER_GROUP; // 6
180
+ const bigGroup = Array.from({ length: MAX + 2 }, (_, i) => [`key_${i}`]);
181
+ const scaleGroups = { key_0: bigGroup };
182
+ const result = flattenScaleGroups(scaleGroups);
183
+ // Should be split into 2 sub-groups
184
+ expect(result.length).toBe(2);
185
+ expect(result[0].length).toBe(MAX);
186
+ expect(result[1].length).toBe(2);
187
+ });
188
+
189
+ test("groups with more sub-arrays come first (sorted by length desc)", () => {
190
+ const scaleGroups = {
191
+ a: [["a"]], // 1 sub-group
192
+ b: [["b"], ["c"]], // 2 sub-groups
193
+ };
194
+ const result = flattenScaleGroups(scaleGroups);
195
+ // b (2 sub-groups) should come before a (1 sub-group)
196
+ expect(result[0]).toContain("b");
197
+ });
198
+ });
199
+
200
+ // ---------------------------------------------------------------------------
201
+ // processChartDataGroups — end-to-end pipeline
202
+ // ---------------------------------------------------------------------------
203
+ describe("processChartDataGroups", () => {
204
+ test("returns an empty array for empty chart data", () => {
205
+ const result = processChartDataGroups(["timestamp"], []);
206
+ expect(result).toEqual([]);
207
+ });
208
+
209
+ test("groups v2.x style action+state series correctly", () => {
210
+ const seriesNames = [
211
+ "timestamp",
212
+ `observation.state${DELIM}0`,
213
+ `observation.state${DELIM}1`,
214
+ `action${DELIM}0`,
215
+ `action${DELIM}1`,
216
+ ];
217
+ const chartData = [
218
+ {
219
+ timestamp: 0,
220
+ [`observation.state${DELIM}0`]: 0.1,
221
+ [`observation.state${DELIM}1`]: 0.2,
222
+ [`action${DELIM}0`]: 0.5,
223
+ [`action${DELIM}1`]: 0.6,
224
+ },
225
+ {
226
+ timestamp: 0.1,
227
+ [`observation.state${DELIM}0`]: 0.15,
228
+ [`observation.state${DELIM}1`]: 0.25,
229
+ [`action${DELIM}0`]: 0.55,
230
+ [`action${DELIM}1`]: 0.65,
231
+ },
232
+ ];
233
+ const result = processChartDataGroups(seriesNames, chartData);
234
+ // All four series share similar scale, so likely merged into 1-2 groups
235
+ expect(result.length).toBeGreaterThanOrEqual(1);
236
+ // Each element is an array of keys
237
+ const allKeys = result.flat();
238
+ expect(allKeys).toContain(`observation.state${DELIM}0`);
239
+ expect(allKeys).toContain(`action${DELIM}0`);
240
+ });
241
+
242
+ test("handles single series without delimiter", () => {
243
+ const seriesNames = ["timestamp", "progress"];
244
+ const chartData = [
245
+ { timestamp: 0, progress: 0.0 },
246
+ { timestamp: 1, progress: 0.5 },
247
+ { timestamp: 2, progress: 1.0 },
248
+ ];
249
+ const result = processChartDataGroups(seriesNames, chartData);
250
+ expect(result.length).toBe(1);
251
+ expect(result[0]).toContain("progress");
252
+ });
253
+ });
src/utils/__tests__/parquetUtils.test.ts ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { describe, expect, test } from "bun:test";
2
+ import {
3
+ formatStringWithVars,
4
+ arrayToCSV,
5
+ getRows,
6
+ } from "@/utils/parquetUtils";
7
+ import { PADDING } from "@/utils/constants";
8
+
9
+ // ---------------------------------------------------------------------------
10
+ // formatStringWithVars — used to build v2.x data / video paths at runtime
11
+ // ---------------------------------------------------------------------------
12
+ describe("formatStringWithVars", () => {
13
+ // v2.0 dataset path templates (real format from rabhishek100/so100_train_dataset)
14
+ test("v2.0 data_path template with pre-padded vars", () => {
15
+ const template =
16
+ "data/{episode_chunk:03d}/episode_{episode_index:06d}.parquet";
17
+ const episodeId = 42;
18
+ const chunkSize = 1000;
19
+ const episode_chunk = Math.floor(episodeId / chunkSize)
20
+ .toString()
21
+ .padStart(PADDING.CHUNK_INDEX, "0");
22
+ const episode_index = episodeId
23
+ .toString()
24
+ .padStart(PADDING.EPISODE_INDEX, "0");
25
+ expect(
26
+ formatStringWithVars(template, { episode_chunk, episode_index }),
27
+ ).toBe("data/000/episode_000042.parquet");
28
+ });
29
+
30
+ // v2.1 dataset path templates (same format as v2.0)
31
+ test("v2.1 data_path template — identical format to v2.0", () => {
32
+ const template =
33
+ "data/{episode_chunk:03d}/episode_{episode_index:06d}.parquet";
34
+ const episode_chunk = (1).toString().padStart(PADDING.CHUNK_INDEX, "0");
35
+ const episode_index = (1500)
36
+ .toString()
37
+ .padStart(PADDING.EPISODE_INDEX, "0");
38
+ expect(
39
+ formatStringWithVars(template, { episode_chunk, episode_index }),
40
+ ).toBe("data/001/episode_001500.parquet");
41
+ });
42
+
43
+ // v2.x video_path template
44
+ test("v2.x video_path template with video_key, chunk, episode", () => {
45
+ const template =
46
+ "videos/{video_key}/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.mp4";
47
+ const episode_chunk = (0).toString().padStart(PADDING.CHUNK_INDEX, "0");
48
+ const episode_index = (7).toString().padStart(PADDING.EPISODE_INDEX, "0");
49
+ expect(
50
+ formatStringWithVars(template, {
51
+ video_key: "observation.images.top",
52
+ episode_chunk,
53
+ episode_index,
54
+ }),
55
+ ).toBe("videos/observation.images.top/chunk-000/episode_000007.mp4");
56
+ });
57
+
58
+ test("leaves unmatched placeholders as 'undefined'", () => {
59
+ // When a variable is missing the replacement returns "undefined" (String(undefined))
60
+ const result = formatStringWithVars("data/{missing_key}.parquet", {});
61
+ expect(result).toBe("data/undefined.parquet");
62
+ });
63
+
64
+ test("handles template without format specifier", () => {
65
+ expect(formatStringWithVars("{a}/{b}", { a: "foo", b: "bar" })).toBe(
66
+ "foo/bar",
67
+ );
68
+ });
69
+
70
+ test("strips :Nd format specifier, uses pre-padded string value", () => {
71
+ // The function does NOT zero-pad; the caller is responsible for padding
72
+ expect(formatStringWithVars("{x:06d}", { x: "000042" })).toBe("000042");
73
+ });
74
+ });
75
+
76
+ // ---------------------------------------------------------------------------
77
+ // arrayToCSV
78
+ // ---------------------------------------------------------------------------
79
+ describe("arrayToCSV", () => {
80
+ test("converts 2D array to CSV string", () => {
81
+ const data = [
82
+ [1, 2, 3],
83
+ [4, 5, 6],
84
+ ];
85
+ expect(arrayToCSV(data)).toBe("1,2,3\n4,5,6");
86
+ });
87
+
88
+ test("handles single row", () => {
89
+ expect(arrayToCSV([[10, 20]])).toBe("10,20");
90
+ });
91
+
92
+ test("handles string values", () => {
93
+ expect(
94
+ arrayToCSV([
95
+ ["a", "b"],
96
+ ["c", "d"],
97
+ ]),
98
+ ).toBe("a,b\nc,d");
99
+ });
100
+
101
+ test("handles empty array", () => {
102
+ expect(arrayToCSV([])).toBe("");
103
+ });
104
+ });
105
+
106
+ // ---------------------------------------------------------------------------
107
+ // getRows — used to build the data table view from flat parquet column data
108
+ // ---------------------------------------------------------------------------
109
+ describe("getRows", () => {
110
+ test("returns empty array when currentFrameData is empty", () => {
111
+ const cols = [{ key: "state", value: ["s0", "s1"] }];
112
+ expect(getRows([], cols)).toEqual([]);
113
+ });
114
+
115
+ test("constructs rows from flat data with equal-length columns", () => {
116
+ // state: [0.1, 0.2], action: [0.5, 0.6] — flat layout: [s0, s1, a0, a1]
117
+ const cols = [
118
+ { key: "observation.state", value: ["s0", "s1"] },
119
+ { key: "action", value: ["a0", "a1"] },
120
+ ];
121
+ const flat = [0.1, 0.2, 0.5, 0.6];
122
+ const rows = getRows(flat, cols);
123
+ expect(rows.length).toBe(2);
124
+ expect(rows[0]).toEqual([0.1, 0.5]);
125
+ expect(rows[1]).toEqual([0.2, 0.6]);
126
+ });
127
+
128
+ test("null-pads shorter columns (action has fewer dims than state)", () => {
129
+ // state: 3 dims, action: 2 dims — row 2 should have null for action
130
+ const cols = [
131
+ { key: "state", value: ["s0", "s1", "s2"] },
132
+ { key: "action", value: ["a0", "a1"] },
133
+ ];
134
+ const flat = [0.1, 0.2, 0.3, 0.5, 0.6]; // s0,s1,s2,a0,a1
135
+ const rows = getRows(flat, cols);
136
+ expect(rows.length).toBe(3);
137
+ expect(rows[2][1]).toEqual({ isNull: true });
138
+ });
139
+
140
+ test("handles single-column data (v2.x progress series)", () => {
141
+ const cols = [{ key: "progress", value: ["p0"] }];
142
+ const flat = [0.75];
143
+ const rows = getRows(flat, cols);
144
+ expect(rows.length).toBe(1);
145
+ expect(rows[0]).toEqual([0.75]);
146
+ });
147
+ });
src/utils/__tests__/stringFormatting.test.ts ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { describe, expect, test } from "bun:test";
2
+ import {
3
+ padNumber,
4
+ formatEpisodeChunk,
5
+ formatEpisodeIndex,
6
+ formatFileIndex,
7
+ formatChunkIndex,
8
+ buildV3VideoPath,
9
+ buildV3DataPath,
10
+ buildV3EpisodesMetadataPath,
11
+ } from "@/utils/stringFormatting";
12
+
13
+ // These utilities are the foundation of v3.0 path construction.
14
+ // v2.x uses formatStringWithVars + manual padStart instead.
15
+
16
+ describe("padNumber", () => {
17
+ test("pads single digit to 3", () => {
18
+ expect(padNumber(1, 3)).toBe("001");
19
+ });
20
+ test("pads zero to 6", () => {
21
+ expect(padNumber(0, 6)).toBe("000000");
22
+ });
23
+ test("does not truncate numbers longer than length", () => {
24
+ expect(padNumber(1234, 3)).toBe("1234");
25
+ });
26
+ test("pads to exact length when already equal", () => {
27
+ expect(padNumber(42, 2)).toBe("42");
28
+ });
29
+ });
30
+
31
+ describe("formatEpisodeChunk — 3-digit padding (v2.x chunk_index, v3 chunk_index)", () => {
32
+ test("chunk 0 → '000'", () => {
33
+ expect(formatEpisodeChunk(0)).toBe("000");
34
+ });
35
+ test("chunk 1 → '001'", () => {
36
+ expect(formatEpisodeChunk(1)).toBe("001");
37
+ });
38
+ test("chunk 42 → '042'", () => {
39
+ expect(formatEpisodeChunk(42)).toBe("042");
40
+ });
41
+ test("chunk 999 → '999'", () => {
42
+ expect(formatEpisodeChunk(999)).toBe("999");
43
+ });
44
+ });
45
+
46
+ describe("formatEpisodeIndex — 6-digit padding (v2.x episode_index)", () => {
47
+ test("index 0 → '000000'", () => {
48
+ expect(formatEpisodeIndex(0)).toBe("000000");
49
+ });
50
+ test("index 42 → '000042'", () => {
51
+ expect(formatEpisodeIndex(42)).toBe("000042");
52
+ });
53
+ test("index 999999 → '999999'", () => {
54
+ expect(formatEpisodeIndex(999999)).toBe("999999");
55
+ });
56
+ });
57
+
58
+ describe("formatFileIndex — 3-digit padding (v3.0 file_index)", () => {
59
+ test("file 0 → '000'", () => {
60
+ expect(formatFileIndex(0)).toBe("000");
61
+ });
62
+ test("file 5 → '005'", () => {
63
+ expect(formatFileIndex(5)).toBe("005");
64
+ });
65
+ test("file 100 → '100'", () => {
66
+ expect(formatFileIndex(100)).toBe("100");
67
+ });
68
+ });
69
+
70
+ describe("formatChunkIndex — 3-digit padding (v3.0 chunk_index)", () => {
71
+ test("chunk 0 → '000'", () => {
72
+ expect(formatChunkIndex(0)).toBe("000");
73
+ });
74
+ test("chunk 12 → '012'", () => {
75
+ expect(formatChunkIndex(12)).toBe("012");
76
+ });
77
+ });
78
+
79
+ // v3.0 specific path builders
80
+ describe("buildV3VideoPath", () => {
81
+ test("single camera, chunk 0, file 0", () => {
82
+ expect(buildV3VideoPath("observation.image", 0, 0)).toBe(
83
+ "videos/observation.image/chunk-000/file-000.mp4",
84
+ );
85
+ });
86
+
87
+ test("nested camera key, non-zero chunk and file", () => {
88
+ expect(buildV3VideoPath("observation.images.wrist", 2, 5)).toBe(
89
+ "videos/observation.images.wrist/chunk-002/file-005.mp4",
90
+ );
91
+ });
92
+
93
+ test("two-camera SO101 dataset style", () => {
94
+ expect(buildV3VideoPath("observation.images.top", 0, 1)).toBe(
95
+ "videos/observation.images.top/chunk-000/file-001.mp4",
96
+ );
97
+ });
98
+ });
99
+
100
+ describe("buildV3DataPath", () => {
101
+ test("chunk 0, file 0", () => {
102
+ expect(buildV3DataPath(0, 0)).toBe("data/chunk-000/file-000.parquet");
103
+ });
104
+ test("chunk 1, file 3", () => {
105
+ expect(buildV3DataPath(1, 3)).toBe("data/chunk-001/file-003.parquet");
106
+ });
107
+ test("large indices", () => {
108
+ expect(buildV3DataPath(10, 99)).toBe("data/chunk-010/file-099.parquet");
109
+ });
110
+ });
111
+
112
+ describe("buildV3EpisodesMetadataPath", () => {
113
+ test("chunk 0, file 0 (default for most datasets)", () => {
114
+ expect(buildV3EpisodesMetadataPath(0, 0)).toBe(
115
+ "meta/episodes/chunk-000/file-000.parquet",
116
+ );
117
+ });
118
+ test("chunk 0, file 2 (multiple metadata files)", () => {
119
+ expect(buildV3EpisodesMetadataPath(0, 2)).toBe(
120
+ "meta/episodes/chunk-000/file-002.parquet",
121
+ );
122
+ });
123
+ });
src/utils/__tests__/typeGuards.test.ts ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { describe, expect, test } from "bun:test";
2
+ import {
3
+ isBigInt,
4
+ bigIntToNumber,
5
+ isNumeric,
6
+ isValidTaskIndex,
7
+ toString,
8
+ isNonEmptyString,
9
+ isObject,
10
+ hasPropertyOfType,
11
+ } from "@/utils/typeGuards";
12
+
13
+ describe("isBigInt", () => {
14
+ test("returns true for BigInt", () => {
15
+ expect(isBigInt(42n)).toBe(true);
16
+ });
17
+ test("returns false for number", () => {
18
+ expect(isBigInt(42)).toBe(false);
19
+ });
20
+ test("returns false for string", () => {
21
+ expect(isBigInt("42")).toBe(false);
22
+ });
23
+ test("returns false for null", () => {
24
+ expect(isBigInt(null)).toBe(false);
25
+ });
26
+ });
27
+
28
+ describe("bigIntToNumber", () => {
29
+ test("converts BigInt to number", () => {
30
+ expect(bigIntToNumber(42n)).toBe(42);
31
+ });
32
+ test("passes through a regular number unchanged", () => {
33
+ expect(bigIntToNumber(3.14)).toBe(3.14);
34
+ });
35
+ test("returns default fallback (0) for non-numeric value", () => {
36
+ expect(bigIntToNumber("hello")).toBe(0);
37
+ });
38
+ test("returns custom fallback for non-numeric value", () => {
39
+ expect(bigIntToNumber(null, -1)).toBe(-1);
40
+ });
41
+ test("converts 0n correctly", () => {
42
+ expect(bigIntToNumber(0n)).toBe(0);
43
+ });
44
+ // Parquet files from v3.0 datasets return BigInt for integer columns
45
+ test("handles large BigInt values from parquet (e.g., frame counts)", () => {
46
+ expect(bigIntToNumber(1000000n)).toBe(1000000);
47
+ });
48
+ });
49
+
50
+ describe("isNumeric", () => {
51
+ test("returns true for number", () => {
52
+ expect(isNumeric(1.5)).toBe(true);
53
+ });
54
+ test("returns true for BigInt (as seen in parquet columns)", () => {
55
+ expect(isNumeric(100n)).toBe(true);
56
+ });
57
+ test("returns false for string", () => {
58
+ expect(isNumeric("5")).toBe(false);
59
+ });
60
+ test("returns false for null", () => {
61
+ expect(isNumeric(null)).toBe(false);
62
+ });
63
+ test("returns false for boolean", () => {
64
+ expect(isNumeric(true)).toBe(false);
65
+ });
66
+ });
67
+
68
+ describe("isValidTaskIndex", () => {
69
+ test("returns true for 0", () => {
70
+ expect(isValidTaskIndex(0)).toBe(true);
71
+ });
72
+ test("returns true for positive integer", () => {
73
+ expect(isValidTaskIndex(5)).toBe(true);
74
+ });
75
+ test("returns true for BigInt 0n (v3 parquet style)", () => {
76
+ expect(isValidTaskIndex(0n)).toBe(true);
77
+ });
78
+ test("returns false for negative number", () => {
79
+ expect(isValidTaskIndex(-1)).toBe(false);
80
+ });
81
+ test("returns false for float", () => {
82
+ expect(isValidTaskIndex(1.5)).toBe(false);
83
+ });
84
+ test("returns false for null", () => {
85
+ expect(isValidTaskIndex(null)).toBe(false);
86
+ });
87
+ test("returns false for undefined", () => {
88
+ expect(isValidTaskIndex(undefined)).toBe(false);
89
+ });
90
+ });
91
+
92
+ describe("toString", () => {
93
+ test("returns string as-is", () => {
94
+ expect(toString("hello")).toBe("hello");
95
+ });
96
+ test("returns empty string for null", () => {
97
+ expect(toString(null)).toBe("");
98
+ });
99
+ test("returns empty string for undefined", () => {
100
+ expect(toString(undefined)).toBe("");
101
+ });
102
+ test("converts number to string", () => {
103
+ expect(toString(42)).toBe("42");
104
+ });
105
+ test("converts BigInt to string", () => {
106
+ expect(toString(7n)).toBe("7");
107
+ });
108
+ });
109
+
110
+ describe("isNonEmptyString", () => {
111
+ test("returns true for non-empty string", () => {
112
+ expect(isNonEmptyString("hello")).toBe(true);
113
+ });
114
+ test("returns false for empty string", () => {
115
+ expect(isNonEmptyString("")).toBe(false);
116
+ });
117
+ test("returns false for number", () => {
118
+ expect(isNonEmptyString(5)).toBe(false);
119
+ });
120
+ test("returns false for null", () => {
121
+ expect(isNonEmptyString(null)).toBe(false);
122
+ });
123
+ });
124
+
125
+ describe("isObject", () => {
126
+ test("returns true for plain object", () => {
127
+ expect(isObject({ a: 1 })).toBe(true);
128
+ });
129
+ test("returns false for null (typeof null === 'object' trap)", () => {
130
+ expect(isObject(null)).toBe(false);
131
+ });
132
+ test("returns false for array", () => {
133
+ expect(isObject([1, 2])).toBe(false);
134
+ });
135
+ test("returns false for string", () => {
136
+ expect(isObject("hello")).toBe(false);
137
+ });
138
+ test("returns true for empty object", () => {
139
+ expect(isObject({})).toBe(true);
140
+ });
141
+ });
142
+
143
+ describe("hasPropertyOfType", () => {
144
+ test("returns true when property exists and passes type guard", () => {
145
+ expect(
146
+ hasPropertyOfType(
147
+ { x: 42 },
148
+ "x",
149
+ (v): v is number => typeof v === "number",
150
+ ),
151
+ ).toBe(true);
152
+ });
153
+ test("returns false when property exists but fails type guard", () => {
154
+ expect(
155
+ hasPropertyOfType(
156
+ { x: "hello" },
157
+ "x",
158
+ (v): v is number => typeof v === "number",
159
+ ),
160
+ ).toBe(false);
161
+ });
162
+ test("returns false when property does not exist", () => {
163
+ expect(
164
+ hasPropertyOfType(
165
+ { a: 1 },
166
+ "b",
167
+ (v): v is number => typeof v === "number",
168
+ ),
169
+ ).toBe(false);
170
+ });
171
+ test("returns false for non-object input", () => {
172
+ expect(
173
+ hasPropertyOfType(null, "x", (v): v is number => typeof v === "number"),
174
+ ).toBe(false);
175
+ });
176
+ });
src/utils/__tests__/versionUtils.test.ts ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { describe, expect, test, mock, afterEach } from "bun:test";
2
+ import { buildVersionedUrl } from "@/utils/versionUtils";
3
+
4
+ // ---------------------------------------------------------------------------
5
+ // buildVersionedUrl — pure function, no mocking needed
6
+ // ---------------------------------------------------------------------------
7
+ describe("buildVersionedUrl", () => {
8
+ test("builds URL for v2.0 dataset data path", () => {
9
+ const url = buildVersionedUrl(
10
+ "rabhishek100/so100_train_dataset",
11
+ "v2.0",
12
+ "data/000/episode_000000.parquet",
13
+ );
14
+ expect(url).toBe(
15
+ "https://huggingface.co/datasets/rabhishek100/so100_train_dataset/resolve/main/data/000/episode_000000.parquet",
16
+ );
17
+ });
18
+
19
+ test("builds URL for v2.1 dataset video path", () => {
20
+ const url = buildVersionedUrl(
21
+ "youliangtan/so101-table-cleanup",
22
+ "v2.1",
23
+ "videos/observation.images.top/chunk-000/episode_000007.mp4",
24
+ );
25
+ expect(url).toBe(
26
+ "https://huggingface.co/datasets/youliangtan/so101-table-cleanup/resolve/main/videos/observation.images.top/chunk-000/episode_000007.mp4",
27
+ );
28
+ });
29
+
30
+ test("builds URL for v3.0 episode metadata", () => {
31
+ const url = buildVersionedUrl(
32
+ "lerobot-data-collection/level12_rac_2_2026-02-07",
33
+ "v3.0",
34
+ "meta/episodes/chunk-000/file-000.parquet",
35
+ );
36
+ expect(url).toBe(
37
+ "https://huggingface.co/datasets/lerobot-data-collection/level12_rac_2_2026-02-07/resolve/main/meta/episodes/chunk-000/file-000.parquet",
38
+ );
39
+ });
40
+
41
+ test("builds URL for v3.0 data chunk", () => {
42
+ const url = buildVersionedUrl(
43
+ "lerobot-data-collection/level12_rac_2_2026-02-07",
44
+ "v3.0",
45
+ "data/chunk-001/file-003.parquet",
46
+ );
47
+ expect(url).toBe(
48
+ "https://huggingface.co/datasets/lerobot-data-collection/level12_rac_2_2026-02-07/resolve/main/data/chunk-001/file-003.parquet",
49
+ );
50
+ });
51
+
52
+ test("builds URL for meta/info.json", () => {
53
+ const url = buildVersionedUrl("myorg/mydataset", "v3.0", "meta/info.json");
54
+ expect(url).toBe(
55
+ "https://huggingface.co/datasets/myorg/mydataset/resolve/main/meta/info.json",
56
+ );
57
+ });
58
+ });
59
+
60
+ // ---------------------------------------------------------------------------
61
+ // getDatasetVersionAndInfo — tested with mocked fetch
62
+ // ---------------------------------------------------------------------------
63
+ describe("getDatasetVersionAndInfo", () => {
64
+ const originalFetch = globalThis.fetch;
65
+
66
+ afterEach(() => {
67
+ globalThis.fetch = originalFetch;
68
+ });
69
+
70
+ test("accepts v2.0 codebase_version", async () => {
71
+ const infoV20 = {
72
+ codebase_version: "v2.0",
73
+ robot_type: "so100",
74
+ total_episodes: 50,
75
+ total_frames: 5000,
76
+ total_tasks: 1,
77
+ chunks_size: 1000,
78
+ data_files_size_in_mb: 10,
79
+ video_files_size_in_mb: 500,
80
+ fps: 30,
81
+ splits: { train: "0:50" },
82
+ data_path: "data/{episode_chunk:03d}/episode_{episode_index:06d}.parquet",
83
+ video_path:
84
+ "videos/{video_key}/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.mp4",
85
+ features: {
86
+ "observation.images.top": {
87
+ dtype: "video",
88
+ shape: [480, 640, 3],
89
+ names: null,
90
+ },
91
+ "observation.state": {
92
+ dtype: "float32",
93
+ shape: [1, 6],
94
+ names: ["j0", "j1", "j2", "j3", "j4", "j5"],
95
+ },
96
+ action: {
97
+ dtype: "float32",
98
+ shape: [1, 6],
99
+ names: ["j0", "j1", "j2", "j3", "j4", "j5"],
100
+ },
101
+ },
102
+ };
103
+
104
+ globalThis.fetch = mock(() =>
105
+ Promise.resolve(new Response(JSON.stringify(infoV20), { status: 200 })),
106
+ ) as unknown as typeof fetch;
107
+
108
+ const { getDatasetVersionAndInfo } = await import("@/utils/versionUtils");
109
+ const result = await getDatasetVersionAndInfo(
110
+ "rabhishek100/so100_train_dataset",
111
+ );
112
+ expect(result.version).toBe("v2.0");
113
+ expect(result.info.total_episodes).toBe(50);
114
+ });
115
+
116
+ test("accepts v2.1 codebase_version", async () => {
117
+ const infoV21 = {
118
+ codebase_version: "v2.1",
119
+ robot_type: "so101",
120
+ total_episodes: 100,
121
+ total_frames: 10000,
122
+ total_tasks: 1,
123
+ chunks_size: 1000,
124
+ data_files_size_in_mb: 20,
125
+ video_files_size_in_mb: 1000,
126
+ fps: 30,
127
+ splits: { train: "0:100" },
128
+ data_path: "data/{episode_chunk:03d}/episode_{episode_index:06d}.parquet",
129
+ video_path:
130
+ "videos/{video_key}/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.mp4",
131
+ features: {
132
+ "observation.images.top": {
133
+ dtype: "video",
134
+ shape: [480, 640, 3],
135
+ names: null,
136
+ },
137
+ "observation.state": { dtype: "float32", shape: [1, 6], names: null },
138
+ action: { dtype: "float32", shape: [1, 6], names: null },
139
+ },
140
+ };
141
+
142
+ globalThis.fetch = mock(() =>
143
+ Promise.resolve(new Response(JSON.stringify(infoV21), { status: 200 })),
144
+ ) as unknown as typeof fetch;
145
+
146
+ // Use fresh import to bypass cache — or just call with a different repoId
147
+ const { getDatasetVersionAndInfo } = await import("@/utils/versionUtils");
148
+ const result = await getDatasetVersionAndInfo(
149
+ "youliangtan/so101-table-cleanup",
150
+ );
151
+ expect(result.version).toBe("v2.1");
152
+ });
153
+
154
+ test("accepts v3.0 codebase_version", async () => {
155
+ const infoV30 = {
156
+ codebase_version: "v3.0",
157
+ robot_type: "openarm",
158
+ total_episodes: 200,
159
+ total_frames: 40000,
160
+ total_tasks: 1,
161
+ chunks_size: 100,
162
+ data_files_size_in_mb: 50,
163
+ video_files_size_in_mb: 2000,
164
+ fps: 50,
165
+ splits: { train: "0:200" },
166
+ data_path: null,
167
+ video_path: null,
168
+ features: {
169
+ "observation.images.top": {
170
+ dtype: "video",
171
+ shape: [480, 640, 3],
172
+ names: null,
173
+ },
174
+ "observation.state": { dtype: "float32", shape: [1, 14], names: null },
175
+ action: { dtype: "float32", shape: [1, 14], names: null },
176
+ },
177
+ };
178
+
179
+ globalThis.fetch = mock(() =>
180
+ Promise.resolve(new Response(JSON.stringify(infoV30), { status: 200 })),
181
+ ) as unknown as typeof fetch;
182
+
183
+ const { getDatasetVersionAndInfo } = await import("@/utils/versionUtils");
184
+ const result = await getDatasetVersionAndInfo(
185
+ "lerobot-data-collection/level12_rac_2_2026-02-07",
186
+ );
187
+ expect(result.version).toBe("v3.0");
188
+ expect(result.info.total_episodes).toBe(200);
189
+ });
190
+
191
+ test("throws for unsupported version", async () => {
192
+ const infoUnsupported = {
193
+ codebase_version: "v1.0",
194
+ features: { dummy: { dtype: "float32", shape: [1], names: null } },
195
+ };
196
+
197
+ globalThis.fetch = mock(() =>
198
+ Promise.resolve(
199
+ new Response(JSON.stringify(infoUnsupported), { status: 200 }),
200
+ ),
201
+ ) as unknown as typeof fetch;
202
+
203
+ const { getDatasetVersionAndInfo } = await import("@/utils/versionUtils");
204
+ await expect(getDatasetVersionAndInfo("old/dataset")).rejects.toThrow(
205
+ "not supported",
206
+ );
207
+ });
208
+
209
+ test("throws when info.json has no features field", async () => {
210
+ globalThis.fetch = mock(() =>
211
+ Promise.resolve(
212
+ new Response(JSON.stringify({ codebase_version: "v3.0" }), {
213
+ status: 200,
214
+ }),
215
+ ),
216
+ ) as unknown as typeof fetch;
217
+
218
+ const { getDatasetVersionAndInfo } = await import("@/utils/versionUtils");
219
+ await expect(getDatasetVersionAndInfo("broken/dataset")).rejects.toThrow();
220
+ });
221
+
222
+ test("throws when fetch fails (network error)", async () => {
223
+ globalThis.fetch = mock(() =>
224
+ Promise.resolve(new Response("Not Found", { status: 404 })),
225
+ ) as unknown as typeof fetch;
226
+
227
+ const { getDatasetVersionAndInfo } = await import("@/utils/versionUtils");
228
+ await expect(
229
+ getDatasetVersionAndInfo("nonexistent/repo"),
230
+ ).rejects.toThrow();
231
+ });
232
+ });
tsconfig.json CHANGED
@@ -23,5 +23,5 @@
23
  }
24
  },
25
  "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
26
- "exclude": ["node_modules"]
27
  }
 
23
  }
24
  },
25
  "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
26
+ "exclude": ["node_modules", "**/__tests__/**"]
27
  }
tsconfig.test.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "extends": "./tsconfig.json",
3
+ "compilerOptions": {
4
+ "target": "ES2020",
5
+ "lib": ["ES2020", "dom"],
6
+ "types": ["bun-types"],
7
+ "incremental": false
8
+ },
9
+ "include": ["**/__tests__/**/*.ts", "src/**/*.ts"]
10
+ }