Spaces:

lerobot
/

visualize_dataset

Running

App Files Files Community

mishig HF Staff commited on Mar 3

Commit

83d531b

unverified ·

2 Parent(s): 2092eea eddbda3

Merge pull request #38 from huggingface/feat/add-tests

Browse files

Add unit tests for all dataset version parsers + test CI workflow

Files changed (11) hide show

.github/workflows/test.yml +23 -0
bun.lock +6 -1
package.json +4 -2
src/app/[org]/[dataset]/[episode]/__tests__/fetch-data.test.ts +385 -0
src/utils/__tests__/dataProcessing.test.ts +253 -0
src/utils/__tests__/parquetUtils.test.ts +147 -0
src/utils/__tests__/stringFormatting.test.ts +123 -0
src/utils/__tests__/typeGuards.test.ts +176 -0
src/utils/__tests__/versionUtils.test.ts +232 -0
tsconfig.json +1 -1
tsconfig.test.json +10 -0

.github/workflows/test.yml ADDED Viewed

	@@ -0,0 +1,23 @@

+name: Tests
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: oven-sh/setup-bun@v1
+        with:
+          bun-version: latest
+      - name: Install dependencies
+        run: bun install
+      - name: Run tests
+        run: bun test

bun.lock CHANGED Viewed

@@ -7,7 +7,6 @@
       "dependencies": {
         "@react-three/drei": "^10.7.7",
         "@react-three/fiber": "^9.5.0",
-        "@types/three": "^0.182.0",
         "hyparquet": "^1.12.1",
         "next": "15.3.6",
         "react": "^19.0.0",
@@ -20,9 +19,11 @@
       "devDependencies": {
         "@eslint/eslintrc": "^3",
         "@tailwindcss/postcss": "^4",
         "@types/node": "^20",
         "@types/react": "^19",
         "@types/react-dom": "^19",
         "eslint": "^9",
         "eslint-config-next": "15.3.1",
         "prettier": "^3.5.3",
@@ -210,6 +211,8 @@
     "@tybys/wasm-util": ["@tybys/wasm-util@0.10.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg=="],
     "@types/d3-array": ["@types/d3-array@3.2.2", "", {}, "sha512-hOLWVbm7uRza0BYXpIIW5pxfrKe0W+D5lrFiAEYR+pb6w3N2SwSMaJbXdUfSEv+dT4MfHBLtn5js0LAWaO6otw=="],
     "@types/d3-color": ["@types/d3-color@3.1.3", "", {}, "sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A=="],
@@ -366,6 +369,8 @@
     "buffer": ["buffer@6.0.3", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.2.1" } }, "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA=="],
     "busboy": ["busboy@1.6.0", "", { "dependencies": { "streamsearch": "^1.1.0" } }, "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA=="],
     "call-bind": ["call-bind@1.0.8", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.0", "es-define-property": "^1.0.0", "get-intrinsic": "^1.2.4", "set-function-length": "^1.2.2" } }, "sha512-oKlSFMcMwpUg2ednkhQ454wfWiU/ul3CkJe/PEHcTKuiX6RpbehUiFMXu13HalGZxfUwCQzZG747YXBn1im9ww=="],

       "dependencies": {
         "@react-three/drei": "^10.7.7",
         "@react-three/fiber": "^9.5.0",
         "hyparquet": "^1.12.1",
         "next": "15.3.6",
         "react": "^19.0.0",
       "devDependencies": {
         "@eslint/eslintrc": "^3",
         "@tailwindcss/postcss": "^4",
+        "@types/bun": "^1.3.10",
         "@types/node": "^20",
         "@types/react": "^19",
         "@types/react-dom": "^19",
+        "@types/three": "^0.182.0",
         "eslint": "^9",
         "eslint-config-next": "15.3.1",
         "prettier": "^3.5.3",
     "@tybys/wasm-util": ["@tybys/wasm-util@0.10.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg=="],
+    "@types/bun": ["@types/bun@1.3.10", "", { "dependencies": { "bun-types": "1.3.10" } }, "sha512-0+rlrUrOrTSskibryHbvQkDOWRJwJZqZlxrUs1u4oOoTln8+WIXBPmAuCF35SWB2z4Zl3E84Nl/D0P7803nigQ=="],
     "@types/d3-array": ["@types/d3-array@3.2.2", "", {}, "sha512-hOLWVbm7uRza0BYXpIIW5pxfrKe0W+D5lrFiAEYR+pb6w3N2SwSMaJbXdUfSEv+dT4MfHBLtn5js0LAWaO6otw=="],
     "@types/d3-color": ["@types/d3-color@3.1.3", "", {}, "sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A=="],
     "buffer": ["buffer@6.0.3", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.2.1" } }, "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA=="],
+    "bun-types": ["bun-types@1.3.10", "", { "dependencies": { "@types/node": "*" } }, "sha512-tcpfCCl6XWo6nCVnpcVrxQ+9AYN1iqMIzgrSKYMB/fjLtV2eyAVEg7AxQJuCq/26R6HpKWykQXuSOq/21RYcbg=="],
     "busboy": ["busboy@1.6.0", "", { "dependencies": { "streamsearch": "^1.1.0" } }, "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA=="],
     "call-bind": ["call-bind@1.0.8", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.0", "es-define-property": "^1.0.0", "get-intrinsic": "^1.2.4", "set-function-length": "^1.2.2" } }, "sha512-oKlSFMcMwpUg2ednkhQ454wfWiU/ul3CkJe/PEHcTKuiX6RpbehUiFMXu13HalGZxfUwCQzZG747YXBn1im9ww=="],

package.json CHANGED Viewed

@@ -9,8 +9,9 @@
     "lint": "next lint",
     "format": "prettier --write .",
     "format:check": "prettier --check .",
-    "type-check": "tsc --noEmit",
     "type-check:watch": "tsc --noEmit --watch",
     "validate": "bun run type-check && bun run lint && bun run format:check"
   },
   "dependencies": {
@@ -27,11 +28,12 @@
   },
   "devDependencies": {
     "@eslint/eslintrc": "^3",
-    "@types/three": "^0.182.0",
     "@tailwindcss/postcss": "^4",
     "@types/node": "^20",
     "@types/react": "^19",
     "@types/react-dom": "^19",
     "eslint": "^9",
     "eslint-config-next": "15.3.1",
     "prettier": "^3.5.3",

     "lint": "next lint",
     "format": "prettier --write .",
     "format:check": "prettier --check .",
+    "type-check": "tsc --noEmit && tsc -p tsconfig.test.json --noEmit",
     "type-check:watch": "tsc --noEmit --watch",
+    "test": "bun test",
     "validate": "bun run type-check && bun run lint && bun run format:check"
   },
   "dependencies": {
   },
   "devDependencies": {
     "@eslint/eslintrc": "^3",
     "@tailwindcss/postcss": "^4",
+    "@types/bun": "^1.3.10",
     "@types/node": "^20",
     "@types/react": "^19",
     "@types/react-dom": "^19",
+    "@types/three": "^0.182.0",
     "eslint": "^9",
     "eslint-config-next": "15.3.1",
     "prettier": "^3.5.3",

src/app/[org]/[dataset]/[episode]/__tests__/fetch-data.test.ts ADDED Viewed

	@@ -0,0 +1,385 @@

+import { describe, expect, test } from "bun:test";
+import { computeColumnMinMax } from "@/app/[org]/[dataset]/[episode]/fetch-data";
+import type { ChartRow } from "@/app/[org]/[dataset]/[episode]/fetch-data";
+// ---------------------------------------------------------------------------
+// computeColumnMinMax
+// Used by the stats panel to display per-column min/max for any dataset version.
+// ---------------------------------------------------------------------------
+describe("computeColumnMinMax — flat numeric values (v2.x / v3.0 style)", () => {
+  test("returns empty array for empty chart data groups", () => {
+    expect(computeColumnMinMax([])).toEqual([]);
+  });
+  test("returns empty array for groups with only timestamp columns", () => {
+    const groups: ChartRow[][] = [[{ timestamp: 0 }, { timestamp: 1 }]];
+    expect(computeColumnMinMax(groups)).toEqual([]);
+  });
+  test("computes min/max for a single flat series", () => {
+    const groups: ChartRow[][] = [
+      [
+        { timestamp: 0, "progress | sparse": 0.1 },
+        { timestamp: 0.5, "progress | sparse": 0.5 },
+        { timestamp: 1.0, "progress | sparse": 0.9 },
+      ],
+    ];
+    const result = computeColumnMinMax(groups);
+    expect(result).toHaveLength(1);
+    expect(result[0].column).toBe("progress | sparse");
+    expect(result[0].min).toBe(0.1);
+    expect(result[0].max).toBe(0.9);
+  });
+  test("rounds to 3 decimal places", () => {
+    const groups: ChartRow[][] = [
+      [
+        { timestamp: 0, col: 1.23456789 },
+        { timestamp: 1, col: 2.0 },
+      ],
+    ];
+    const result = computeColumnMinMax(groups);
+    expect(result[0].min).toBe(1.235); // rounded
+    expect(result[0].max).toBe(2.0);
+  });
+  test("ignores non-finite values (Infinity, NaN)", () => {
+    const groups: ChartRow[][] = [
+      [
+        { timestamp: 0, col: Infinity },
+        { timestamp: 0.5, col: 3.0 },
+        { timestamp: 1, col: NaN },
+      ],
+    ];
+    const result = computeColumnMinMax(groups);
+    expect(result[0].min).toBe(3.0);
+    expect(result[0].max).toBe(3.0);
+  });
+});
+describe("computeColumnMinMax — nested group values (grouped suffix format)", () => {
+  test("computes min/max for nested observation.state group (v2.x 6-DoF robot)", () => {
+    const groups: ChartRow[][] = [
+      [
+        {
+          timestamp: 0,
+          "observation.state": { "0": -0.5, "1": 0.2, "2": 1.5 },
+        },
+        {
+          timestamp: 0.1,
+          "observation.state": { "0": -0.3, "1": 0.8, "2": 0.7 },
+        },
+      ],
+    ];
+    const result = computeColumnMinMax(groups);
+    const colMap = Object.fromEntries(result.map((r) => [r.column, r]));
+    // observation.state | 0: min=-0.5, max=-0.3
+    expect(colMap["observation.state | 0"].min).toBe(-0.5);
+    expect(colMap["observation.state | 0"].max).toBe(-0.3);
+    // observation.state | 1: min=0.2, max=0.8
+    expect(colMap["observation.state | 1"].min).toBe(0.2);
+    expect(colMap["observation.state | 1"].max).toBe(0.8);
+    // observation.state | 2: min=0.7, max=1.5
+    expect(colMap["observation.state | 2"].min).toBe(0.7);
+    expect(colMap["observation.state | 2"].max).toBe(1.5);
+  });
+  test("handles multiple groups (action + state) across multiple chart data groups", () => {
+    const groups: ChartRow[][] = [
+      [
+        {
+          timestamp: 0,
+          "observation.state": { "0": 0.1, "1": 0.2 },
+        },
+      ],
+      [
+        {
+          timestamp: 0,
+          action: { "0": -1.0, "1": 1.0 },
+        },
+      ],
+    ];
+    const result = computeColumnMinMax(groups);
+    const colMap = Object.fromEntries(result.map((r) => [r.column, r]));
+    expect(colMap["observation.state | 0"]).toBeDefined();
+    expect(colMap["action | 0"].min).toBe(-1.0);
+    expect(colMap["action | 0"].max).toBe(-1.0);
+    expect(colMap["action | 1"].min).toBe(1.0);
+  });
+});
+// ---------------------------------------------------------------------------
+// Version-specific path construction integration tests
+//
+// These tests verify that the path templates for v2.0, v2.1, and v3.0 produce
+// correct URLs when combined with buildVersionedUrl and formatStringWithVars.
+// ---------------------------------------------------------------------------
+import { buildVersionedUrl } from "@/utils/versionUtils";
+import { formatStringWithVars } from "@/utils/parquetUtils";
+import {
+  buildV3DataPath,
+  buildV3VideoPath,
+  buildV3EpisodesMetadataPath,
+} from "@/utils/stringFormatting";
+import { PADDING } from "@/utils/constants";
+const DATASET_BASE = "https://huggingface.co/datasets";
+function makeChunkAndIndex(episodeId: number, chunkSize: number) {
+  const episode_chunk = Math.floor(episodeId / chunkSize)
+    .toString()
+    .padStart(PADDING.CHUNK_INDEX, "0");
+  const episode_index = episodeId
+    .toString()
+    .padStart(PADDING.EPISODE_INDEX, "0");
+  return { episode_chunk, episode_index };
+}
+describe("v2.0 path construction (rabhishek100/so100_train_dataset style)", () => {
+  const repoId = "rabhishek100/so100_train_dataset";
+  const version = "v2.0";
+  const dataPath =
+    "data/{episode_chunk:03d}/episode_{episode_index:06d}.parquet";
+  const videoPath =
+    "videos/{video_key}/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.mp4";
+  test("episode 0 in chunk 0", () => {
+    const { episode_chunk, episode_index } = makeChunkAndIndex(0, 1000);
+    const path = formatStringWithVars(dataPath, {
+      episode_chunk,
+      episode_index,
+    });
+    const url = buildVersionedUrl(repoId, version, path);
+    expect(url).toBe(
+      `${DATASET_BASE}/${repoId}/resolve/main/data/000/episode_000000.parquet`,
+    );
+  });
+  test("episode 42 in chunk 0", () => {
+    const { episode_chunk, episode_index } = makeChunkAndIndex(42, 1000);
+    const path = formatStringWithVars(dataPath, {
+      episode_chunk,
+      episode_index,
+    });
+    expect(
+      formatStringWithVars(dataPath, { episode_chunk, episode_index }),
+    ).toBe("data/000/episode_000042.parquet");
+    const url = buildVersionedUrl(repoId, version, path);
+    expect(url).toContain("/data/000/episode_000042.parquet");
+  });
+  test("episode 1000 in chunk 1 (chunk boundary)", () => {
+    const { episode_chunk, episode_index } = makeChunkAndIndex(1000, 1000);
+    const path = formatStringWithVars(dataPath, {
+      episode_chunk,
+      episode_index,
+    });
+    expect(path).toBe("data/001/episode_001000.parquet");
+  });
+  test("v2.0 video URL for top camera", () => {
+    const { episode_chunk, episode_index } = makeChunkAndIndex(7, 1000);
+    const path = formatStringWithVars(videoPath, {
+      video_key: "observation.images.top",
+      episode_chunk,
+      episode_index,
+    });
+    const url = buildVersionedUrl(repoId, version, path);
+    expect(url).toBe(
+      `${DATASET_BASE}/${repoId}/resolve/main/videos/observation.images.top/chunk-000/episode_000007.mp4`,
+    );
+  });
+});
+describe("v2.1 path construction (youliangtan/so101-table-cleanup style)", () => {
+  // v2.1 uses the same path templates as v2.0
+  const dataPath =
+    "data/{episode_chunk:03d}/episode_{episode_index:06d}.parquet";
+  test("episode 0 resolves correctly", () => {
+    const { episode_chunk, episode_index } = makeChunkAndIndex(0, 1000);
+    const path = formatStringWithVars(dataPath, {
+      episode_chunk,
+      episode_index,
+    });
+    expect(path).toBe("data/000/episode_000000.parquet");
+  });
+  test("episode in second chunk (chunk_size=1000, episode 1500)", () => {
+    const { episode_chunk, episode_index } = makeChunkAndIndex(1500, 1000);
+    const path = formatStringWithVars(dataPath, {
+      episode_chunk,
+      episode_index,
+    });
+    expect(path).toBe("data/001/episode_001500.parquet");
+  });
+  test("v2.1 URL is the same format as v2.0 (backward compatible)", () => {
+    const { episode_chunk, episode_index } = makeChunkAndIndex(5, 1000);
+    const v20path = formatStringWithVars(dataPath, {
+      episode_chunk,
+      episode_index,
+    });
+    const v21path = formatStringWithVars(dataPath, {
+      episode_chunk,
+      episode_index,
+    });
+    expect(v20path).toBe(v21path);
+  });
+});
+describe("v3.0 path construction (lerobot-data-collection/level12_rac_2_2026-02-07 style)", () => {
+  const repoId = "lerobot-data-collection/level12_rac_2_2026-02-07";
+  const version = "v3.0";
+  test("episode metadata path for first file", () => {
+    const path = buildV3EpisodesMetadataPath(0, 0);
+    const url = buildVersionedUrl(repoId, version, path);
+    expect(url).toBe(
+      `${DATASET_BASE}/${repoId}/resolve/main/meta/episodes/chunk-000/file-000.parquet`,
+    );
+  });
+  test("data path from episode metadata (chunk 0, file 2)", () => {
+    const path = buildV3DataPath(0, 2);
+    const url = buildVersionedUrl(repoId, version, path);
+    expect(url).toBe(
+      `${DATASET_BASE}/${repoId}/resolve/main/data/chunk-000/file-002.parquet`,
+    );
+  });
+  test("video path for top camera (chunk 0, file 0)", () => {
+    const path = buildV3VideoPath("observation.images.top", 0, 0);
+    const url = buildVersionedUrl(repoId, version, path);
+    expect(url).toBe(
+      `${DATASET_BASE}/${repoId}/resolve/main/videos/observation.images.top/chunk-000/file-000.mp4`,
+    );
+  });
+  test("video path for wrist camera with non-zero file index (per-camera metadata)", () => {
+    // v3.0 supports per-camera video segmentation — each camera can have different file indices
+    const path = buildV3VideoPath("observation.images.wrist", 0, 3);
+    expect(path).toBe("videos/observation.images.wrist/chunk-000/file-003.mp4");
+  });
+  test("data path for large dataset spanning multiple chunks", () => {
+    // Episode in chunk 1, file 5 based on episode metadata
+    const path = buildV3DataPath(1, 5);
+    expect(path).toBe("data/chunk-001/file-005.parquet");
+  });
+});
+// ---------------------------------------------------------------------------
+// v3.0 episode metadata row parsing (parseEpisodeRowSimple-equivalent logic)
+// Tests that the BigInt conversion and field extraction work correctly with
+// realistic parquet row shapes from v3.0 datasets.
+// ---------------------------------------------------------------------------
+import { bigIntToNumber } from "@/utils/typeGuards";
+describe("v3.0 episode metadata row parsing helpers", () => {
+  const toBigIntSafe = (value: unknown): number => {
+    if (typeof value === "bigint") return Number(value);
+    if (typeof value === "number") return value;
+    if (typeof value === "string") return parseInt(value) || 0;
+    return 0;
+  };
+  const toNumSafe = (value: unknown): number => {
+    if (typeof value === "number") return value;
+    if (typeof value === "bigint") return Number(value);
+    if (typeof value === "string") return parseFloat(value) || 0;
+    return 0;
+  };
+  test("parses named-key row (v3.0 primary format)", () => {
+    // Simulates a row from meta/episodes/chunk-000/file-000.parquet
+    const row: Record<string, unknown> = {
+      episode_index: 0n,
+      "data/chunk_index": 0n,
+      "data/file_index": 2n,
+      dataset_from_index: 0n,
+      dataset_to_index: 200n,
+      length: 200n,
+      "videos/observation.images.top/chunk_index": 0n,
+      "videos/observation.images.top/file_index": 0n,
+      "videos/observation.images.top/from_timestamp": 0.0,
+      "videos/observation.images.top/to_timestamp": 4.0,
+    };
+    expect(toBigIntSafe(row["episode_index"])).toBe(0);
+    expect(toBigIntSafe(row["data/file_index"])).toBe(2);
+    expect(toBigIntSafe(row["dataset_from_index"])).toBe(0);
+    expect(toBigIntSafe(row["dataset_to_index"])).toBe(200);
+    expect(toBigIntSafe(row["length"])).toBe(200);
+    expect(toNumSafe(row["videos/observation.images.top/from_timestamp"])).toBe(
+      0.0,
+    );
+    expect(toNumSafe(row["videos/observation.images.top/to_timestamp"])).toBe(
+      4.0,
+    );
+  });
+  test("parses numeric-key row (fallback format)", () => {
+    // Fallback when column names are not available (older v3 datasets)
+    const row: Record<string, unknown> = {
+      "0": 5, // episode_index
+      "1": 0, // data_chunk_index
+      "2": 3, // data_file_index
+      "3": 600, // dataset_from_index
+      "4": 800, // dataset_to_index
+      "5": 0, // video_chunk_index
+      "6": 3, // video_file_index
+      "7": 12.0, // video_from_timestamp
+      "8": 16.0, // video_to_timestamp
+      "9": 200, // length
+    };
+    const toNum = (v: unknown, fallback = 0): number =>
+      typeof v === "number" ? v : typeof v === "bigint" ? Number(v) : fallback;
+    expect(toNum(row["0"])).toBe(5); // episode_index
+    expect(toNum(row["2"])).toBe(3); // data_file_index
+    expect(toNum(row["3"])).toBe(600); // dataset_from_index
+    expect(toNum(row["4"])).toBe(800); // dataset_to_index
+    expect(toNum(row["8"], 30)).toBe(16.0); // video_to_timestamp
+  });
+  test("bigIntToNumber converts all BigInt parquet columns correctly", () => {
+    // v3.0 integer columns come out of hyparquet as BigInt
+    expect(bigIntToNumber(0n, 0)).toBe(0);
+    expect(bigIntToNumber(200n, 0)).toBe(200);
+    expect(bigIntToNumber(1234567n, 0)).toBe(1234567);
+    // Float columns remain as regular numbers
+    expect(bigIntToNumber(4.0, 0)).toBe(4.0);
+  });
+  test("video segmentation timestamps are correctly derived for multiple episodes", () => {
+    // Each episode has its own video segment; timestamps accumulate per episode
+    const episodes = [
+      { from_timestamp: 0.0, to_timestamp: 4.0, length: 200 },
+      { from_timestamp: 4.0, to_timestamp: 8.2, length: 210 },
+      { from_timestamp: 8.2, to_timestamp: 12.0, length: 190 },
+    ];
+    episodes.forEach((ep) => {
+      const duration = ep.to_timestamp - ep.from_timestamp;
+      expect(duration).toBeGreaterThan(0);
+      expect(ep.from_timestamp).toBeLessThan(ep.to_timestamp);
+    });
+    // Segments are contiguous (each episode starts where the previous ends)
+    for (let i = 1; i < episodes.length; i++) {
+      expect(episodes[i].from_timestamp).toBeCloseTo(
+        episodes[i - 1].to_timestamp,
+        5,
+      );
+    }
+  });
+});

src/utils/__tests__/dataProcessing.test.ts ADDED Viewed

	@@ -0,0 +1,253 @@

+import { describe, expect, test } from "bun:test";
+import {
+  groupRowBySuffix,
+  buildSuffixGroupsMap,
+  computeGroupStats,
+  groupByScale,
+  flattenScaleGroups,
+  processChartDataGroups,
+} from "@/utils/dataProcessing";
+import { CHART_CONFIG } from "@/utils/constants";
+const DELIM = CHART_CONFIG.SERIES_NAME_DELIMITER; // " | "
+// ---------------------------------------------------------------------------
+// groupRowBySuffix
+// ---------------------------------------------------------------------------
+describe("groupRowBySuffix", () => {
+  test("passes through timestamp unchanged", () => {
+    const result = groupRowBySuffix({ timestamp: 1.5 });
+    expect(result.timestamp).toBe(1.5);
+  });
+  test("keeps single-prefix suffix keys as flat entries with full original name", () => {
+    // `action | 0`, `action | 1`, `action | 2` each have a UNIQUE prefix per suffix,
+    // so they stay flat (no nesting). Nesting only occurs when multiple prefixes
+    // share the same numeric suffix (e.g. state | 0 AND action | 0).
+    const row = {
+      [`action${DELIM}0`]: 0.1,
+      [`action${DELIM}1`]: 0.2,
+      [`action${DELIM}2`]: 0.3,
+      timestamp: 0,
+    };
+    const result = groupRowBySuffix(row);
+    expect(result[`action${DELIM}0`]).toBe(0.1);
+    expect(result[`action${DELIM}1`]).toBe(0.2);
+    expect(result[`action${DELIM}2`]).toBe(0.3);
+  });
+  test("keeps keys without delimiter at top level", () => {
+    const row = { progress: 0.75, timestamp: 2.0 };
+    const result = groupRowBySuffix(row);
+    expect(result["progress"]).toBe(0.75);
+  });
+  test("preserves single-member suffix as full original key", () => {
+    // A key like "observation.state | 0" that is alone in its suffix group
+    // should remain at the top level with its full original name
+    const row = { [`solo_col${DELIM}joint`]: 1.0 };
+    const result = groupRowBySuffix(row);
+    expect(result[`solo_col${DELIM}joint`]).toBe(1.0);
+  });
+  test("groups by suffix when multiple prefixes share the same suffix (v2.x state+action)", () => {
+    // `observation.state | 0` and `action | 0` both have suffix "0",
+    // so they are grouped under the key "0" as a nested object { "observation.state": ..., "action": ... }.
+    const row = {
+      [`observation.state${DELIM}0`]: 0.1,
+      [`observation.state${DELIM}1`]: 0.2,
+      [`action${DELIM}0`]: 0.5,
+      [`action${DELIM}1`]: 0.6,
+      timestamp: 0.5,
+    };
+    const result = groupRowBySuffix(row);
+    // Both suffix "0" groups: observation.state and action
+    const group0 = result["0"] as Record<string, number>;
+    const group1 = result["1"] as Record<string, number>;
+    expect(group0["observation.state"]).toBe(0.1);
+    expect(group0["action"]).toBe(0.5);
+    expect(group1["observation.state"]).toBe(0.2);
+    expect(group1["action"]).toBe(0.6);
+  });
+});
+// ---------------------------------------------------------------------------
+// buildSuffixGroupsMap
+// ---------------------------------------------------------------------------
+describe("buildSuffixGroupsMap", () => {
+  test("groups keys by their suffix", () => {
+    const keys = [
+      `action${DELIM}0`,
+      `action${DELIM}1`,
+      `observation.state${DELIM}0`,
+    ];
+    const map = buildSuffixGroupsMap(keys);
+    expect(map["action"]).toBeUndefined(); // suffix is "0" and "1"
+    expect(map["0"]).toContain(`action${DELIM}0`);
+    expect(map["0"]).toContain(`observation.state${DELIM}0`);
+    expect(map["1"]).toContain(`action${DELIM}1`);
+  });
+  test("keys without delimiter fall back to the key itself", () => {
+    const map = buildSuffixGroupsMap(["progress"]);
+    expect(map["progress"]).toEqual(["progress"]);
+  });
+  test("returns empty object for empty input", () => {
+    expect(buildSuffixGroupsMap([])).toEqual({});
+  });
+});
+// ---------------------------------------------------------------------------
+// computeGroupStats
+// ---------------------------------------------------------------------------
+describe("computeGroupStats", () => {
+  test("computes correct min and max across all rows for each group", () => {
+    const chartData = [
+      { "action | 0": 1.0, "action | 1": -2.0 },
+      { "action | 0": 3.0, "action | 1": 0.5 },
+    ];
+    const groups = [["action | 0", "action | 1"]];
+    const stats = computeGroupStats(chartData, groups);
+    expect(stats["action | 0"].min).toBe(-2.0);
+    expect(stats["action | 0"].max).toBe(3.0);
+  });
+  test("ignores NaN values", () => {
+    const chartData = [{ col: NaN }, { col: 5 }, { col: 2 }];
+    const stats = computeGroupStats(chartData, [["col"]]);
+    expect(stats["col"].min).toBe(2);
+    expect(stats["col"].max).toBe(5);
+  });
+  test("returns Infinity/-Infinity for all-NaN group (group skipped in groupByScale)", () => {
+    const chartData = [{ col: NaN }];
+    const stats = computeGroupStats(chartData, [["col"]]);
+    expect(stats["col"].min).toBe(Infinity);
+    expect(stats["col"].max).toBe(-Infinity);
+  });
+});
+// ---------------------------------------------------------------------------
+// groupByScale
+// ---------------------------------------------------------------------------
+describe("groupByScale", () => {
+  test("groups series with similar scale together", () => {
+    // Two series both in range ~[0, 1] — should be grouped
+    const suffixGroups = [["a"], ["b"]];
+    const stats = {
+      a: { min: 0.1, max: 1.0 },
+      b: { min: 0.2, max: 0.9 },
+    };
+    const result = groupByScale(suffixGroups, stats);
+    const groups = Object.values(result);
+    // Both a and b have similar log-scale range, expect them merged
+    expect(groups.some((g) => g.length === 2)).toBe(true);
+  });
+  test("keeps series with vastly different scales separate", () => {
+    // One series in [0,1], another in [0, 1000]
+    const suffixGroups = [["small"], ["large"]];
+    const stats = {
+      small: { min: 0.001, max: 1.0 },
+      large: { min: 100, max: 1000 },
+    };
+    const result = groupByScale(suffixGroups, stats);
+    // Each should be in its own group
+    expect(Object.keys(result).length).toBe(2);
+  });
+  test("skips groups with non-finite stats", () => {
+    const suffixGroups = [["bad"]];
+    const stats = { bad: { min: Infinity, max: -Infinity } };
+    const result = groupByScale(suffixGroups, stats);
+    expect(Object.keys(result).length).toBe(0);
+  });
+});
+// ---------------------------------------------------------------------------
+// flattenScaleGroups
+// ---------------------------------------------------------------------------
+describe("flattenScaleGroups", () => {
+  test("returns each scale group as a flat array of keys", () => {
+    const scaleGroups = { a: [["a", "b"], ["c"]] };
+    const result = flattenScaleGroups(scaleGroups);
+    expect(result).toEqual([["a", "b", "c"]]);
+  });
+  test("splits large groups exceeding MAX_SERIES_PER_GROUP", () => {
+    const MAX = CHART_CONFIG.MAX_SERIES_PER_GROUP; // 6
+    const bigGroup = Array.from({ length: MAX + 2 }, (_, i) => [`key_${i}`]);
+    const scaleGroups = { key_0: bigGroup };
+    const result = flattenScaleGroups(scaleGroups);
+    // Should be split into 2 sub-groups
+    expect(result.length).toBe(2);
+    expect(result[0].length).toBe(MAX);
+    expect(result[1].length).toBe(2);
+  });
+  test("groups with more sub-arrays come first (sorted by length desc)", () => {
+    const scaleGroups = {
+      a: [["a"]], // 1 sub-group
+      b: [["b"], ["c"]], // 2 sub-groups
+    };
+    const result = flattenScaleGroups(scaleGroups);
+    // b (2 sub-groups) should come before a (1 sub-group)
+    expect(result[0]).toContain("b");
+  });
+});
+// ---------------------------------------------------------------------------
+// processChartDataGroups — end-to-end pipeline
+// ---------------------------------------------------------------------------
+describe("processChartDataGroups", () => {
+  test("returns an empty array for empty chart data", () => {
+    const result = processChartDataGroups(["timestamp"], []);
+    expect(result).toEqual([]);
+  });
+  test("groups v2.x style action+state series correctly", () => {
+    const seriesNames = [
+      "timestamp",
+      `observation.state${DELIM}0`,
+      `observation.state${DELIM}1`,
+      `action${DELIM}0`,
+      `action${DELIM}1`,
+    ];
+    const chartData = [
+      {
+        timestamp: 0,
+        [`observation.state${DELIM}0`]: 0.1,
+        [`observation.state${DELIM}1`]: 0.2,
+        [`action${DELIM}0`]: 0.5,
+        [`action${DELIM}1`]: 0.6,
+      },
+      {
+        timestamp: 0.1,
+        [`observation.state${DELIM}0`]: 0.15,
+        [`observation.state${DELIM}1`]: 0.25,
+        [`action${DELIM}0`]: 0.55,
+        [`action${DELIM}1`]: 0.65,
+      },
+    ];
+    const result = processChartDataGroups(seriesNames, chartData);
+    // All four series share similar scale, so likely merged into 1-2 groups
+    expect(result.length).toBeGreaterThanOrEqual(1);
+    // Each element is an array of keys
+    const allKeys = result.flat();
+    expect(allKeys).toContain(`observation.state${DELIM}0`);
+    expect(allKeys).toContain(`action${DELIM}0`);
+  });
+  test("handles single series without delimiter", () => {
+    const seriesNames = ["timestamp", "progress"];
+    const chartData = [
+      { timestamp: 0, progress: 0.0 },
+      { timestamp: 1, progress: 0.5 },
+      { timestamp: 2, progress: 1.0 },
+    ];
+    const result = processChartDataGroups(seriesNames, chartData);
+    expect(result.length).toBe(1);
+    expect(result[0]).toContain("progress");
+  });
+});

src/utils/__tests__/parquetUtils.test.ts ADDED Viewed

	@@ -0,0 +1,147 @@

+import { describe, expect, test } from "bun:test";
+import {
+  formatStringWithVars,
+  arrayToCSV,
+  getRows,
+} from "@/utils/parquetUtils";
+import { PADDING } from "@/utils/constants";
+// ---------------------------------------------------------------------------
+// formatStringWithVars — used to build v2.x data / video paths at runtime
+// ---------------------------------------------------------------------------
+describe("formatStringWithVars", () => {
+  // v2.0 dataset path templates (real format from rabhishek100/so100_train_dataset)
+  test("v2.0 data_path template with pre-padded vars", () => {
+    const template =
+      "data/{episode_chunk:03d}/episode_{episode_index:06d}.parquet";
+    const episodeId = 42;
+    const chunkSize = 1000;
+    const episode_chunk = Math.floor(episodeId / chunkSize)
+      .toString()
+      .padStart(PADDING.CHUNK_INDEX, "0");
+    const episode_index = episodeId
+      .toString()
+      .padStart(PADDING.EPISODE_INDEX, "0");
+    expect(
+      formatStringWithVars(template, { episode_chunk, episode_index }),
+    ).toBe("data/000/episode_000042.parquet");
+  });
+  // v2.1 dataset path templates (same format as v2.0)
+  test("v2.1 data_path template — identical format to v2.0", () => {
+    const template =
+      "data/{episode_chunk:03d}/episode_{episode_index:06d}.parquet";
+    const episode_chunk = (1).toString().padStart(PADDING.CHUNK_INDEX, "0");
+    const episode_index = (1500)
+      .toString()
+      .padStart(PADDING.EPISODE_INDEX, "0");
+    expect(
+      formatStringWithVars(template, { episode_chunk, episode_index }),
+    ).toBe("data/001/episode_001500.parquet");
+  });
+  // v2.x video_path template
+  test("v2.x video_path template with video_key, chunk, episode", () => {
+    const template =
+      "videos/{video_key}/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.mp4";
+    const episode_chunk = (0).toString().padStart(PADDING.CHUNK_INDEX, "0");
+    const episode_index = (7).toString().padStart(PADDING.EPISODE_INDEX, "0");
+    expect(
+      formatStringWithVars(template, {
+        video_key: "observation.images.top",
+        episode_chunk,
+        episode_index,
+      }),
+    ).toBe("videos/observation.images.top/chunk-000/episode_000007.mp4");
+  });
+  test("leaves unmatched placeholders as 'undefined'", () => {
+    // When a variable is missing the replacement returns "undefined" (String(undefined))
+    const result = formatStringWithVars("data/{missing_key}.parquet", {});
+    expect(result).toBe("data/undefined.parquet");
+  });
+  test("handles template without format specifier", () => {
+    expect(formatStringWithVars("{a}/{b}", { a: "foo", b: "bar" })).toBe(
+      "foo/bar",
+    );
+  });
+  test("strips :Nd format specifier, uses pre-padded string value", () => {
+    // The function does NOT zero-pad; the caller is responsible for padding
+    expect(formatStringWithVars("{x:06d}", { x: "000042" })).toBe("000042");
+  });
+});
+// ---------------------------------------------------------------------------
+// arrayToCSV
+// ---------------------------------------------------------------------------
+describe("arrayToCSV", () => {
+  test("converts 2D array to CSV string", () => {
+    const data = [
+      [1, 2, 3],
+      [4, 5, 6],
+    ];
+    expect(arrayToCSV(data)).toBe("1,2,3\n4,5,6");
+  });
+  test("handles single row", () => {
+    expect(arrayToCSV([[10, 20]])).toBe("10,20");
+  });
+  test("handles string values", () => {
+    expect(
+      arrayToCSV([
+        ["a", "b"],
+        ["c", "d"],
+      ]),
+    ).toBe("a,b\nc,d");
+  });
+  test("handles empty array", () => {
+    expect(arrayToCSV([])).toBe("");
+  });
+});
+// ---------------------------------------------------------------------------
+// getRows — used to build the data table view from flat parquet column data
+// ---------------------------------------------------------------------------
+describe("getRows", () => {
+  test("returns empty array when currentFrameData is empty", () => {
+    const cols = [{ key: "state", value: ["s0", "s1"] }];
+    expect(getRows([], cols)).toEqual([]);
+  });
+  test("constructs rows from flat data with equal-length columns", () => {
+    // state: [0.1, 0.2], action: [0.5, 0.6] — flat layout: [s0, s1, a0, a1]
+    const cols = [
+      { key: "observation.state", value: ["s0", "s1"] },
+      { key: "action", value: ["a0", "a1"] },
+    ];
+    const flat = [0.1, 0.2, 0.5, 0.6];
+    const rows = getRows(flat, cols);
+    expect(rows.length).toBe(2);
+    expect(rows[0]).toEqual([0.1, 0.5]);
+    expect(rows[1]).toEqual([0.2, 0.6]);
+  });
+  test("null-pads shorter columns (action has fewer dims than state)", () => {
+    // state: 3 dims, action: 2 dims — row 2 should have null for action
+    const cols = [
+      { key: "state", value: ["s0", "s1", "s2"] },
+      { key: "action", value: ["a0", "a1"] },
+    ];
+    const flat = [0.1, 0.2, 0.3, 0.5, 0.6]; // s0,s1,s2,a0,a1
+    const rows = getRows(flat, cols);
+    expect(rows.length).toBe(3);
+    expect(rows[2][1]).toEqual({ isNull: true });
+  });
+  test("handles single-column data (v2.x progress series)", () => {
+    const cols = [{ key: "progress", value: ["p0"] }];
+    const flat = [0.75];
+    const rows = getRows(flat, cols);
+    expect(rows.length).toBe(1);
+    expect(rows[0]).toEqual([0.75]);
+  });
+});

src/utils/__tests__/stringFormatting.test.ts ADDED Viewed

	@@ -0,0 +1,123 @@

+import { describe, expect, test } from "bun:test";
+import {
+  padNumber,
+  formatEpisodeChunk,
+  formatEpisodeIndex,
+  formatFileIndex,
+  formatChunkIndex,
+  buildV3VideoPath,
+  buildV3DataPath,
+  buildV3EpisodesMetadataPath,
+} from "@/utils/stringFormatting";
+// These utilities are the foundation of v3.0 path construction.
+// v2.x uses formatStringWithVars + manual padStart instead.
+describe("padNumber", () => {
+  test("pads single digit to 3", () => {
+    expect(padNumber(1, 3)).toBe("001");
+  });
+  test("pads zero to 6", () => {
+    expect(padNumber(0, 6)).toBe("000000");
+  });
+  test("does not truncate numbers longer than length", () => {
+    expect(padNumber(1234, 3)).toBe("1234");
+  });
+  test("pads to exact length when already equal", () => {
+    expect(padNumber(42, 2)).toBe("42");
+  });
+});
+describe("formatEpisodeChunk — 3-digit padding (v2.x chunk_index, v3 chunk_index)", () => {
+  test("chunk 0 → '000'", () => {
+    expect(formatEpisodeChunk(0)).toBe("000");
+  });
+  test("chunk 1 → '001'", () => {
+    expect(formatEpisodeChunk(1)).toBe("001");
+  });
+  test("chunk 42 → '042'", () => {
+    expect(formatEpisodeChunk(42)).toBe("042");
+  });
+  test("chunk 999 → '999'", () => {
+    expect(formatEpisodeChunk(999)).toBe("999");
+  });
+});
+describe("formatEpisodeIndex — 6-digit padding (v2.x episode_index)", () => {
+  test("index 0 → '000000'", () => {
+    expect(formatEpisodeIndex(0)).toBe("000000");
+  });
+  test("index 42 → '000042'", () => {
+    expect(formatEpisodeIndex(42)).toBe("000042");
+  });
+  test("index 999999 → '999999'", () => {
+    expect(formatEpisodeIndex(999999)).toBe("999999");
+  });
+});
+describe("formatFileIndex — 3-digit padding (v3.0 file_index)", () => {
+  test("file 0 → '000'", () => {
+    expect(formatFileIndex(0)).toBe("000");
+  });
+  test("file 5 → '005'", () => {
+    expect(formatFileIndex(5)).toBe("005");
+  });
+  test("file 100 → '100'", () => {
+    expect(formatFileIndex(100)).toBe("100");
+  });
+});
+describe("formatChunkIndex — 3-digit padding (v3.0 chunk_index)", () => {
+  test("chunk 0 → '000'", () => {
+    expect(formatChunkIndex(0)).toBe("000");
+  });
+  test("chunk 12 → '012'", () => {
+    expect(formatChunkIndex(12)).toBe("012");
+  });
+});
+// v3.0 specific path builders
+describe("buildV3VideoPath", () => {
+  test("single camera, chunk 0, file 0", () => {
+    expect(buildV3VideoPath("observation.image", 0, 0)).toBe(
+      "videos/observation.image/chunk-000/file-000.mp4",
+    );
+  });
+  test("nested camera key, non-zero chunk and file", () => {
+    expect(buildV3VideoPath("observation.images.wrist", 2, 5)).toBe(
+      "videos/observation.images.wrist/chunk-002/file-005.mp4",
+    );
+  });
+  test("two-camera SO101 dataset style", () => {
+    expect(buildV3VideoPath("observation.images.top", 0, 1)).toBe(
+      "videos/observation.images.top/chunk-000/file-001.mp4",
+    );
+  });
+});
+describe("buildV3DataPath", () => {
+  test("chunk 0, file 0", () => {
+    expect(buildV3DataPath(0, 0)).toBe("data/chunk-000/file-000.parquet");
+  });
+  test("chunk 1, file 3", () => {
+    expect(buildV3DataPath(1, 3)).toBe("data/chunk-001/file-003.parquet");
+  });
+  test("large indices", () => {
+    expect(buildV3DataPath(10, 99)).toBe("data/chunk-010/file-099.parquet");
+  });
+});
+describe("buildV3EpisodesMetadataPath", () => {
+  test("chunk 0, file 0 (default for most datasets)", () => {
+    expect(buildV3EpisodesMetadataPath(0, 0)).toBe(
+      "meta/episodes/chunk-000/file-000.parquet",
+    );
+  });
+  test("chunk 0, file 2 (multiple metadata files)", () => {
+    expect(buildV3EpisodesMetadataPath(0, 2)).toBe(
+      "meta/episodes/chunk-000/file-002.parquet",
+    );
+  });
+});

src/utils/__tests__/typeGuards.test.ts ADDED Viewed

	@@ -0,0 +1,176 @@

+import { describe, expect, test } from "bun:test";
+import {
+  isBigInt,
+  bigIntToNumber,
+  isNumeric,
+  isValidTaskIndex,
+  toString,
+  isNonEmptyString,
+  isObject,
+  hasPropertyOfType,
+} from "@/utils/typeGuards";
+describe("isBigInt", () => {
+  test("returns true for BigInt", () => {
+    expect(isBigInt(42n)).toBe(true);
+  });
+  test("returns false for number", () => {
+    expect(isBigInt(42)).toBe(false);
+  });
+  test("returns false for string", () => {
+    expect(isBigInt("42")).toBe(false);
+  });
+  test("returns false for null", () => {
+    expect(isBigInt(null)).toBe(false);
+  });
+});
+describe("bigIntToNumber", () => {
+  test("converts BigInt to number", () => {
+    expect(bigIntToNumber(42n)).toBe(42);
+  });
+  test("passes through a regular number unchanged", () => {
+    expect(bigIntToNumber(3.14)).toBe(3.14);
+  });
+  test("returns default fallback (0) for non-numeric value", () => {
+    expect(bigIntToNumber("hello")).toBe(0);
+  });
+  test("returns custom fallback for non-numeric value", () => {
+    expect(bigIntToNumber(null, -1)).toBe(-1);
+  });
+  test("converts 0n correctly", () => {
+    expect(bigIntToNumber(0n)).toBe(0);
+  });
+  // Parquet files from v3.0 datasets return BigInt for integer columns
+  test("handles large BigInt values from parquet (e.g., frame counts)", () => {
+    expect(bigIntToNumber(1000000n)).toBe(1000000);
+  });
+});
+describe("isNumeric", () => {
+  test("returns true for number", () => {
+    expect(isNumeric(1.5)).toBe(true);
+  });
+  test("returns true for BigInt (as seen in parquet columns)", () => {
+    expect(isNumeric(100n)).toBe(true);
+  });
+  test("returns false for string", () => {
+    expect(isNumeric("5")).toBe(false);
+  });
+  test("returns false for null", () => {
+    expect(isNumeric(null)).toBe(false);
+  });
+  test("returns false for boolean", () => {
+    expect(isNumeric(true)).toBe(false);
+  });
+});
+describe("isValidTaskIndex", () => {
+  test("returns true for 0", () => {
+    expect(isValidTaskIndex(0)).toBe(true);
+  });
+  test("returns true for positive integer", () => {
+    expect(isValidTaskIndex(5)).toBe(true);
+  });
+  test("returns true for BigInt 0n (v3 parquet style)", () => {
+    expect(isValidTaskIndex(0n)).toBe(true);
+  });
+  test("returns false for negative number", () => {
+    expect(isValidTaskIndex(-1)).toBe(false);
+  });
+  test("returns false for float", () => {
+    expect(isValidTaskIndex(1.5)).toBe(false);
+  });
+  test("returns false for null", () => {
+    expect(isValidTaskIndex(null)).toBe(false);
+  });
+  test("returns false for undefined", () => {
+    expect(isValidTaskIndex(undefined)).toBe(false);
+  });
+});
+describe("toString", () => {
+  test("returns string as-is", () => {
+    expect(toString("hello")).toBe("hello");
+  });
+  test("returns empty string for null", () => {
+    expect(toString(null)).toBe("");
+  });
+  test("returns empty string for undefined", () => {
+    expect(toString(undefined)).toBe("");
+  });
+  test("converts number to string", () => {
+    expect(toString(42)).toBe("42");
+  });
+  test("converts BigInt to string", () => {
+    expect(toString(7n)).toBe("7");
+  });
+});
+describe("isNonEmptyString", () => {
+  test("returns true for non-empty string", () => {
+    expect(isNonEmptyString("hello")).toBe(true);
+  });
+  test("returns false for empty string", () => {
+    expect(isNonEmptyString("")).toBe(false);
+  });
+  test("returns false for number", () => {
+    expect(isNonEmptyString(5)).toBe(false);
+  });
+  test("returns false for null", () => {
+    expect(isNonEmptyString(null)).toBe(false);
+  });
+});
+describe("isObject", () => {
+  test("returns true for plain object", () => {
+    expect(isObject({ a: 1 })).toBe(true);
+  });
+  test("returns false for null (typeof null === 'object' trap)", () => {
+    expect(isObject(null)).toBe(false);
+  });
+  test("returns false for array", () => {
+    expect(isObject([1, 2])).toBe(false);
+  });
+  test("returns false for string", () => {
+    expect(isObject("hello")).toBe(false);
+  });
+  test("returns true for empty object", () => {
+    expect(isObject({})).toBe(true);
+  });
+});
+describe("hasPropertyOfType", () => {
+  test("returns true when property exists and passes type guard", () => {
+    expect(
+      hasPropertyOfType(
+        { x: 42 },
+        "x",
+        (v): v is number => typeof v === "number",
+      ),
+    ).toBe(true);
+  });
+  test("returns false when property exists but fails type guard", () => {
+    expect(
+      hasPropertyOfType(
+        { x: "hello" },
+        "x",
+        (v): v is number => typeof v === "number",
+      ),
+    ).toBe(false);
+  });
+  test("returns false when property does not exist", () => {
+    expect(
+      hasPropertyOfType(
+        { a: 1 },
+        "b",
+        (v): v is number => typeof v === "number",
+      ),
+    ).toBe(false);
+  });
+  test("returns false for non-object input", () => {
+    expect(
+      hasPropertyOfType(null, "x", (v): v is number => typeof v === "number"),
+    ).toBe(false);
+  });
+});

src/utils/__tests__/versionUtils.test.ts ADDED Viewed

	@@ -0,0 +1,232 @@

+import { describe, expect, test, mock, afterEach } from "bun:test";
+import { buildVersionedUrl } from "@/utils/versionUtils";
+// ---------------------------------------------------------------------------
+// buildVersionedUrl — pure function, no mocking needed
+// ---------------------------------------------------------------------------
+describe("buildVersionedUrl", () => {
+  test("builds URL for v2.0 dataset data path", () => {
+    const url = buildVersionedUrl(
+      "rabhishek100/so100_train_dataset",
+      "v2.0",
+      "data/000/episode_000000.parquet",
+    );
+    expect(url).toBe(
+      "https://huggingface.co/datasets/rabhishek100/so100_train_dataset/resolve/main/data/000/episode_000000.parquet",
+    );
+  });
+  test("builds URL for v2.1 dataset video path", () => {
+    const url = buildVersionedUrl(
+      "youliangtan/so101-table-cleanup",
+      "v2.1",
+      "videos/observation.images.top/chunk-000/episode_000007.mp4",
+    );
+    expect(url).toBe(
+      "https://huggingface.co/datasets/youliangtan/so101-table-cleanup/resolve/main/videos/observation.images.top/chunk-000/episode_000007.mp4",
+    );
+  });
+  test("builds URL for v3.0 episode metadata", () => {
+    const url = buildVersionedUrl(
+      "lerobot-data-collection/level12_rac_2_2026-02-07",
+      "v3.0",
+      "meta/episodes/chunk-000/file-000.parquet",
+    );
+    expect(url).toBe(
+      "https://huggingface.co/datasets/lerobot-data-collection/level12_rac_2_2026-02-07/resolve/main/meta/episodes/chunk-000/file-000.parquet",
+    );
+  });
+  test("builds URL for v3.0 data chunk", () => {
+    const url = buildVersionedUrl(
+      "lerobot-data-collection/level12_rac_2_2026-02-07",
+      "v3.0",
+      "data/chunk-001/file-003.parquet",
+    );
+    expect(url).toBe(
+      "https://huggingface.co/datasets/lerobot-data-collection/level12_rac_2_2026-02-07/resolve/main/data/chunk-001/file-003.parquet",
+    );
+  });
+  test("builds URL for meta/info.json", () => {
+    const url = buildVersionedUrl("myorg/mydataset", "v3.0", "meta/info.json");
+    expect(url).toBe(
+      "https://huggingface.co/datasets/myorg/mydataset/resolve/main/meta/info.json",
+    );
+  });
+});
+// ---------------------------------------------------------------------------
+// getDatasetVersionAndInfo — tested with mocked fetch
+// ---------------------------------------------------------------------------
+describe("getDatasetVersionAndInfo", () => {
+  const originalFetch = globalThis.fetch;
+  afterEach(() => {
+    globalThis.fetch = originalFetch;
+  });
+  test("accepts v2.0 codebase_version", async () => {
+    const infoV20 = {
+      codebase_version: "v2.0",
+      robot_type: "so100",
+      total_episodes: 50,
+      total_frames: 5000,
+      total_tasks: 1,
+      chunks_size: 1000,
+      data_files_size_in_mb: 10,
+      video_files_size_in_mb: 500,
+      fps: 30,
+      splits: { train: "0:50" },
+      data_path: "data/{episode_chunk:03d}/episode_{episode_index:06d}.parquet",
+      video_path:
+        "videos/{video_key}/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.mp4",
+      features: {
+        "observation.images.top": {
+          dtype: "video",
+          shape: [480, 640, 3],
+          names: null,
+        },
+        "observation.state": {
+          dtype: "float32",
+          shape: [1, 6],
+          names: ["j0", "j1", "j2", "j3", "j4", "j5"],
+        },
+        action: {
+          dtype: "float32",
+          shape: [1, 6],
+          names: ["j0", "j1", "j2", "j3", "j4", "j5"],
+        },
+      },
+    };
+    globalThis.fetch = mock(() =>
+      Promise.resolve(new Response(JSON.stringify(infoV20), { status: 200 })),
+    ) as unknown as typeof fetch;
+    const { getDatasetVersionAndInfo } = await import("@/utils/versionUtils");
+    const result = await getDatasetVersionAndInfo(
+      "rabhishek100/so100_train_dataset",
+    );
+    expect(result.version).toBe("v2.0");
+    expect(result.info.total_episodes).toBe(50);
+  });
+  test("accepts v2.1 codebase_version", async () => {
+    const infoV21 = {
+      codebase_version: "v2.1",
+      robot_type: "so101",
+      total_episodes: 100,
+      total_frames: 10000,
+      total_tasks: 1,
+      chunks_size: 1000,
+      data_files_size_in_mb: 20,
+      video_files_size_in_mb: 1000,
+      fps: 30,
+      splits: { train: "0:100" },
+      data_path: "data/{episode_chunk:03d}/episode_{episode_index:06d}.parquet",
+      video_path:
+        "videos/{video_key}/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.mp4",
+      features: {
+        "observation.images.top": {
+          dtype: "video",
+          shape: [480, 640, 3],
+          names: null,
+        },
+        "observation.state": { dtype: "float32", shape: [1, 6], names: null },
+        action: { dtype: "float32", shape: [1, 6], names: null },
+      },
+    };
+    globalThis.fetch = mock(() =>
+      Promise.resolve(new Response(JSON.stringify(infoV21), { status: 200 })),
+    ) as unknown as typeof fetch;
+    // Use fresh import to bypass cache — or just call with a different repoId
+    const { getDatasetVersionAndInfo } = await import("@/utils/versionUtils");
+    const result = await getDatasetVersionAndInfo(
+      "youliangtan/so101-table-cleanup",
+    );
+    expect(result.version).toBe("v2.1");
+  });
+  test("accepts v3.0 codebase_version", async () => {
+    const infoV30 = {
+      codebase_version: "v3.0",
+      robot_type: "openarm",
+      total_episodes: 200,
+      total_frames: 40000,
+      total_tasks: 1,
+      chunks_size: 100,
+      data_files_size_in_mb: 50,
+      video_files_size_in_mb: 2000,
+      fps: 50,
+      splits: { train: "0:200" },
+      data_path: null,
+      video_path: null,
+      features: {
+        "observation.images.top": {
+          dtype: "video",
+          shape: [480, 640, 3],
+          names: null,
+        },
+        "observation.state": { dtype: "float32", shape: [1, 14], names: null },
+        action: { dtype: "float32", shape: [1, 14], names: null },
+      },
+    };
+    globalThis.fetch = mock(() =>
+      Promise.resolve(new Response(JSON.stringify(infoV30), { status: 200 })),
+    ) as unknown as typeof fetch;
+    const { getDatasetVersionAndInfo } = await import("@/utils/versionUtils");
+    const result = await getDatasetVersionAndInfo(
+      "lerobot-data-collection/level12_rac_2_2026-02-07",
+    );
+    expect(result.version).toBe("v3.0");
+    expect(result.info.total_episodes).toBe(200);
+  });
+  test("throws for unsupported version", async () => {
+    const infoUnsupported = {
+      codebase_version: "v1.0",
+      features: { dummy: { dtype: "float32", shape: [1], names: null } },
+    };
+    globalThis.fetch = mock(() =>
+      Promise.resolve(
+        new Response(JSON.stringify(infoUnsupported), { status: 200 }),
+      ),
+    ) as unknown as typeof fetch;
+    const { getDatasetVersionAndInfo } = await import("@/utils/versionUtils");
+    await expect(getDatasetVersionAndInfo("old/dataset")).rejects.toThrow(
+      "not supported",
+    );
+  });
+  test("throws when info.json has no features field", async () => {
+    globalThis.fetch = mock(() =>
+      Promise.resolve(
+        new Response(JSON.stringify({ codebase_version: "v3.0" }), {
+          status: 200,
+        }),
+      ),
+    ) as unknown as typeof fetch;
+    const { getDatasetVersionAndInfo } = await import("@/utils/versionUtils");
+    await expect(getDatasetVersionAndInfo("broken/dataset")).rejects.toThrow();
+  });
+  test("throws when fetch fails (network error)", async () => {
+    globalThis.fetch = mock(() =>
+      Promise.resolve(new Response("Not Found", { status: 404 })),
+    ) as unknown as typeof fetch;
+    const { getDatasetVersionAndInfo } = await import("@/utils/versionUtils");
+    await expect(
+      getDatasetVersionAndInfo("nonexistent/repo"),
+    ).rejects.toThrow();
+  });
+});

tsconfig.json CHANGED Viewed

@@ -23,5 +23,5 @@
     }
   },
   "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
-  "exclude": ["node_modules"]
 }

     }
   },
   "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
+  "exclude": ["node_modules", "**/__tests__/**"]
 }

tsconfig.test.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "extends": "./tsconfig.json",
+  "compilerOptions": {
+    "target": "ES2020",
+    "lib": ["ES2020", "dom"],
+    "types": ["bun-types"],
+    "incremental": false
+  },
+  "include": ["**/__tests__/**/*.ts", "src/**/*.ts"]
+}