- {/* Sidebar */}
-
-
- {/* Content */}
-
- {isLoading &&
}
-
-
-
-
-
-
-
+ 3D Replay
+ {activeTab === "urdf" && (
+
+ )}
+
+ )}
+
- {/* Videos */}
- {videosInfo.length && (
-
setVideosReady(true)}
+ {/* Body: sidebar + content */}
+
+ {/* Sidebar — only on Episodes tab */}
+ {activeTab === "episodes" && (
+
)}
- {/* Language Instruction */}
- {task && (
-
-
-
- Language Instruction:
-
-
-
- {task.split("\n").map((instruction, index) => (
-
- {instruction}
-
- ))}
-
-
- )}
+ {/* Main content */}
+
+ {isLoading &&
}
- {/* Graph */}
-
- setChartsReady(true)}
- />
-
+ {activeTab === "episodes" && (
+ <>
+
+
+ {/* Videos */}
+ {videosInfo.length > 0 && (
+
setVideosReady(true)}
+ />
+ )}
+
+ {/* Language Instruction */}
+ {task && (
+
+
+
+ Language Instruction:
+
+
+
+ {task
+ .split("\n")
+ .map((instruction: string, index: number) => (
+
+ {instruction}
+
+ ))}
+
+
+ )}
+
+ {/* Graph */}
+
+ setChartsReady(true)}
+ />
+
+
+
+ >
+ )}
+
+ {activeTab === "statistics" && (
+
+ )}
+
+ {activeTab === "frames" && (
+
+ )}
+
+ {activeTab === "insights" && (
+ }>
+
+
+ )}
+
+ {activeTab === "filtering" && (
+ }>
+ {
+ setSidebarFlaggedOnly(true);
+ handleTabChange("episodes");
+ }}
+ />
+
+ )}
+
+ {activeTab === "urdf" && (
+ }>
+
+
+ )}
+
);
diff --git a/src/app/[org]/[dataset]/[episode]/fetch-data.ts b/src/app/[org]/[dataset]/[episode]/fetch-data.ts
index b08cc97cf3c2a9fa7c3bdb5473bff7fb3aee80aa..99755696c2d43cdc0a56d9b067f714e4dc5ec805 100644
--- a/src/app/[org]/[dataset]/[episode]/fetch-data.ts
+++ b/src/app/[org]/[dataset]/[episode]/fetch-data.ts
@@ -1,32 +1,109 @@
import {
- fetchJson,
+ DatasetMetadata,
fetchParquetFile,
formatStringWithVars,
- readParquetColumn,
readParquetAsObjects,
} from "@/utils/parquetUtils";
import { pick } from "@/utils/pick";
-import { getDatasetVersion, buildVersionedUrl } from "@/utils/versionUtils";
+import {
+ getDatasetVersionAndInfo,
+ buildVersionedUrl,
+} from "@/utils/versionUtils";
import { PADDING, CHART_CONFIG, EXCLUDED_COLUMNS } from "@/utils/constants";
import {
processChartDataGroups,
groupRowBySuffix,
} from "@/utils/dataProcessing";
-import { extractLanguageInstructions } from "@/utils/languageInstructions";
import {
buildV3VideoPath,
buildV3DataPath,
buildV3EpisodesMetadataPath,
} from "@/utils/stringFormatting";
import { bigIntToNumber } from "@/utils/typeGuards";
-import type {
- DatasetMetadata,
- EpisodeData,
- EpisodeMetadataV3,
- VideoInfo,
- AdjacentEpisodeVideos,
- ChartDataGroup,
-} from "@/types";
+import type { VideoInfo, AdjacentEpisodeVideos } from "@/types";
+
+const SERIES_NAME_DELIMITER = CHART_CONFIG.SERIES_NAME_DELIMITER;
+
+export type CameraInfo = { name: string; width: number; height: number };
+
+export type DatasetDisplayInfo = {
+ repoId: string;
+ total_frames: number;
+ total_episodes: number;
+ fps: number;
+ robot_type: string | null;
+ codebase_version: string;
+ total_tasks: number;
+ dataset_size_mb: number;
+ cameras: CameraInfo[];
+};
+
+export type ChartRow = Record
>;
+
+export type ColumnMinMax = {
+ column: string;
+ min: number;
+ max: number;
+};
+
+export type EpisodeLengthInfo = {
+ episodeIndex: number;
+ lengthSeconds: number;
+ frames: number;
+};
+
+export type EpisodeLengthStats = {
+ shortestEpisodes: EpisodeLengthInfo[];
+ longestEpisodes: EpisodeLengthInfo[];
+ allEpisodeLengths: EpisodeLengthInfo[];
+ meanEpisodeLength: number;
+ medianEpisodeLength: number;
+ stdEpisodeLength: number;
+ episodeLengthHistogram: { binLabel: string; count: number }[];
+};
+
+export type EpisodeFrameInfo = {
+ episodeIndex: number;
+ videoUrl: string;
+ firstFrameTime: number;
+ lastFrameTime: number | null; // null = seek to video.duration on client
+};
+
+export type EpisodeFramesData = {
+ cameras: string[];
+ framesByCamera: Record;
+};
+
+export type EpisodeData = {
+ datasetInfo: DatasetDisplayInfo;
+ episodeId: number;
+ videosInfo: VideoInfo[];
+ chartDataGroups: ChartRow[][];
+ flatChartData: Record[];
+ episodes: number[];
+ ignoredColumns: string[];
+ duration: number;
+ task?: string;
+};
+
+type EpisodeMetadataV3 = {
+ episode_index: number;
+ data_chunk_index: number;
+ data_file_index: number;
+ dataset_from_index: number;
+ dataset_to_index: number;
+ video_chunk_index: number;
+ video_file_index: number;
+ video_from_timestamp: number;
+ video_to_timestamp: number;
+ length: number;
+ [key: string]: string | number;
+};
+
+type ColumnDef = {
+ key: string;
+ value: string[];
+};
export async function getEpisodeData(
org: string,
@@ -35,10 +112,10 @@ export async function getEpisodeData(
): Promise {
const repoId = `${org}/${dataset}`;
try {
- // Check for compatible dataset version (v3.0, v2.1, or v2.0)
- const version = await getDatasetVersion(repoId);
- const jsonUrl = buildVersionedUrl(repoId, version, "meta/info.json");
- const info = await fetchJson(jsonUrl);
+ console.time(`[perf] getDatasetVersionAndInfo`);
+ const { version, info: rawInfo } = await getDatasetVersionAndInfo(repoId);
+ console.timeEnd(`[perf] getDatasetVersionAndInfo`);
+ const info = rawInfo as unknown as DatasetMetadata;
if (info.video_path === null) {
throw new Error(
@@ -46,19 +123,39 @@ export async function getEpisodeData(
);
}
- // Handle different versions
- if (version === "v3.0") {
- return await getEpisodeDataV3(repoId, version, info, episodeId);
- } else {
- return await getEpisodeDataV2(repoId, version, info, episodeId);
- }
+ console.time(`[perf] getEpisodeData (${version})`);
+ const result =
+ version === "v3.0"
+ ? await getEpisodeDataV3(repoId, version, info, episodeId)
+ : await getEpisodeDataV2(repoId, version, info, episodeId);
+ console.timeEnd(`[perf] getEpisodeData (${version})`);
+
+ // Extract camera resolutions from features
+ const cameras: CameraInfo[] = Object.entries(rawInfo.features)
+ .filter(([, f]) => f.dtype === "video" && f.shape.length >= 2)
+ .map(([name, f]) => ({ name, height: f.shape[0], width: f.shape[1] }));
+
+ result.datasetInfo = {
+ ...result.datasetInfo,
+ robot_type: rawInfo.robot_type ?? null,
+ codebase_version: rawInfo.codebase_version,
+ total_tasks: rawInfo.total_tasks ?? 0,
+ dataset_size_mb:
+ Math.round(
+ ((rawInfo.data_files_size_in_mb ?? 0) +
+ (rawInfo.video_files_size_in_mb ?? 0)) *
+ 10,
+ ) / 10,
+ cameras,
+ };
+
+ return result;
} catch (err) {
console.error("Error loading episode data:", err);
throw err;
}
}
-// Get video info for adjacent episodes (for preloading)
export async function getAdjacentEpisodesVideoInfo(
org: string,
dataset: string,
@@ -67,9 +164,8 @@ export async function getAdjacentEpisodesVideoInfo(
): Promise {
const repoId = `${org}/${dataset}`;
try {
- const version = await getDatasetVersion(repoId);
- const jsonUrl = buildVersionedUrl(repoId, version, "meta/info.json");
- const info = await fetchJson(jsonUrl);
+ const { version, info: rawInfo } = await getDatasetVersionAndInfo(repoId);
+ const info = rawInfo as unknown as DatasetMetadata;
const totalEpisodes = info.total_episodes;
const adjacentVideos: AdjacentEpisodeVideos[] = [];
@@ -142,12 +238,16 @@ async function getEpisodeDataV2(
): Promise {
const episode_chunk = Math.floor(0 / 1000);
- // Dataset information
- const datasetInfo = {
+ const datasetInfo: DatasetDisplayInfo = {
repoId,
total_frames: info.total_frames,
total_episodes: info.total_episodes,
fps: info.fps,
+ robot_type: null,
+ codebase_version: version,
+ total_tasks: 0,
+ dataset_size_mb: 0,
+ cameras: [],
};
// Generate list of episodes
@@ -197,22 +297,17 @@ async function getEpisodeDataV2(
const filteredColumns = columnNames.filter(
(column) => !excludedColumns.includes(column.key),
);
- const filteredColumnNames = [
- "timestamp",
- ...filteredColumns.map((column) => column.key),
- ];
-
- const columns = filteredColumns.map(({ key }) => {
- let column_names = info.features[key].names;
- while (typeof column_names === "object") {
+ const columns: ColumnDef[] = filteredColumns.map(({ key }) => {
+ let column_names: unknown = info.features[key].names;
+ while (typeof column_names === "object" && column_names !== null) {
if (Array.isArray(column_names)) break;
- column_names = Object.values(column_names ?? {})[0];
+ column_names = Object.values(column_names)[0];
}
return {
key,
value: Array.isArray(column_names)
? column_names.map(
- (name) => `${key}${CHART_CONFIG.SERIES_NAME_DELIMITER}${name}`,
+ (name: string) => `${key}${SERIES_NAME_DELIMITER}${name}`,
)
: Array.from(
{ length: columnNames.find((c) => c.key === key)?.length ?? 1 },
@@ -233,32 +328,38 @@ async function getEpisodeDataV2(
);
const arrayBuffer = await fetchParquetFile(parquetUrl);
+ const allData = await readParquetAsObjects(arrayBuffer, []);
- // Extract task - first check for language instructions (preferred), then fallback to task field or tasks.jsonl
+ // Extract task from language_instruction fields, task field, or tasks.jsonl
let task: string | undefined;
- let allData: Record[] = [];
- // Load data first
- try {
- allData = await readParquetAsObjects(arrayBuffer, []);
- } catch {
- // Could not read parquet data
- }
+ if (allData.length > 0) {
+ const firstRow = allData[0];
+ const languageInstructions: string[] = [];
- // First check for language_instruction fields in the data (preferred)
- task = extractLanguageInstructions(allData);
+ if (typeof firstRow.language_instruction === "string") {
+ languageInstructions.push(firstRow.language_instruction);
+ }
- // If no language instructions found, try direct task field
- if (
- !task &&
- allData.length > 0 &&
- typeof allData[0].task === "string" &&
- allData[0].task
- ) {
+ let instructionNum = 2;
+ while (
+ typeof firstRow[`language_instruction_${instructionNum}`] === "string"
+ ) {
+ languageInstructions.push(
+ firstRow[`language_instruction_${instructionNum}`] as string,
+ );
+ instructionNum++;
+ }
+
+ if (languageInstructions.length > 0) {
+ task = languageInstructions.join("\n");
+ }
+ }
+
+ if (!task && allData.length > 0 && typeof allData[0].task === "string") {
task = allData[0].task;
}
- // If still no task found, try loading from tasks.jsonl metadata file (v2.x format)
if (!task && allData.length > 0) {
try {
const tasksUrl = buildVersionedUrl(repoId, version, "meta/tasks.jsonl");
@@ -266,7 +367,6 @@ async function getEpisodeDataV2(
if (tasksResponse.ok) {
const tasksText = await tasksResponse.text();
- // Parse JSONL format (one JSON object per line)
const tasksData = tasksText
.split("\n")
.filter((line) => line.trim())
@@ -274,13 +374,11 @@ async function getEpisodeDataV2(
if (tasksData && tasksData.length > 0) {
const taskIndex = allData[0].task_index;
-
- // Convert BigInt to number for comparison
const taskIndexNum =
typeof taskIndex === "bigint" ? Number(taskIndex) : taskIndex;
-
- // Find task by task_index
- const taskData = tasksData.find((t) => t.task_index === taskIndexNum);
+ const taskData = tasksData.find(
+ (t: Record) => t.task_index === taskIndexNum,
+ );
if (taskData) {
task = taskData.task;
}
@@ -291,20 +389,25 @@ async function getEpisodeDataV2(
}
}
- const data = await readParquetColumn(arrayBuffer, filteredColumnNames);
- // Flatten and map to array of objects for chartData
+ // Build chart data from already-parsed allData (no second parquet parse)
const seriesNames = [
"timestamp",
...columns.map(({ value }) => value).flat(),
];
- const chartData = data.map((row) => {
- const flatRow = row.flat();
+ const chartData = allData.map((row) => {
const obj: Record = {};
- seriesNames.forEach((key, idx) => {
- const value = flatRow[idx];
- obj[key] = typeof value === "number" ? value : Number(value) || 0;
- });
+ obj["timestamp"] = Number(row.timestamp);
+ for (const col of columns) {
+ const rawVal = row[col.key];
+ if (Array.isArray(rawVal)) {
+ rawVal.forEach((v: unknown, i: number) => {
+ if (i < col.value.length) obj[col.value[i]] = Number(v);
+ });
+ } else if (rawVal !== undefined) {
+ obj[col.value[0]] = Number(rawVal);
+ }
+ }
return obj;
});
@@ -338,6 +441,7 @@ async function getEpisodeDataV2(
episodeId,
videosInfo,
chartDataGroups,
+ flatChartData: chartData,
episodes,
ignoredColumns,
duration,
@@ -352,15 +456,18 @@ async function getEpisodeDataV3(
info: DatasetMetadata,
episodeId: number,
): Promise {
- // Create dataset info structure (like v2.x)
- const datasetInfo = {
+ const datasetInfo: DatasetDisplayInfo = {
repoId,
total_frames: info.total_frames,
total_episodes: info.total_episodes,
fps: info.fps,
+ robot_type: null,
+ codebase_version: version,
+ total_tasks: 0,
+ dataset_size_mb: 0,
+ cameras: [],
};
- // Generate episodes list based on total_episodes from dataset info
const episodes = Array.from({ length: info.total_episodes }, (_, i) => i);
// Load episode metadata to get timestamps for episode 0
@@ -379,25 +486,19 @@ async function getEpisodeDataV3(
);
// Load episode data for charts
- const { chartDataGroups, ignoredColumns, task } = await loadEpisodeDataV3(
- repoId,
- version,
- info,
- episodeMetadata,
- );
+ const { chartDataGroups, flatChartData, ignoredColumns, task } =
+ await loadEpisodeDataV3(repoId, version, info, episodeMetadata);
- // Calculate duration from episode length and FPS if available
- const episodeLength = bigIntToNumber(episodeMetadata.length);
- const duration = episodeLength
- ? episodeLength / info.fps
- : (episodeMetadata.video_to_timestamp || 0) -
- (episodeMetadata.video_from_timestamp || 0);
+ const duration = episodeMetadata.length
+ ? episodeMetadata.length / info.fps
+ : episodeMetadata.video_to_timestamp - episodeMetadata.video_from_timestamp;
return {
datasetInfo,
episodeId,
videosInfo,
chartDataGroups,
+ flatChartData,
episodes,
ignoredColumns,
duration,
@@ -412,7 +513,8 @@ async function loadEpisodeDataV3(
info: DatasetMetadata,
episodeMetadata: EpisodeMetadataV3,
): Promise<{
- chartDataGroups: ChartDataGroup[];
+ chartDataGroups: ChartRow[][];
+ flatChartData: Record[];
ignoredColumns: string[];
task?: string;
}> {
@@ -427,9 +529,11 @@ async function loadEpisodeDataV3(
const fullData = await readParquetAsObjects(arrayBuffer, []);
// Extract the episode-specific data slice
- // Convert BigInt to number if needed
- const fromIndex = Number(episodeMetadata.dataset_from_index || 0);
- const toIndex = Number(episodeMetadata.dataset_to_index || fullData.length);
+ const fromIndex = bigIntToNumber(episodeMetadata.dataset_from_index, 0);
+ const toIndex = bigIntToNumber(
+ episodeMetadata.dataset_to_index,
+ fullData.length,
+ );
// Find the starting index of this parquet file by checking the first row's index
// This handles the case where episodes are split across multiple parquet files
@@ -445,29 +549,57 @@ async function loadEpisodeDataV3(
const episodeData = fullData.slice(localFromIndex, localToIndex);
if (episodeData.length === 0) {
- return { chartDataGroups: [], ignoredColumns: [], task: undefined };
+ return {
+ chartDataGroups: [],
+ flatChartData: [],
+ ignoredColumns: [],
+ task: undefined,
+ };
}
// Convert to the same format as v2.x for compatibility with existing chart code
- const { chartDataGroups, ignoredColumns } = processEpisodeDataForCharts(
- episodeData,
- info,
- episodeMetadata,
- );
+ const { chartDataGroups, flatChartData, ignoredColumns } =
+ processEpisodeDataForCharts(episodeData, info, episodeMetadata);
// First check for language_instruction fields in the data (preferred)
- // Check multiple rows: first, middle, and last
- const sampleIndices = [
- 0,
- Math.floor(episodeData.length / 2),
- episodeData.length - 1,
- ];
- let task = extractLanguageInstructions(episodeData, sampleIndices);
-
- // If no language instructions found, fall back to tasks metadata
- if (!task) {
+ let task: string | undefined;
+ if (episodeData.length > 0) {
+ const languageInstructions: string[] = [];
+
+ const extractInstructions = (row: Record) => {
+ if (typeof row.language_instruction === "string") {
+ languageInstructions.push(row.language_instruction);
+ }
+ let num = 2;
+ while (typeof row[`language_instruction_${num}`] === "string") {
+ languageInstructions.push(
+ row[`language_instruction_${num}`] as string,
+ );
+ num++;
+ }
+ };
+
+ extractInstructions(episodeData[0]);
+
+ // If no instructions in first row, check middle and last rows
+ if (languageInstructions.length === 0 && episodeData.length > 1) {
+ for (const idx of [
+ Math.floor(episodeData.length / 2),
+ episodeData.length - 1,
+ ]) {
+ extractInstructions(episodeData[idx]);
+ if (languageInstructions.length > 0) break;
+ }
+ }
+
+ if (languageInstructions.length > 0) {
+ task = languageInstructions.join("\n");
+ }
+ }
+
+ // Fall back to tasks metadata parquet
+ if (!task && episodeData.length > 0) {
try {
- // Load tasks metadata
const tasksUrl = buildVersionedUrl(
repoId,
version,
@@ -476,53 +608,28 @@ async function loadEpisodeDataV3(
const tasksArrayBuffer = await fetchParquetFile(tasksUrl);
const tasksData = await readParquetAsObjects(tasksArrayBuffer, []);
- if (
- episodeData.length > 0 &&
- tasksData &&
- tasksData.length > 0 &&
- "task_index" in episodeData[0]
- ) {
- const taskIndex = episodeData[0].task_index;
+ if (tasksData.length > 0) {
+ const taskIndexNum = bigIntToNumber(episodeData[0].task_index, -1);
- // Convert BigInt to number for comparison
- const taskIndexNum =
- typeof taskIndex === "bigint"
- ? Number(taskIndex)
- : typeof taskIndex === "number"
- ? taskIndex
- : undefined;
-
- // Look up task by index
- if (
- taskIndexNum !== undefined &&
- taskIndexNum >= 0 &&
- taskIndexNum < tasksData.length
- ) {
+ if (taskIndexNum >= 0 && taskIndexNum < tasksData.length) {
const taskData = tasksData[taskIndexNum];
- // Extract task from various possible fields
- if (
- taskData &&
- "__index_level_0__" in taskData &&
- typeof taskData.__index_level_0__ === "string"
- ) {
- task = taskData.__index_level_0__;
- } else if (
- taskData &&
- "task" in taskData &&
- typeof taskData.task === "string"
- ) {
- task = taskData.task;
- }
+ const rawTask = taskData.__index_level_0__ ?? taskData.task;
+ task = typeof rawTask === "string" ? rawTask : undefined;
}
}
} catch {
- // Could not load tasks metadata - dataset might not have language tasks
+ // Could not load tasks metadata
}
}
- return { chartDataGroups, ignoredColumns, task };
+ return { chartDataGroups, flatChartData, ignoredColumns, task };
} catch {
- return { chartDataGroups: [], ignoredColumns: [], task: undefined };
+ return {
+ chartDataGroups: [],
+ flatChartData: [],
+ ignoredColumns: [],
+ task: undefined,
+ };
}
}
@@ -531,16 +638,11 @@ function processEpisodeDataForCharts(
episodeData: Record[],
info: DatasetMetadata,
episodeMetadata?: EpisodeMetadataV3,
-): { chartDataGroups: ChartDataGroup[]; ignoredColumns: string[] } {
- // Get numeric column features (not currently used but kept for reference)
- // const columnNames = Object.entries(info.features)
- // .filter(
- // ([, value]) =>
- // ["float32", "int32"].includes(value.dtype) &&
- // value.shape.length === 1,
- // )
- // .map(([key, value]) => ({ key, value }));
-
+): {
+ chartDataGroups: ChartRow[][];
+ flatChartData: Record[];
+ ignoredColumns: string[];
+} {
// Convert parquet data to chart format
let seriesNames: string[] = [];
@@ -576,7 +678,7 @@ function processEpisodeDataForCharts(
const excludedColumns = EXCLUDED_COLUMNS.V3 as readonly string[];
// Create columns structure similar to V2.1 for proper hierarchical naming
- const columns = Object.entries(info.features)
+ const columns: ColumnDef[] = Object.entries(info.features)
.filter(
([key, value]) =>
["float32", "int32"].includes(value.dtype) &&
@@ -584,16 +686,16 @@ function processEpisodeDataForCharts(
!excludedColumns.includes(key),
)
.map(([key, feature]) => {
- let column_names = feature.names;
- while (typeof column_names === "object") {
+ let column_names: unknown = feature.names;
+ while (typeof column_names === "object" && column_names !== null) {
if (Array.isArray(column_names)) break;
- column_names = Object.values(column_names ?? {})[0];
+ column_names = Object.values(column_names)[0];
}
return {
key,
value: Array.isArray(column_names)
? column_names.map(
- (name) => `${key}${CHART_CONFIG.SERIES_NAME_DELIMITER}${name}`,
+ (name: string) => `${key}${SERIES_NAME_DELIMITER}${name}`,
)
: Array.from(
{ length: feature.shape[0] || 1 },
@@ -714,45 +816,6 @@ function processEpisodeDataForCharts(
// Process chart data into organized groups using utility function
const chartGroups = processChartDataGroups(seriesNames, chartData);
- // Utility function to group row keys by suffix (same as V2.1)
- function groupRowBySuffix(row: Record): {
- timestamp: number;
- [key: string]: number | Record;
- } {
- const result: {
- timestamp: number;
- [key: string]: number | Record;
- } = {
- timestamp: 0,
- };
- const suffixGroups: Record> = {};
- for (const [key, value] of Object.entries(row)) {
- if (key === "timestamp") {
- result.timestamp = value;
- continue;
- }
- const parts = key.split(CHART_CONFIG.SERIES_NAME_DELIMITER);
- if (parts.length === 2) {
- const [prefix, suffix] = parts;
- if (!suffixGroups[suffix]) suffixGroups[suffix] = {};
- suffixGroups[suffix][prefix] = value;
- } else {
- result[key] = value;
- }
- }
- for (const [suffix, group] of Object.entries(suffixGroups)) {
- const keys = Object.keys(group);
- if (keys.length === 1) {
- // Use the full original name as the key
- const fullName = `${keys[0]}${CHART_CONFIG.SERIES_NAME_DELIMITER}${suffix}`;
- result[fullName] = group[keys[0]];
- } else {
- result[suffix] = group;
- }
- }
- return result;
- }
-
const chartDataGroups = chartGroups.map((group) =>
chartData.map((row) => {
const grouped = groupRowBySuffix(pick(row, [...group, "timestamp"]));
@@ -765,7 +828,7 @@ function processEpisodeDataForCharts(
}),
);
- return { chartDataGroups, ignoredColumns };
+ return { chartDataGroups, flatChartData: chartData, ignoredColumns };
}
// Video info extraction with segmentation for v3.0
@@ -786,18 +849,22 @@ function extractVideoInfoV3WithSegmentation(
key.startsWith(`videos/${videoKey}/`),
);
- let chunkIndex, fileIndex, segmentStart, segmentEnd;
+ let chunkIndex: number,
+ fileIndex: number,
+ segmentStart: number,
+ segmentEnd: number;
+
+ const toNum = (v: string | number): number =>
+ typeof v === "string" ? parseFloat(v) || 0 : v;
if (cameraSpecificKeys.length > 0) {
- // Use camera-specific metadata
- const chunkValue = episodeMetadata[`videos/${videoKey}/chunk_index`];
- const fileValue = episodeMetadata[`videos/${videoKey}/file_index`];
- chunkIndex = bigIntToNumber(chunkValue, 0);
- fileIndex = bigIntToNumber(fileValue, 0);
- segmentStart = episodeMetadata[`videos/${videoKey}/from_timestamp`] || 0;
- segmentEnd = episodeMetadata[`videos/${videoKey}/to_timestamp`] || 30;
+ chunkIndex = toNum(episodeMetadata[`videos/${videoKey}/chunk_index`]);
+ fileIndex = toNum(episodeMetadata[`videos/${videoKey}/file_index`]);
+ segmentStart =
+ toNum(episodeMetadata[`videos/${videoKey}/from_timestamp`]) || 0;
+ segmentEnd =
+ toNum(episodeMetadata[`videos/${videoKey}/to_timestamp`]) || 30;
} else {
- // Fallback to generic video metadata
chunkIndex = episodeMetadata.video_chunk_index || 0;
fileIndex = episodeMetadata.video_file_index || 0;
segmentStart = episodeMetadata.video_from_timestamp || 0;
@@ -899,70 +966,83 @@ function parseEpisodeRowSimple(
// Check if this is v3.0 format with named keys
if ("episode_index" in row) {
// v3.0 format - use named keys
- const episodeData: Record = {
- episode_index: bigIntToNumber(row["episode_index"], 0),
- data_chunk_index: bigIntToNumber(row["data/chunk_index"], 0),
- data_file_index: bigIntToNumber(row["data/file_index"], 0),
- dataset_from_index: bigIntToNumber(row["dataset_from_index"], 0),
- dataset_to_index: bigIntToNumber(row["dataset_to_index"], 0),
- length: bigIntToNumber(row["length"], 0),
+ // Convert BigInt values to numbers
+ const toBigIntSafe = (value: unknown): number => {
+ if (typeof value === "bigint") return Number(value);
+ if (typeof value === "number") return value;
+ if (typeof value === "string") return parseInt(value) || 0;
+ return 0;
+ };
+
+ const toNumSafe = (value: unknown): number => {
+ if (typeof value === "number") return value;
+ if (typeof value === "bigint") return Number(value);
+ if (typeof value === "string") return parseFloat(value) || 0;
+ return 0;
};
// Handle video metadata - look for video-specific keys
const videoKeys = Object.keys(row).filter(
(key) => key.includes("videos/") && key.includes("/chunk_index"),
);
+ let videoChunkIndex = 0,
+ videoFileIndex = 0,
+ videoFromTs = 0,
+ videoToTs = 30;
if (videoKeys.length > 0) {
- // Use the first video stream for basic info
- const firstVideoKey = videoKeys[0];
- const videoBaseName = firstVideoKey.replace("/chunk_index", "");
-
- episodeData.video_chunk_index = bigIntToNumber(
- row[`${videoBaseName}/chunk_index`],
- 0,
- );
- episodeData.video_file_index = bigIntToNumber(
- row[`${videoBaseName}/file_index`],
- 0,
- );
- episodeData.video_from_timestamp = bigIntToNumber(
- row[`${videoBaseName}/from_timestamp`],
- 0,
- );
- episodeData.video_to_timestamp = bigIntToNumber(
- row[`${videoBaseName}/to_timestamp`],
- 0,
- );
- } else {
- // Fallback video values
- episodeData.video_chunk_index = 0;
- episodeData.video_file_index = 0;
- episodeData.video_from_timestamp = 0;
- episodeData.video_to_timestamp = 30;
+ const videoBaseName = videoKeys[0].replace("/chunk_index", "");
+ videoChunkIndex = toBigIntSafe(row[`${videoBaseName}/chunk_index`]);
+ videoFileIndex = toBigIntSafe(row[`${videoBaseName}/file_index`]);
+ videoFromTs = toNumSafe(row[`${videoBaseName}/from_timestamp`]);
+ videoToTs = toNumSafe(row[`${videoBaseName}/to_timestamp`]) || 30;
}
- // Store the raw row data to preserve per-camera metadata
- // This allows extractVideoInfoV3WithSegmentation to access camera-specific timestamps
+ const episodeData: EpisodeMetadataV3 = {
+ episode_index: toBigIntSafe(row["episode_index"]),
+ data_chunk_index: toBigIntSafe(row["data/chunk_index"]),
+ data_file_index: toBigIntSafe(row["data/file_index"]),
+ dataset_from_index: toBigIntSafe(row["dataset_from_index"]),
+ dataset_to_index: toBigIntSafe(row["dataset_to_index"]),
+ length: toBigIntSafe(row["length"]),
+ video_chunk_index: videoChunkIndex,
+ video_file_index: videoFileIndex,
+ video_from_timestamp: videoFromTs,
+ video_to_timestamp: videoToTs,
+ };
+
+ // Store per-camera metadata for extractVideoInfoV3WithSegmentation
Object.keys(row).forEach((key) => {
if (key.startsWith("videos/")) {
- episodeData[key] = bigIntToNumber(row[key]);
+ const val = row[key];
+ episodeData[key] =
+ typeof val === "bigint"
+ ? Number(val)
+ : typeof val === "number" || typeof val === "string"
+ ? val
+ : 0;
}
});
return episodeData as EpisodeMetadataV3;
} else {
// Fallback to numeric keys for compatibility
+ const toNum = (v: unknown, fallback = 0): number =>
+ typeof v === "number"
+ ? v
+ : typeof v === "bigint"
+ ? Number(v)
+ : fallback;
return {
- episode_index: bigIntToNumber(row["0"], 0),
- data_chunk_index: bigIntToNumber(row["1"], 0),
- data_file_index: bigIntToNumber(row["2"], 0),
- dataset_from_index: bigIntToNumber(row["3"], 0),
- dataset_to_index: bigIntToNumber(row["4"], 0),
- video_chunk_index: bigIntToNumber(row["5"], 0),
- video_file_index: bigIntToNumber(row["6"], 0),
- video_from_timestamp: bigIntToNumber(row["7"], 0),
- video_to_timestamp: bigIntToNumber(row["8"], 30),
- length: bigIntToNumber(row["9"], 30),
+ episode_index: toNum(row["0"]),
+ data_chunk_index: toNum(row["1"]),
+ data_file_index: toNum(row["2"]),
+ dataset_from_index: toNum(row["3"]),
+ dataset_to_index: toNum(row["4"]),
+ video_chunk_index: toNum(row["5"]),
+ video_file_index: toNum(row["6"]),
+ video_from_timestamp: toNum(row["7"]),
+ video_to_timestamp: toNum(row["8"], 30),
+ length: toNum(row["9"], 30),
};
}
}
@@ -984,6 +1064,995 @@ function parseEpisodeRowSimple(
return fallback;
}
+// ─── Stats computation ───────────────────────────────────────────
+
+/**
+ * Compute per-column min/max values from the current episode's chart data.
+ */
+export function computeColumnMinMax(
+ chartDataGroups: ChartRow[][],
+): ColumnMinMax[] {
+ const stats: Record = {};
+
+ for (const group of chartDataGroups) {
+ for (const row of group) {
+ for (const [key, value] of Object.entries(row)) {
+ if (key === "timestamp") continue;
+ if (typeof value === "number" && isFinite(value)) {
+ if (!stats[key]) {
+ stats[key] = { min: value, max: value };
+ } else {
+ if (value < stats[key].min) stats[key].min = value;
+ if (value > stats[key].max) stats[key].max = value;
+ }
+ } else if (typeof value === "object" && value !== null) {
+ // Nested group like { joint_0: 1.2, joint_1: 3.4 }
+ for (const [subKey, subVal] of Object.entries(value)) {
+ const fullKey = `${key} | ${subKey}`;
+ if (typeof subVal === "number" && isFinite(subVal)) {
+ if (!stats[fullKey]) {
+ stats[fullKey] = { min: subVal, max: subVal };
+ } else {
+ if (subVal < stats[fullKey].min) stats[fullKey].min = subVal;
+ if (subVal > stats[fullKey].max) stats[fullKey].max = subVal;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return Object.entries(stats).map(([column, { min, max }]) => ({
+ column,
+ min: Math.round(min * 1000) / 1000,
+ max: Math.round(max * 1000) / 1000,
+ }));
+}
+
+/**
+ * Load all episode lengths from the episodes metadata parquet files (v3.0).
+ * Returns min/max/mean/median/std and a histogram, or null if unavailable.
+ */
+export async function loadAllEpisodeLengthsV3(
+ repoId: string,
+ version: string,
+ fps: number,
+): Promise {
+ try {
+ const allEpisodes: { index: number; length: number }[] = [];
+ let fileIndex = 0;
+ const chunkIndex = 0;
+
+ while (true) {
+ const path = `meta/episodes/chunk-${chunkIndex.toString().padStart(3, "0")}/file-${fileIndex.toString().padStart(3, "0")}.parquet`;
+ const url = buildVersionedUrl(repoId, version, path);
+ try {
+ const buf = await fetchParquetFile(url);
+ const rows = await readParquetAsObjects(buf, []);
+ if (rows.length === 0 && fileIndex > 0) break;
+ for (const row of rows) {
+ const parsed = parseEpisodeRowSimple(row);
+ allEpisodes.push({
+ index: parsed.episode_index,
+ length: parsed.length,
+ });
+ }
+ fileIndex++;
+ } catch {
+ break;
+ }
+ }
+
+ if (allEpisodes.length === 0) return null;
+
+ const withSeconds = allEpisodes.map((ep) => ({
+ episodeIndex: ep.index,
+ frames: ep.length,
+ lengthSeconds: Math.round((ep.length / fps) * 100) / 100,
+ }));
+
+ const sortedByLength = [...withSeconds].sort(
+ (a, b) => a.lengthSeconds - b.lengthSeconds,
+ );
+ const shortestEpisodes = sortedByLength.slice(0, 5);
+ const longestEpisodes = sortedByLength.slice(-5).reverse();
+
+ const lengths = withSeconds.map((e) => e.lengthSeconds);
+ const sum = lengths.reduce((a, b) => a + b, 0);
+ const mean = Math.round((sum / lengths.length) * 100) / 100;
+
+ const sorted = [...lengths].sort((a, b) => a - b);
+ const mid = Math.floor(sorted.length / 2);
+ const median =
+ sorted.length % 2 === 0
+ ? Math.round(((sorted[mid - 1] + sorted[mid]) / 2) * 100) / 100
+ : sorted[mid];
+
+ const variance =
+ lengths.reduce((acc, l) => acc + (l - mean) ** 2, 0) / lengths.length;
+ const std = Math.round(Math.sqrt(variance) * 100) / 100;
+
+ // Build histogram
+ const histMin = Math.min(...lengths);
+ const histMax = Math.max(...lengths);
+
+ if (histMax === histMin) {
+ return {
+ shortestEpisodes,
+ longestEpisodes,
+ allEpisodeLengths: withSeconds,
+ meanEpisodeLength: mean,
+ medianEpisodeLength: median,
+ stdEpisodeLength: std,
+ episodeLengthHistogram: [
+ { binLabel: `${histMin.toFixed(1)}s`, count: lengths.length },
+ ],
+ };
+ }
+
+ const p1 = sorted[Math.floor(sorted.length * 0.01)];
+ const p99 = sorted[Math.ceil(sorted.length * 0.99) - 1];
+ const range = p99 - p1 || 1;
+
+ const targetBins = Math.max(
+ 10,
+ Math.min(50, Math.ceil(Math.log2(lengths.length) + 1)),
+ );
+ const rawBinWidth = range / targetBins;
+ const magnitude = Math.pow(10, Math.floor(Math.log10(rawBinWidth)));
+ const niceSteps = [1, 2, 2.5, 5, 10];
+ const niceBinWidth =
+ niceSteps.map((s) => s * magnitude).find((w) => w >= rawBinWidth) ??
+ rawBinWidth;
+
+ const niceMin = Math.floor(p1 / niceBinWidth) * niceBinWidth;
+ const niceMax = Math.ceil(p99 / niceBinWidth) * niceBinWidth;
+ const actualBinCount = Math.max(
+ 1,
+ Math.round((niceMax - niceMin) / niceBinWidth),
+ );
+ const bins = Array.from({ length: actualBinCount }, () => 0);
+
+ for (const len of lengths) {
+ let binIdx = Math.floor((len - niceMin) / niceBinWidth);
+ if (binIdx < 0) binIdx = 0;
+ if (binIdx >= actualBinCount) binIdx = actualBinCount - 1;
+ bins[binIdx]++;
+ }
+
+ const histogram = bins.map((count, i) => {
+ const lo = niceMin + i * niceBinWidth;
+ const hi = lo + niceBinWidth;
+ return { binLabel: `${lo.toFixed(1)}–${hi.toFixed(1)}s`, count };
+ });
+
+ return {
+ shortestEpisodes,
+ longestEpisodes,
+ allEpisodeLengths: withSeconds,
+ meanEpisodeLength: mean,
+ medianEpisodeLength: median,
+ stdEpisodeLength: std,
+ episodeLengthHistogram: histogram,
+ };
+ } catch {
+ return null;
+ }
+}
+
+/**
+ * Load video frame info for all episodes across all cameras.
+ * Returns camera names + a map of camera → EpisodeFrameInfo[].
+ */
+export async function loadAllEpisodeFrameInfo(
+ repoId: string,
+ version: string,
+ info: DatasetMetadata,
+): Promise {
+ const videoFeatures = Object.entries(info.features).filter(
+ ([, f]) => f.dtype === "video",
+ );
+ if (videoFeatures.length === 0) return { cameras: [], framesByCamera: {} };
+
+ const cameras = videoFeatures.map(([key]) => key);
+ const framesByCamera: Record = {};
+ for (const cam of cameras) framesByCamera[cam] = [];
+
+ if (version === "v3.0") {
+ let fileIndex = 0;
+ while (true) {
+ const path = `meta/episodes/chunk-000/file-${fileIndex.toString().padStart(3, "0")}.parquet`;
+ try {
+ const buf = await fetchParquetFile(
+ buildVersionedUrl(repoId, version, path),
+ );
+ const rows = await readParquetAsObjects(buf, []);
+ if (rows.length === 0 && fileIndex > 0) break;
+ for (const row of rows) {
+ const epIdx = Number(row["episode_index"] ?? 0);
+ for (const cam of cameras) {
+ const cIdx = Number(
+ row[`videos/${cam}/chunk_index`] ?? row["video_chunk_index"] ?? 0,
+ );
+ const fIdx = Number(
+ row[`videos/${cam}/file_index`] ?? row["video_file_index"] ?? 0,
+ );
+ const fromTs = Number(
+ row[`videos/${cam}/from_timestamp`] ??
+ row["video_from_timestamp"] ??
+ 0,
+ );
+ const toTs = Number(
+ row[`videos/${cam}/to_timestamp`] ??
+ row["video_to_timestamp"] ??
+ 30,
+ );
+ const videoPath = `videos/${cam}/chunk-${cIdx.toString().padStart(3, "0")}/file-${fIdx.toString().padStart(3, "0")}.mp4`;
+ framesByCamera[cam].push({
+ episodeIndex: epIdx,
+ videoUrl: buildVersionedUrl(repoId, version, videoPath),
+ firstFrameTime: fromTs,
+ lastFrameTime: Math.max(0, toTs - 0.05),
+ });
+ }
+ }
+ fileIndex++;
+ } catch {
+ break;
+ }
+ }
+ return { cameras, framesByCamera };
+ }
+
+ // v2.x — construct URLs from template
+ for (let i = 0; i < info.total_episodes; i++) {
+ const chunk = Math.floor(i / (info.chunks_size || 1000));
+ for (const cam of cameras) {
+ const videoPath = formatStringWithVars(info.video_path, {
+ video_key: cam,
+ episode_chunk: chunk.toString().padStart(3, "0"),
+ episode_index: i.toString().padStart(6, "0"),
+ });
+ framesByCamera[cam].push({
+ episodeIndex: i,
+ videoUrl: buildVersionedUrl(repoId, version, videoPath),
+ firstFrameTime: 0,
+ lastFrameTime: null,
+ });
+ }
+ }
+ return { cameras, framesByCamera };
+}
+
+// ─── Cross-episode action variance ──────────────────────────────
+
+export type LowMovementEpisode = {
+ episodeIndex: number;
+ totalMovement: number;
+};
+
+export type AggVelocityStat = {
+ name: string;
+ std: number; // normalized by motor range
+ maxAbs: number; // normalized by motor range
+ bins: number[];
+ lo: number; // normalized by motor range
+ hi: number; // normalized by motor range
+ motorRange: number;
+ inactive?: boolean; // true if p95(|Δa|) < 1% of motor range
+ discrete?: boolean; // true if motor has very few unique values (e.g. open/close gripper)
+};
+
+export type AggAutocorrelation = {
+ chartData: Record[];
+ suggestedChunk: number | null;
+ shortKeys: string[];
+};
+
+export type SpeedDistEntry = {
+ episodeIndex: number;
+ speed: number;
+};
+
+export type AggAlignment = {
+ ccData: { lag: number; max: number; mean: number; min: number }[];
+ meanPeakLag: number;
+ meanPeakCorr: number;
+ maxPeakLag: number;
+ maxPeakCorr: number;
+ minPeakLag: number;
+ minPeakCorr: number;
+ lagRangeMin: number;
+ lagRangeMax: number;
+ numPairs: number;
+};
+
+export type JerkyEpisode = {
+ episodeIndex: number;
+ meanAbsDelta: number;
+};
+
+export type CrossEpisodeVarianceData = {
+ actionNames: string[];
+ timeBins: number[];
+ variance: number[][];
+ numEpisodes: number;
+ lowMovementEpisodes: LowMovementEpisode[];
+ aggVelocity: AggVelocityStat[];
+ aggAutocorrelation: AggAutocorrelation | null;
+ speedDistribution: SpeedDistEntry[];
+ jerkyEpisodes: JerkyEpisode[];
+ aggAlignment: AggAlignment | null;
+};
+
+export async function loadCrossEpisodeActionVariance(
+ repoId: string,
+ version: string,
+ info: DatasetMetadata,
+ fps: number,
+ maxEpisodes = 500,
+ numTimeBins = 50,
+): Promise {
+ const actionEntry = Object.entries(info.features).find(
+ ([key, f]) => key === "action" && f.shape.length === 1,
+ );
+ if (!actionEntry) {
+ console.warn(
+ "[cross-ep] No action feature found. Available features:",
+ Object.entries(info.features)
+ .map(([k, f]) => `${k}(${f.dtype}, shape=${JSON.stringify(f.shape)})`)
+ .join(", "),
+ );
+ return null;
+ }
+
+ const [actionKey, actionMeta] = actionEntry;
+ const actionDim = actionMeta.shape[0];
+
+ let names: unknown = actionMeta.names;
+ while (typeof names === "object" && names !== null && !Array.isArray(names)) {
+ names = Object.values(names)[0];
+ }
+ const actionNames = Array.isArray(names)
+ ? (names as string[]).map((n) => `${actionKey}${SERIES_NAME_DELIMITER}${n}`)
+ : Array.from(
+ { length: actionDim },
+ (_, i) => `${actionKey}${SERIES_NAME_DELIMITER}${i}`,
+ );
+
+ // State feature for alignment computation
+ const stateEntry = Object.entries(info.features).find(
+ ([key, f]) => key === "observation.state" && f.shape.length === 1,
+ );
+ const stateKey = stateEntry?.[0] ?? null;
+ const stateDim = stateEntry?.[1].shape[0] ?? 0;
+
+ // Collect episode metadata
+ type EpMeta = {
+ index: number;
+ chunkIdx: number;
+ fileIdx: number;
+ from: number;
+ to: number;
+ };
+ const allEps: EpMeta[] = [];
+
+ if (version === "v3.0") {
+ let fileIndex = 0;
+ while (true) {
+ const path = `meta/episodes/chunk-000/file-${fileIndex.toString().padStart(3, "0")}.parquet`;
+ try {
+ const buf = await fetchParquetFile(
+ buildVersionedUrl(repoId, version, path),
+ );
+ const rows = await readParquetAsObjects(buf, []);
+ if (rows.length === 0 && fileIndex > 0) break;
+ for (const row of rows) {
+ const parsed = parseEpisodeRowSimple(row);
+ allEps.push({
+ index: parsed.episode_index,
+ chunkIdx: parsed.data_chunk_index,
+ fileIdx: parsed.data_file_index,
+ from: parsed.dataset_from_index,
+ to: parsed.dataset_to_index,
+ });
+ }
+ fileIndex++;
+ } catch {
+ break;
+ }
+ }
+ } else {
+ for (let i = 0; i < info.total_episodes; i++) {
+ allEps.push({ index: i, chunkIdx: 0, fileIdx: 0, from: 0, to: 0 });
+ }
+ }
+
+ if (allEps.length < 2) {
+ console.warn(
+ `[cross-ep] Only ${allEps.length} episode(s) found in metadata, need ≥2`,
+ );
+ return null;
+ }
+ console.log(
+ `[cross-ep] Found ${allEps.length} episodes in metadata, sampling up to ${maxEpisodes}`,
+ );
+
+ // Sample episodes evenly
+ const sampled =
+ allEps.length <= maxEpisodes
+ ? allEps
+ : Array.from(
+ { length: maxEpisodes },
+ (_, i) =>
+ allEps[Math.round((i * (allEps.length - 1)) / (maxEpisodes - 1))],
+ );
+
+ // Load action (and state) data per episode
+ const episodeActions: { index: number; actions: number[][] }[] = [];
+ const episodeStates: (number[][] | null)[] = [];
+
+ if (version === "v3.0") {
+ const byFile = new Map();
+ for (const ep of sampled) {
+ const key = `${ep.chunkIdx}-${ep.fileIdx}`;
+ if (!byFile.has(key)) byFile.set(key, []);
+ byFile.get(key)!.push(ep);
+ }
+
+ for (const [, eps] of byFile) {
+ const ep0 = eps[0];
+ const dataPath = `data/chunk-${ep0.chunkIdx.toString().padStart(3, "0")}/file-${ep0.fileIdx.toString().padStart(3, "0")}.parquet`;
+ try {
+ const buf = await fetchParquetFile(
+ buildVersionedUrl(repoId, version, dataPath),
+ );
+ const rows = await readParquetAsObjects(buf, []);
+ const fileStart =
+ rows.length > 0 && rows[0].index !== undefined
+ ? Number(rows[0].index)
+ : 0;
+
+ for (const ep of eps) {
+ const localFrom = Math.max(0, ep.from - fileStart);
+ const localTo = Math.min(rows.length, ep.to - fileStart);
+ const actions: number[][] = [];
+ const states: number[][] = [];
+ for (let r = localFrom; r < localTo; r++) {
+ const raw = rows[r]?.[actionKey];
+ if (Array.isArray(raw)) actions.push(raw.map(Number));
+ if (stateKey) {
+ const sRaw = rows[r]?.[stateKey];
+ if (Array.isArray(sRaw)) states.push(sRaw.map(Number));
+ }
+ }
+ if (actions.length > 0) {
+ episodeActions.push({ index: ep.index, actions });
+ episodeStates.push(
+ stateKey && states.length === actions.length ? states : null,
+ );
+ }
+ }
+ } catch {
+ /* skip file */
+ }
+ }
+ } else {
+ const chunkSize = info.chunks_size || 1000;
+ for (const ep of sampled) {
+ const chunk = Math.floor(ep.index / chunkSize);
+ const dataPath = formatStringWithVars(info.data_path, {
+ episode_chunk: chunk.toString().padStart(3, "0"),
+ episode_index: ep.index.toString().padStart(6, "0"),
+ });
+ try {
+ const buf = await fetchParquetFile(
+ buildVersionedUrl(repoId, version, dataPath),
+ );
+ const rows = await readParquetAsObjects(buf, []);
+ const actions: number[][] = [];
+ const states: number[][] = [];
+ for (const row of rows) {
+ const raw = row[actionKey];
+ if (Array.isArray(raw)) {
+ actions.push(raw.map(Number));
+ } else {
+ const vec: number[] = [];
+ for (let d = 0; d < actionDim; d++) {
+ const v = row[`${actionKey}.${d}`] ?? row[d];
+ vec.push(typeof v === "number" ? v : Number(v) || 0);
+ }
+ actions.push(vec);
+ }
+ if (stateKey) {
+ const sRaw = row[stateKey];
+ if (Array.isArray(sRaw)) states.push(sRaw.map(Number));
+ }
+ }
+ if (actions.length > 0) {
+ episodeActions.push({ index: ep.index, actions });
+ episodeStates.push(
+ stateKey && states.length === actions.length ? states : null,
+ );
+ }
+ } catch {
+ /* skip */
+ }
+ }
+ }
+
+ if (episodeActions.length < 2) {
+ console.warn(
+ `[cross-ep] Only ${episodeActions.length} episode(s) had loadable action data out of ${sampled.length} sampled`,
+ );
+ return null;
+ }
+ console.log(
+ `[cross-ep] Loaded action data for ${episodeActions.length}/${sampled.length} episodes`,
+ );
+
+ // Resample each episode to numTimeBins and compute variance
+ const timeBins = Array.from(
+ { length: numTimeBins },
+ (_, i) => i / (numTimeBins - 1),
+ );
+ const sums = Array.from(
+ { length: numTimeBins },
+ () => new Float64Array(actionDim),
+ );
+ const sumsSq = Array.from(
+ { length: numTimeBins },
+ () => new Float64Array(actionDim),
+ );
+ const counts = new Uint32Array(numTimeBins);
+
+ for (const { actions: epActions } of episodeActions) {
+ const T = epActions.length;
+ for (let b = 0; b < numTimeBins; b++) {
+ const srcIdx = Math.min(Math.round(timeBins[b] * (T - 1)), T - 1);
+ const row = epActions[srcIdx];
+ for (let d = 0; d < actionDim; d++) {
+ const v = row[d] ?? 0;
+ sums[b][d] += v;
+ sumsSq[b][d] += v * v;
+ }
+ counts[b]++;
+ }
+ }
+
+ const variance: number[][] = [];
+ for (let b = 0; b < numTimeBins; b++) {
+ const row: number[] = [];
+ const n = counts[b];
+ for (let d = 0; d < actionDim; d++) {
+ if (n < 2) {
+ row.push(0);
+ continue;
+ }
+ const mean = sums[b][d] / n;
+ row.push(sumsSq[b][d] / n - mean * mean);
+ }
+ variance.push(row);
+ }
+
+ // Per-episode average movement per frame: mean L2 norm of frame-to-frame action deltas
+ const movementScores: LowMovementEpisode[] = episodeActions.map(
+ ({ index, actions: ep }) => {
+ if (ep.length < 2) return { episodeIndex: index, totalMovement: 0 };
+ let total = 0;
+ for (let t = 1; t < ep.length; t++) {
+ let sumSq = 0;
+ for (let d = 0; d < actionDim; d++) {
+ const delta = (ep[t][d] ?? 0) - (ep[t - 1][d] ?? 0);
+ sumSq += delta * delta;
+ }
+ total += Math.sqrt(sumSq);
+ }
+ const avgPerFrame = total / (ep.length - 1);
+ return {
+ episodeIndex: index,
+ totalMovement: Math.round(avgPerFrame * 10000) / 10000,
+ };
+ },
+ );
+
+ movementScores.sort((a, b) => a.totalMovement - b.totalMovement);
+ const lowMovementEpisodes = movementScores.slice(0, 10);
+
+ // Precompute per-dimension normalization: motor range (max − min) and unique value count
+ const motorRanges: number[] = new Array(actionDim);
+ const motorUniqueCount: number[] = new Array(actionDim);
+ const DISCRETE_THRESHOLD = 4; // ≤ this many unique values → discrete motor
+ for (let d = 0; d < actionDim; d++) {
+ let lo = Infinity,
+ hi = -Infinity;
+ const uniqueVals = new Set();
+ for (const { actions: ep } of episodeActions) {
+ for (let t = 0; t < ep.length; t++) {
+ const v = ep[t][d] ?? 0;
+ if (v < lo) lo = v;
+ if (v > hi) hi = v;
+ if (uniqueVals.size <= DISCRETE_THRESHOLD) uniqueVals.add(v);
+ }
+ }
+ motorRanges[d] = hi - lo || 1;
+ motorUniqueCount[d] = uniqueVals.size;
+ }
+
+ // Per-episode, per-dimension activity: p95(|Δa|) >= 1% of motor range
+ const ACTIVITY_THRESHOLD = 0.001; // 0.1% of motor range
+ // activeMap[episodeIdx][dimIdx] = true if motor d is active in that episode
+ const activeMap: boolean[][] = episodeActions.map(({ actions: ep }) => {
+ const flags: boolean[] = new Array(actionDim);
+ for (let d = 0; d < actionDim; d++) {
+ if (ep.length < 2) {
+ flags[d] = false;
+ continue;
+ }
+ const absDeltas: number[] = [];
+ for (let t = 1; t < ep.length; t++) {
+ absDeltas.push(Math.abs((ep[t][d] ?? 0) - (ep[t - 1][d] ?? 0)));
+ }
+ absDeltas.sort((a, b) => a - b);
+ const p95 = absDeltas[Math.floor(absDeltas.length * 0.95)];
+ flags[d] = p95 >= motorRanges[d] * ACTIVITY_THRESHOLD;
+ }
+ return flags;
+ });
+ // A motor is globally inactive only if inactive in all episodes
+ const globallyActive: boolean[] = new Array(actionDim);
+ for (let d = 0; d < actionDim; d++) {
+ globallyActive[d] = activeMap.some((flags) => flags[d]);
+ }
+
+ // Aggregated velocity stats: pool deltas from all episodes, normalized by motor range
+ const shortName = (k: string) => {
+ const p = k.split(SERIES_NAME_DELIMITER);
+ return p.length > 1 ? p[p.length - 1] : k;
+ };
+
+ const aggVelocity: AggVelocityStat[] = (() => {
+ const binCount = 30;
+ const results: AggVelocityStat[] = [];
+ for (let d = 0; d < actionDim; d++) {
+ const motorRange = motorRanges[d];
+ const inactive = !globallyActive[d];
+ // Collect all deltas (unfiltered) for histogram display
+ const allDeltas: number[] = [];
+ // Collect only deltas from active episodes for stats
+ const activeDeltas: number[] = [];
+ for (let ei = 0; ei < episodeActions.length; ei++) {
+ const ep = episodeActions[ei].actions;
+ for (let t = 1; t < ep.length; t++) {
+ const delta = (ep[t][d] ?? 0) - (ep[t - 1][d] ?? 0);
+ allDeltas.push(delta);
+ if (activeMap[ei][d]) activeDeltas.push(delta);
+ }
+ }
+ const deltas = activeDeltas.length > 0 ? activeDeltas : allDeltas;
+ const nUnique = motorUniqueCount[d];
+ const discrete = nUnique <= DISCRETE_THRESHOLD;
+ if (deltas.length === 0) {
+ results.push({
+ name: shortName(actionNames[d]),
+ std: 0,
+ maxAbs: 0,
+ bins: new Array(binCount).fill(0),
+ lo: 0,
+ hi: 0,
+ motorRange,
+ inactive,
+ discrete,
+ });
+ continue;
+ }
+ let sum = 0,
+ maxAbsRaw = 0,
+ loRaw = Infinity,
+ hiRaw = -Infinity;
+ for (const v of deltas) {
+ sum += v;
+ const a = Math.abs(v);
+ if (a > maxAbsRaw) maxAbsRaw = a;
+ if (v < loRaw) loRaw = v;
+ if (v > hiRaw) hiRaw = v;
+ }
+ const mean = sum / deltas.length;
+ let varSum = 0;
+ for (const v of deltas) varSum += (v - mean) ** 2;
+ const rawStd = Math.sqrt(varSum / deltas.length);
+ const std = rawStd / motorRange;
+ const maxAbs = maxAbsRaw / motorRange;
+ const lo = loRaw / motorRange;
+ const hi = hiRaw / motorRange;
+ const range = hi - lo || 1;
+ const binW = range / binCount;
+ const bins = new Array(binCount).fill(0);
+ for (const v of deltas) {
+ const normV = v / motorRange;
+ let b = Math.floor((normV - lo) / binW);
+ if (b >= binCount) b = binCount - 1;
+ bins[b]++;
+ }
+ results.push({
+ name: shortName(actionNames[d]),
+ std,
+ maxAbs,
+ bins,
+ lo,
+ hi,
+ motorRange,
+ inactive,
+ discrete,
+ });
+ }
+ return results;
+ })();
+
+ // Aggregated autocorrelation: average per-episode ACFs
+ const aggAutocorrelation: AggAutocorrelation | null = (() => {
+ const maxLag = Math.min(
+ 100,
+ Math.floor(
+ episodeActions.reduce(
+ (min, e) => Math.min(min, e.actions.length),
+ Infinity,
+ ) / 2,
+ ),
+ );
+ if (maxLag < 2) return null;
+
+ const avgAcf: number[][] = Array.from({ length: actionDim }, () =>
+ new Array(maxLag).fill(0),
+ );
+ let epCount = 0;
+
+ for (const { actions: ep } of episodeActions) {
+ if (ep.length < maxLag * 2) continue;
+ epCount++;
+ for (let d = 0; d < actionDim; d++) {
+ const vals = ep.map((row) => row[d] ?? 0);
+ const n = vals.length;
+ const m = vals.reduce((a, b) => a + b, 0) / n;
+ const centered = vals.map((v) => v - m);
+ const vari = centered.reduce((a, v) => a + v * v, 0);
+ if (vari === 0) continue;
+ for (let lag = 1; lag <= maxLag; lag++) {
+ let s = 0;
+ for (let t = 0; t < n - lag; t++)
+ s += centered[t] * centered[t + lag];
+ avgAcf[d][lag - 1] += s / vari;
+ }
+ }
+ }
+
+ if (epCount === 0) return null;
+ for (let d = 0; d < actionDim; d++)
+ for (let l = 0; l < maxLag; l++) avgAcf[d][l] /= epCount;
+
+ const shortKeys = actionNames.map(shortName);
+ const chartData = Array.from({ length: maxLag }, (_, lag) => {
+ const row: Record = {
+ lag: lag + 1,
+ time: (lag + 1) / fps,
+ };
+ shortKeys.forEach((k, d) => {
+ row[k] = avgAcf[d][lag];
+ });
+ return row;
+ });
+
+ // Suggested chunk: median lag where ACF drops below 0.5
+ const lags = avgAcf
+ .map((acf) => {
+ const i = acf.findIndex((v) => v < 0.5);
+ return i >= 0 ? i + 1 : null;
+ })
+ .filter(Boolean) as number[];
+ const suggestedChunk =
+ lags.length > 0
+ ? lags.sort((a, b) => a - b)[Math.floor(lags.length / 2)]
+ : null;
+
+ return { chartData, suggestedChunk, shortKeys };
+ })();
+
+ // Per-episode jerkiness: mean |Δa| across dimensions active in that episode, normalized by motor range
+ const jerkyEpisodes: JerkyEpisode[] = episodeActions
+ .map(({ index, actions: ep }, ei) => {
+ let sum = 0,
+ count = 0;
+ for (let t = 1; t < ep.length; t++) {
+ for (let d = 0; d < actionDim; d++) {
+ if (!activeMap[ei][d]) continue; // skip motors inactive in this episode
+ sum +=
+ Math.abs((ep[t][d] ?? 0) - (ep[t - 1][d] ?? 0)) / motorRanges[d];
+ count++;
+ }
+ }
+ return { episodeIndex: index, meanAbsDelta: count > 0 ? sum / count : 0 };
+ })
+ .sort((a, b) => b.meanAbsDelta - a.meanAbsDelta);
+
+ // Speed distribution: all episode movement scores (not just lowest 10)
+ const speedDistribution: SpeedDistEntry[] = movementScores.map((s) => ({
+ episodeIndex: s.episodeIndex,
+ speed: s.totalMovement,
+ }));
+
+ // Aggregated state-action alignment across episodes
+ const aggAlignment: AggAlignment | null = (() => {
+ if (!stateKey || stateDim === 0) return null;
+
+ let sNms: unknown = stateEntry![1].names;
+ while (typeof sNms === "object" && sNms !== null && !Array.isArray(sNms))
+ sNms = Object.values(sNms)[0];
+ const stateNames = Array.isArray(sNms)
+ ? (sNms as string[])
+ : Array.from({ length: stateDim }, (_, i) => `${i}`);
+ const actionSuffixes = actionNames.map((n) => {
+ const p = n.split(SERIES_NAME_DELIMITER);
+ return p[p.length - 1];
+ });
+
+ // Match pairs by suffix, fall back to index
+ const pairs: [number, number][] = [];
+ for (let ai = 0; ai < actionDim; ai++) {
+ const si = stateNames.findIndex((s) => s === actionSuffixes[ai]);
+ if (si >= 0) pairs.push([ai, si]);
+ }
+ if (pairs.length === 0) {
+ const count = Math.min(actionDim, stateDim);
+ for (let i = 0; i < count; i++) pairs.push([i, i]);
+ }
+ if (pairs.length === 0) return null;
+
+ const maxLag = 30;
+ const numLags = 2 * maxLag + 1;
+ const corrSums = pairs.map(() => new Float64Array(numLags));
+ const corrCounts = pairs.map(() => new Uint32Array(numLags));
+
+ for (let ei = 0; ei < episodeActions.length; ei++) {
+ const states = episodeStates[ei];
+ if (!states) continue;
+ const { actions } = episodeActions[ei];
+ const n = Math.min(actions.length, states.length);
+ if (n < 10) continue;
+
+ for (let pi = 0; pi < pairs.length; pi++) {
+ const [ai, si] = pairs[pi];
+ const aVals = actions.slice(0, n).map((r) => r[ai] ?? 0);
+ const sDeltas = Array.from(
+ { length: n - 1 },
+ (_, t) => (states[t + 1][si] ?? 0) - (states[t][si] ?? 0),
+ );
+ const effN = Math.min(aVals.length, sDeltas.length);
+ const aM = aVals.slice(0, effN).reduce((a, b) => a + b, 0) / effN;
+ const sM = sDeltas.slice(0, effN).reduce((a, b) => a + b, 0) / effN;
+
+ for (let li = 0; li < numLags; li++) {
+ const lag = -maxLag + li;
+ let sum = 0,
+ aV = 0,
+ sV = 0;
+ for (let t = 0; t < effN; t++) {
+ const sIdx = t + lag;
+ if (sIdx < 0 || sIdx >= sDeltas.length) continue;
+ const a = aVals[t] - aM,
+ s = sDeltas[sIdx] - sM;
+ sum += a * s;
+ aV += a * a;
+ sV += s * s;
+ }
+ const d = Math.sqrt(aV * sV);
+ if (d > 0) {
+ corrSums[pi][li] += sum / d;
+ corrCounts[pi][li]++;
+ }
+ }
+ }
+ }
+
+ const avgCorrs = pairs.map((_, pi) =>
+ Array.from({ length: numLags }, (_, li) =>
+ corrCounts[pi][li] > 0 ? corrSums[pi][li] / corrCounts[pi][li] : 0,
+ ),
+ );
+
+ const ccData = Array.from({ length: numLags }, (_, li) => {
+ const lag = -maxLag + li;
+ const vals = avgCorrs.map((pc) => pc[li]);
+ return {
+ lag,
+ max: Math.max(...vals),
+ mean: vals.reduce((a, b) => a + b, 0) / vals.length,
+ min: Math.min(...vals),
+ };
+ });
+
+ let meanPeakLag = 0,
+ meanPeakCorr = -Infinity;
+ let maxPeakLag = 0,
+ maxPeakCorr = -Infinity;
+ let minPeakLag = 0,
+ minPeakCorr = -Infinity;
+ for (const row of ccData) {
+ if (row.max > maxPeakCorr) {
+ maxPeakCorr = row.max;
+ maxPeakLag = row.lag;
+ }
+ if (row.mean > meanPeakCorr) {
+ meanPeakCorr = row.mean;
+ meanPeakLag = row.lag;
+ }
+ if (row.min > minPeakCorr) {
+ minPeakCorr = row.min;
+ minPeakLag = row.lag;
+ }
+ }
+
+ const perPairPeakLags = avgCorrs.map((pc) => {
+ let best = -Infinity,
+ bestLag = 0;
+ for (let li = 0; li < pc.length; li++) {
+ if (pc[li] > best) {
+ best = pc[li];
+ bestLag = -maxLag + li;
+ }
+ }
+ return bestLag;
+ });
+
+ return {
+ ccData,
+ meanPeakLag,
+ meanPeakCorr,
+ maxPeakLag,
+ maxPeakCorr,
+ minPeakLag,
+ minPeakCorr,
+ lagRangeMin: Math.min(...perPairPeakLags),
+ lagRangeMax: Math.max(...perPairPeakLags),
+ numPairs: pairs.length,
+ };
+ })();
+
+ return {
+ actionNames,
+ timeBins,
+ variance,
+ numEpisodes: episodeActions.length,
+ lowMovementEpisodes,
+ aggVelocity,
+ aggAutocorrelation,
+ speedDistribution,
+ jerkyEpisodes,
+ aggAlignment,
+ };
+}
+
+// Load only flatChartData for a specific episode (used by URDF viewer episode switching)
+export async function loadEpisodeFlatChartData(
+ repoId: string,
+ version: string,
+ info: DatasetMetadata,
+ episodeId: number,
+): Promise[]> {
+ const episodeMetadata = await loadEpisodeMetadataV3Simple(
+ repoId,
+ version,
+ episodeId,
+ );
+ const { flatChartData } = await loadEpisodeDataV3(
+ repoId,
+ version,
+ info,
+ episodeMetadata,
+ );
+ return flatChartData;
+}
+
// Safe wrapper for UI error display
export async function getEpisodeDataSafe(
org: string,
@@ -993,10 +2062,8 @@ export async function getEpisodeDataSafe(
try {
const data = await getEpisodeData(org, dataset, episodeId);
return { data };
- } catch (err) {
- // Only expose the error message, not stack or sensitive info
- const errorMessage =
- err instanceof Error ? err.message : String(err) || "Unknown error";
- return { error: errorMessage };
+ } catch (err: unknown) {
+ const message = err instanceof Error ? err.message : String(err);
+ return { error: message || "Unknown error" };
}
}
diff --git a/src/app/[org]/[dataset]/[episode]/page.tsx b/src/app/[org]/[dataset]/[episode]/page.tsx
index 4ae7324d93373b48442a8f8b71fe5f2d0c3459cb..1c631cf4d0dcca28fb44f706eebc7d53a3ed405d 100644
--- a/src/app/[org]/[dataset]/[episode]/page.tsx
+++ b/src/app/[org]/[dataset]/[episode]/page.tsx
@@ -27,7 +27,7 @@ export default async function EpisodePage({
const { data, error } = await getEpisodeDataSafe(org, dataset, episodeNumber);
return (
-
+
);
}
diff --git a/src/app/explore/page.tsx b/src/app/explore/page.tsx
index 064bbd2774d6c659246e46d7be246d1045f6feca..e4ce14e38dd03e4f8454c57a73fa7d5ea2764df1 100644
--- a/src/app/explore/page.tsx
+++ b/src/app/explore/page.tsx
@@ -2,7 +2,7 @@ import React from "react";
import ExploreGrid from "./explore-grid";
import { fetchJson, formatStringWithVars } from "@/utils/parquetUtils";
import { getDatasetVersion, buildVersionedUrl } from "@/utils/versionUtils";
-import type { DatasetMetadata } from "@/types";
+import type { DatasetMetadata } from "@/utils/parquetUtils";
export default async function ExplorePage({
searchParams,
@@ -10,7 +10,7 @@ export default async function ExplorePage({
searchParams: Promise<{ p?: string }>;
}) {
const params = await searchParams;
- let datasets: any[] = [];
+ let datasets: { id: string }[] = [];
let currentPage = 1;
let totalPages = 1;
try {
@@ -40,7 +40,7 @@ export default async function ExplorePage({
// Fetch episode 0 data for each dataset
const datasetWithVideos = (
await Promise.all(
- datasets.map(async (ds: any) => {
+ datasets.map(async (ds) => {
try {
const [org, dataset] = ds.id.split("/");
const repoId = `${org}/${dataset}`;
diff --git a/src/app/globals.css b/src/app/globals.css
index b64eea66be356bca19e6d5c16e455ec88e68cd92..094a8539628c18d32fa3da0b0b3e67832ee43923 100644
--- a/src/app/globals.css
+++ b/src/app/globals.css
@@ -19,6 +19,11 @@
}
}
+html {
+ /* Scale all rem-based sizes (text, padding, buttons) up ~12% */
+ font-size: 18px;
+}
+
body {
background: var(--background);
color: var(--foreground);
diff --git a/src/app/layout.tsx b/src/app/layout.tsx
index 740ed31a86dc527b8e61f9e728b3af9e89e3bb92..79b9bc96cd5e10da10417b89e505998e3493c1a4 100644
--- a/src/app/layout.tsx
+++ b/src/app/layout.tsx
@@ -5,8 +5,8 @@ import "./globals.css";
const inter = Inter({ subsets: ["latin"] });
export const metadata: Metadata = {
- title: "LeRobot Dataset Visualizer",
- description: "Visualization of LeRobot Datasets",
+ title: "LeRobot Dataset Tool and Visualizer",
+ description: "Tool and Visualizer for LeRobot Datasets",
};
export default function RootLayout({
diff --git a/src/app/page.tsx b/src/app/page.tsx
index 00f92b63c5ecd76bb3472d8a097c89ba03b22a67..840eb9fad71887e49c355747b48fc8a3fba6b974 100644
--- a/src/app/page.tsx
+++ b/src/app/page.tsx
@@ -4,6 +4,18 @@ import Link from "next/link";
import { useRouter } from "next/navigation";
import { useSearchParams } from "next/navigation";
+declare global {
+ interface Window {
+ YT?: {
+ Player: new (
+ id: string,
+ config: Record,
+ ) => { destroy?: () => void };
+ };
+ onYouTubeIframeAPIReady?: () => void;
+ }
+}
+
export default function Home() {
return (
@@ -53,18 +65,19 @@ function HomeInner() {
}
}, [searchParams, router]);
- const playerRef = useRef(null);
+ const playerRef = useRef<{ destroy?: () => void } | null>(null);
useEffect(() => {
// Load YouTube IFrame API if not already present
- if (!(window as any).YT) {
+ if (!window.YT) {
const tag = document.createElement("script");
tag.src = "https://www.youtube.com/iframe_api";
document.body.appendChild(tag);
}
let interval: NodeJS.Timeout;
- (window as any).onYouTubeIframeAPIReady = () => {
- playerRef.current = new (window as any).YT.Player("yt-bg-player", {
+ window.onYouTubeIframeAPIReady = () => {
+ if (!window.YT) return;
+ playerRef.current = new window.YT.Player("yt-bg-player", {
videoId: "Er8SPJsIYr0",
playerVars: {
autoplay: 1,
@@ -79,7 +92,14 @@ function HomeInner() {
start: 0,
},
events: {
- onReady: (event: any) => {
+ onReady: (event: {
+ target: {
+ playVideo: () => void;
+ mute: () => void;
+ seekTo: (t: number) => void;
+ getCurrentTime: () => number;
+ };
+ }) => {
event.target.playVideo();
event.target.mute();
interval = setInterval(() => {
@@ -101,7 +121,7 @@ function HomeInner() {
const inputRef = useRef(null);
- const handleGo = (e: React.FormEvent) => {
+ const handleGo = (e: { preventDefault: () => void }) => {
e.preventDefault();
const value = inputRef.current?.value.trim();
if (value) {
@@ -120,16 +140,8 @@ function HomeInner() {
{/* Centered Content */}