Spaces:

lerobot
/

visualize_dataset

Running on CPU Upgrade

App Files Files Community

aractingi commited on Sep 4, 2025

Commit

aa2bc0d

1 Parent(s): 5fe6175

Refactor episode data loading to support v3.0 structure and enhance video player segmentation features. Added debug logging for version checks and data loading processes (will remove later).

Browse files

Files changed (5) hide show

src/app/[org]/[dataset]/[episode]/episode-viewer.tsx +0 -8
src/app/[org]/[dataset]/[episode]/fetch-data.ts +880 -192
src/components/videos-player.tsx +86 -12
src/utils/parquetUtils.ts +13 -3
src/utils/versionUtils.ts +47 -5

src/app/[org]/[dataset]/[episode]/episode-viewer.tsx CHANGED Viewed

@@ -214,14 +214,6 @@ function EpisodeViewerInner({ data }: { data: any }) {
             onChartsReady={() => setChartsReady(true)}
           />
-          {ignoredColumns.length > 0 && (
-            <p className="mt-2 text-orange-700">
-              Columns{" "}
-              <span className="font-mono">{ignoredColumns.join(", ")}</span> are
-              NOT shown since the visualizer currently does not support 2D or 3D
-              data.
-            </p>
-          )}
         </div>
         <PlaybackBar />

             onChartsReady={() => setChartsReady(true)}
           />
         </div>
         <PlaybackBar />

src/app/[org]/[dataset]/[episode]/fetch-data.ts CHANGED Viewed

@@ -20,253 +20,941 @@ export async function getEpisodeData(
 ) {
   const repoId = `${org}/${dataset}`;
   try {
-    const episode_chunk = Math.floor(0 / 1000);
-    // Check for compatible dataset version (v2.1 or v2.0)
     const version = await getDatasetVersion(repoId);
     const jsonUrl = buildVersionedUrl(repoId, version, "meta/info.json");
     const info = await fetchJson<DatasetMetadata>(jsonUrl);
-    // Dataset information
-    const datasetInfo = {
-      repoId,
-      total_frames: info.total_frames,
-      total_episodes: info.total_episodes,
-      fps: info.fps,
-    };
-    // Generate list of episodes
-    const episodes =
-      process.env.EPISODES === undefined
-        ? Array.from(
-            { length: datasetInfo.total_episodes },
-            // episode id starts from 0
-            (_, i) => i,
-          )
-        : process.env.EPISODES
-            .split(/\s+/)
-            .map((x) => parseInt(x.trim(), 10))
-            .filter((x) => !isNaN(x));
-    // Videos information
-    const videosInfo = Object.entries(info.features)
-      .filter(([key, value]) => value.dtype === "video")
-      .map(([key, _]) => {
-        const videoPath = formatStringWithVars(info.video_path, {
-          video_key: key,
-          episode_chunk: episode_chunk.toString().padStart(3, "0"),
-          episode_index: episodeId.toString().padStart(6, "0"),
-        });
-        return {
-          filename: key,
-          url: buildVersionedUrl(repoId, version, videoPath),
-        };
-      });
-    // Column data
-    const columnNames = Object.entries(info.features)
-      .filter(
-        ([key, value]) =>
-          ["float32", "int32"].includes(value.dtype) &&
-          value.shape.length === 1,
-      )
-      .map(([key, { shape }]) => ({ key, length: shape[0] }));
-    // Exclude specific columns
-    const excludedColumns = [
-      "timestamp",
-      "frame_index",
-      "episode_index",
-      "index",
-      "task_index",
-    ];
-    const filteredColumns = columnNames.filter(
-      (column) => !excludedColumns.includes(column.key),
-    );
-    const filteredColumnNames = [
-      "timestamp",
-      ...filteredColumns.map((column) => column.key),
-    ];
-    const columns = filteredColumns.map(({ key }) => {
-      let column_names = info.features[key].names;
-      while (typeof column_names === "object") {
-        if (Array.isArray(column_names)) break;
-        column_names = Object.values(column_names ?? {})[0];
-      }
       return {
-        key,
-        value: Array.isArray(column_names)
-          ? column_names.map((name) => `${key}${SERIES_NAME_DELIMITER}${name}`)
-          : Array.from(
-              { length: columnNames.find((c) => c.key === key)?.length ?? 1 },
-              (_, i) => `${key}${SERIES_NAME_DELIMITER}${i}`,
-            ),
       };
     });
-    const parquetUrl = buildVersionedUrl(
-      repoId,
-      version,
-      formatStringWithVars(info.data_path, {
-        episode_chunk: episode_chunk.toString().padStart(3, "0"),
-        episode_index: episodeId.toString().padStart(6, "0"),
-      })
-    );
-    const arrayBuffer = await fetchParquetFile(parquetUrl);
-    const data = await readParquetColumn(arrayBuffer, filteredColumnNames);
-    // Flatten and map to array of objects for chartData
-    const seriesNames = [
       "timestamp",
-      ...columns.map(({ value }) => value).flat(),
     ];
-    const chartData = data.map((row) => {
-      const flatRow = row.flat();
-      const obj: Record<string, number> = {};
-      seriesNames.forEach((key, idx) => {
-        obj[key] = flatRow[idx];
       });
-      return obj;
-    });
-    // List of columns that are ignored (e.g., 2D or 3D data)
-    const ignoredColumns = Object.entries(info.features)
       .filter(
         ([key, value]) =>
-          ["float32", "int32"].includes(value.dtype) && value.shape.length > 1,
       )
-      .map(([key]) => key);
-    // 1. Group all numeric keys by suffix (excluding 'timestamp')
-    const numericKeys = seriesNames.filter((k) => k !== "timestamp");
-    const suffixGroupsMap: Record<string, string[]> = {};
     for (const key of numericKeys) {
-      const parts = key.split(SERIES_NAME_DELIMITER);
-      const suffix = parts[1] || parts[0]; // fallback to key if no delimiter
-      if (!suffixGroupsMap[suffix]) suffixGroupsMap[suffix] = [];
-      suffixGroupsMap[suffix].push(key);
-    }
-    const suffixGroups = Object.values(suffixGroupsMap);
-    // 2. Compute min/max for each suffix group as a whole
-    const groupStats: Record<string, { min: number; max: number }> = {};
-    suffixGroups.forEach((group) => {
-      let min = Infinity,
-        max = -Infinity;
-      for (const row of chartData) {
-        for (const key of group) {
-          const v = row[key];
-          if (typeof v === "number" && !isNaN(v)) {
-            if (v < min) min = v;
-            if (v > max) max = v;
-          }
         }
       }
-      // Use the first key in the group as the group id
-      groupStats[group[0]] = { min, max };
-    });
-    // 3. Group suffix groups by similar scale (treat each suffix group as a unit)
-    const scaleGroups: Record<string, string[][]> = {};
-    const used = new Set<string>();
-    const SCALE_THRESHOLD = 2;
-    for (const group of suffixGroups) {
-      const groupId = group[0];
-      if (used.has(groupId)) continue;
-      const { min, max } = groupStats[groupId];
-      if (!isFinite(min) || !isFinite(max)) continue;
-      const logMin = Math.log10(Math.abs(min) + 1e-9);
-      const logMax = Math.log10(Math.abs(max) + 1e-9);
-      const unit: string[][] = [group];
-      used.add(groupId);
-      for (const other of suffixGroups) {
-        const otherId = other[0];
-        if (used.has(otherId) || otherId === groupId) continue;
-        const { min: omin, max: omax } = groupStats[otherId];
-        if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
-        const ologMin = Math.log10(Math.abs(omin) + 1e-9);
-        const ologMax = Math.log10(Math.abs(omax) + 1e-9);
-        if (
-          Math.abs(logMin - ologMin) <= SCALE_THRESHOLD &&
-          Math.abs(logMax - ologMax) <= SCALE_THRESHOLD
-        ) {
-          unit.push(other);
-          used.add(otherId);
         }
       }
-      scaleGroups[groupId] = unit;
     }
-    // 4. Flatten scaleGroups into chartGroups (array of arrays of keys)
-    const chartGroups: string[][] = Object.values(scaleGroups)
-      .sort((a, b) => b.length - a.length)
-      .flatMap((suffixGroupArr) => {
-        // suffixGroupArr is array of suffix groups (each is array of keys)
-        const merged = suffixGroupArr.flat();
-        if (merged.length > 6) {
-          const subgroups = [];
-          for (let i = 0; i < merged.length; i += 6) {
-            subgroups.push(merged.slice(i, i + 6));
-          }
-          return subgroups;
-        }
-        return [merged];
-      });
-    const duration = chartData[chartData.length - 1].timestamp;
-    // Utility: group row keys by suffix
-    function groupRowBySuffix(row: Record<string, number>): Record<string, any> {
-      const result: Record<string, any> = {};
-      const suffixGroups: Record<string, Record<string, number>> = {};
       for (const [key, value] of Object.entries(row)) {
         if (key === "timestamp") {
           result["timestamp"] = value;
           continue;
         }
-        const parts = key.split(SERIES_NAME_DELIMITER);
-        if (parts.length === 2) {
-          const [prefix, suffix] = parts;
-          if (!suffixGroups[suffix]) suffixGroups[suffix] = {};
-          suffixGroups[suffix][prefix] = value;
         } else {
           result[key] = value;
         }
       }
-      for (const [suffix, group] of Object.entries(suffixGroups)) {
         const keys = Object.keys(group);
         if (keys.length === 1) {
-          // Use the full original name as the key
-          const fullName = `${keys[0]}${SERIES_NAME_DELIMITER}${suffix}`;
-          result[fullName] = group[keys[0]];
         } else {
           result[suffix] = group;
         }
       }
-      return result;
     }
-    const chartDataGroups = chartGroups.map((group) =>
-      chartData.map((row) => groupRowBySuffix(pick(row, [...group, "timestamp"])))
-    );
     return {
-      datasetInfo,
-      episodeId,
-      videosInfo,
-      chartDataGroups,
-      episodes,
-      ignoredColumns,
-      duration,
     };
-  } catch (err) {
-    console.error("Error loading episode data:", err);
-    throw err;
   }
 }
 // Safe wrapper for UI error display
 export async function getEpisodeDataSafe(

 ) {
   const repoId = `${org}/${dataset}`;
   try {
+    // Check for compatible dataset version (v3.0, v2.1, or v2.0)
     const version = await getDatasetVersion(repoId);
+    console.log(`[DEBUG] Detected dataset version: ${version} for ${repoId}`);
     const jsonUrl = buildVersionedUrl(repoId, version, "meta/info.json");
     const info = await fetchJson<DatasetMetadata>(jsonUrl);
+    // Handle different versions
+    if (version === "v3.0") {
+      console.log(`[DEBUG] Using v3.0 data loader for ${repoId}`);
+      return await getEpisodeDataV3(repoId, version, info, episodeId);
+    } else {
+      console.log(`[DEBUG] Using v2.x data loader for ${repoId} (version: ${version})`);
+      return await getEpisodeDataV2(repoId, version, info, episodeId);
+    }
+  } catch (err) {
+    console.error("Error loading episode data:", err);
+    throw err;
+  }
+}
+// Legacy v2.x data loading
+async function getEpisodeDataV2(
+  repoId: string,
+  version: string,
+  info: DatasetMetadata,
+  episodeId: number,
+) {
+  const episode_chunk = Math.floor(0 / 1000);
+  // Dataset information
+  const datasetInfo = {
+    repoId,
+    total_frames: info.total_frames,
+    total_episodes: info.total_episodes,
+    fps: info.fps,
+  };
+  // Generate list of episodes
+  const episodes =
+    process.env.EPISODES === undefined
+      ? Array.from(
+          { length: datasetInfo.total_episodes },
+          // episode id starts from 0
+          (_, i) => i,
+        )
+      : process.env.EPISODES
+          .split(/\s+/)
+          .map((x) => parseInt(x.trim(), 10))
+          .filter((x) => !isNaN(x));
+  // Videos information
+  const videosInfo = Object.entries(info.features)
+    .filter(([key, value]) => value.dtype === "video")
+    .map(([key, _]) => {
+      const videoPath = formatStringWithVars(info.video_path, {
+        video_key: key,
+        episode_chunk: episode_chunk.toString().padStart(3, "0"),
+        episode_index: episodeId.toString().padStart(6, "0"),
+      });
       return {
+        filename: key,
+        url: buildVersionedUrl(repoId, version, videoPath),
       };
     });
+  // Column data
+  const columnNames = Object.entries(info.features)
+    .filter(
+      ([key, value]) =>
+        ["float32", "int32"].includes(value.dtype) &&
+        value.shape.length === 1,
+    )
+    .map(([key, { shape }]) => ({ key, length: shape[0] }));
+  // Exclude specific columns
+  const excludedColumns = [
+    "timestamp",
+    "frame_index",
+    "episode_index",
+    "index",
+    "task_index",
+  ];
+  const filteredColumns = columnNames.filter(
+    (column) => !excludedColumns.includes(column.key),
+  );
+  const filteredColumnNames = [
+    "timestamp",
+    ...filteredColumns.map((column) => column.key),
+  ];
+  const columns = filteredColumns.map(({ key }) => {
+    let column_names = info.features[key].names;
+    while (typeof column_names === "object") {
+      if (Array.isArray(column_names)) break;
+      column_names = Object.values(column_names ?? {})[0];
+    }
+    return {
+      key,
+      value: Array.isArray(column_names)
+        ? column_names.map((name) => `${key}${SERIES_NAME_DELIMITER}${name}`)
+        : Array.from(
+            { length: columnNames.find((c) => c.key === key)?.length ?? 1 },
+            (_, i) => `${key}${SERIES_NAME_DELIMITER}${i}`,
+          ),
+    };
+  });
+  const parquetUrl = buildVersionedUrl(
+    repoId,
+    version,
+    formatStringWithVars(info.data_path, {
+      episode_chunk: episode_chunk.toString().padStart(3, "0"),
+      episode_index: episodeId.toString().padStart(6, "0"),
+    })
+  );
+  const arrayBuffer = await fetchParquetFile(parquetUrl);
+  const data = await readParquetColumn(arrayBuffer, filteredColumnNames);
+  // Flatten and map to array of objects for chartData
+  const seriesNames = [
+    "timestamp",
+    ...columns.map(({ value }) => value).flat(),
+  ];
+  const chartData = data.map((row) => {
+    const flatRow = row.flat();
+    const obj: Record<string, number> = {};
+    seriesNames.forEach((key, idx) => {
+      obj[key] = flatRow[idx];
+    });
+    return obj;
+  });
+  // List of columns that are ignored (e.g., 2D or 3D data)
+  const ignoredColumns = Object.entries(info.features)
+    .filter(
+      ([key, value]) =>
+        ["float32", "int32"].includes(value.dtype) && value.shape.length > 1,
+    )
+    .map(([key]) => key);
+  // 1. Group all numeric keys by suffix (excluding 'timestamp')
+  const numericKeys = seriesNames.filter((k) => k !== "timestamp");
+  const suffixGroupsMap: Record<string, string[]> = {};
+  for (const key of numericKeys) {
+    const parts = key.split(SERIES_NAME_DELIMITER);
+    const suffix = parts[1] || parts[0]; // fallback to key if no delimiter
+    if (!suffixGroupsMap[suffix]) suffixGroupsMap[suffix] = [];
+    suffixGroupsMap[suffix].push(key);
+  }
+  const suffixGroups = Object.values(suffixGroupsMap);
+  // 2. Compute min/max for each suffix group as a whole
+  const groupStats: Record<string, { min: number; max: number }> = {};
+  suffixGroups.forEach((group) => {
+    let min = Infinity,
+      max = -Infinity;
+    for (const row of chartData) {
+      for (const key of group) {
+        const v = row[key];
+        if (typeof v === "number" && !isNaN(v)) {
+          if (v < min) min = v;
+          if (v > max) max = v;
+        }
+      }
+    }
+    // Use the first key in the group as the group id
+    groupStats[group[0]] = { min, max };
+  });
+  // 3. Group suffix groups by similar scale (treat each suffix group as a unit)
+  const scaleGroups: Record<string, string[][]> = {};
+  const used = new Set<string>();
+  const SCALE_THRESHOLD = 2;
+  for (const group of suffixGroups) {
+    const groupId = group[0];
+    if (used.has(groupId)) continue;
+    const { min, max } = groupStats[groupId];
+    if (!isFinite(min) || !isFinite(max)) continue;
+    const logMin = Math.log10(Math.abs(min) + 1e-9);
+    const logMax = Math.log10(Math.abs(max) + 1e-9);
+    const unit: string[][] = [group];
+    used.add(groupId);
+    for (const other of suffixGroups) {
+      const otherId = other[0];
+      if (used.has(otherId) || otherId === groupId) continue;
+      const { min: omin, max: omax } = groupStats[otherId];
+      if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
+      const ologMin = Math.log10(Math.abs(omin) + 1e-9);
+      const ologMax = Math.log10(Math.abs(omax) + 1e-9);
+      if (
+        Math.abs(logMin - ologMin) <= SCALE_THRESHOLD &&
+        Math.abs(logMax - ologMax) <= SCALE_THRESHOLD
+      ) {
+        unit.push(other);
+        used.add(otherId);
+      }
+    }
+    scaleGroups[groupId] = unit;
+  }
+  // 4. Flatten scaleGroups into chartGroups (array of arrays of keys)
+  const chartGroups: string[][] = Object.values(scaleGroups)
+    .sort((a, b) => b.length - a.length)
+    .flatMap((suffixGroupArr) => {
+      // suffixGroupArr is array of suffix groups (each is array of keys)
+      const merged = suffixGroupArr.flat();
+      if (merged.length > 6) {
+        const subgroups = [];
+        for (let i = 0; i < merged.length; i += 6) {
+          subgroups.push(merged.slice(i, i + 6));
+        }
+        return subgroups;
+      }
+      return [merged];
+    });
+  const duration = chartData[chartData.length - 1].timestamp;
+  // Utility: group row keys by suffix
+  function groupRowBySuffix(row: Record<string, number>): Record<string, any> {
+    const result: Record<string, any> = {};
+    const suffixGroups: Record<string, Record<string, number>> = {};
+    for (const [key, value] of Object.entries(row)) {
+      if (key === "timestamp") {
+        result["timestamp"] = value;
+        continue;
+      }
+      const parts = key.split(SERIES_NAME_DELIMITER);
+      if (parts.length === 2) {
+        const [prefix, suffix] = parts;
+        if (!suffixGroups[suffix]) suffixGroups[suffix] = {};
+        suffixGroups[suffix][prefix] = value;
+      } else {
+        result[key] = value;
+      }
+    }
+    for (const [suffix, group] of Object.entries(suffixGroups)) {
+      const keys = Object.keys(group);
+      if (keys.length === 1) {
+        // Use the full original name as the key
+        const fullName = `${keys[0]}${SERIES_NAME_DELIMITER}${suffix}`;
+        result[fullName] = group[keys[0]];
+      } else {
+        result[suffix] = group;
+      }
+    }
+    return result;
+  }
+  const chartDataGroups = chartGroups.map((group) =>
+    chartData.map((row) => groupRowBySuffix(pick(row, [...group, "timestamp"])))
+  );
+  return {
+    datasetInfo,
+    episodeId,
+    videosInfo,
+    chartDataGroups,
+    episodes,
+    ignoredColumns,
+    duration,
+  };
+}
+// v3.0 implementation with segmentation support for all episodes
+async function getEpisodeDataV3(
+  repoId: string,
+  version: string,
+  info: DatasetMetadata,
+  episodeId: number,
+) {
+  console.log(`[DEBUG] Loading v3.0 episode data for ${repoId}, episode ${episodeId}`);
+  // Create dataset info structure (like v2.x)
+  const datasetInfo = {
+    repoId,
+    total_frames: info.total_frames,
+    total_episodes: info.total_episodes,
+    fps: info.fps,
+  };
+  // Generate episodes list based on total_episodes from dataset info
+  const episodes = Array.from({ length: info.total_episodes }, (_, i) => i);
+  console.log(`[DEBUG] Available episodes: ${episodes.length} (0 to ${episodes.length - 1})`);
+  // Load episode metadata to get timestamps for episode 0
+  const episodeMetadata = await loadEpisodeMetadataV3Simple(repoId, version, episodeId);
+  // Create video info with segmentation using the metadata
+  const videosInfo = extractVideoInfoV3WithSegmentation(repoId, version, info, episodeMetadata);
+  // Load episode data for charts
+  const { chartDataGroups, ignoredColumns } = await loadEpisodeDataV3(repoId, version, info, episodeMetadata);
+  return {
+    datasetInfo,
+    episodeId,
+    videosInfo,
+    chartDataGroups,
+    episodes,
+    ignoredColumns,
+    duration: episodeMetadata.video_to_timestamp - episodeMetadata.video_from_timestamp, // Use actual episode duration
+  };
+}
+// Load episode data for v3.0 charts
+async function loadEpisodeDataV3(
+  repoId: string,
+  version: string,
+  info: DatasetMetadata,
+  episodeMetadata: any,
+): Promise<{ chartDataGroups: any[]; ignoredColumns: string[] }> {
+  console.log(`[DEBUG] Loading v3.0 data for episode ${episodeMetadata.episode_index}`);
+  // Build data file path using chunk and file indices
+  const dataChunkIndex = episodeMetadata.data_chunk_index || 0;
+  const dataFileIndex = episodeMetadata.data_file_index || 0;
+  const dataPath = `data/chunk-${dataChunkIndex.toString().padStart(3, "0")}/file-${dataFileIndex.toString().padStart(3, "0")}.parquet`;
+  console.log(`[DEBUG] Loading data from: ${dataPath}`);
+  console.log(`[DEBUG] Data range: ${episodeMetadata.dataset_from_index} to ${episodeMetadata.dataset_to_index}`);
+  try {
+    const dataUrl = buildVersionedUrl(repoId, version, dataPath);
+    const arrayBuffer = await fetchParquetFile(dataUrl);
+    const fullData = await readParquetColumn(arrayBuffer, []);
+    console.log(`[DEBUG] Loaded ${fullData.length} total data rows`);
+    // Extract the episode-specific data slice
+    // Convert BigInt to number if needed
+    const fromIndex = Number(episodeMetadata.dataset_from_index || 0);
+    const toIndex = Number(episodeMetadata.dataset_to_index || fullData.length);
+    console.log(`[DEBUG] Converting indices: ${episodeMetadata.dataset_from_index} → ${fromIndex}, ${episodeMetadata.dataset_to_index} → ${toIndex}`);
+    const episodeData = fullData.slice(fromIndex, toIndex);
+    console.log(`[DEBUG] Episode data slice: ${episodeData.length} rows (${fromIndex} to ${toIndex})`);
+    if (episodeData.length === 0) {
+      console.log(`[DEBUG] No data found for episode ${episodeMetadata.episode_index}`);
+      return { chartDataGroups: [], ignoredColumns: [] };
+    }
+    // Convert to the same format as v2.x for compatibility with existing chart code
+    const { chartDataGroups, ignoredColumns } = processEpisodeDataForCharts(episodeData, info, episodeMetadata);
+    return { chartDataGroups, ignoredColumns };
+  } catch (error) {
+    console.error(`[DEBUG] Failed to load episode data:`, error);
+    return { chartDataGroups: [], ignoredColumns: [] };
+  }
+}
+// Process episode data for charts (v3.0 compatible)
+function processEpisodeDataForCharts(
+  episodeData: any[],
+  info: DatasetMetadata,
+  episodeMetadata?: any,
+): { chartDataGroups: any[]; ignoredColumns: string[] } {
+  const SERIES_NAME_DELIMITER = ".";
+  // Get numeric column features
+  const columnNames = Object.entries(info.features)
+    .filter(
+      ([key, value]) =>
+        ["float32", "int32"].includes(value.dtype) &&
+        value.shape.length === 1,
+    )
+    .map(([key, value]) => ({ key, value }));
+  // Convert parquet data to chart format
+  let seriesNames: string[] = [];
+  // Create a mapping from numeric indices to feature names for v3.0 data
+  const v3IndexToFeatureMap: Record<string, string> = {
+    '0': 'observation.state',
+    '1': 'action',
+    '2': 'timestamp',
+    '3': 'episode_index',
+    '4': 'frame_index',
+    '5': 'next.reward',
+    '6': 'next.done',
+    '7': 'index',
+    '8': 'task_index'
+  };
+  // Columns to exclude from charts
+  const excludedColumns = ['index', 'task_index', 'episode_index', 'frame_index'];
+  // First, extract all series from the first data row to understand the structure
+  if (episodeData.length > 0) {
+    const firstRow = episodeData[0];
+    const allKeys: string[] = [];
+    Object.entries(firstRow || {}).forEach(([key, value]) => {
+      if (key === 'timestamp') return; // Skip timestamp, we'll add it separately
+      // Map numeric key to feature name if available
+      const featureName = v3IndexToFeatureMap[key] || key;
+      // Skip excluded columns
+      if (excludedColumns.includes(featureName)) return;
+      if (Array.isArray(value) && value.length > 0) {
+        // For array values like observation.state and action, create a key for each element
+        value.forEach((_, idx) => {
+          allKeys.push(`${featureName}[${idx}]`);
+        });
+      } else if (typeof value === 'number' && !isNaN(value)) {
+        // For scalar numeric values
+        allKeys.push(featureName);
+      } else if (typeof value === 'bigint') {
+        // For BigInt values
+        allKeys.push(featureName);
+      }
+    });
+    seriesNames = ["timestamp", ...allKeys];
+    console.log(`[DEBUG] Detected series:`, allKeys);
+    console.log(`[DEBUG] First row sample:`, firstRow);
+  } else {
+    // Fallback to feature-based approach
+    seriesNames = [
       "timestamp",
+      ...columnNames.map(({ key }) => key),
     ];
+  }
+  const chartData = episodeData.map((row, index) => {
+    const obj: Record<string, number> = {};
+    // Add timestamp aligned with video timing
+    // For v3.0, we need to map the episode data index to the actual video duration
+    let videoDuration = episodeData.length; // Fallback to data length
+    if (episodeMetadata) {
+      // Use actual video segment duration if available
+      videoDuration = (episodeMetadata.video_to_timestamp || 30) - (episodeMetadata.video_from_timestamp || 0);
+    }
+    obj["timestamp"] = (index / Math.max(episodeData.length - 1, 1)) * videoDuration;
+    // For v3.0, data might have numeric string keys, so we need to map them
+    // Get all available keys from the first row to understand the structure
+    if (index === 0) {
+      console.log(`[DEBUG] Data row keys:`, Object.keys(row || {}));
+      console.log(`[DEBUG] Available features:`, Object.keys(info.features));
+    }
+    // Add all data columns
+    if (row && typeof row === 'object') {
+      Object.entries(row).forEach(([key, value]) => {
+        if (key === 'timestamp') {
+          // Timestamp is already handled above
+          return;
+        }
+        // Map numeric key to feature name if available
+        const featureName = v3IndexToFeatureMap[key] || key;
+        // Skip excluded columns
+        if (excludedColumns.includes(featureName)) return;
+        if (Array.isArray(value)) {
+          // For array values like observation.state and action
+          value.forEach((val, idx) => {
+            const elementKey = `${featureName}[${idx}]`;
+            obj[elementKey] = typeof val === 'number' ? val : Number(val);
+          });
+        } else if (typeof value === 'number' && !isNaN(value)) {
+          obj[featureName] = value;
+        } else if (typeof value === 'bigint') {
+          obj[featureName] = Number(value);
+        } else if (typeof value === 'boolean') {
+          // Convert boolean to number for charts
+          obj[featureName] = value ? 1 : 0;
+        }
       });
+    }
+    return obj;
+  });
+  // List of columns that are ignored (now we handle 2D data by flattening)
+  const ignoredColumns = [
+    ...Object.entries(info.features)
       .filter(
         ([key, value]) =>
+          ["float32", "int32"].includes(value.dtype) && value.shape.length > 2, // Only ignore 3D+ data
       )
+      .map(([key]) => key),
+    ...excludedColumns // Also include the manually excluded columns
+  ];
+  // Group processing logic (adapted for v3.0 numeric keys)
+  const numericKeys = seriesNames.filter((k) => k !== "timestamp");
+  // Group keys by prefix (for hierarchical structure like v2)
+  const suffixGroupsMap: Record<string, string[]> = {};
+  // First, let's check if we have keys with dots (hierarchical structure)
+  const hasHierarchicalKeys = numericKeys.some(key => key.includes('.') && !key.includes('['));
+  if (hasHierarchicalKeys) {
+    // Group by suffix after the dot (like v2 does)
     for (const key of numericKeys) {
+      const cleanKey = key.replace(/\[\d+\]$/, ''); // Remove array indices
+      const parts = cleanKey.split('.');
+      if (parts.length >= 2) {
+        // For keys like "observation.state" or "action.main_shoulder_pan"
+        const suffix = parts.slice(1).join('.'); // Everything after first dot
+        if (!suffixGroupsMap[suffix]) {
+          suffixGroupsMap[suffix] = [];
+        }
+        suffixGroupsMap[suffix].push(key);
+      } else {
+        // Keys without dots go in their own group
+        if (!suffixGroupsMap[key]) {
+          suffixGroupsMap[key] = [];
         }
+        suffixGroupsMap[key].push(key);
       }
+    }
+  } else {
+    // For v3 data without hierarchical keys, group by base name (removing array indices)
+    for (const key of numericKeys) {
+      const baseKey = key.replace(/\[\d+\]$/, '');
+      if (!suffixGroupsMap[baseKey]) {
+        suffixGroupsMap[baseKey] = [];
+      }
+      suffixGroupsMap[baseKey].push(key);
+    }
+  }
+  const suffixGroups = Object.values(suffixGroupsMap);
+  console.log(`[DEBUG] Created suffix groups:`, suffixGroupsMap);
+  // Compute min/max for each suffix group
+  const groupStats: Record<string, { min: number; max: number }> = {};
+  suffixGroups.forEach((group) => {
+    let min = Infinity, max = -Infinity;
+    for (const row of chartData) {
+      for (const key of group) {
+        const v = row[key];
+        if (typeof v === "number" && !isNaN(v)) {
+          if (v < min) min = v;
+          if (v > max) max = v;
         }
       }
     }
+    groupStats[group[0]] = { min, max };
+  });
+  // Group by similar scale
+  const scaleGroups: Record<string, string[][]> = {};
+  const used = new Set<string>();
+  const SCALE_THRESHOLD = 2;
+  for (const group of suffixGroups) {
+    const groupId = group[0];
+    if (used.has(groupId)) continue;
+    const { min, max } = groupStats[groupId];
+    if (!isFinite(min) || !isFinite(max)) continue;
+    const logMin = Math.log10(Math.abs(min) + 1e-9);
+    const logMax = Math.log10(Math.abs(max) + 1e-9);
+    const unit: string[][] = [group];
+    used.add(groupId);
+    for (const other of suffixGroups) {
+      const otherId = other[0];
+      if (used.has(otherId) || otherId === groupId) continue;
+      const { min: omin, max: omax } = groupStats[otherId];
+      if (!isFinite(omin) || !isFinite(omax) || omin === omax) continue;
+      const ologMin = Math.log10(Math.abs(omin) + 1e-9);
+      const ologMax = Math.log10(Math.abs(omax) + 1e-9);
+      if (
+        Math.abs(logMin - ologMin) <= SCALE_THRESHOLD &&
+        Math.abs(logMax - ologMax) <= SCALE_THRESHOLD
+      ) {
+        unit.push(other);
+        used.add(otherId);
+      }
+    }
+    scaleGroups[groupId] = unit;
+  }
+  // Flatten into chartGroups
+  const chartGroups: string[][] = Object.values(scaleGroups)
+    .sort((a, b) => b.length - a.length)
+    .flatMap((suffixGroupArr) => {
+      const merged = suffixGroupArr.flat();
+      if (merged.length > 6) {
+        const subgroups = [];
+        for (let i = 0; i < merged.length; i += 6) {
+          subgroups.push(merged.slice(i, i + 6));
+        }
+        return subgroups;
+      }
+      return [merged];
+    });
+  // Utility function to group row keys by suffix
+  function groupRowBySuffix(row: Record<string, number>): Record<string, any> {
+    const result: Record<string, any> = {};
+    // Check if we have hierarchical keys
+    const hasHierarchicalKeys = Object.keys(row).some(key => key.includes('.') && !key.includes('[') && key !== 'timestamp');
+    if (hasHierarchicalKeys) {
+      // Group by prefix for hierarchical display
+      const prefixGroups: Record<string, Record<string, number>> = {};
       for (const [key, value] of Object.entries(row)) {
         if (key === "timestamp") {
           result["timestamp"] = value;
           continue;
         }
+        const cleanKey = key.replace(/\[\d+\]$/, ''); // Remove array indices
+        const parts = cleanKey.split('.');
+        if (parts.length >= 2) {
+          const prefix = parts[0];
+          const suffix = parts.slice(1).join('.');
+          if (!prefixGroups[suffix]) {
+            prefixGroups[suffix] = {};
+          }
+          // Store with the prefix as key
+          prefixGroups[suffix][prefix] = value;
         } else {
+          // Non-hierarchical keys go directly to result
           result[key] = value;
         }
       }
+      // Add grouped data to result
+      for (const [suffix, group] of Object.entries(prefixGroups)) {
         const keys = Object.keys(group);
         if (keys.length === 1) {
+          // Single value, use full name
+          result[`${keys[0]}.${suffix}`] = group[keys[0]];
         } else {
+          // Multiple values, create nested structure
           result[suffix] = group;
         }
       }
+    } else {
+      // For non-hierarchical data, just pass through
+      for (const [key, value] of Object.entries(row)) {
+        result[key] = value;
+      }
     }
+    return result;
+  }
+  const chartDataGroups = chartGroups.map((group) =>
+    chartData.map((row) => groupRowBySuffix(pick(row, [...group, "timestamp"])))
+  );
+  console.log(`[DEBUG] Generated ${chartDataGroups.length} chart groups`);
+  console.log(`[DEBUG] Chart groups structure:`, chartGroups);
+  if (chartDataGroups.length > 0 && chartDataGroups[0].length > 0) {
+    console.log(`[DEBUG] Sample chart data:`, chartDataGroups[0][0]);
+  }
+  return { chartDataGroups, ignoredColumns };
+}
+// Simplified video info extraction for v3.0 - just use first chunk files
+function extractSimpleVideoInfoV3(
+  repoId: string,
+  version: string,
+  info: DatasetMetadata,
+): any[] {
+  // Get video features from dataset info
+  const videoFeatures = Object.entries(info.features)
+    .filter(([key, value]) => value.dtype === "video");
+  const videosInfo = videoFeatures.map(([videoKey, _]) => {
+    // For simplified version, just use chunk-000/file-000.mp4
+    const videoPath = `videos/${videoKey}/chunk-000/file-000.mp4`;
     return {
+      filename: videoKey,
+      url: buildVersionedUrl(repoId, version, videoPath),
+      // No segmentation - just show the full video file
+      isSegmented: false,
     };
+  });
+  return videosInfo;
+}
+// Video info extraction with segmentation for v3.0
+function extractVideoInfoV3WithSegmentation(
+  repoId: string,
+  version: string,
+  info: DatasetMetadata,
+  episodeMetadata: any,
+): any[] {
+  // Get video features from dataset info
+  const videoFeatures = Object.entries(info.features)
+    .filter(([key, value]) => value.dtype === "video");
+  const videosInfo = videoFeatures.map(([videoKey, _]) => {
+    // Use chunk and file indices from metadata
+    const chunkIndex = episodeMetadata.video_chunk_index || 0;
+    const fileIndex = episodeMetadata.video_file_index || 0;
+    const videoPath = `videos/${videoKey}/chunk-${chunkIndex.toString().padStart(3, "0")}/file-${fileIndex.toString().padStart(3, "0")}.mp4`;
+    const fullUrl = buildVersionedUrl(repoId, version, videoPath);
+    console.log(`[DEBUG] Video URL for ${videoKey}: ${fullUrl}`);
+    console.log(`[DEBUG] Chunk index: ${chunkIndex}, File index: ${fileIndex}`);
+    console.log(`[DEBUG] Timestamps: ${episodeMetadata.video_from_timestamp} to ${episodeMetadata.video_to_timestamp}`);
+    return {
+      filename: videoKey,
+      url: fullUrl,
+      // Enable segmentation with timestamps from metadata
+      isSegmented: true,
+      segmentStart: episodeMetadata.video_from_timestamp || 0,
+      segmentEnd: episodeMetadata.video_to_timestamp || 30,
+      segmentDuration: (episodeMetadata.video_to_timestamp || 30) - (episodeMetadata.video_from_timestamp || 0),
+    };
+  });
+  console.log(`[DEBUG] Created segmented video info:`, videosInfo);
+  return videosInfo;
+}
+// Metadata loading for v3.0 episodes
+async function loadEpisodeMetadataV3Simple(
+  repoId: string,
+  version: string,
+  episodeId: number,
+): Promise<any> {
+  console.log(`[DEBUG] Loading v3.0 metadata for episode ${episodeId}`);
+  const episodesMetadataUrl = buildVersionedUrl(
+    repoId,
+    version,
+    "meta/episodes/chunk-000/file-000.parquet"
+  );
+  try {
+    const arrayBuffer = await fetchParquetFile(episodesMetadataUrl);
+    const episodesData = await readParquetColumn(arrayBuffer, []);
+    console.log(`[DEBUG] Loaded ${episodesData.length} episode rows`);
+    if (episodesData.length === 0) {
+      throw new Error("No episode metadata found");
+    }
+    // Find the row for the requested episode
+    let episodeRow = null;
+    for (let i = 0; i < episodesData.length; i++) { // Check all rows
+      const row = episodesData[i];
+      const parsedRow = parseEpisodeRowSimple(row, false); // Don't log for each attempt
+      if (parsedRow.episode_index === episodeId) {
+        episodeRow = row;
+        console.log(`[DEBUG] Found episode ${episodeId} at row ${i}`);
+        break;
+      }
+    }
+    if (!episodeRow) {
+      // Fallback: if we can't find the exact episode, use the row at index episodeId
+      if (episodeId < episodesData.length) {
+        episodeRow = episodesData[episodeId];
+        console.log(`[DEBUG] Using fallback row ${episodeId} for episode ${episodeId}`);
+      } else {
+        throw new Error(`Episode ${episodeId} not found in metadata`);
+      }
+    }
+    // Convert the row to a usable format
+    return parseEpisodeRowSimple(episodeRow, true); // Enable logging for final parse
+  } catch (error) {
+    console.error(`Failed to load episode metadata:`, error);
+    throw error;
+  }
+}
+// Simple parser for episode row - focuses on key fields for episodes
+function parseEpisodeRowSimple(row: any, enableLogging: boolean = true): any {
+  if (enableLogging) {
+    console.log(`[DEBUG] Parsing episode row with keys:`, Object.keys(row || {}));
+    console.log(`[DEBUG] Row type:`, typeof row);
   }
+  // Based on the debug output we saw, the row has numeric string keys
+  // We'll need to map these to meaningful field names
+  // This is a best-guess mapping - may need adjustment based on actual data
+  if (row && typeof row === 'object') {
+    // Try to extract key fields we need for video segmentation
+    // Based on your example: episode_index, video timestamps, etc.
+    const episodeData = {
+      episode_index: row['0'] || 0, // First column likely episode index
+      data_chunk_index: row['1'] || 0, // Data chunk index
+      data_file_index: row['2'] || 0, // Data file index
+      dataset_from_index: row['3'] || 0, // Dataset start index
+      dataset_to_index: row['4'] || 0, // Dataset end index
+      video_chunk_index: row['5'] || 0, // Video chunk index
+      video_file_index: row['6'] || 0, // Video file index
+      video_from_timestamp: row['7'] || 0, // Video from timestamp
+      video_to_timestamp: row['8'] || 30, // Video to timestamp
+      length: row['9'] || 30, // Episode length
+    };
+    if (enableLogging) {
+      console.log(`[DEBUG] Raw row values:`);
+      console.log(`  Row['0'] (episode_index): ${row['0']}`);
+      console.log(`  Row['1'] (data_chunk_index): ${row['1']}`);
+      console.log(`  Row['2'] (data_file_index): ${row['2']}`);
+      console.log(`  Row['3'] (dataset_from_index): ${row['3']}`);
+      console.log(`  Row['4'] (dataset_to_index): ${row['4']}`);
+      console.log(`  Row['5'] (video_chunk_index): ${row['5']}`);
+      console.log(`  Row['6'] (video_file_index): ${row['6']}`);
+      console.log(`  Row['7'] (video_from_timestamp): ${row['7']}`);
+      console.log(`  Row['8'] (video_to_timestamp): ${row['8']}`);
+      console.log(`  Row['9'] (length): ${row['9']}`);
+    }
+    if (enableLogging) {
+      console.log(`[DEBUG] Parsed episode data:`, episodeData);
+    }
+    return episodeData;
+  }
+  // Fallback if parsing fails
+  const fallback = {
+    episode_index: 0,
+    data_chunk_index: 0,
+    data_file_index: 0,
+    dataset_from_index: 0,
+    dataset_to_index: 0,
+    video_chunk_index: 0,
+    video_file_index: 0,
+    video_from_timestamp: 0,
+    video_to_timestamp: 30,
+    length: 30,
+  };
+  if (enableLogging) {
+    console.log(`[DEBUG] Using fallback episode data:`, fallback);
+  }
+  return fallback;
+}
+// Parse episode metadata row into structured object
+function parseEpisodeRow(row: any): any {
+  // This is a placeholder - the actual structure depends on how the parquet data is organized
+  // You may need to adjust this based on the actual column names and order
+  if (Array.isArray(row)) {
+    // If it's an array, we need to map positions to field names
+    // This is a rough mapping - needs to be adjusted based on actual data structure
+    return {
+      episode_index: row[0],
+      data_chunk_index: row[1],
+      data_file_index: row[2],
+      dataset_from_index: row[3],
+      dataset_to_index: row[4],
+      video_chunk_index: row[5],
+      video_file_index: row[6],
+      video_from_timestamp: row[7],
+      video_to_timestamp: row[8],
+      length: row[9],
+      // Add more fields as needed
+    };
+  } else {
+    // If it's already an object, return as-is
+    return row;
+  }
+}
+// Extract video information for v3.0 format
+async function extractVideoInfoV3(
+  repoId: string,
+  version: string,
+  info: DatasetMetadata,
+  episodeMetadata: any,
+): Promise<any[]> {
+  // Get video features from dataset info
+  const videoFeatures = Object.entries(info.features)
+    .filter(([key, value]) => value.dtype === "video");
+  const videosInfo = videoFeatures.map(([videoKey, _]) => {
+    // For v3.0, video path format is: videos/camera_key/chunk-000/file-000.mp4
+    // Extract the appropriate chunk and file indices for this video key
+    const videoChunkKey = `videos/${videoKey}/chunk_index`;
+    const videoFileKey = `videos/${videoKey}/file_index`;
+    const videoFromTimestampKey = `videos/${videoKey}/from_timestamp`;
+    const videoToTimestampKey = `videos/${videoKey}/to_timestamp`;
+    const chunkIndex = episodeMetadata[videoChunkKey] || 0;
+    const fileIndex = episodeMetadata[videoFileKey] || 0;
+    const fromTimestamp = episodeMetadata[videoFromTimestampKey] || 0;
+    const toTimestamp = episodeMetadata[videoToTimestampKey] || 0;
+    const videoPath = `videos/${videoKey}/chunk-${chunkIndex.toString().padStart(3, "0")}/file-${fileIndex.toString().padStart(3, "0")}.mp4`;
+    return {
+      filename: videoKey,
+      url: buildVersionedUrl(repoId, version, videoPath),
+      // Segment information for v3.0 chunked videos
+      isSegmented: true,
+      segmentStart: fromTimestamp,
+      segmentEnd: toTimestamp,
+      segmentDuration: toTimestamp - fromTimestamp,
+    };
+  });
+  return videosInfo;
+}
+// DISABLED: Complex episode data loading for simplified v3.0 implementation
+/*
+async function loadEpisodeDataV3(
+  episodeMetadata: any,
+): Promise<{ chartDataGroups: any[]; ignoredColumns: string[]; duration: number }> {
+  // Complex data loading disabled for simplified implementation
+  throw new Error("Complex data loading disabled in simplified v3.0 implementation");
 }
+*/
 // Safe wrapper for UI error display
 export async function getEpisodeDataSafe(

src/components/videos-player.tsx CHANGED Viewed

@@ -7,6 +7,10 @@ import { FaExpand, FaCompress, FaTimes, FaEye } from "react-icons/fa";
 type VideoInfo = {
   filename: string;
   url: string;
 };
 type VideoPlayerProps = {
@@ -142,27 +146,87 @@ export const VideosPlayer = ({
     }
   }, [hiddenVideos, showHiddenMenu, enlargedVideo]);
-  // Sync video times
   useEffect(() => {
-    videoRefs.current.forEach((video) => {
       if (video && Math.abs(video.currentTime - currentTime) > 0.2) {
-        video.currentTime = currentTime;
       }
     });
-  }, [currentTime]);
   // Handle time update
   const handleTimeUpdate = (e: React.SyntheticEvent<HTMLVideoElement>) => {
     const video = e.target as HTMLVideoElement;
     if (video && video.duration) {
-      setCurrentTime(video.currentTime);
     }
   };
-  // Handle video ready
   useEffect(() => {
     let videosReadyCount = 0;
-    const onCanPlayThrough = () => {
       videosReadyCount += 1;
       if (videosReadyCount === videosInfo.length) {
         if (typeof onVideosReady === "function") {
@@ -172,13 +236,15 @@ export const VideosPlayer = ({
       }
     };
-    videoRefs.current.forEach((video) => {
       if (video) {
         // If already ready, call the handler immediately
         if (video.readyState >= 4) {
-          onCanPlayThrough();
         } else {
-          video.addEventListener("canplaythrough", onCanPlayThrough);
         }
       }
     });
@@ -186,11 +252,18 @@ export const VideosPlayer = ({
     return () => {
       videoRefs.current.forEach((video) => {
         if (video) {
-          video.removeEventListener("canplaythrough", onCanPlayThrough);
         }
       });
     };
-  }, []);
   return (
     <>
@@ -323,6 +396,7 @@ export const VideosPlayer = ({
                 }}
                 muted
                 loop
                 className={`w-full object-contain ${isEnlarged ? "max-h-[90vh] max-w-[90vw]" : ""}`}
                 onTimeUpdate={
                   idx === firstVisibleIdx ? handleTimeUpdate : undefined

 type VideoInfo = {
   filename: string;
   url: string;
+  isSegmented?: boolean;
+  segmentStart?: number;
+  segmentEnd?: number;
+  segmentDuration?: number;
 };
 type VideoPlayerProps = {
     }
   }, [hiddenVideos, showHiddenMenu, enlargedVideo]);
+  // Sync video times (with segment awareness)
   useEffect(() => {
+    videoRefs.current.forEach((video, index) => {
       if (video && Math.abs(video.currentTime - currentTime) > 0.2) {
+        const videoInfo = videosInfo[index];
+        if (videoInfo?.isSegmented) {
+          // For segmented videos, map the global time to segment time
+          const segmentStart = videoInfo.segmentStart || 0;
+          const segmentDuration = videoInfo.segmentDuration || 0;
+          if (segmentDuration > 0) {
+            // Map currentTime (0 to segmentDuration) to video time (segmentStart to segmentEnd)
+            const segmentTime = segmentStart + currentTime;
+            video.currentTime = segmentTime;
+          }
+        } else {
+          // For non-segmented videos, use direct time mapping
+          video.currentTime = currentTime;
+        }
       }
     });
+  }, [currentTime, videosInfo]);
   // Handle time update
   const handleTimeUpdate = (e: React.SyntheticEvent<HTMLVideoElement>) => {
     const video = e.target as HTMLVideoElement;
     if (video && video.duration) {
+      // Find the video info for this video element
+      const videoIndex = videoRefs.current.findIndex(ref => ref === video);
+      const videoInfo = videosInfo[videoIndex];
+      if (videoInfo?.isSegmented) {
+        // For segmented videos, map the video time back to global time (0 to segmentDuration)
+        const segmentStart = videoInfo.segmentStart || 0;
+        const globalTime = Math.max(0, video.currentTime - segmentStart);
+        setCurrentTime(globalTime);
+      } else {
+        // For non-segmented videos, use direct time mapping
+        setCurrentTime(video.currentTime);
+      }
     }
   };
+  // Handle video ready and setup segmentation
   useEffect(() => {
     let videosReadyCount = 0;
+    const onCanPlayThrough = (videoIndex: number) => {
+      const video = videoRefs.current[videoIndex];
+      const videoInfo = videosInfo[videoIndex];
+      // Setup video segmentation for v3.0 chunked videos
+      if (video && videoInfo?.isSegmented) {
+        const segmentStart = videoInfo.segmentStart || 0;
+        const segmentEnd = videoInfo.segmentEnd || video.duration || 0;
+        console.log(`[VIDEO DEBUG] Setting up segmentation for ${videoInfo.filename}: ${segmentStart}s to ${segmentEnd}s`);
+        // Set initial time to segment start if not already set
+        if (video.currentTime < segmentStart || video.currentTime > segmentEnd) {
+          video.currentTime = segmentStart;
+        }
+        // Add event listener to handle segment boundaries
+        const handleTimeUpdate = () => {
+          if (video.currentTime > segmentEnd) {
+            video.currentTime = segmentStart;
+            if (!video.loop) {
+              video.pause();
+            }
+          }
+        };
+        video.addEventListener('timeupdate', handleTimeUpdate);
+        // Store cleanup function
+        (video as any)._segmentCleanup = () => {
+          video.removeEventListener('timeupdate', handleTimeUpdate);
+        };
+      }
       videosReadyCount += 1;
       if (videosReadyCount === videosInfo.length) {
         if (typeof onVideosReady === "function") {
       }
     };
+    videoRefs.current.forEach((video, index) => {
       if (video) {
         // If already ready, call the handler immediately
         if (video.readyState >= 4) {
+          onCanPlayThrough(index);
         } else {
+          const readyHandler = () => onCanPlayThrough(index);
+          video.addEventListener("canplaythrough", readyHandler);
+          (video as any)._readyHandler = readyHandler;
         }
       }
     });
     return () => {
       videoRefs.current.forEach((video) => {
         if (video) {
+          // Remove ready handler
+          if ((video as any)._readyHandler) {
+            video.removeEventListener("canplaythrough", (video as any)._readyHandler);
+          }
+          // Remove segment handler
+          if ((video as any)._segmentCleanup) {
+            (video as any)._segmentCleanup();
+          }
         }
       });
     };
+  }, [videosInfo, onVideosReady, setIsPlaying]);
   return (
     <>
                 }}
                 muted
                 loop
+                preload="auto"
                 className={`w-full object-contain ${isEnlarged ? "max-h-[90vh] max-w-[90vw]" : ""}`}
                 onTimeUpdate={
                   idx === firstVisibleIdx ? handleTimeUpdate : undefined

src/utils/parquetUtils.ts CHANGED Viewed

@@ -52,11 +52,21 @@ export async function readParquetColumn(
   fileBuffer: ArrayBuffer,
   columns: string[],
 ): Promise<any[]> {
-  return new Promise((resolve) => {
     parquetRead({
       file: fileBuffer,
-      columns,
-      onComplete: (data: any[]) => resolve(data),
     });
   });
 }

   fileBuffer: ArrayBuffer,
   columns: string[],
 ): Promise<any[]> {
+  return new Promise((resolve, reject) => {
     parquetRead({
       file: fileBuffer,
+      columns: columns.length > 0 ? columns : undefined, // Let hyparquet read all columns if empty array
+      onComplete: (data: any[]) => {
+        console.log('[DEBUG] Parquet read completed, data length:', data.length);
+        if (data.length > 0) {
+          console.log('[DEBUG] First data row structure:', typeof data[0], Object.keys(data[0] || {}));
+        }
+        resolve(data);
+      },
+      onError: (error: any) => {
+        console.error('[DEBUG] Parquet read error:', error);
+        reject(error);
+      }
     });
   });
 }

src/utils/versionUtils.ts CHANGED Viewed

@@ -41,27 +41,69 @@ async function checkVersionExists(repoId: string, version: string): Promise<bool
   }
 }
 /**
  * Determines the best available version for a dataset.
  * Prefers v3.0, falls back to v2.1, then v2.0, or throws an error if none exist.
  */
 export async function getDatasetVersion(repoId: string): Promise<string> {
-  // Check for v3.0 first
-  if (await checkVersionExists(repoId, "v3.0")) {
-    return "v3.0";
   }
   // Check for v2.1
-  if (await checkVersionExists(repoId, "v2.1")) {
     return "v2.1";
   }
   // Fall back to v2.0
-  if (await checkVersionExists(repoId, "v2.0")) {
     return "v2.0";
   }
   // If none of the supported versions exist, throw an error
   throw new Error(
     `Dataset ${repoId} is not compatible with this visualizer. ` +
     "This tool only works with dataset versions 3.0, 2.1, or 2.0. " +

   }
 }
+/**
+ * Checks if a dataset has v3.0 chunked structure
+ */
+async function checkV3ChunkedStructure(repoId: string): Promise<boolean> {
+  try {
+    const testUrl = `${DATASET_URL}/${repoId}/resolve/v3.0/meta/episodes/chunk-000/file-000.parquet`;
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), 10000);
+    const response = await fetch(testUrl, {
+      method: "HEAD",
+      cache: "no-store",
+      signal: controller.signal
+    });
+    clearTimeout(timeoutId);
+    return response.ok;
+  } catch (error) {
+    return false;
+  }
+}
 /**
  * Determines the best available version for a dataset.
  * Prefers v3.0, falls back to v2.1, then v2.0, or throws an error if none exist.
  */
 export async function getDatasetVersion(repoId: string): Promise<string> {
+  console.log(`[VERSION DEBUG] Checking versions for ${repoId}`);
+  // Check for v3.0 first - must have both info.json AND chunked episode structure
+  const hasV3Info = await checkVersionExists(repoId, "v3.0");
+  console.log(`[VERSION DEBUG] v3.0 info.json exists: ${hasV3Info}`);
+  if (hasV3Info) {
+    const hasV3Structure = await checkV3ChunkedStructure(repoId);
+    console.log(`[VERSION DEBUG] v3.0 chunked structure exists: ${hasV3Structure}`);
+    if (hasV3Structure) {
+      console.log(`[VERSION DEBUG] Using v3.0 for ${repoId}`);
+      return "v3.0";
+    }
   }
   // Check for v2.1
+  const hasV21 = await checkVersionExists(repoId, "v2.1");
+  console.log(`[VERSION DEBUG] v2.1 exists: ${hasV21}`);
+  if (hasV21) {
+    console.log(`[VERSION DEBUG] Using v2.1 for ${repoId}`);
     return "v2.1";
   }
   // Fall back to v2.0
+  const hasV20 = await checkVersionExists(repoId, "v2.0");
+  console.log(`[VERSION DEBUG] v2.0 exists: ${hasV20}`);
+  if (hasV20) {
+    console.log(`[VERSION DEBUG] Using v2.0 for ${repoId}`);
     return "v2.0";
   }
   // If none of the supported versions exist, throw an error
+  console.log(`[VERSION DEBUG] No compatible versions found for ${repoId}`);
   throw new Error(
     `Dataset ${repoId} is not compatible with this visualizer. ` +
     "This tool only works with dataset versions 3.0, 2.1, or 2.0. " +