import { asyncBufferFromUrl, cachedAsyncBuffer, parquetRead, parquetReadObjects, type AsyncBuffer, } from "hyparquet"; export interface DatasetMetadata { codebase_version: string; robot_type: string; total_episodes: number; total_frames: number; total_tasks: number; total_videos: number; total_chunks: number; chunks_size: number; fps: number; splits: Record; data_path: string; video_path: string; features: Record< string, { dtype: string; shape: number[]; names: string[] | Record | null; info?: Record; } >; } export async function fetchJson(url: string): Promise { const res = await fetch(url, { cache: "no-store" }); if (!res.ok) { throw new Error( `Failed to fetch JSON ${url}: ${res.status} ${res.statusText}`, ); } return res.json() as Promise; } export function formatStringWithVars( format: string, vars: Record, ): string { return format.replace(/{(\w+)(?::\d+d)?}/g, (_, key) => String(vars[key])); } // Fetch and parse the Parquet file type ParquetFile = ArrayBuffer | AsyncBuffer; const parquetFileCache = new Map(); export async function fetchParquetFile(url: string): Promise { const cached = parquetFileCache.get(url); if (cached) return cached; const file = await asyncBufferFromUrl({ url, requestInit: { cache: "no-store" }, }); const wrapped = cachedAsyncBuffer(file); parquetFileCache.set(url, wrapped); return wrapped; } // Read specific columns from the Parquet file export async function readParquetColumn( fileBuffer: ParquetFile, columns: string[], options?: { rowStart?: number; rowEnd?: number }, ): Promise { return new Promise((resolve, reject) => { try { parquetRead({ file: fileBuffer, columns: columns.length > 0 ? columns : undefined, rowStart: options?.rowStart, rowEnd: options?.rowEnd, onComplete: (data: unknown[][]) => { resolve(data); }, }); } catch (error) { reject(error); } }); } export async function readParquetAsObjects( fileBuffer: ParquetFile, columns: string[] = [], options?: { rowStart?: number; rowEnd?: number }, ): Promise[]> { return parquetReadObjects({ file: fileBuffer, columns: columns.length > 0 ? columns : undefined, rowStart: options?.rowStart, rowEnd: options?.rowEnd, }) as Promise[]>; } // Convert a 2D array to a CSV string export function arrayToCSV(data: (number | string)[][]): string { return data.map((row) => row.join(",")).join("\n"); } type ColumnInfo = { key: string; value: string[] }; export function getRows(currentFrameData: unknown[], columns: ColumnInfo[]) { if (!currentFrameData || currentFrameData.length === 0) { return []; } const rows: Array> = []; const nRows = Math.max(...columns.map((column) => column.value.length)); let rowIndex = 0; while (rowIndex < nRows) { const row: Array<{ isNull: true } | unknown> = []; // number of states may NOT match number of actions. In this case, we null-pad the 2D array const nullCell = { isNull: true }; // row consists of [state value, action value] let idx = rowIndex; for (const column of columns) { const nColumn = column.value.length; row.push(rowIndex < nColumn ? currentFrameData[idx] : nullCell); idx += nColumn; // because currentFrameData = [state0, state1, ..., stateN, action0, action1, ..., actionN] } rowIndex += 1; rows.push(row); } return rows; }