|
|
import { parquetRead, parquetReadObjects } from "hyparquet"; |
|
|
import { buildProxyUrl } from './apiHelpers'; |
|
|
|
|
|
export interface DatasetMetadata { |
|
|
codebase_version: string; |
|
|
robot_type: string; |
|
|
total_episodes: number; |
|
|
total_frames: number; |
|
|
total_tasks: number; |
|
|
total_videos: number; |
|
|
total_chunks: number; |
|
|
chunks_size: number; |
|
|
fps: number; |
|
|
splits: Record<string, string>; |
|
|
data_path: string; |
|
|
video_path: string; |
|
|
features: Record< |
|
|
string, |
|
|
{ |
|
|
dtype: string; |
|
|
shape: any[]; |
|
|
names: any[] | Record<string, any> | null; |
|
|
info?: Record<string, any>; |
|
|
} |
|
|
>; |
|
|
} |
|
|
|
|
|
export async function fetchJson<T>(url: string): Promise<T> { |
|
|
|
|
|
const proxyUrl = await buildProxyUrl(url); |
|
|
const res = await fetch(proxyUrl); |
|
|
if (!res.ok) { |
|
|
const errorData = await res.json().catch(() => ({})); |
|
|
throw new Error( |
|
|
errorData.error || `Failed to fetch JSON ${url}: ${res.status} ${res.statusText}`, |
|
|
); |
|
|
} |
|
|
return res.json() as Promise<T>; |
|
|
} |
|
|
|
|
|
export function formatStringWithVars( |
|
|
format: string, |
|
|
vars: Record<string, any>, |
|
|
): string { |
|
|
return format.replace(/{(\w+)(?::\d+d)?}/g, (_, key) => vars[key]); |
|
|
} |
|
|
|
|
|
|
|
|
export async function fetchParquetFile(url: string): Promise<ArrayBuffer> { |
|
|
|
|
|
const proxyUrl = await buildProxyUrl(url); |
|
|
const res = await fetch(proxyUrl); |
|
|
|
|
|
if (!res.ok) { |
|
|
const errorData = await res.json().catch(() => ({})); |
|
|
throw new Error(errorData.error || `Failed to fetch ${url}: ${res.status} ${res.statusText}`); |
|
|
} |
|
|
|
|
|
return res.arrayBuffer(); |
|
|
} |
|
|
|
|
|
|
|
|
export async function readParquetColumn( |
|
|
fileBuffer: ArrayBuffer, |
|
|
columns: string[], |
|
|
): Promise<any[]> { |
|
|
return new Promise((resolve, reject) => { |
|
|
try { |
|
|
parquetRead({ |
|
|
file: fileBuffer, |
|
|
columns: columns.length > 0 ? columns : undefined, |
|
|
onComplete: (data: any[]) => { |
|
|
resolve(data); |
|
|
} |
|
|
}); |
|
|
} catch (error) { |
|
|
reject(error); |
|
|
} |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
export async function readParquetAsObjects( |
|
|
fileBuffer: ArrayBuffer, |
|
|
columns: string[] = [], |
|
|
): Promise<Record<string, any>[]> { |
|
|
return parquetReadObjects({ |
|
|
file: fileBuffer, |
|
|
columns: columns.length > 0 ? columns : undefined, |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
export function arrayToCSV(data: (number | string)[][]): string { |
|
|
return data.map((row) => row.join(",")).join("\n"); |
|
|
} |
|
|
|
|
|
|
|
|
export function getRows(currentFrameData: any[], columns: any[]) { |
|
|
if (!currentFrameData || currentFrameData.length === 0) { |
|
|
return []; |
|
|
} |
|
|
|
|
|
const rows = []; |
|
|
const nRows = Math.max(...columns.map((column) => column.value.length)); |
|
|
let rowIndex = 0; |
|
|
|
|
|
while (rowIndex < nRows) { |
|
|
const row = []; |
|
|
|
|
|
const nullCell = { isNull: true }; |
|
|
|
|
|
let idx = rowIndex; |
|
|
|
|
|
for (const column of columns) { |
|
|
const nColumn = column.value.length; |
|
|
row.push(rowIndex < nColumn ? currentFrameData[idx] : nullCell); |
|
|
idx += nColumn; |
|
|
} |
|
|
|
|
|
rowIndex += 1; |
|
|
rows.push(row); |
|
|
} |
|
|
|
|
|
return rows; |
|
|
} |
|
|
|