Spaces:
Running
Running
File size: 3,748 Bytes
1f94f4a 8a37195 a990603 8a37195 2b90078 8a37195 4f26d07 8a37195 4f26d07 8a37195 1f94f4a 28ef794 1f94f4a 28ef794 1f94f4a 8a37195 1f94f4a 8a37195 1f94f4a a990603 aa2bc0d eff9a58 a990603 1f94f4a a990603 eff9a58 4f26d07 eff9a58 8a37195 c8b127a 1f94f4a c8b127a 1f94f4a a990603 c8b127a 1f94f4a a990603 c8b127a 8a37195 a990603 8a37195 4f26d07 8a37195 4f26d07 8a37195 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | import {
asyncBufferFromUrl,
cachedAsyncBuffer,
parquetRead,
parquetReadObjects,
type AsyncBuffer,
} from "hyparquet";
export interface DatasetMetadata {
codebase_version: string;
robot_type: string;
total_episodes: number;
total_frames: number;
total_tasks: number;
total_videos: number;
total_chunks: number;
chunks_size: number;
fps: number;
splits: Record<string, string>;
data_path: string;
video_path: string;
features: Record<
string,
{
dtype: string;
shape: number[];
names: string[] | Record<string, unknown> | null;
info?: Record<string, unknown>;
}
>;
}
export async function fetchJson<T>(url: string): Promise<T> {
const res = await fetch(url, { cache: "no-store" });
if (!res.ok) {
throw new Error(
`Failed to fetch JSON ${url}: ${res.status} ${res.statusText}`,
);
}
return res.json() as Promise<T>;
}
export function formatStringWithVars(
format: string,
vars: Record<string, string | number>,
): string {
return format.replace(/{(\w+)(?::\d+d)?}/g, (_, key) => String(vars[key]));
}
// Fetch and parse the Parquet file
type ParquetFile = ArrayBuffer | AsyncBuffer;
const parquetFileCache = new Map<string, AsyncBuffer>();
export async function fetchParquetFile(url: string): Promise<ParquetFile> {
const cached = parquetFileCache.get(url);
if (cached) return cached;
const file = await asyncBufferFromUrl({
url,
requestInit: { cache: "no-store" },
});
const wrapped = cachedAsyncBuffer(file);
parquetFileCache.set(url, wrapped);
return wrapped;
}
// Read specific columns from the Parquet file
export async function readParquetColumn(
fileBuffer: ParquetFile,
columns: string[],
options?: { rowStart?: number; rowEnd?: number },
): Promise<unknown[][]> {
return new Promise((resolve, reject) => {
try {
parquetRead({
file: fileBuffer,
columns: columns.length > 0 ? columns : undefined,
rowStart: options?.rowStart,
rowEnd: options?.rowEnd,
onComplete: (data: unknown[][]) => {
resolve(data);
},
});
} catch (error) {
reject(error);
}
});
}
export async function readParquetAsObjects(
fileBuffer: ParquetFile,
columns: string[] = [],
options?: { rowStart?: number; rowEnd?: number },
): Promise<Record<string, unknown>[]> {
return parquetReadObjects({
file: fileBuffer,
columns: columns.length > 0 ? columns : undefined,
rowStart: options?.rowStart,
rowEnd: options?.rowEnd,
}) as Promise<Record<string, unknown>[]>;
}
// Convert a 2D array to a CSV string
export function arrayToCSV(data: (number | string)[][]): string {
return data.map((row) => row.join(",")).join("\n");
}
type ColumnInfo = { key: string; value: string[] };
export function getRows(currentFrameData: unknown[], columns: ColumnInfo[]) {
if (!currentFrameData || currentFrameData.length === 0) {
return [];
}
const rows: Array<Array<{ isNull: true } | unknown>> = [];
const nRows = Math.max(...columns.map((column) => column.value.length));
let rowIndex = 0;
while (rowIndex < nRows) {
const row: Array<{ isNull: true } | unknown> = [];
// number of states may NOT match number of actions. In this case, we null-pad the 2D array
const nullCell = { isNull: true };
// row consists of [state value, action value]
let idx = rowIndex;
for (const column of columns) {
const nColumn = column.value.length;
row.push(rowIndex < nColumn ? currentFrameData[idx] : nullCell);
idx += nColumn; // because currentFrameData = [state0, state1, ..., stateN, action0, action1, ..., actionN]
}
rowIndex += 1;
rows.push(row);
}
return rows;
}
|