File size: 3,686 Bytes
5c6cc05 0b43131 5c6cc05 f791e9c 5c6cc05 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import { parquetRead, parquetReadObjects } from "hyparquet";
export interface DatasetMetadata {
codebase_version: string;
robot_type: string;
total_episodes: number;
total_frames: number;
total_tasks: number;
total_videos: number;
total_chunks: number;
chunks_size: number;
fps: number;
splits: Record<string, string>;
data_path: string;
video_path: string;
features: Record<
string,
{
dtype: string;
shape: any[];
names: any[] | Record<string, any> | null;
info?: Record<string, any>;
}
>;
}
export async function fetchJson<T>(url: string): Promise<T> {
// Get token from environment (set in Space secrets)
const hfToken = process.env.HF_TOKEN;
const headers: HeadersInit = {};
// Add Authorization header if token is available
if (hfToken) {
headers["Authorization"] = `Bearer ${hfToken}`;
}
const res = await fetch(url, { headers });
if (!res.ok) {
throw new Error(
`Failed to fetch JSON ${url}: ${res.status} ${res.statusText}`,
);
}
return res.json() as Promise<T>;
}
export function formatStringWithVars(
format: string,
vars: Record<string, any>,
): string {
return format.replace(/{(\w+)(?::\d+d)?}/g, (_, key) => vars[key]);
}
// Fetch and parse the Parquet file
export async function fetchParquetFile(url: string): Promise<ArrayBuffer> {
// Get token from environment (set in Space secrets)
const hfToken = process.env.HF_TOKEN;
const headers: HeadersInit = {};
// Add Authorization header if token is available
if (hfToken) {
headers["Authorization"] = `Bearer ${hfToken}`;
}
const res = await fetch(url, { headers });
if (!res.ok) {
throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
}
return res.arrayBuffer();
}
// Read specific columns from the Parquet file
export async function readParquetColumn(
fileBuffer: ArrayBuffer,
columns: string[],
): Promise<any[]> {
return new Promise((resolve, reject) => {
try {
parquetRead({
file: fileBuffer,
columns: columns.length > 0 ? columns : undefined, // Let hyparquet read all columns if empty array
onComplete: (data: any[]) => {
resolve(data);
}
});
} catch (error) {
reject(error);
}
});
}
// Read parquet file and return objects with column names as keys
export async function readParquetAsObjects(
fileBuffer: ArrayBuffer,
columns: string[] = [],
): Promise<Record<string, any>[]> {
return parquetReadObjects({
file: fileBuffer,
columns: columns.length > 0 ? columns : undefined,
});
}
// Convert a 2D array to a CSV string
export function arrayToCSV(data: (number | string)[][]): string {
return data.map((row) => row.join(",")).join("\n");
}
// Get rows from the current frame data
export function getRows(currentFrameData: any[], columns: any[]) {
if (!currentFrameData || currentFrameData.length === 0) {
return [];
}
const rows = [];
const nRows = Math.max(...columns.map((column) => column.value.length));
let rowIndex = 0;
while (rowIndex < nRows) {
const row = [];
// number of states may NOT match number of actions. In this case, we null-pad the 2D array
const nullCell = { isNull: true };
// row consists of [state value, action value]
let idx = rowIndex;
for (const column of columns) {
const nColumn = column.value.length;
row.push(rowIndex < nColumn ? currentFrameData[idx] : nullCell);
idx += nColumn; // because currentFrameData = [state0, state1, ..., stateN, action0, action1, ..., actionN]
}
rowIndex += 1;
rows.push(row);
}
return rows;
}
|