Spaces:
Running
Running
| // Hugging Face Dataset Server API Service | |
| import type { | |
| DatasetRowsResponse, | |
| DatasetFirstRowsResponse, | |
| DatasetSplitsResponse, | |
| DatasetError, | |
| } from "./types"; | |
| import yaml from "js-yaml"; | |
| const BASE_URL = "https://datasets-server.huggingface.co"; | |
| /** | |
| * Fetch the first 100 rows of a dataset | |
| */ | |
| export async function fetchFirstRows( | |
| dataset: string, | |
| config: string = "default", | |
| split: string = "train", | |
| token?: string | |
| ): Promise<DatasetFirstRowsResponse> { | |
| const url = `${BASE_URL}/first-rows?dataset=${encodeURIComponent( | |
| dataset | |
| )}&config=${encodeURIComponent(config)}&split=${encodeURIComponent(split)}`; | |
| const headers: HeadersInit = {}; | |
| if (token) { | |
| headers["Authorization"] = `Bearer ${token}`; | |
| } | |
| const response = await fetch(url, { headers }); | |
| if (!response.ok) { | |
| const error: DatasetError = await response.json(); | |
| throw new Error( | |
| error.error || `Failed to fetch dataset: ${response.statusText}` | |
| ); | |
| } | |
| return response.json(); | |
| } | |
| /** | |
| * Fetch paginated rows from a dataset | |
| * @param offset Starting row index (0-based) | |
| * @param length Number of rows to fetch (max 100) | |
| */ | |
| export async function fetchRows( | |
| dataset: string, | |
| config: string = "default", | |
| split: string = "train", | |
| offset: number = 0, | |
| length: number = 100, | |
| token?: string | |
| ): Promise<DatasetRowsResponse> { | |
| const url = `${BASE_URL}/rows?dataset=${encodeURIComponent( | |
| dataset | |
| )}&config=${encodeURIComponent(config)}&split=${encodeURIComponent( | |
| split | |
| )}&offset=${offset}&length=${length}`; | |
| const headers: HeadersInit = {}; | |
| if (token) { | |
| headers["Authorization"] = `Bearer ${token}`; | |
| } | |
| const response = await fetch(url, { headers }); | |
| if (!response.ok) { | |
| const error: DatasetError = await response.json(); | |
| throw new Error( | |
| error.error || `Failed to fetch dataset: ${response.statusText}` | |
| ); | |
| } | |
| return response.json(); | |
| } | |
| /** | |
| * List available splits for a dataset | |
| */ | |
| export async function fetchSplits( | |
| dataset: string, | |
| token?: string | |
| ): Promise<DatasetSplitsResponse> { | |
| const url = `${BASE_URL}/splits?dataset=${encodeURIComponent(dataset)}`; | |
| const headers: HeadersInit = {}; | |
| if (token) { | |
| headers["Authorization"] = `Bearer ${token}`; | |
| } | |
| const response = await fetch(url, { headers }); | |
| if (!response.ok) { | |
| const error: DatasetError = await response.json(); | |
| throw new Error( | |
| error.error || `Failed to fetch splits: ${response.statusText}` | |
| ); | |
| } | |
| return response.json(); | |
| } | |
| /** | |
| * Check if a dataset is valid and accessible | |
| */ | |
| export async function checkDatasetValid( | |
| dataset: string, | |
| token?: string | |
| ): Promise<boolean> { | |
| const url = `${BASE_URL}/is-valid?dataset=${encodeURIComponent(dataset)}`; | |
| const headers: HeadersInit = {}; | |
| if (token) { | |
| headers["Authorization"] = `Bearer ${token}`; | |
| } | |
| const response = await fetch(url, { headers }); | |
| if (!response.ok) { | |
| return false; | |
| } | |
| const data = await response.json(); | |
| return data.valid === true; | |
| } | |
| /** | |
| * Fetch all rows from a jurisdiction config (for term/definition data) | |
| */ | |
| export async function fetchAllRows( | |
| dataset: string, | |
| config: string, | |
| split: string = "train", | |
| token?: string | |
| ): Promise<any[]> { | |
| const allRows: any[] = []; | |
| let offset = 0; | |
| const batchSize = 100; | |
| let hasMore = true; | |
| while (hasMore) { | |
| const response = await fetchRows( | |
| dataset, | |
| config, | |
| split, | |
| offset, | |
| batchSize, | |
| token | |
| ); | |
| const rows = response.rows.map((r) => r.row); | |
| allRows.push(...rows); | |
| offset += rows.length; | |
| hasMore = offset < response.num_rows_total; | |
| } | |
| return allRows; | |
| } | |
| export async function fetchDatasetFileRaw( | |
| dataset: string, // "owner/dataset-name" | |
| filePath: string, // "config.yaml" | |
| branch = "main", | |
| token?: string | |
| ): Promise<string> { | |
| const url = `https://huggingface.co/datasets/${dataset}/raw/${encodeURIComponent( | |
| branch | |
| )}/${filePath}`; | |
| const headers: HeadersInit = {}; | |
| if (token) { | |
| headers["Authorization"] = `Bearer ${token}`; | |
| } | |
| const res = await fetch(url, { headers }); | |
| if (!res.ok) | |
| throw new Error(`Failed to fetch file: ${res.status} ${res.statusText}`); | |
| return res.text(); | |
| } | |
| export interface DatasetConfigYaml { | |
| scoring_model: string; | |
| scoring_provider: string; | |
| synthesis_model: string; | |
| synthesis_provider: string; | |
| jurisdiction1: string; | |
| jurisdiction2: string; | |
| description: string; | |
| grading_templates_1_uid: string; | |
| grading_templates_2_uid: string; | |
| generation_date?: string; | |
| } | |
| export async function fetchDatasetConfigYaml( | |
| dataset: string, | |
| filePath = "config.yaml", | |
| branch = "main", | |
| token?: string | |
| ): Promise<DatasetConfigYaml | null> { | |
| const text = await fetchDatasetFileRaw(dataset, filePath, branch, token); | |
| return yaml.load(text) as DatasetConfigYaml; | |
| } | |