term-comparison / src /lib /huggingfaceApi.ts
Tobias Brugger
add config files loading and display
9976904
// Hugging Face Dataset Server API Service
import type {
DatasetRowsResponse,
DatasetFirstRowsResponse,
DatasetSplitsResponse,
DatasetError,
} from "./types";
import yaml from "js-yaml";
const BASE_URL = "https://datasets-server.huggingface.co";
/**
* Fetch the first 100 rows of a dataset
*/
export async function fetchFirstRows(
dataset: string,
config: string = "default",
split: string = "train",
token?: string
): Promise<DatasetFirstRowsResponse> {
const url = `${BASE_URL}/first-rows?dataset=${encodeURIComponent(
dataset
)}&config=${encodeURIComponent(config)}&split=${encodeURIComponent(split)}`;
const headers: HeadersInit = {};
if (token) {
headers["Authorization"] = `Bearer ${token}`;
}
const response = await fetch(url, { headers });
if (!response.ok) {
const error: DatasetError = await response.json();
throw new Error(
error.error || `Failed to fetch dataset: ${response.statusText}`
);
}
return response.json();
}
/**
* Fetch paginated rows from a dataset
* @param offset Starting row index (0-based)
* @param length Number of rows to fetch (max 100)
*/
export async function fetchRows(
dataset: string,
config: string = "default",
split: string = "train",
offset: number = 0,
length: number = 100,
token?: string
): Promise<DatasetRowsResponse> {
const url = `${BASE_URL}/rows?dataset=${encodeURIComponent(
dataset
)}&config=${encodeURIComponent(config)}&split=${encodeURIComponent(
split
)}&offset=${offset}&length=${length}`;
const headers: HeadersInit = {};
if (token) {
headers["Authorization"] = `Bearer ${token}`;
}
const response = await fetch(url, { headers });
if (!response.ok) {
const error: DatasetError = await response.json();
throw new Error(
error.error || `Failed to fetch dataset: ${response.statusText}`
);
}
return response.json();
}
/**
* List available splits for a dataset
*/
export async function fetchSplits(
dataset: string,
token?: string
): Promise<DatasetSplitsResponse> {
const url = `${BASE_URL}/splits?dataset=${encodeURIComponent(dataset)}`;
const headers: HeadersInit = {};
if (token) {
headers["Authorization"] = `Bearer ${token}`;
}
const response = await fetch(url, { headers });
if (!response.ok) {
const error: DatasetError = await response.json();
throw new Error(
error.error || `Failed to fetch splits: ${response.statusText}`
);
}
return response.json();
}
/**
* Check if a dataset is valid and accessible
*/
export async function checkDatasetValid(
dataset: string,
token?: string
): Promise<boolean> {
const url = `${BASE_URL}/is-valid?dataset=${encodeURIComponent(dataset)}`;
const headers: HeadersInit = {};
if (token) {
headers["Authorization"] = `Bearer ${token}`;
}
const response = await fetch(url, { headers });
if (!response.ok) {
return false;
}
const data = await response.json();
return data.valid === true;
}
/**
* Fetch all rows from a jurisdiction config (for term/definition data)
*/
export async function fetchAllRows(
dataset: string,
config: string,
split: string = "train",
token?: string
): Promise<any[]> {
const allRows: any[] = [];
let offset = 0;
const batchSize = 100;
let hasMore = true;
while (hasMore) {
const response = await fetchRows(
dataset,
config,
split,
offset,
batchSize,
token
);
const rows = response.rows.map((r) => r.row);
allRows.push(...rows);
offset += rows.length;
hasMore = offset < response.num_rows_total;
}
return allRows;
}
export async function fetchDatasetFileRaw(
dataset: string, // "owner/dataset-name"
filePath: string, // "config.yaml"
branch = "main",
token?: string
): Promise<string> {
const url = `https://huggingface.co/datasets/${dataset}/raw/${encodeURIComponent(
branch
)}/${filePath}`;
const headers: HeadersInit = {};
if (token) {
headers["Authorization"] = `Bearer ${token}`;
}
const res = await fetch(url, { headers });
if (!res.ok)
throw new Error(`Failed to fetch file: ${res.status} ${res.statusText}`);
return res.text();
}
export interface DatasetConfigYaml {
scoring_model: string;
scoring_provider: string;
synthesis_model: string;
synthesis_provider: string;
jurisdiction1: string;
jurisdiction2: string;
description: string;
grading_templates_1_uid: string;
grading_templates_2_uid: string;
generation_date?: string;
}
export async function fetchDatasetConfigYaml(
dataset: string,
filePath = "config.yaml",
branch = "main",
token?: string
): Promise<DatasetConfigYaml | null> {
const text = await fetchDatasetFileRaw(dataset, filePath, branch, token);
return yaml.load(text) as DatasetConfigYaml;
}