import { useState, useEffect } from "react"; import { api, getErrorMessage } from "../api"; import type { DatasetInfo, DatasetLoadResponse, DatasetPreviewDoc } from "../types"; import StatusMessage from "./StatusMessage"; import MetricCard from "./MetricCard"; import Toggle from "./Toggle"; import Select from "./Select"; import Switch from "./Switch"; import LogViewer from "./LogViewer"; interface Props { onStatsUpdate?: (stats: any) => void; } export default function DatasetPanel({ onStatsUpdate }: Props) { const [info, setInfo] = useState(null); const [error, setError] = useState(""); // Load config const [source, setSource] = useState<"raw" | "embeddings">("raw"); const [maxDocs, setMaxDocs] = useState(500); const [minTextLen, setMinTextLen] = useState(100); const [sourceFilter, setSourceFilter] = useState(""); const [loadAll, setLoadAll] = useState(true); const [buildIndex, setBuildIndex] = useState(true); const [loading, setLoading] = useState(false); const [loadResult, setLoadResult] = useState(null); const [showAdvanced, setShowAdvanced] = useState(false); // Preview const [previewDocs, setPreviewDocs] = useState([]); const [previewLoading, setPreviewLoading] = useState(false); useEffect(() => { api.datasetInfo().then(setInfo).catch((err) => { setError(getErrorMessage(err)); }); }, []); async function handlePreview() { setPreviewLoading(true); setError(""); try { const res = await api.datasetPreview(10, sourceFilter || undefined); setPreviewDocs(res.documents); } catch (err) { setError(getErrorMessage(err)); } finally { setPreviewLoading(false); } } async function handleLoad() { setLoading(true); setError(""); setLoadResult(null); try { const res = await api.datasetLoad({ source, max_docs: loadAll ? 100000 : maxDocs, min_text_length: loadAll ? 0 : minTextLen, source_filter: sourceFilter || undefined, build_index: buildIndex, }); setLoadResult(res); if (onStatsUpdate) { try { const s = await api.getStats(); onStatsUpdate(s); } catch (e) { console.warn("Failed to refresh stats after load:", e); } } } catch (err) { setError(getErrorMessage(err)); } finally { setLoading(false); } } return (
{/* Info */}

Epstein Files Dataset

Load documents from the publicly released U.S. House Oversight Committee Epstein Files via HuggingFace. Two sources available:

{info && (
setSource("raw")}>
Raw Text Documents {info.raw_texts.size_mb} MB
{info.raw_texts.description}
Columns: {info.raw_texts.columns?.join(", ")}
setSource("embeddings")}>
Pre-computed Embeddings {info.embeddings.vector_dim}d
{info.embeddings.description}
Model: {info.embeddings.model}
)} setSource(v as "raw" | "embeddings")} />
{/* Load actions + advanced config */}

Load Dataset

{source === "raw" && ( )}
{showAdvanced && (
{!loadAll && ( <>
setMaxDocs(+e.target.value)} min={10} max={100000} />
{source === "raw" && (
setMinTextLen(+e.target.value)} min={0} max={10000} />
)} )} {source === "raw" && (