File size: 3,924 Bytes
6cdce85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
'use client';

import {
    createContext,
    useContext,
    useState,
    useEffect,
    useCallback,
    ReactNode,
} from 'react';
import { datasetLoader, FilterOptions, LoadExamplesResult } from './loader';
import type { DatasetExample, CodingProblem } from '@/types';

type Split = 'train' | 'validation' | 'test';

interface DatasetContextValue {
    isLoading: boolean;
    loadedSplits: Set<Split>;
    splitCounts: Record<string, number>;
    loadSplit: (split: Split) => Promise<void>;
    filterExamples: (
        split: Split,
        filters: FilterOptions,
        limit?: number,
        offset?: number
    ) => LoadExamplesResult;
    getCodingProblems: (split: Split) => CodingProblem[];
    getAllExamples: (split: Split) => DatasetExample[];
}

const DatasetContext = createContext<DatasetContextValue | null>(null);

interface DatasetProviderProps {
    children: ReactNode;
    initialSplits?: Split[];
}

export function DatasetProvider({
    children,
    initialSplits = ['train', 'test', 'validation'],
}: DatasetProviderProps) {
    const [isLoading, setIsLoading] = useState(true);
    const [loadedSplits, setLoadedSplits] = useState<Set<Split>>(new Set());
    const [splitCounts, setSplitCounts] = useState<Record<string, number>>({});

    // Load initial splits on mount
    useEffect(() => {
        const loadInitialData = async () => {
            setIsLoading(true);
            try {
                // Load split info first
                const info = await datasetLoader.getSplitInfo();
                setSplitCounts(info);

                // Load initial splits in parallel
                await Promise.all(
                    initialSplits.map(async (split) => {
                        await datasetLoader.preloadSplit(split);
                        setLoadedSplits((prev) => new Set([...prev, split]));
                    })
                );
            } catch (error) {
                console.error('Failed to load dataset:', error);
            } finally {
                setIsLoading(false);
            }
        };

        loadInitialData();
    }, []);

    const loadSplit = useCallback(async (split: Split) => {
        if (datasetLoader.isLoaded(split)) {
            setLoadedSplits((prev) => new Set([...prev, split]));
            return;
        }

        await datasetLoader.preloadSplit(split);
        setLoadedSplits((prev) => new Set([...prev, split]));

        // Update counts after loading
        const examples = datasetLoader.getAllExamples(split);
        setSplitCounts((prev) => ({ ...prev, [split]: examples.length }));
    }, []);

    const filterExamples = useCallback(
        (
            split: Split,
            filters: FilterOptions,
            limit: number = 50,
            offset: number = 0
        ): LoadExamplesResult => {
            if (!datasetLoader.isLoaded(split)) {
                return { examples: [], total: 0 };
            }
            return datasetLoader.filterExamples(split, filters, limit, offset);
        },
        []
    );

    const getCodingProblems = useCallback((split: Split): CodingProblem[] => {
        return datasetLoader.getCodingProblems(split);
    }, []);

    const getAllExamples = useCallback((split: Split): DatasetExample[] => {
        return datasetLoader.getAllExamples(split);
    }, []);

    return (
        <DatasetContext.Provider
            value={{
                isLoading,
                loadedSplits,
                splitCounts,
                loadSplit,
                filterExamples,
                getCodingProblems,
                getAllExamples,
            }}
        >
            {children}
        </DatasetContext.Provider>
    );
}

export function useDataset() {
    const context = useContext(DatasetContext);
    if (!context) {
        throw new Error('useDataset must be used within a DatasetProvider');
    }
    return context;
}