Spaces:
Running
Running
| import { AsyncDuckDB, AsyncDuckDBConnection } from "@duckdb/duckdb-wasm"; | |
| import { useCallback, useEffect, useRef, useState } from "react"; | |
| // https://duckdb.org/docs/api/wasm/query#arrow-table-to-json | |
| // TODO: import the arrow lib and use the correct type | |
| // eslint-disable-next-line @typescript-eslint/no-explicit-any | |
| const arrowResultToJson = (arrowResult: any) => { | |
| // eslint-disable-next-line @typescript-eslint/no-explicit-any | |
| return arrowResult.toArray().map((row: any) => row.toJSON()) || []; | |
| }; | |
| export const useParquetTable = ( | |
| db: AsyncDuckDB | null, | |
| { | |
| datasetUrl, | |
| datasetQuery, | |
| setDatasetQuery: _setDatasetQuery, | |
| onQueryChanged, | |
| }: { | |
| datasetUrl: string; | |
| datasetQuery: string; | |
| setDatasetQuery: (query: string) => void; | |
| onQueryChanged?: (query: string) => void; | |
| } | |
| ) => { | |
| const [datasetLoaded, setDatasetLoaded] = useState(false); | |
| const [loading, setLoading] = useState(false); | |
| const [querying, setQuerying] = useState(false); | |
| const [dataset, setDataset] = useState<null>(null); | |
| const [error, setError] = useState<string | null>(null); | |
| const onQueryChangedRef = useRef(onQueryChanged); | |
| useEffect(() => { | |
| onQueryChangedRef.current = onQueryChanged; | |
| }, [onQueryChanged]); | |
| const datasetQueryRef = useRef(datasetQuery); | |
| useEffect(() => { | |
| datasetQueryRef.current = datasetQuery; | |
| }, [datasetQuery]); | |
| const setDatasetQuery = useCallback( | |
| (query: string) => { | |
| _setDatasetQuery(query); | |
| onQueryChangedRef.current?.(query); | |
| }, | |
| [_setDatasetQuery, onQueryChangedRef] | |
| ); | |
| const clearDataset = useCallback(() => { | |
| setDataset(null); | |
| setError(null); | |
| }, []); | |
| const loadDataset = useCallback(() => { | |
| if (db && datasetUrl) { | |
| setLoading(true); | |
| setDataset(null); | |
| setError(null); | |
| setDatasetLoaded(false); | |
| console.log("Loading", datasetUrl); | |
| const status: { conn: AsyncDuckDBConnection | null; killed: boolean } = { | |
| conn: null, | |
| killed: false, | |
| }; | |
| db.connect().then((conn) => { | |
| if (status.killed) { | |
| conn.close(); | |
| return; | |
| } | |
| status.conn = conn; | |
| conn | |
| // we get httpfs for free with cors restrictions | |
| // we get parquet downloaded as we load it | |
| // https://duckdb.org/docs/api/wasm/extensions | |
| // https://duckdb.org/docs/api/wasm/data_ingestion#parquet | |
| .query( | |
| `LOAD parquet;LOAD httpfs;DROP TABLE IF EXISTS dataset;CREATE TABLE dataset AS SELECT * FROM '${datasetUrl}';` | |
| ) | |
| .then(() => { | |
| setDatasetLoaded(true); | |
| if (!datasetQueryRef.current) { | |
| console.log("Setting default query"); | |
| setDatasetQuery(`SELECT * FROM dataset LIMIT 10;`); | |
| } | |
| }) | |
| .catch((err) => { | |
| console.error(err); | |
| setError(err.message); | |
| setDataset(null); | |
| setDatasetLoaded(false); | |
| setDatasetQuery(""); | |
| }) | |
| .finally(() => { | |
| conn.close(); | |
| setLoading(false); | |
| }); | |
| }); | |
| return () => { | |
| status.killed = true; | |
| if (status.conn) { | |
| console.log("Closing connection"); | |
| status.conn.close(); | |
| } | |
| }; | |
| } else if (db) { | |
| console.log("Resetting db"); | |
| setDataset(null); | |
| setError(null); | |
| setDatasetLoaded(false); | |
| setDatasetQuery(""); | |
| db.reset(); | |
| } | |
| }, [db, datasetUrl, setDatasetQuery, datasetQueryRef]); | |
| const performQuery = useCallback( | |
| (query: string) => { | |
| if (db && datasetLoaded) { | |
| setQuerying(true); | |
| db.connect() | |
| .then((conn) => { | |
| conn | |
| .query(query) | |
| .then((result) => setDataset(arrowResultToJson(result))) | |
| .catch(setError); | |
| }) | |
| .finally(() => setQuerying(false)); | |
| } | |
| }, | |
| [db, datasetLoaded] | |
| ); | |
| useEffect(() => { | |
| if (datasetLoaded && datasetQuery) { | |
| performQuery(datasetQuery); | |
| } | |
| }, [datasetLoaded, datasetQuery, performQuery]); | |
| useEffect(() => { | |
| loadDataset(); | |
| }, [loadDataset]); | |
| return { | |
| loading, | |
| dataset, | |
| error, | |
| clearDataset, | |
| loadDataset, | |
| querying, | |
| performQuery, | |
| }; | |
| }; | |