Spaces:
Running
Running
use @carlopi 's trick to fake glob expansion
Browse fileshttps://github.com/duckdb/duckdb/pull/10051#issuecomment-1865825012
(h/t @severo )
- docs/data/presse.parquet.sh +3 -11
docs/data/presse.parquet.sh
CHANGED
|
@@ -5,19 +5,11 @@ CREATE TABLE presse AS (
|
|
| 5 |
SELECT title
|
| 6 |
, author
|
| 7 |
, LPAD((REGEXP_EXTRACT(date, '1[0-9][0-9][0-9]') || '-01-01'), 10, '0')::DATE AS year
|
| 8 |
-
FROM read_parquet(
|
| 9 |
-
|
| 10 |
-
for i in $(seq 1 320); do
|
| 11 |
-
echo " 'https://huggingface.co/datasets/PleIAs/French-PD-Newspapers/resolve/main/gallica_presse_$i.parquet'," >> $TMPDIR/presse.sql
|
| 12 |
-
done
|
| 13 |
-
|
| 14 |
-
echo """ ])
|
| 15 |
);
|
| 16 |
-
|
| 17 |
COPY presse TO '$TMPDIR/presse.parquet' (FORMAT 'parquet', COMPRESSION 'GZIP');
|
| 18 |
-
"""
|
| 19 |
-
|
| 20 |
-
duckdb < $TMPDIR/presse.sql
|
| 21 |
|
| 22 |
# isatty
|
| 23 |
if [ -t 1 ]; then
|
|
|
|
| 5 |
SELECT title
|
| 6 |
, author
|
| 7 |
, LPAD((REGEXP_EXTRACT(date, '1[0-9][0-9][0-9]') || '-01-01'), 10, '0')::DATE AS year
|
| 8 |
+
FROM read_parquet(
|
| 9 |
+
[('https://huggingface.co/datasets/PleIAs/French-PD-Newspapers/resolve/main/gallica_presse_{:d}.parquet').format(n) for n in range(1, 321)])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
);
|
|
|
|
| 11 |
COPY presse TO '$TMPDIR/presse.parquet' (FORMAT 'parquet', COMPRESSION 'GZIP');
|
| 12 |
+
""" | duckdb
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# isatty
|
| 15 |
if [ -t 1 ]; then
|