Spaces:
Runtime error
Runtime error
π¦ Add SQL Query Tab (DuckDB)
Browse files- app.py +53 -0
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -6,6 +6,8 @@ import glob
|
|
| 6 |
import time
|
| 7 |
import plotly.express as px
|
| 8 |
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
|
|
|
| 9 |
|
| 10 |
# Config
|
| 11 |
MAIN_DATASET = "gionuibk/hyperliquidL2Book"
|
|
@@ -232,6 +234,57 @@ else:
|
|
| 232 |
else:
|
| 233 |
st.dataframe(ds_subset, use_container_width=True)
|
| 234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
st.write(f"Last updated: {time.strftime('%H:%M:%S')}")
|
| 236 |
if st.button("Refresh"):
|
| 237 |
st.rerun()
|
|
|
|
| 6 |
import time
|
| 7 |
import plotly.express as px
|
| 8 |
from concurrent.futures import ThreadPoolExecutor
|
| 9 |
+
import duckdb
|
| 10 |
+
|
| 11 |
|
| 12 |
# Config
|
| 13 |
MAIN_DATASET = "gionuibk/hyperliquidL2Book"
|
|
|
|
| 234 |
else:
|
| 235 |
st.dataframe(ds_subset, use_container_width=True)
|
| 236 |
|
| 237 |
+
# ===================== SQL QUERY TAB =====================
|
| 238 |
+
st.divider()
|
| 239 |
+
st.subheader("π¦ SQL Query (DuckDB)")
|
| 240 |
+
st.caption("Query any HF Parquet file remotely. **Fast** - runs on server, not your local machine.")
|
| 241 |
+
|
| 242 |
+
# Helper to build URL
|
| 243 |
+
def hf_parquet_url(repo_id, filename):
|
| 244 |
+
return f"https://huggingface.co/datasets/{repo_id}/resolve/main/{filename}"
|
| 245 |
+
|
| 246 |
+
# Dataset + File Selection
|
| 247 |
+
col_ds, col_file = st.columns(2)
|
| 248 |
+
with col_ds:
|
| 249 |
+
sql_dataset = st.selectbox("Dataset", ALL_DATASETS + ['gionuibk/hyperliquidL2Book-v2'], key="sql_ds")
|
| 250 |
+
with col_file:
|
| 251 |
+
# Fetch file list for selected dataset (cached)
|
| 252 |
+
@st.cache_data(ttl=600)
|
| 253 |
+
def get_parquet_files(ds):
|
| 254 |
+
try:
|
| 255 |
+
api = HfApi(token=HF_TOKEN)
|
| 256 |
+
files = api.list_repo_files(repo_id=ds, repo_type="dataset")
|
| 257 |
+
return [f for f in files if f.endswith('.parquet')][:100] # Limit to 100
|
| 258 |
+
except:
|
| 259 |
+
return []
|
| 260 |
+
|
| 261 |
+
available_files = get_parquet_files(sql_dataset)
|
| 262 |
+
sql_file = st.selectbox("File", available_files if available_files else ["(No files found)"], key="sql_file")
|
| 263 |
+
|
| 264 |
+
# SQL Input
|
| 265 |
+
example_url = hf_parquet_url(sql_dataset, sql_file) if sql_file and sql_file != "(No files found)" else "URL"
|
| 266 |
+
default_sql = f"SELECT * FROM read_parquet('{example_url}') LIMIT 10"
|
| 267 |
+
sql_input = st.text_area("SQL Query", value=default_sql, height=100)
|
| 268 |
+
|
| 269 |
+
# Execute Button
|
| 270 |
+
if st.button("π Run Query", type="primary"):
|
| 271 |
+
if sql_input.strip():
|
| 272 |
+
with st.spinner("Executing query..."):
|
| 273 |
+
try:
|
| 274 |
+
con = duckdb.connect(':memory:')
|
| 275 |
+
con.execute("INSTALL httpfs; LOAD httpfs;")
|
| 276 |
+
result = con.execute(sql_input).fetchdf()
|
| 277 |
+
st.success(f"β
Query returned {len(result)} rows.")
|
| 278 |
+
st.dataframe(result, use_container_width=True)
|
| 279 |
+
|
| 280 |
+
# Download button
|
| 281 |
+
csv = result.to_csv(index=False)
|
| 282 |
+
st.download_button("β¬οΈ Download CSV", csv, "query_result.csv", "text/csv")
|
| 283 |
+
except Exception as e:
|
| 284 |
+
st.error(f"β Query Error: {e}")
|
| 285 |
+
else:
|
| 286 |
+
st.warning("Please enter a SQL query.")
|
| 287 |
+
|
| 288 |
st.write(f"Last updated: {time.strftime('%H:%M:%S')}")
|
| 289 |
if st.button("Refresh"):
|
| 290 |
st.rerun()
|
requirements.txt
CHANGED
|
@@ -4,3 +4,4 @@ huggingface_hub
|
|
| 4 |
fastparquet
|
| 5 |
pyarrow
|
| 6 |
plotly
|
|
|
|
|
|
| 4 |
fastparquet
|
| 5 |
pyarrow
|
| 6 |
plotly
|
| 7 |
+
duckdb
|