gionuibk commited on
Commit
6eb712d
Β·
verified Β·
1 Parent(s): 340f32b

πŸ¦† Add SQL Query Tab (DuckDB)

Browse files
Files changed (2) hide show
  1. app.py +53 -0
  2. requirements.txt +1 -0
app.py CHANGED
@@ -6,6 +6,8 @@ import glob
6
  import time
7
  import plotly.express as px
8
  from concurrent.futures import ThreadPoolExecutor
 
 
9
 
10
  # Config
11
  MAIN_DATASET = "gionuibk/hyperliquidL2Book"
@@ -232,6 +234,57 @@ else:
232
  else:
233
  st.dataframe(ds_subset, use_container_width=True)
234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  st.write(f"Last updated: {time.strftime('%H:%M:%S')}")
236
  if st.button("Refresh"):
237
  st.rerun()
 
6
  import time
7
  import plotly.express as px
8
  from concurrent.futures import ThreadPoolExecutor
9
+ import duckdb
10
+
11
 
12
  # Config
13
  MAIN_DATASET = "gionuibk/hyperliquidL2Book"
 
234
  else:
235
  st.dataframe(ds_subset, use_container_width=True)
236
 
237
+ # ===================== SQL QUERY TAB =====================
238
+ st.divider()
239
+ st.subheader("πŸ¦† SQL Query (DuckDB)")
240
+ st.caption("Query any HF Parquet file remotely. **Fast** - runs on server, not your local machine.")
241
+
242
+ # Helper to build URL
243
+ def hf_parquet_url(repo_id, filename):
244
+ return f"https://huggingface.co/datasets/{repo_id}/resolve/main/{filename}"
245
+
246
+ # Dataset + File Selection
247
+ col_ds, col_file = st.columns(2)
248
+ with col_ds:
249
+ sql_dataset = st.selectbox("Dataset", ALL_DATASETS + ['gionuibk/hyperliquidL2Book-v2'], key="sql_ds")
250
+ with col_file:
251
+ # Fetch file list for selected dataset (cached)
252
+ @st.cache_data(ttl=600)
253
+ def get_parquet_files(ds):
254
+ try:
255
+ api = HfApi(token=HF_TOKEN)
256
+ files = api.list_repo_files(repo_id=ds, repo_type="dataset")
257
+ return [f for f in files if f.endswith('.parquet')][:100] # Limit to 100
258
+ except:
259
+ return []
260
+
261
+ available_files = get_parquet_files(sql_dataset)
262
+ sql_file = st.selectbox("File", available_files if available_files else ["(No files found)"], key="sql_file")
263
+
264
+ # SQL Input
265
+ example_url = hf_parquet_url(sql_dataset, sql_file) if sql_file and sql_file != "(No files found)" else "URL"
266
+ default_sql = f"SELECT * FROM read_parquet('{example_url}') LIMIT 10"
267
+ sql_input = st.text_area("SQL Query", value=default_sql, height=100)
268
+
269
+ # Execute Button
270
+ if st.button("πŸš€ Run Query", type="primary"):
271
+ if sql_input.strip():
272
+ with st.spinner("Executing query..."):
273
+ try:
274
+ con = duckdb.connect(':memory:')
275
+ con.execute("INSTALL httpfs; LOAD httpfs;")
276
+ result = con.execute(sql_input).fetchdf()
277
+ st.success(f"βœ… Query returned {len(result)} rows.")
278
+ st.dataframe(result, use_container_width=True)
279
+
280
+ # Download button
281
+ csv = result.to_csv(index=False)
282
+ st.download_button("⬇️ Download CSV", csv, "query_result.csv", "text/csv")
283
+ except Exception as e:
284
+ st.error(f"❌ Query Error: {e}")
285
+ else:
286
+ st.warning("Please enter a SQL query.")
287
+
288
  st.write(f"Last updated: {time.strftime('%H:%M:%S')}")
289
  if st.button("Refresh"):
290
  st.rerun()
requirements.txt CHANGED
@@ -4,3 +4,4 @@ huggingface_hub
4
  fastparquet
5
  pyarrow
6
  plotly
 
 
4
  fastparquet
5
  pyarrow
6
  plotly
7
+ duckdb