Spaces:

gionuibk
/

HPLL-DataReview

Runtime error

App Files Files Community

gionuibk commited on Dec 8, 2025

Commit

6eb712d

verified ·

1 Parent(s): 340f32b

🦆 Add SQL Query Tab (DuckDB)

Browse files

Files changed (2) hide show

app.py +53 -0
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ import glob
 import time
 import plotly.express as px
 from concurrent.futures import ThreadPoolExecutor
 # Config
 MAIN_DATASET = "gionuibk/hyperliquidL2Book"
@@ -232,6 +234,57 @@ else:
         else:
             st.dataframe(ds_subset, use_container_width=True)
 st.write(f"Last updated: {time.strftime('%H:%M:%S')}")
 if st.button("Refresh"):
     st.rerun()

 import time
 import plotly.express as px
 from concurrent.futures import ThreadPoolExecutor
+import duckdb
 # Config
 MAIN_DATASET = "gionuibk/hyperliquidL2Book"
         else:
             st.dataframe(ds_subset, use_container_width=True)
+# ===================== SQL QUERY TAB =====================
+st.divider()
+st.subheader("🦆 SQL Query (DuckDB)")
+st.caption("Query any HF Parquet file remotely. **Fast** - runs on server, not your local machine.")
+# Helper to build URL
+def hf_parquet_url(repo_id, filename):
+    return f"https://huggingface.co/datasets/{repo_id}/resolve/main/{filename}"
+# Dataset + File Selection
+col_ds, col_file = st.columns(2)
+with col_ds:
+    sql_dataset = st.selectbox("Dataset", ALL_DATASETS + ['gionuibk/hyperliquidL2Book-v2'], key="sql_ds")
+with col_file:
+    # Fetch file list for selected dataset (cached)
+    @st.cache_data(ttl=600)
+    def get_parquet_files(ds):
+        try:
+            api = HfApi(token=HF_TOKEN)
+            files = api.list_repo_files(repo_id=ds, repo_type="dataset")
+            return [f for f in files if f.endswith('.parquet')][:100]  # Limit to 100
+        except:
+            return []
+    available_files = get_parquet_files(sql_dataset)
+    sql_file = st.selectbox("File", available_files if available_files else ["(No files found)"], key="sql_file")
+# SQL Input
+example_url = hf_parquet_url(sql_dataset, sql_file) if sql_file and sql_file != "(No files found)" else "URL"
+default_sql = f"SELECT * FROM read_parquet('{example_url}') LIMIT 10"
+sql_input = st.text_area("SQL Query", value=default_sql, height=100)
+# Execute Button
+if st.button("🚀 Run Query", type="primary"):
+    if sql_input.strip():
+        with st.spinner("Executing query..."):
+            try:
+                con = duckdb.connect(':memory:')
+                con.execute("INSTALL httpfs; LOAD httpfs;")
+                result = con.execute(sql_input).fetchdf()
+                st.success(f"✅ Query returned {len(result)} rows.")
+                st.dataframe(result, use_container_width=True)
+                # Download button
+                csv = result.to_csv(index=False)
+                st.download_button("⬇️ Download CSV", csv, "query_result.csv", "text/csv")
+            except Exception as e:
+                st.error(f"❌ Query Error: {e}")
+    else:
+        st.warning("Please enter a SQL query.")
 st.write(f"Last updated: {time.strftime('%H:%M:%S')}")
 if st.button("Refresh"):
     st.rerun()

requirements.txt CHANGED Viewed

@@ -4,3 +4,4 @@ huggingface_hub
 fastparquet
 pyarrow
 plotly

 fastparquet
 pyarrow
 plotly
+duckdb