Spaces:

gionuibk
/

HPLL-DataReview

Runtime error

App Files Files Community

gionuibk commited on Dec 8, 2025

Commit

7d7a1d7

verified ·

1 Parent(s): b5c025a

Deploy Dashboard

Browse files

Files changed (4) hide show

Dockerfile +12 -0
app.py +102 -0
deploy_review.py +30 -0
requirements.txt +6 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,12 @@

+FROM python:3.10-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY app.py .
+EXPOSE 7860
+CMD ["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0"]

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import streamlit as st
+import pandas as pd
+import os
+from huggingface_hub import HfApi, hf_hub_download
+import glob
+import time
+import plotly.express as px
+from concurrent.futures import ThreadPoolExecutor
+# Config
+DATASET_ID = "gionuibk/hyperliquidL2Book"
+HF_TOKEN = os.environ.get("HF_TOKEN")
+CACHE_DIR = "/data/cache"
+os.makedirs(CACHE_DIR, exist_ok=True)
+st.set_page_config(page_title="HPLL Data Review", layout="wide", page_icon="📊")
+@st.cache_data(ttl=300)
+def load_s3_inventory():
+    st.toast("Fetching Inventory...", icon="⏳")
+    api = HfApi(token=HF_TOKEN)
+    files = api.list_repo_files(repo_id=DATASET_ID, repo_type="dataset")
+    # Filter for inventory parts
+    inv_files = [f for f in files if f.startswith("config/inventory_parts/")]
+    if not inv_files:
+        return pd.DataFrame()
+    dfs = []
+    def download_and_load(f):
+        try:
+            local = hf_hub_download(repo_id=DATASET_ID, filename=f, repo_type="dataset", local_dir=CACHE_DIR, token=HF_TOKEN)
+            return pd.read_parquet(local)
+        except Exception as e:
+            return None
+    with ThreadPoolExecutor(max_workers=10) as executor:
+        results = executor.map(download_and_load, inv_files)
+    dfs = [r for r in results if r is not None]
+    if dfs:
+        full_df = pd.concat(dfs, ignore_index=True)
+        # Process Timestamp
+        full_df['date'] = pd.to_datetime(full_df['modified'], unit='s').dt.date
+        return full_df
+    return pd.DataFrame()
+# Main UI
+st.title("📊 Hyperliquid S3 Backfill Control Center")
+col1, col2 = st.columns([1, 1])
+with col1:
+    st.info("Reading form: **gionuibk/hyperliquidL2Book**")
+# Load Data
+df = load_s3_inventory()
+if df.empty:
+    st.warning("⚠️ No Inventory Data Found yet! Indexer V3 might still be running.")
+else:
+    # Key Metrics
+    total_files = len(df)
+    total_size_gb = df['size'].sum() / 1024 / 1024 / 1024
+    st.metric(label="Total S3 Files Indexed", value=f"{total_files:,}")
+    st.metric(label="Total Size", value=f"{total_size_gb:.2f} GB")
+    # Grouping
+    st.subheader("📁 Prefix Breakdown")
+    # Extract Root Prefix
+    df['root_prefix'] = df['bucket'].astype(str) + "/" + df['key'].str.split('/').str[0]
+    counts = df.groupby('root_prefix').size().reset_index(name='count')
+    sizes = df.groupby('root_prefix')['size'].sum().reset_index(name='bytes')
+    sizes['GB'] = sizes['bytes'] / 1e9
+    stats = pd.merge(counts, sizes, on='root_prefix')
+    st.dataframe(stats, use_container_width=True)
+    # Visuals
+    st.subheader("📅 Data Distribution by Date")
+    daily_counts = df.groupby('date').size().reset_index(name='files')
+    fig = px.bar(daily_counts, x='date', y='files', title="Daily Index Volume")
+    st.plotly_chart(fig, use_container_width=True)
+    # Raw Data Explorer
+    with st.expander("🔎 Raw Inventory Search"):
+        search = st.text_input("Filter filter (regex)", "")
+        if search:
+            subset = df[df['key'].str.contains(search, na=False)]
+            st.dataframe(subset.head(100), use_container_width=True)
+        else:
+            st.dataframe(df.head(100), use_container_width=True)
+st.write(f"Last updated: {time.strftime('%H:%M:%S')}")
+if st.button("Refresh"):
+    st.rerun()

deploy_review.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from huggingface_hub import HfApi, create_repo
+import os
+import sys
+SPACE_ID = "gionuibk/HPLL-DataReview"
+HF_TOKEN = os.environ.get("HF_TOKEN")
+if not HF_TOKEN:
+    print("❌ HF_TOKEN missing")
+    sys.exit(1)
+print(f"🚀 Deploying Review Dashboard to {SPACE_ID}...")
+api = HfApi(token=HF_TOKEN)
+create_repo(repo_id=SPACE_ID, repo_type="space", space_sdk="docker", exist_ok=True, token=HF_TOKEN)
+print("🔑 Adding Secrets...")
+try:
+    api.add_space_secret(repo_id=SPACE_ID, key="HF_TOKEN", value=HF_TOKEN)
+except: pass
+print("📤 Uploading...")
+api.upload_folder(
+    folder_path="/Users/blacksun/HPLL_DataReview",
+    repo_id=SPACE_ID,
+    repo_type="space",
+    path_in_repo=".",
+    commit_message="Deploy Dashboard"
+)
+print("✅ Done! https://huggingface.co/spaces/gionuibk/HPLL-DataReview")

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit==1.32.0
+pandas
+huggingface_hub
+fastparquet
+pyarrow
+plotly