gionuibk commited on
Commit
7d7a1d7
Β·
verified Β·
1 Parent(s): b5c025a

Deploy Dashboard

Browse files
Files changed (4) hide show
  1. Dockerfile +12 -0
  2. app.py +102 -0
  3. deploy_review.py +30 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY app.py .
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0"]
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ from huggingface_hub import HfApi, hf_hub_download
5
+ import glob
6
+ import time
7
+ import plotly.express as px
8
+ from concurrent.futures import ThreadPoolExecutor
9
+
10
+ # Config
11
+ DATASET_ID = "gionuibk/hyperliquidL2Book"
12
+ HF_TOKEN = os.environ.get("HF_TOKEN")
13
+ CACHE_DIR = "/data/cache"
14
+ os.makedirs(CACHE_DIR, exist_ok=True)
15
+
16
+ st.set_page_config(page_title="HPLL Data Review", layout="wide", page_icon="πŸ“Š")
17
+
18
+ @st.cache_data(ttl=300)
19
+ def load_s3_inventory():
20
+ st.toast("Fetching Inventory...", icon="⏳")
21
+ api = HfApi(token=HF_TOKEN)
22
+ files = api.list_repo_files(repo_id=DATASET_ID, repo_type="dataset")
23
+
24
+ # Filter for inventory parts
25
+ inv_files = [f for f in files if f.startswith("config/inventory_parts/")]
26
+
27
+ if not inv_files:
28
+ return pd.DataFrame()
29
+
30
+ dfs = []
31
+
32
+ def download_and_load(f):
33
+ try:
34
+ local = hf_hub_download(repo_id=DATASET_ID, filename=f, repo_type="dataset", local_dir=CACHE_DIR, token=HF_TOKEN)
35
+ return pd.read_parquet(local)
36
+ except Exception as e:
37
+ return None
38
+
39
+ with ThreadPoolExecutor(max_workers=10) as executor:
40
+ results = executor.map(download_and_load, inv_files)
41
+
42
+ dfs = [r for r in results if r is not None]
43
+
44
+ if dfs:
45
+ full_df = pd.concat(dfs, ignore_index=True)
46
+ # Process Timestamp
47
+ full_df['date'] = pd.to_datetime(full_df['modified'], unit='s').dt.date
48
+ return full_df
49
+ return pd.DataFrame()
50
+
51
+ # Main UI
52
+ st.title("πŸ“Š Hyperliquid S3 Backfill Control Center")
53
+
54
+ col1, col2 = st.columns([1, 1])
55
+
56
+ with col1:
57
+ st.info("Reading form: **gionuibk/hyperliquidL2Book**")
58
+
59
+ # Load Data
60
+ df = load_s3_inventory()
61
+
62
+ if df.empty:
63
+ st.warning("⚠️ No Inventory Data Found yet! Indexer V3 might still be running.")
64
+ else:
65
+ # Key Metrics
66
+ total_files = len(df)
67
+ total_size_gb = df['size'].sum() / 1024 / 1024 / 1024
68
+
69
+ st.metric(label="Total S3 Files Indexed", value=f"{total_files:,}")
70
+ st.metric(label="Total Size", value=f"{total_size_gb:.2f} GB")
71
+
72
+ # Grouping
73
+ st.subheader("πŸ“ Prefix Breakdown")
74
+
75
+ # Extract Root Prefix
76
+ df['root_prefix'] = df['bucket'].astype(str) + "/" + df['key'].str.split('/').str[0]
77
+
78
+ counts = df.groupby('root_prefix').size().reset_index(name='count')
79
+ sizes = df.groupby('root_prefix')['size'].sum().reset_index(name='bytes')
80
+ sizes['GB'] = sizes['bytes'] / 1e9
81
+
82
+ stats = pd.merge(counts, sizes, on='root_prefix')
83
+ st.dataframe(stats, use_container_width=True)
84
+
85
+ # Visuals
86
+ st.subheader("πŸ“… Data Distribution by Date")
87
+ daily_counts = df.groupby('date').size().reset_index(name='files')
88
+ fig = px.bar(daily_counts, x='date', y='files', title="Daily Index Volume")
89
+ st.plotly_chart(fig, use_container_width=True)
90
+
91
+ # Raw Data Explorer
92
+ with st.expander("πŸ”Ž Raw Inventory Search"):
93
+ search = st.text_input("Filter filter (regex)", "")
94
+ if search:
95
+ subset = df[df['key'].str.contains(search, na=False)]
96
+ st.dataframe(subset.head(100), use_container_width=True)
97
+ else:
98
+ st.dataframe(df.head(100), use_container_width=True)
99
+
100
+ st.write(f"Last updated: {time.strftime('%H:%M:%S')}")
101
+ if st.button("Refresh"):
102
+ st.rerun()
deploy_review.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import HfApi, create_repo
2
+ import os
3
+ import sys
4
+
5
+ SPACE_ID = "gionuibk/HPLL-DataReview"
6
+ HF_TOKEN = os.environ.get("HF_TOKEN")
7
+
8
+ if not HF_TOKEN:
9
+ print("❌ HF_TOKEN missing")
10
+ sys.exit(1)
11
+
12
+ print(f"πŸš€ Deploying Review Dashboard to {SPACE_ID}...")
13
+ api = HfApi(token=HF_TOKEN)
14
+
15
+ create_repo(repo_id=SPACE_ID, repo_type="space", space_sdk="docker", exist_ok=True, token=HF_TOKEN)
16
+
17
+ print("πŸ”‘ Adding Secrets...")
18
+ try:
19
+ api.add_space_secret(repo_id=SPACE_ID, key="HF_TOKEN", value=HF_TOKEN)
20
+ except: pass
21
+
22
+ print("πŸ“€ Uploading...")
23
+ api.upload_folder(
24
+ folder_path="/Users/blacksun/HPLL_DataReview",
25
+ repo_id=SPACE_ID,
26
+ repo_type="space",
27
+ path_in_repo=".",
28
+ commit_message="Deploy Dashboard"
29
+ )
30
+ print("βœ… Done! https://huggingface.co/spaces/gionuibk/HPLL-DataReview")
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit==1.32.0
2
+ pandas
3
+ huggingface_hub
4
+ fastparquet
5
+ pyarrow
6
+ plotly