Arrechenash commited on
Commit
89dd6ec
·
1 Parent(s): d6858c0

Refactor: Modularize data source and add chart page

Browse files

- Extracts data handling into .
- Renames to .
- Adds a new candlestick chart page at .
- Updates dependencies and configurations.

.gitignore CHANGED
@@ -1,2 +1,4 @@
1
  .env
2
  *.parquet
 
 
 
1
  .env
2
  *.parquet
3
+ *.sqlite
4
+ *__pycache__
Dockerfile CHANGED
@@ -26,4 +26,4 @@ EXPOSE 8501
26
 
27
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health || exit 1
28
 
29
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
26
 
27
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health || exit 1
28
 
29
+ ENTRYPOINT ["streamlit", "run", "src/main.py", "--server.port=8501", "--server.address=0.0.0.0"]
Makefile CHANGED
@@ -11,7 +11,7 @@ fmt:
11
  .venv/bin/isort .
12
 
13
  run:
14
- APP_ENV=development .venv/bin/streamlit run src/streamlit_app.py
15
 
16
  deploy:
17
  @if ! git remote | grep -q '^hf$$'; then git remote add hf git@hf.co:spaces/Arrechenash/dashboard; fi
 
11
  .venv/bin/isort .
12
 
13
  run:
14
+ APP_ENV=development .venv/bin/streamlit run src/main.py
15
 
16
  deploy:
17
  @if ! git remote | grep -q '^hf$$'; then git remote add hf git@hf.co:spaces/Arrechenash/dashboard; fi
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: blue
5
  colorTo: green
6
  sdk: streamlit
7
  sdk_version: "1.28.0"
8
- app_file: src/streamlit_app.py
9
  ---
10
 
11
  # dashboard
@@ -16,7 +16,7 @@ pip install -r requirements.txt
16
 
17
  ## Run dashboard
18
 
19
- streamlit run src/streamlit_app.py
20
 
21
  ## Knowledge
22
 
 
5
  colorTo: green
6
  sdk: streamlit
7
  sdk_version: "1.28.0"
8
+ app_file: src/main.py
9
  ---
10
 
11
  # dashboard
 
16
 
17
  ## Run dashboard
18
 
19
+ streamlit run src/main.py
20
 
21
  ## Knowledge
22
 
requirements.txt CHANGED
@@ -1,3 +1,8 @@
1
  pandas
 
2
  duckdb
3
- streamlit
 
 
 
 
 
1
  pandas
2
+ pyarrow
3
  duckdb
4
+ streamlit
5
+ plotly
6
+ python-dotenv
7
+ alpaca-py
8
+ requests-cache
src/datasource.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import datetime, timedelta
3
+
4
+ import duckdb
5
+ import requests_cache
6
+ import streamlit as st
7
+ from alpaca.data.historical import StockHistoricalDataClient
8
+ from alpaca.data.requests import StockBarsRequest
9
+ from alpaca.data.timeframe import TimeFrame, TimeFrameUnit
10
+ from dotenv import load_dotenv
11
+
12
+ import datasource
13
+
14
+ load_dotenv()
15
+
16
+ ALPACA_API_KEY = os.getenv("ALPACA_API_KEY")
17
+ ALPACA_SECRET_KEY = os.getenv("ALPACA_SECRET_KEY")
18
+
19
+ if not ALPACA_API_KEY or not ALPACA_SECRET_KEY:
20
+ st.error(
21
+ "API keys missing. Set ALPACA_API_KEY and ALPACA_SECRET_KEY in your .env or as secrets/environment variables."
22
+ )
23
+ st.stop()
24
+
25
+ requests_cache.install_cache("alpaca_api_cache", expire_after=120)
26
+
27
+
28
+ # Data source
29
+ url = (
30
+ "stocks.parquet"
31
+ if os.getenv("APP_ENV") == "development"
32
+ else "hf://datasets/Arrechenash/stocks/stocks.parquet"
33
+ )
34
+
35
+
36
+ @st.cache_data
37
+ def load_symbols():
38
+ return (
39
+ duckdb.query(
40
+ f"SELECT DISTINCT symbol FROM read_parquet('{url}') ORDER BY symbol"
41
+ )
42
+ .to_df()["symbol"]
43
+ .tolist()
44
+ )
45
+
46
+
47
+ @st.cache_data
48
+ def get_data(filters=None):
49
+ query = f"SELECT * FROM read_parquet('{url}')"
50
+ if filters:
51
+ query += " WHERE " + " AND ".join(filters)
52
+ return duckdb.query(query + " ORDER BY date DESC").to_df()
53
+
54
+
55
+ @st.cache_resource
56
+ def get_client():
57
+ return StockHistoricalDataClient(ALPACA_API_KEY, ALPACA_SECRET_KEY)
58
+
59
+
60
+ def get_stock_bars(symbol_or_symbols, date_start, date_end):
61
+ req = StockBarsRequest(
62
+ symbol_or_symbols=symbol_or_symbols,
63
+ timeframe=TimeFrame(amount=5, unit=TimeFrameUnit.Minute),
64
+ start=str(date_start),
65
+ end=str(date_end),
66
+ )
67
+ return get_client().get_stock_bars(req).df
src/{streamlit_app.py → main.py} RENAMED
@@ -1,39 +1,10 @@
1
- import os
2
-
3
  import duckdb
4
  import streamlit as st
5
 
6
- # Page configuration
7
- st.set_page_config(layout="wide")
8
-
9
- # Data source
10
- url = (
11
- "stocks.parquet"
12
- if os.getenv("APP_ENV") == "development"
13
- else "hf://datasets/Arrechenash/stocks/stocks.parquet"
14
- )
15
-
16
-
17
- @st.cache_data
18
- def get_data(filters=None):
19
- query = f"SELECT * FROM read_parquet('{url}')"
20
- if filters:
21
- query += " WHERE " + " AND ".join(filters)
22
- return duckdb.query(query + " ORDER BY date DESC").to_df()
23
-
24
-
25
- @st.cache_data
26
- def load_symbols():
27
- return (
28
- duckdb.query(
29
- f"SELECT DISTINCT symbol FROM read_parquet('{url}') ORDER BY symbol"
30
- )
31
- .to_df()["symbol"]
32
- .tolist()
33
- )
34
 
 
35
 
36
- # Initialize session state without forcing values onto widgets
37
  defaults = {
38
  "date": None,
39
  "symbols": [],
@@ -49,7 +20,8 @@ for key, value in defaults.items():
49
  if key not in st.session_state:
50
  st.session_state[key] = value
51
 
52
- # Sidebar filters
 
53
  with st.sidebar:
54
  st.session_state.date = st.date_input("Date", value=None)
55
  st.session_state.symbols = st.multiselect(
@@ -71,10 +43,8 @@ with st.sidebar:
71
  "Min rel volume", value=st.session_state.min_relvol
72
  )
73
 
74
- # Placeholder for results count (updated after df load)
75
  results_placeholder = st.empty()
76
 
77
- # Construct query filters
78
  f = st.session_state
79
  filters = []
80
  if f.date:
@@ -92,15 +62,12 @@ if f.min_gap:
92
  if f.min_run:
93
  filters.append(f"run_pct >= {f.min_run}")
94
 
95
- # Load data
96
  df = get_data(filters if filters else None)
97
 
98
- # Update the sidebar with results count
99
  with results_placeholder:
100
  st.markdown("---")
101
  st.markdown(f"**Results: {len(df)}**")
102
 
103
- # UI rendering
104
  if df.empty:
105
  st.info("No data found with current filters")
106
  else:
@@ -117,7 +84,7 @@ else:
117
  st.selectbox(
118
  "Y-axis",
119
  numeric_cols,
120
- key="y_axis", # ✅ No index or value conflicts
121
  )
122
 
123
  st.scatter_chart(
 
 
 
1
  import duckdb
2
  import streamlit as st
3
 
4
+ from datasource import get_data, load_symbols
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ st.set_page_config(layout="wide")
7
 
 
8
  defaults = {
9
  "date": None,
10
  "symbols": [],
 
20
  if key not in st.session_state:
21
  st.session_state[key] = value
22
 
23
+ st.title("Market Overview")
24
+
25
  with st.sidebar:
26
  st.session_state.date = st.date_input("Date", value=None)
27
  st.session_state.symbols = st.multiselect(
 
43
  "Min rel volume", value=st.session_state.min_relvol
44
  )
45
 
 
46
  results_placeholder = st.empty()
47
 
 
48
  f = st.session_state
49
  filters = []
50
  if f.date:
 
62
  if f.min_run:
63
  filters.append(f"run_pct >= {f.min_run}")
64
 
 
65
  df = get_data(filters if filters else None)
66
 
 
67
  with results_placeholder:
68
  st.markdown("---")
69
  st.markdown(f"**Results: {len(df)}**")
70
 
 
71
  if df.empty:
72
  st.info("No data found with current filters")
73
  else:
 
84
  st.selectbox(
85
  "Y-axis",
86
  numeric_cols,
87
+ key="y_axis",
88
  )
89
 
90
  st.scatter_chart(
src/pages/chart.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import plotly.graph_objs as go
3
+ import streamlit as st
4
+
5
+ from datasource import get_stock_bars, load_symbols
6
+
7
+ st.set_page_config(layout="wide")
8
+ st.title("Candlestick Chart")
9
+ st.sidebar.title("Filters")
10
+
11
+ symbol = st.sidebar.text_input("Ticker symbol", value="HTOO").upper()
12
+ date_start = st.sidebar.date_input("Start date", pd.Timestamp("2025-07-22"))
13
+ date_end = st.sidebar.date_input("End date", pd.Timestamp("2025-07-23"))
14
+
15
+ try:
16
+ bars = get_stock_bars(symbol, date_start, date_end)
17
+ if bars.empty:
18
+ st.warning("No data. Check symbol and dates.")
19
+ else:
20
+ bars = bars.reset_index()
21
+ bars["timestamp"] = bars["timestamp"].dt.tz_convert("America/New_York")
22
+ bars = bars.set_index("timestamp")
23
+
24
+ bars["volume"] = pd.to_numeric(bars["volume"], errors="coerce").fillna(0)
25
+ typical_price = (bars["high"] + bars["low"] + bars["close"]) / 3.0
26
+ bars["vwap"] = (typical_price * bars["volume"]).cumsum() / bars[
27
+ "volume"
28
+ ].cumsum()
29
+
30
+ premarket_mask = bars.index.time < pd.to_datetime("09:30:00").time()
31
+ premarket_high = (
32
+ bars.loc[premarket_mask, "high"].max() if premarket_mask.any() else None
33
+ )
34
+
35
+ timestamps = [ts.strftime("%Y-%m-%d %H:%M:%S") for ts in bars.index]
36
+ open_vals = bars["open"].tolist()
37
+ high_vals = bars["high"].tolist()
38
+ low_vals = bars["low"].tolist()
39
+ close_vals = bars["close"].tolist()
40
+ vwap_vals = bars["vwap"].tolist()
41
+ volume_vals = bars["volume"].tolist()
42
+
43
+ fig = go.Figure()
44
+ fig.add_trace(
45
+ go.Candlestick(
46
+ x=timestamps,
47
+ open=open_vals,
48
+ high=high_vals,
49
+ low=low_vals,
50
+ close=close_vals,
51
+ name="Candlestick",
52
+ )
53
+ )
54
+ fig.add_trace(
55
+ go.Scatter(
56
+ x=timestamps,
57
+ y=vwap_vals,
58
+ mode="lines",
59
+ line=dict(color="yellow", width=1),
60
+ name="VWAP",
61
+ )
62
+ )
63
+ if premarket_high is not None and pd.notna(premarket_high):
64
+ fig.add_trace(
65
+ go.Scatter(
66
+ x=[timestamps[0], timestamps[-1]],
67
+ y=[premarket_high, premarket_high],
68
+ mode="lines",
69
+ line=dict(color="red", width=1, dash="dash"),
70
+ name="Premarket High",
71
+ )
72
+ )
73
+ fig.add_trace(
74
+ go.Bar(
75
+ x=timestamps,
76
+ y=volume_vals,
77
+ yaxis="y2",
78
+ marker=dict(color="rgba(200,200,200,0.5)"),
79
+ name="Volume",
80
+ opacity=0.5,
81
+ )
82
+ )
83
+
84
+ bars_dates = pd.to_datetime(bars.index.date).unique()
85
+ for day in bars_dates:
86
+ dm = pd.Timestamp(day).strftime("%Y-%m-%d")
87
+ fig.add_vrect(
88
+ x0=f"{dm} 04:00:00",
89
+ x1=f"{dm} 09:30:00",
90
+ fillcolor="rgba(0, 200, 255, 0.10)",
91
+ layer="below",
92
+ line_width=0,
93
+ annotation_text="Pre-market",
94
+ annotation_position="top left",
95
+ )
96
+ fig.add_vrect(
97
+ x0=f"{dm} 16:00:00",
98
+ x1=f"{dm} 20:00:00",
99
+ fillcolor="rgba(255, 200, 0, 0.08)",
100
+ layer="below",
101
+ line_width=0,
102
+ annotation_text="After-hours",
103
+ annotation_position="top left",
104
+ )
105
+
106
+ fig.update_layout(
107
+ title=f"{symbol} 5-min",
108
+ xaxis_title="Date/Time",
109
+ yaxis_title="Price",
110
+ xaxis_rangeslider_visible=False,
111
+ yaxis=dict(domain=[0.3, 1]),
112
+ yaxis2=dict(domain=[0, 0.25], title="Volume"),
113
+ legend=dict(orientation="h"),
114
+ margin=dict(t=40, b=20),
115
+ hovermode="x unified",
116
+ height=720,
117
+ )
118
+
119
+ st.plotly_chart(fig, use_container_width=True)
120
+ preview_cols = ["open", "high", "low", "close", "volume", "vwap"]
121
+ if premarket_high is not None and pd.notna(premarket_high):
122
+ preview_cols.append("premarket_high")
123
+ bars["premarket_high"] = premarket_high
124
+ st.write("Data:")
125
+ st.table(bars[preview_cols].reset_index().head(20))
126
+
127
+ except Exception as e:
128
+ st.error(f"Error fetching or plotting data: {e}")
129
+ import traceback
130
+
131
+ st.write("Full traceback:")
132
+ st.code(traceback.format_exc())