Commit ·
a84c47e
0
Parent(s):
Initial clean deploy
Browse files- .env.example +2 -0
- .gitignore +13 -0
- .python-version +1 -0
- Dockerfile +46 -0
- README.md +0 -0
- backtester.py +246 -0
- create_mock_data.py +44 -0
- dashboard.py +310 -0
- hello.py +6 -0
- pyproject.toml +17 -0
- test_conversion.py +22 -0
- utils.py +86 -0
- uv.lock +0 -0
.env.example
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
HF_TOKEN=your_huggingface_token_here
|
| 2 |
+
TARGET_FILE=marketsession_post_polygon_2020-01-01_2025-12-01.parquet_with_premarketvolume900K_marketcap1B.parquet
|
.gitignore
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python-generated files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[oc]
|
| 4 |
+
build/
|
| 5 |
+
dist/
|
| 6 |
+
wheels/
|
| 7 |
+
*.egg-info
|
| 8 |
+
|
| 9 |
+
# Virtual environments
|
| 10 |
+
.venv
|
| 11 |
+
# Data files
|
| 12 |
+
*.parquet
|
| 13 |
+
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
Dockerfile
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use a specialized UV image for building
|
| 2 |
+
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
|
| 3 |
+
|
| 4 |
+
# Enable bytecode compilation and set UV options
|
| 5 |
+
ENV UV_COMPILE_BYTECODE=1
|
| 6 |
+
ENV UV_LINK_MODE=copy
|
| 7 |
+
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
|
| 10 |
+
# Install dependencies separately to leverage Docker layer caching
|
| 11 |
+
# This uses cache mounts for the uv cache and binds for configuration files
|
| 12 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 13 |
+
--mount=type=bind,source=uv.lock,target=uv.lock \
|
| 14 |
+
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
| 15 |
+
uv sync --frozen --no-install-project --no-dev
|
| 16 |
+
|
| 17 |
+
# Copy the rest of the application code
|
| 18 |
+
COPY . /app
|
| 19 |
+
|
| 20 |
+
# Sync the project (installs the current project package)
|
| 21 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 22 |
+
uv sync --frozen --no-dev
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# --- Final Stage ---
|
| 26 |
+
FROM python:3.12-slim-bookworm
|
| 27 |
+
|
| 28 |
+
# Set environment variables
|
| 29 |
+
ENV PYTHONUNBUFFERED=1
|
| 30 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 31 |
+
|
| 32 |
+
WORKDIR /app
|
| 33 |
+
|
| 34 |
+
# Copy the synced environment from the builder
|
| 35 |
+
COPY --from=builder /app /app
|
| 36 |
+
|
| 37 |
+
# Create necessary cache directories with appropriate permissions
|
| 38 |
+
# (Useful for certain cloud environments or local dockers)
|
| 39 |
+
RUN mkdir -p /.cache && chmod 777 /.cache
|
| 40 |
+
|
| 41 |
+
# Expose the dashboard port
|
| 42 |
+
EXPOSE 5010
|
| 43 |
+
|
| 44 |
+
# Run the Panel dashboard
|
| 45 |
+
# Using direct 'panel serve' as it's more robust in container environments
|
| 46 |
+
CMD ["panel", "serve", "dashboard.py", "--address", "0.0.0.0", "--port", "5010", "--allow-websocket-origin", "*"]
|
README.md
ADDED
|
File without changes
|
backtester.py
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def run_backtest(
|
| 6 |
+
df,
|
| 7 |
+
risk_per_trade,
|
| 8 |
+
stop_loss_pct,
|
| 9 |
+
take_profit_pct,
|
| 10 |
+
initial_capital,
|
| 11 |
+
start_date,
|
| 12 |
+
end_date,
|
| 13 |
+
max_trades_per_day,
|
| 14 |
+
commission_amount=2.0,
|
| 15 |
+
):
|
| 16 |
+
"""
|
| 17 |
+
Runs the backtest logic on the provided dataframe with given parameters.
|
| 18 |
+
"""
|
| 19 |
+
# Filter by date
|
| 20 |
+
start_ts = pd.Timestamp(start_date)
|
| 21 |
+
end_ts = pd.Timestamp(end_date)
|
| 22 |
+
|
| 23 |
+
mask = (df["datetime"] >= start_ts) & (df["datetime"] <= end_ts)
|
| 24 |
+
sub_df = df[mask].copy()
|
| 25 |
+
|
| 26 |
+
if sub_df.empty:
|
| 27 |
+
return pd.DataFrame()
|
| 28 |
+
|
| 29 |
+
dates = sorted(sub_df["date"].unique())
|
| 30 |
+
|
| 31 |
+
trades = []
|
| 32 |
+
capital_net = initial_capital
|
| 33 |
+
capital_gross = initial_capital
|
| 34 |
+
total_comm_accum = 0
|
| 35 |
+
|
| 36 |
+
# Market‐session price columns
|
| 37 |
+
ms_columns = [
|
| 38 |
+
"marketsession_1min",
|
| 39 |
+
"marketsession_3min",
|
| 40 |
+
"marketsession_5min",
|
| 41 |
+
"marketsession_10min",
|
| 42 |
+
"marketsession_15min",
|
| 43 |
+
"marketsession_30min",
|
| 44 |
+
"marketsession_60min",
|
| 45 |
+
"marketsession_120min",
|
| 46 |
+
"marketsession_high",
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
for current_date in dates:
|
| 50 |
+
countertradesperday = 0
|
| 51 |
+
day_df = sub_df[sub_df["date"] == current_date]
|
| 52 |
+
|
| 53 |
+
for _, row in day_df.iterrows():
|
| 54 |
+
if row.get("Ticker") == "QMMM": # specific exclusion from user script
|
| 55 |
+
continue
|
| 56 |
+
|
| 57 |
+
entry_price = row["premarket_close"]
|
| 58 |
+
current_risk_amt = capital_net * risk_per_trade
|
| 59 |
+
size = current_risk_amt # size is dollar amount? user script: size = capital_net * RISK_PER_TRADE
|
| 60 |
+
|
| 61 |
+
# User script:
|
| 62 |
+
# stop_price = entry_price * (1 + STOP_LOSS_PCT)
|
| 63 |
+
# target_price = entry_price * (1 - TAKE_PROFIT_PCT)
|
| 64 |
+
# Short setup
|
| 65 |
+
stop_price = entry_price * (1 + stop_loss_pct)
|
| 66 |
+
target_price = entry_price * (1 - take_profit_pct)
|
| 67 |
+
|
| 68 |
+
exit_price = None
|
| 69 |
+
exit_type = None
|
| 70 |
+
|
| 71 |
+
for col in ms_columns:
|
| 72 |
+
if col not in row or pd.isna(row[col]):
|
| 73 |
+
continue
|
| 74 |
+
|
| 75 |
+
price = row[col]
|
| 76 |
+
# Stop-loss
|
| 77 |
+
if price >= stop_price:
|
| 78 |
+
exit_price = stop_price
|
| 79 |
+
exit_type = "stop"
|
| 80 |
+
break
|
| 81 |
+
# Take-profit
|
| 82 |
+
if price <= target_price:
|
| 83 |
+
exit_price = target_price
|
| 84 |
+
exit_type = "target"
|
| 85 |
+
break
|
| 86 |
+
|
| 87 |
+
if exit_price is None:
|
| 88 |
+
exit_price = row["marketsession_close"]
|
| 89 |
+
exit_type = "close"
|
| 90 |
+
|
| 91 |
+
# Pnl for short
|
| 92 |
+
pnl_gross = (entry_price - exit_price) / entry_price * size
|
| 93 |
+
|
| 94 |
+
# Commission logic as requested/defined
|
| 95 |
+
comission_entry = commission_amount * size / entry_price / 200
|
| 96 |
+
comission_exit = comission_entry # using user's approximation
|
| 97 |
+
|
| 98 |
+
total_comm = comission_entry + comission_exit
|
| 99 |
+
pnl_net = pnl_gross - total_comm
|
| 100 |
+
|
| 101 |
+
# Update capitals
|
| 102 |
+
capital_net += pnl_net
|
| 103 |
+
capital_gross += pnl_gross
|
| 104 |
+
total_comm_accum += total_comm
|
| 105 |
+
|
| 106 |
+
pnl_perc = (
|
| 107 |
+
pnl_net / (capital_net - pnl_net) * 100
|
| 108 |
+
if (capital_net - pnl_net) != 0
|
| 109 |
+
else 0
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
if capital_net < initial_capital / 2:
|
| 113 |
+
# Stop out logic
|
| 114 |
+
break
|
| 115 |
+
|
| 116 |
+
trades.append(
|
| 117 |
+
{
|
| 118 |
+
"date": current_date,
|
| 119 |
+
"ticker": row.get("Ticker"),
|
| 120 |
+
"entry_price": entry_price,
|
| 121 |
+
"exit_price": exit_price,
|
| 122 |
+
"exit_type": exit_type,
|
| 123 |
+
"size": size,
|
| 124 |
+
"pnl": pnl_net,
|
| 125 |
+
"pnl_gross": pnl_gross,
|
| 126 |
+
"pnl_perc": pnl_perc,
|
| 127 |
+
"capital_net": capital_net,
|
| 128 |
+
"capital_gross": capital_gross,
|
| 129 |
+
"comm": total_comm,
|
| 130 |
+
"cumulative_comm": total_comm_accum,
|
| 131 |
+
}
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
countertradesperday += 1
|
| 135 |
+
if countertradesperday >= max_trades_per_day:
|
| 136 |
+
break
|
| 137 |
+
|
| 138 |
+
if capital_net < initial_capital / 2:
|
| 139 |
+
break
|
| 140 |
+
|
| 141 |
+
return pd.DataFrame(trades)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def analyze_day_trading(trades_df):
|
| 145 |
+
"""
|
| 146 |
+
Analyze day trading performance based on trade logs.
|
| 147 |
+
Returns results dict and enriched df.
|
| 148 |
+
"""
|
| 149 |
+
if trades_df.empty:
|
| 150 |
+
return {}, trades_df
|
| 151 |
+
|
| 152 |
+
df = trades_df.copy()
|
| 153 |
+
|
| 154 |
+
# Calculate additional metrics
|
| 155 |
+
df["is_win"] = df["pnl"] > 0
|
| 156 |
+
df["cumulative_pnl"] = df["pnl"].cumsum()
|
| 157 |
+
df["cumulative_pnl_gross"] = df["pnl_gross"].cumsum()
|
| 158 |
+
|
| 159 |
+
df["running_max"] = df["cumulative_pnl"].cummax()
|
| 160 |
+
df["drawdown"] = df["running_max"] - df["cumulative_pnl"]
|
| 161 |
+
df["drawdown_pct"] = (df["pnl_gross"] / df["capital_gross"]) * 100
|
| 162 |
+
|
| 163 |
+
# Return per trade
|
| 164 |
+
# Note: user used pnl_perc which is pnl/running_capital*100.
|
| 165 |
+
df["return"] = df["pnl_perc"] / 100
|
| 166 |
+
|
| 167 |
+
total_trades = len(df)
|
| 168 |
+
profitable_trades = sum(df["is_win"])
|
| 169 |
+
losing_trades = total_trades - profitable_trades
|
| 170 |
+
win_rate = profitable_trades / total_trades if total_trades > 0 else 0
|
| 171 |
+
|
| 172 |
+
total_pnl = df["pnl"].sum()
|
| 173 |
+
avg_pnl = df["pnl"].mean()
|
| 174 |
+
max_pnl = df["pnl"].max()
|
| 175 |
+
min_pnl = df["pnl"].min()
|
| 176 |
+
|
| 177 |
+
avg_pnl_perc = df["pnl_perc"].mean()
|
| 178 |
+
|
| 179 |
+
avg_win = df.loc[df["is_win"], "pnl"].mean() if profitable_trades > 0 else 0
|
| 180 |
+
avg_loss = df.loc[~df["is_win"], "pnl"].mean() if losing_trades > 0 else 0
|
| 181 |
+
|
| 182 |
+
risk_reward_ratio = abs(avg_win / avg_loss) if avg_loss != 0 else float("inf")
|
| 183 |
+
|
| 184 |
+
max_drawdown = df["drawdown"].max()
|
| 185 |
+
max_drawdown_perc = (
|
| 186 |
+
max_drawdown / df["running_max"].max() * 100
|
| 187 |
+
if df["running_max"].max() > 0
|
| 188 |
+
else 0
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
mean_return = df["return"].mean()
|
| 192 |
+
std_return = df["return"].std()
|
| 193 |
+
sharpe_ratio = (mean_return * 252**0.5) / std_return if std_return > 0 else 0
|
| 194 |
+
|
| 195 |
+
expectancy = (win_rate * avg_win) + ((1 - win_rate) * avg_loss)
|
| 196 |
+
|
| 197 |
+
total_profit = df.loc[df["is_win"], "pnl"].sum() if profitable_trades > 0 else 0
|
| 198 |
+
total_loss = abs(df.loc[~df["is_win"], "pnl"].sum()) if losing_trades > 0 else 1
|
| 199 |
+
profit_factor = total_profit / total_loss if total_loss > 0 else float("inf")
|
| 200 |
+
|
| 201 |
+
by_date = df.groupby("date")["pnl"].sum().reset_index()
|
| 202 |
+
by_ticker = df.groupby("ticker").agg(
|
| 203 |
+
{"pnl": ["sum", "mean", "count"], "is_win": "mean"}
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
# New Metrics Calculation
|
| 207 |
+
initial_capital_inferred = (
|
| 208 |
+
df.iloc[0]["capital_net"] - df.iloc[0]["pnl"] if not df.empty else 0
|
| 209 |
+
)
|
| 210 |
+
return_on_initial_capital = (
|
| 211 |
+
(total_pnl / initial_capital_inferred * 100)
|
| 212 |
+
if initial_capital_inferred != 0
|
| 213 |
+
else 0
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
total_commissions = df["comm"].sum() if not df.empty else 0
|
| 217 |
+
commission_impact_pct = (
|
| 218 |
+
(total_commissions / total_pnl * 100) if total_pnl != 0 else 0
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
results = {
|
| 222 |
+
"total_trades": total_trades,
|
| 223 |
+
"profitable_trades": profitable_trades,
|
| 224 |
+
"losing_trades": losing_trades,
|
| 225 |
+
"win_rate": win_rate,
|
| 226 |
+
"total_pnl": total_pnl,
|
| 227 |
+
"return_on_init_cap_pct": return_on_initial_capital,
|
| 228 |
+
"total_commissions": total_commissions,
|
| 229 |
+
"comm_to_pnl_pct": commission_impact_pct,
|
| 230 |
+
"avg_pnl": avg_pnl,
|
| 231 |
+
"max_pnl": max_pnl,
|
| 232 |
+
"min_pnl": min_pnl,
|
| 233 |
+
"avg_pnl_perc": avg_pnl_perc,
|
| 234 |
+
"avg_win": avg_win,
|
| 235 |
+
"avg_loss": avg_loss,
|
| 236 |
+
"risk_reward_ratio": risk_reward_ratio,
|
| 237 |
+
"max_drawdown": max_drawdown,
|
| 238 |
+
"max_drawdown_perc": max_drawdown_perc,
|
| 239 |
+
"sharpe_ratio": sharpe_ratio,
|
| 240 |
+
"expectancy": expectancy,
|
| 241 |
+
"profit_factor": profit_factor,
|
| 242 |
+
# 'by_date': by_date, # Keep dataframes out of strict dict if not needed for simple display, or keep them.
|
| 243 |
+
# 'by_ticker': by_ticker
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
return results, df
|
create_mock_data.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import datetime
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def create_mock_data():
|
| 7 |
+
dates = pd.date_range(start="2024-10-01", end="2024-11-01", freq="D")
|
| 8 |
+
tickers = ["AAPL", "TSLA", "AMZN", "GOOG", "MSFT"]
|
| 9 |
+
|
| 10 |
+
rows = []
|
| 11 |
+
for d in dates:
|
| 12 |
+
for t in tickers:
|
| 13 |
+
# Randomize values to meet filters most of the time
|
| 14 |
+
entry = 10.0 + np.random.randn()
|
| 15 |
+
# columns: premarket_change_from_perviousday_perc > 8 and premarket_close > 2 and `Shares Float`>1e6 and `Market Capitalization`<100e6
|
| 16 |
+
rows.append(
|
| 17 |
+
{
|
| 18 |
+
"datetime": d,
|
| 19 |
+
"Ticker": t,
|
| 20 |
+
"premarket_change_from_perviousday_perc": 10.0 + np.random.randn(),
|
| 21 |
+
"premarket_close": entry,
|
| 22 |
+
"Shares Float": 2e6,
|
| 23 |
+
"Market Capitalization": 50e6,
|
| 24 |
+
"marketsession_1min": entry * (1 - 0.01 * np.random.randn()),
|
| 25 |
+
"marketsession_3min": entry * (1 - 0.02 * np.random.randn()),
|
| 26 |
+
"marketsession_5min": entry * (1 - 0.03 * np.random.randn()),
|
| 27 |
+
"marketsession_10min": entry * (1 - 0.04 * np.random.randn()),
|
| 28 |
+
"marketsession_15min": entry * (1 - 0.05 * np.random.randn()),
|
| 29 |
+
"marketsession_30min": entry * (1 - 0.06 * np.random.randn()),
|
| 30 |
+
"marketsession_60min": entry * (1 - 0.07 * np.random.randn()),
|
| 31 |
+
"marketsession_120min": entry * (1 - 0.08 * np.random.randn()),
|
| 32 |
+
"marketsession_high": entry * 1.1,
|
| 33 |
+
"marketsession_close": entry * 0.9,
|
| 34 |
+
}
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
df = pd.DataFrame(rows)
|
| 38 |
+
filename = "marketsession_post_polygon_2020-01-01_2025-12-01.parquet_with_premarketvolume900K_marketcap1B.parquet"
|
| 39 |
+
df.to_parquet(filename)
|
| 40 |
+
print(f"Mock data created: {filename}")
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
if __name__ == "__main__":
|
| 44 |
+
create_mock_data()
|
dashboard.py
ADDED
|
@@ -0,0 +1,310 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import panel as pn
|
| 2 |
+
import hvplot.pandas
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
from utils import load_data, DEFAULT_FILTER_QUERY
|
| 6 |
+
from backtester import run_backtest, analyze_day_trading
|
| 7 |
+
|
| 8 |
+
pn.extension("tabulator")
|
| 9 |
+
|
| 10 |
+
# --- 1. Load Data ---
|
| 11 |
+
# Initial load
|
| 12 |
+
print("Loading data... (this might take a moment if downloading)")
|
| 13 |
+
try:
|
| 14 |
+
# Cache the data in memory so we don't reload on every callback
|
| 15 |
+
# In a production app you might handle this differently
|
| 16 |
+
GLOBAL_DF = load_data()
|
| 17 |
+
print(f"Data loaded. Rows: {len(GLOBAL_DF)}")
|
| 18 |
+
except Exception as e:
|
| 19 |
+
GLOBAL_DF = pd.DataFrame()
|
| 20 |
+
print(f"Error loading data: {e}")
|
| 21 |
+
|
| 22 |
+
# --- 2. Widgets ---
|
| 23 |
+
query_input = pn.widgets.TextAreaInput(
|
| 24 |
+
name="Filter Query (Pandas Syntax)",
|
| 25 |
+
value=DEFAULT_FILTER_QUERY,
|
| 26 |
+
height=100,
|
| 27 |
+
sizing_mode="stretch_width",
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
risk_per_trade_input = pn.widgets.FloatSlider(
|
| 31 |
+
name="Risk Per Trade (%)", start=0.01, end=1.00, step=0.01, value=0.15
|
| 32 |
+
)
|
| 33 |
+
stop_loss_input = pn.widgets.FloatSlider(
|
| 34 |
+
name="Stop Loss (%)", start=0.01, end=1.00, step=0.01, value=0.35
|
| 35 |
+
)
|
| 36 |
+
take_profit_input = pn.widgets.FloatSlider(
|
| 37 |
+
name="Take Profit (%)", start=0.01, end=1.00, step=0.01, value=0.55
|
| 38 |
+
)
|
| 39 |
+
initial_capital_input = pn.widgets.FloatInput(
|
| 40 |
+
name="Initial Capital ($)", value=10000.0, step=100
|
| 41 |
+
)
|
| 42 |
+
max_trades_input = pn.widgets.IntSlider(
|
| 43 |
+
name="Max Trades Per Day", start=1, end=20, value=6
|
| 44 |
+
)
|
| 45 |
+
commission_amount_input = pn.widgets.FloatInput(
|
| 46 |
+
name="Commission Amount ($ per 200 shares)", value=2.0, step=0.1
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
# Date Range (default based on user script)
|
| 50 |
+
default_start = pd.Timestamp("2024-10-07").date()
|
| 51 |
+
default_end = pd.Timestamp("2025-12-01").date() # Future date from user script
|
| 52 |
+
date_range_input = pn.widgets.DateRangeSlider(
|
| 53 |
+
name="Date Range",
|
| 54 |
+
start=pd.Timestamp("2020-01-01").date(),
|
| 55 |
+
end=pd.Timestamp("2026-01-01").date(),
|
| 56 |
+
value=(default_start, default_end),
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
run_button = pn.widgets.Button(name="Run Backtest", button_type="primary")
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# --- 3. Callbacks & Logic ---
|
| 63 |
+
def execute_backtest(event=None):
|
| 64 |
+
# Determine if we need to reload data based on query
|
| 65 |
+
# To be safe and simple, we reload if the user changed the query or if we just want to ensure consistency.
|
| 66 |
+
# Given the file is small (parquet), we can reload or filter.
|
| 67 |
+
# Note: load_data() handles reading and filtering.
|
| 68 |
+
|
| 69 |
+
current_query = query_input.value
|
| 70 |
+
|
| 71 |
+
# We will reload the data with the specific query
|
| 72 |
+
# If this becomes slow, we can optimize to cache the unfiltered raw data and filter here.
|
| 73 |
+
try:
|
| 74 |
+
current_df = load_data(filter_query=current_query)
|
| 75 |
+
except Exception as e:
|
| 76 |
+
return pn.pane.Markdown(f"## Error loading data/applying query: {e}")
|
| 77 |
+
|
| 78 |
+
if current_df.empty:
|
| 79 |
+
return pn.pane.Markdown("## Error: No Data Loaded (Empty after filter)")
|
| 80 |
+
|
| 81 |
+
# Get values
|
| 82 |
+
rpt = risk_per_trade_input.value
|
| 83 |
+
sl = stop_loss_input.value
|
| 84 |
+
tp = take_profit_input.value
|
| 85 |
+
init_cap = initial_capital_input.value
|
| 86 |
+
max_trades = max_trades_input.value
|
| 87 |
+
comm_amt = commission_amount_input.value
|
| 88 |
+
start_date, end_date = date_range_input.value
|
| 89 |
+
|
| 90 |
+
trades_df = run_backtest(
|
| 91 |
+
current_df,
|
| 92 |
+
rpt,
|
| 93 |
+
sl,
|
| 94 |
+
tp,
|
| 95 |
+
init_cap,
|
| 96 |
+
start_date,
|
| 97 |
+
end_date,
|
| 98 |
+
max_trades,
|
| 99 |
+
commission_amount=comm_amt,
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
if trades_df.empty:
|
| 103 |
+
return pn.pane.Markdown("## No trades found for this configuration")
|
| 104 |
+
|
| 105 |
+
# Analyze
|
| 106 |
+
results, analysis_df = analyze_day_trading(trades_df)
|
| 107 |
+
|
| 108 |
+
# --- Visuals ---
|
| 109 |
+
|
| 110 |
+
# 1. Equity Curve (Net vs Gross)
|
| 111 |
+
equity_plot = analysis_df.hvplot.line(
|
| 112 |
+
x="index",
|
| 113 |
+
y=["capital_net", "capital_gross"],
|
| 114 |
+
value_label="Capital ($)",
|
| 115 |
+
title="Account Growth (Net vs Gross)",
|
| 116 |
+
ylabel="Capital ($)",
|
| 117 |
+
xlabel="Trade #",
|
| 118 |
+
grid=True,
|
| 119 |
+
height=400,
|
| 120 |
+
responsive=True,
|
| 121 |
+
color=["#4CAF50", "#2196F3"],
|
| 122 |
+
hover_cols=["ticker", "pnl"],
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
# 1b. Capital & Profit over Days
|
| 126 |
+
daily_stats = analysis_df.groupby("date").agg({
|
| 127 |
+
"capital_net": "last",
|
| 128 |
+
"capital_gross": "last",
|
| 129 |
+
"pnl": "sum",
|
| 130 |
+
"pnl_gross": "sum"
|
| 131 |
+
}).reset_index()
|
| 132 |
+
|
| 133 |
+
capital_days_plot = daily_stats.hvplot.line(
|
| 134 |
+
x="date",
|
| 135 |
+
y=["capital_net", "capital_gross"],
|
| 136 |
+
title="Capital over Days",
|
| 137 |
+
ylabel="Capital ($)",
|
| 138 |
+
grid=True,
|
| 139 |
+
height=300,
|
| 140 |
+
responsive=True,
|
| 141 |
+
color=["#4CAF50", "#2196F3"],
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
profit_days_plot = daily_stats.hvplot.bar(
|
| 145 |
+
x="date",
|
| 146 |
+
y=["pnl", "pnl_gross"],
|
| 147 |
+
title="Daily Profit (Net vs Gross)",
|
| 148 |
+
ylabel="Profit ($)",
|
| 149 |
+
grid=True,
|
| 150 |
+
height=300,
|
| 151 |
+
responsive=True,
|
| 152 |
+
alpha=0.6,
|
| 153 |
+
color=["#4CAF50", "#2196F3"],
|
| 154 |
+
yformatter="%.0f",
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
# 2. Cumulative Commission
|
| 158 |
+
comm_plot = analysis_df.hvplot.line(
|
| 159 |
+
x="index",
|
| 160 |
+
y="cumulative_comm",
|
| 161 |
+
title="Cumulative Commissions Paid",
|
| 162 |
+
ylabel="Total Commission ($)",
|
| 163 |
+
xlabel="Trade #",
|
| 164 |
+
grid=True,
|
| 165 |
+
height=200,
|
| 166 |
+
responsive=True,
|
| 167 |
+
color="#FF9800",
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
# 2. Drawdown
|
| 171 |
+
drawdown_plot = analysis_df.hvplot.area(
|
| 172 |
+
y="drawdown",
|
| 173 |
+
title="Drawdown",
|
| 174 |
+
ylabel="Drawdown ($)",
|
| 175 |
+
grid=True,
|
| 176 |
+
height=200,
|
| 177 |
+
responsive=True,
|
| 178 |
+
color="red",
|
| 179 |
+
alpha=0.3,
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
# 2b. Drawdown %
|
| 183 |
+
drawdown_pct_plot = analysis_df.hvplot.area(
|
| 184 |
+
y="drawdown_pct",
|
| 185 |
+
title="Drawdown %",
|
| 186 |
+
ylabel="Drawdown (%)",
|
| 187 |
+
grid=True,
|
| 188 |
+
height=200,
|
| 189 |
+
responsive=True,
|
| 190 |
+
color="red",
|
| 191 |
+
alpha=0.3,
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
# 3. P&L Distribution
|
| 195 |
+
pnl_dist_plot = analysis_df.hvplot.hist(
|
| 196 |
+
y="pnl", title="P&L Distribution", bins=30, height=300, responsive=True
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
# 4. Ticker Performance (Top/Bottom 10)
|
| 200 |
+
ticker_stats = analysis_df.groupby("ticker")["pnl"].sum().sort_values()
|
| 201 |
+
if len(ticker_stats) > 20:
|
| 202 |
+
# Show top 10 and bottom 10
|
| 203 |
+
top = ticker_stats.tail(10)
|
| 204 |
+
bottom = ticker_stats.head(10)
|
| 205 |
+
subset = pd.concat([bottom, top])
|
| 206 |
+
else:
|
| 207 |
+
subset = ticker_stats
|
| 208 |
+
|
| 209 |
+
ticker_plot = subset.hvplot.bar(
|
| 210 |
+
title="P&L by Ticker (Best/Worst)", rot=45, height=400, responsive=True
|
| 211 |
+
)
|
| 212 |
+
|
| 213 |
+
# 5. Metrics Table
|
| 214 |
+
# Format metrics for display
|
| 215 |
+
metrics_df = pd.DataFrame(
|
| 216 |
+
[
|
| 217 |
+
{"Metric": k, "Value": f"{v:.2f}" if isinstance(v, float) else v}
|
| 218 |
+
for k, v in results.items()
|
| 219 |
+
if not isinstance(v, pd.DataFrame)
|
| 220 |
+
]
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
metrics_table = pn.widgets.Tabulator(metrics_df, disabled=True, show_index=False)
|
| 224 |
+
|
| 225 |
+
# 6. Trades Table (Paginated)
|
| 226 |
+
display_trades_df = trades_df.copy()
|
| 227 |
+
for col in display_trades_df.select_dtypes(include=['float', 'float64']).columns:
|
| 228 |
+
display_trades_df[col] = display_trades_df[col].fillna(0).astype(int)
|
| 229 |
+
|
| 230 |
+
trades_table = pn.widgets.Tabulator(
|
| 231 |
+
display_trades_df,
|
| 232 |
+
pagination="local",
|
| 233 |
+
page_size=10,
|
| 234 |
+
sizing_mode="stretch_width",
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
# Layout
|
| 238 |
+
dashboard = pn.Column(
|
| 239 |
+
pn.Row(
|
| 240 |
+
pn.Column(metrics_table, width=300),
|
| 241 |
+
pn.Column(
|
| 242 |
+
equity_plot,
|
| 243 |
+
drawdown_plot,
|
| 244 |
+
drawdown_pct_plot,
|
| 245 |
+
comm_plot,
|
| 246 |
+
capital_days_plot,
|
| 247 |
+
profit_days_plot,
|
| 248 |
+
),
|
| 249 |
+
),
|
| 250 |
+
pn.Row(pnl_dist_plot, ticker_plot),
|
| 251 |
+
pn.layout.Divider(),
|
| 252 |
+
"### Trade Log",
|
| 253 |
+
trades_table,
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
return dashboard
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
# Bind the function to the button
|
| 260 |
+
# We effectively want to replace the main content when button is clicked
|
| 261 |
+
# pn.bind is one way, or just updating a dynamic map.
|
| 262 |
+
# Simplest: use a Column that we clear and append to.
|
| 263 |
+
|
| 264 |
+
output_area = pn.Column()
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
def on_click(event):
|
| 268 |
+
output_area.clear()
|
| 269 |
+
output_area.append(pn.indicators.LoadingSpinner(value=True, width=50, height=50))
|
| 270 |
+
try:
|
| 271 |
+
content = execute_backtest()
|
| 272 |
+
output_area.clear()
|
| 273 |
+
output_area.append(content)
|
| 274 |
+
except Exception as e:
|
| 275 |
+
output_area.clear()
|
| 276 |
+
output_area.append(pn.pane.Markdown(f"## Error during execution: {e}"))
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
run_button.on_click(on_click)
|
| 280 |
+
|
| 281 |
+
# --- Layout ---
|
| 282 |
+
sidebar = pn.Column(
|
| 283 |
+
"## Configuration",
|
| 284 |
+
query_input,
|
| 285 |
+
risk_per_trade_input,
|
| 286 |
+
stop_loss_input,
|
| 287 |
+
take_profit_input,
|
| 288 |
+
initial_capital_input,
|
| 289 |
+
max_trades_input,
|
| 290 |
+
commission_amount_input,
|
| 291 |
+
date_range_input,
|
| 292 |
+
run_button,
|
| 293 |
+
pn.layout.Divider(),
|
| 294 |
+
"**Note**: Ensure `HF_TOKEN` is set in `.env` to download data.",
|
| 295 |
+
)
|
| 296 |
+
|
| 297 |
+
template = pn.template.FastListTemplate(
|
| 298 |
+
title="Penny Stock Short GAP UP Strategy Backtester",
|
| 299 |
+
sidebar=[sidebar],
|
| 300 |
+
main=[output_area],
|
| 301 |
+
accent_base_color="#1f77b4",
|
| 302 |
+
header_background="#1f77b4",
|
| 303 |
+
)
|
| 304 |
+
|
| 305 |
+
# Servable
|
| 306 |
+
template.servable()
|
| 307 |
+
|
| 308 |
+
if __name__ == "__main__":
|
| 309 |
+
# If run as script
|
| 310 |
+
pn.serve(template, show=False, port=5010)
|
hello.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def main():
|
| 2 |
+
print("Hello from myantigravity1!")
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
if __name__ == "__main__":
|
| 6 |
+
main()
|
pyproject.toml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "backtesting-penny-short"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Penny Stock Strategy Backtester with Panel Dashboard"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"datasets>=4.4.2",
|
| 9 |
+
"huggingface-hub>=1.2.3",
|
| 10 |
+
"matplotlib>=3.10.8",
|
| 11 |
+
"pandas>=2.3.3",
|
| 12 |
+
"python-dotenv>=1.2.1",
|
| 13 |
+
"panel>=1.3.8",
|
| 14 |
+
"hvplot>=0.9.2",
|
| 15 |
+
"pyarrow>=15.0.0",
|
| 16 |
+
"fastparquet>=2024.2.0"
|
| 17 |
+
]
|
test_conversion.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
print("Creating mock dataframe...")
|
| 5 |
+
trades_df = pd.DataFrame({
|
| 6 |
+
'float_col': [1.1, 2.9, 3.5, np.nan],
|
| 7 |
+
'int_col': [10, 20, 30, 40],
|
| 8 |
+
'str_col': ['a', 'b', 'c', 'd']
|
| 9 |
+
})
|
| 10 |
+
|
| 11 |
+
print("Original Types:")
|
| 12 |
+
print(trades_df.dtypes)
|
| 13 |
+
print(trades_df)
|
| 14 |
+
|
| 15 |
+
print("\nConverting...")
|
| 16 |
+
display_trades_df = trades_df.copy()
|
| 17 |
+
for col in display_trades_df.select_dtypes(include=['float', 'float64']).columns:
|
| 18 |
+
display_trades_df[col] = display_trades_df[col].fillna(0).astype(int)
|
| 19 |
+
|
| 20 |
+
print("\nResult Types:")
|
| 21 |
+
print(display_trades_df.dtypes)
|
| 22 |
+
print(display_trades_df)
|
utils.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from huggingface_hub import HfApi, hf_hub_download
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def download_from_hf_dataset(file_path, dataset_name, token=None, repo_type="dataset"):
|
| 10 |
+
"""
|
| 11 |
+
Download a file from a Hugging Face dataset repository.
|
| 12 |
+
"""
|
| 13 |
+
if token is None:
|
| 14 |
+
token = os.getenv("HF_TOKEN")
|
| 15 |
+
|
| 16 |
+
try:
|
| 17 |
+
local_path = hf_hub_download(
|
| 18 |
+
repo_id=dataset_name,
|
| 19 |
+
filename=file_path,
|
| 20 |
+
repo_type=repo_type,
|
| 21 |
+
local_dir=".",
|
| 22 |
+
token=token,
|
| 23 |
+
)
|
| 24 |
+
print(
|
| 25 |
+
f"Successfully downloaded {file_path} from {dataset_name} to {local_path}"
|
| 26 |
+
)
|
| 27 |
+
return local_path
|
| 28 |
+
except Exception as e:
|
| 29 |
+
print(f"Error downloading file: {str(e)}")
|
| 30 |
+
# Check if file exists locally as fallback
|
| 31 |
+
if os.path.exists(file_path):
|
| 32 |
+
print(f"Found local copy of {file_path}, using that.")
|
| 33 |
+
return file_path
|
| 34 |
+
return None
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
DEFAULT_FILTER_QUERY = (
|
| 38 |
+
"premarket_change_from_perviousday_perc > 8 and "
|
| 39 |
+
"premarket_close > 2 and "
|
| 40 |
+
"`Shares Float`>1e6 and "
|
| 41 |
+
"`Market Capitalization`<100e6"
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def load_data(filter_query=DEFAULT_FILTER_QUERY):
|
| 46 |
+
"""
|
| 47 |
+
Loads and preprocesses the specific penny stock dataset.
|
| 48 |
+
"""
|
| 49 |
+
token = os.getenv("HF_TOKEN")
|
| 50 |
+
dataset_name = "AmirTrader/PennyStocks"
|
| 51 |
+
|
| 52 |
+
# Original logic for file name construction
|
| 53 |
+
# Get file name from environment variable or use default
|
| 54 |
+
default_file = "marketsession_post_polygon_2020-01-01_2025-12-01.parquet_with_premarketvolume900K_marketcap1B.parquet"
|
| 55 |
+
target_file = os.getenv("TARGET_FILE", default_file)
|
| 56 |
+
|
| 57 |
+
# Attempt download
|
| 58 |
+
local_path = download_from_hf_dataset(
|
| 59 |
+
file_path=target_file, dataset_name=dataset_name, token=token
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
if not local_path or not os.path.exists(local_path):
|
| 63 |
+
raise FileNotFoundError(f"Could not find or download dataset: {target_file}")
|
| 64 |
+
|
| 65 |
+
df = pd.read_parquet(local_path)
|
| 66 |
+
|
| 67 |
+
# Pre-filtering based on user's script
|
| 68 |
+
# This queries the "universe" of stocks
|
| 69 |
+
if filter_query:
|
| 70 |
+
try:
|
| 71 |
+
df = df.query(filter_query).copy()
|
| 72 |
+
except Exception as e:
|
| 73 |
+
print(f"Error applying query '{filter_query}': {e}")
|
| 74 |
+
# Fallback or re-raise? Let's re-raise to notify user in dashboard
|
| 75 |
+
raise e
|
| 76 |
+
else:
|
| 77 |
+
df = df.copy()
|
| 78 |
+
|
| 79 |
+
# Ensure datetime
|
| 80 |
+
if "datetime" in df.columns:
|
| 81 |
+
# Check if it needs conversion (it likely is already datetime in parquet)
|
| 82 |
+
# Using errors='ignore' in case it's already correct to avoid overhead
|
| 83 |
+
df["datetime"] = pd.to_datetime(df["datetime"])
|
| 84 |
+
df["date"] = df["datetime"].dt.date
|
| 85 |
+
|
| 86 |
+
return df
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|