eaglelandsonce commited on
Commit
8342e14
·
verified ·
1 Parent(s): 2c7417c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +205 -122
app.py CHANGED
@@ -3,22 +3,24 @@ import math
3
  import tempfile
4
  from dataclasses import dataclass
5
  from functools import lru_cache
 
6
 
7
  import gradio as gr
 
 
8
  import matplotlib
9
  matplotlib.use("Agg")
10
  import matplotlib.pyplot as plt
 
11
  import numpy as np
12
  import pandas as pd
 
13
  import torch
14
  import torch.nn as nn
15
  import torch.nn.functional as F
16
-
17
  import lightning.pytorch as pl
18
  from torch.utils.data import DataLoader, TensorDataset
19
 
20
- from pandas_datareader import data as pdr
21
-
22
 
23
  DISCLAIMER = """
24
  **Disclaimer (Educational Use Only):**
@@ -29,11 +31,11 @@ Markets are risky; consult a qualified professional for investment guidance.
29
 
30
 
31
  # -----------------------------
32
- # Data & features
33
  # -----------------------------
34
  @dataclass
35
  class FeatureSpec:
36
- lookback_days: int = 365 * 2 # ~2 years
37
  sma_fast: int = 10
38
  sma_slow: int = 20
39
  rsi_period: int = 14
@@ -42,67 +44,148 @@ class FeatureSpec:
42
 
43
  def _rsi(close: pd.Series, period: int = 14) -> pd.Series:
44
  delta = close.diff()
45
- gain = (delta.where(delta > 0, 0)).rolling(period).mean()
46
- loss = (-delta.where(delta < 0, 0)).rolling(period).mean()
47
  rs = gain / (loss + 1e-9)
48
  return 100 - (100 / (1 + rs))
49
 
50
 
51
- @lru_cache(maxsize=64)
52
- def fetch_prices_stooq(ticker: str) -> pd.DataFrame:
53
  """
54
- Fetch daily OHLCV from Stooq (free). Returns ascending date index.
 
 
 
 
55
  """
56
- # Stooq expects lowercase tickers for US like "aapl" (it also works with uppercase sometimes).
57
  t = ticker.strip().lower()
58
- df = pdr.DataReader(t, "stooq") # newest->oldest
59
- df = df.sort_index() # oldest->newest
60
- df.index = pd.to_datetime(df.index)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  return df
62
 
63
 
64
- def build_features(df: pd.DataFrame, spec: FeatureSpec) -> pd.DataFrame:
65
  """
66
- Build simple features + binary target:
67
  target = 1 if next-day return > 0 else 0
 
68
  """
69
- out = df.copy()
70
- out["close"] = out["Close"].astype(float)
71
 
72
- out["ret_1"] = out["close"].pct_change()
73
- out["ret_5"] = out["close"].pct_change(5)
74
- out["sma_fast"] = out["close"].rolling(spec.sma_fast).mean()
75
- out["sma_slow"] = out["close"].rolling(spec.sma_slow).mean()
76
- out["sma_ratio"] = out["sma_fast"] / (out["sma_slow"] + 1e-9) - 1.0
77
 
78
- out["rsi"] = _rsi(out["close"], spec.rsi_period)
79
- out["vol"] = out["ret_1"].rolling(spec.vol_window).std()
 
80
 
81
- # next-day return and target
82
- out["ret_next"] = out["close"].pct_change().shift(-1)
83
- out["target"] = (out["ret_next"] > 0).astype(int)
84
 
85
- # drop rows with NaNs from rolling calcs
86
- out = out.dropna().copy()
 
87
 
88
- # select model columns
89
- feats = out[["ret_1", "ret_5", "sma_ratio", "rsi", "vol"]].copy()
90
- feats["target"] = out["target"].astype(int).values
91
- feats["close"] = out["close"].values
92
- return feats
93
 
 
 
 
94
 
95
- def make_dataset_for_tickers(tickers, spec: FeatureSpec) -> pd.DataFrame:
 
 
 
 
 
 
 
96
  frames = []
 
 
97
  for t in tickers:
98
- prices = fetch_prices_stooq(t)
99
- # keep last N days (plus some buffer for rolling windows)
100
- prices = prices.iloc[-(spec.lookback_days + 100):].copy()
101
- feats = build_features(prices, spec)
102
- feats["ticker"] = t.upper()
103
- feats["date"] = feats.index.astype(str)
104
- frames.append(feats.reset_index(drop=True))
105
- return pd.concat(frames, ignore_index=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
 
108
  # -----------------------------
@@ -140,25 +223,7 @@ class LitClassifier(pl.LightningModule):
140
  return torch.optim.Adam(self.parameters(), lr=self.lr)
141
 
142
 
143
- def fig_to_image(fig) -> np.ndarray:
144
- buf = io.BytesIO()
145
- fig.savefig(buf, format="png", bbox_inches="tight", dpi=160)
146
- plt.close(fig)
147
- buf.seek(0)
148
- return plt.imread(buf)
149
-
150
-
151
- def save_df_to_temp_csv(df: pd.DataFrame) -> str:
152
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", prefix="signals_dataset_")
153
- df.to_csv(tmp.name, index=False)
154
- return tmp.name
155
-
156
-
157
- # -----------------------------
158
- # “Signals” logic (educational)
159
- # -----------------------------
160
  def signal_from_prob(p_up: float, buy_th: float, sell_th: float) -> str:
161
- # p_up: probability next day is up
162
  if p_up >= buy_th:
163
  return "BUY (signal)"
164
  if p_up <= sell_th:
@@ -166,6 +231,9 @@ def signal_from_prob(p_up: float, buy_th: float, sell_th: float) -> str:
166
  return "HOLD (signal)"
167
 
168
 
 
 
 
169
  def run_app(
170
  tickers_text: str,
171
  lookback_days: int,
@@ -175,40 +243,31 @@ def run_app(
175
  seed: int,
176
  buy_threshold: float,
177
  sell_threshold: float,
 
178
  ):
179
  pl.seed_everything(int(seed), workers=True)
180
 
181
  tickers = [t.strip().upper() for t in tickers_text.split(",") if t.strip()]
182
  tickers = tickers[:10]
183
- if len(tickers) == 0:
184
- raise gr.Error("Please enter at least 1 ticker (comma-separated), e.g. AAPL, MSFT, NVDA")
185
 
186
  spec = FeatureSpec(lookback_days=int(lookback_days))
 
187
 
188
- # Build dataset (one table for all tickers)
189
- df = make_dataset_for_tickers(tuple(tickers), spec) # tuple so cache plays nice if you reuse
190
- df = df.replace([np.inf, -np.inf], np.nan).dropna().copy()
191
-
192
- # Train/val split by time-ish: last 20% as val PER ticker
193
- parts = []
194
- for t in tickers:
195
- dft = df[df["ticker"] == t].copy()
196
- n = len(dft)
197
- cut = max(int(n * 0.8), 1)
198
- dft["split"] = "train"
199
- dft.loc[dft.index[cut:], "split"] = "val"
200
- parts.append(dft)
201
- df = pd.concat(parts, ignore_index=True)
202
 
203
  feature_cols = ["ret_1", "ret_5", "sma_ratio", "rsi", "vol"]
204
 
205
- # Standardize features using TRAIN split stats
206
  train_df = df[df["split"] == "train"].copy()
207
  mu = train_df[feature_cols].mean()
208
  sd = train_df[feature_cols].std().replace(0, 1.0)
209
 
210
  df_std = df.copy()
211
  df_std[feature_cols] = (df_std[feature_cols] - mu) / sd
 
212
 
213
  # Torch tensors
214
  X_train = torch.tensor(df_std[df_std["split"] == "train"][feature_cols].values, dtype=torch.float32)
@@ -220,10 +279,18 @@ def run_app(
220
  train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=int(batch_size), shuffle=True)
221
  val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=int(batch_size), shuffle=False)
222
 
 
 
 
 
 
 
223
  model = LitClassifier(n_features=len(feature_cols), lr=float(lr))
224
 
225
  trainer = pl.Trainer(
226
  max_epochs=int(epochs),
 
 
227
  logger=False,
228
  enable_checkpointing=False,
229
  enable_progress_bar=False,
@@ -233,11 +300,13 @@ def run_app(
233
  trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)
234
 
235
  # Inference: latest row per ticker
236
- out_rows = []
237
  model.eval()
 
238
  with torch.no_grad():
239
  for t in tickers:
240
- dft = df_std[df_std["ticker"] == t].copy()
 
 
241
  last = dft.iloc[-1]
242
  x = torch.tensor(last[feature_cols].values.astype(np.float32)).unsqueeze(0)
243
  logit = model(x).item()
@@ -246,63 +315,73 @@ def run_app(
246
  out_rows.append(
247
  {
248
  "ticker": t,
249
- "last_close": float(last["close"]),
 
250
  "p_up_next_day": round(float(p_up), 4),
251
  "signal": sig,
252
- "date": last["date"],
253
  }
254
  )
255
- signals_df = pd.DataFrame(out_rows).sort_values("p_up_next_day", ascending=False)
256
 
257
- # Simple backtest curve for FIRST ticker (val split only): long if p_up>=buy_th, short if p_up<=sell_th
 
 
 
 
 
258
  t0 = tickers[0]
259
- d0 = df_std[df_std["ticker"] == t0].copy()
260
- d0_val = d0[d0["split"] == "val"].copy()
261
- if len(d0_val) >= 5:
262
- X0 = torch.tensor(d0_val[feature_cols].values, dtype=torch.float32)
263
- logits = model(X0).detach().cpu().numpy()
264
  p = 1 / (1 + np.exp(-logits))
265
 
266
- pos = np.zeros_like(p)
267
  pos[p >= float(buy_threshold)] = 1.0
268
  pos[p <= float(sell_threshold)] = -1.0 # short (toy)
269
- # realized return is ret_next (already aligned)
270
- r = d0_val["ret_next"].values
271
- strat = pos * r
272
  equity = (1 + strat).cumprod()
273
 
274
  fig = plt.figure()
275
  plt.plot(equity)
276
- plt.title(f"Toy Backtest (VAL only) — {t0} | long/short by signal")
277
  plt.xlabel("Val days")
278
  plt.ylabel("Equity (start=1.0)")
279
  plt.grid(True, alpha=0.3)
280
  backtest_img = fig_to_image(fig)
281
- else:
282
- backtest_img = None
283
-
284
- # Data preview + download
285
- preview = df.head(20).copy()
286
- csv_path = save_df_to_temp_csv(df)
287
-
288
- summary = (
289
- f"Tickers: {', '.join(tickers)}\n"
290
- f"Rows total: {len(df)} (train={len(df[df['split']=='train'])}, val={len(df[df['split']=='val'])})\n"
291
- f"Model: Lightning MLP classifier (predict next-day up/down)\n"
292
- f"Signals are educational thresholds: BUY if p>= {buy_threshold}, SELL if p<= {sell_threshold}\n"
293
- )
294
 
295
- return signals_df, backtest_img, preview, csv_path, summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
 
297
 
 
 
 
 
298
  with gr.Blocks(title="Educational Stock Signals (Lightning)") as demo:
299
- gr.Markdown("# Educational Stock Signals (Top 10 tickers)\n" + DISCLAIMER)
300
 
301
- with gr.Row():
302
- tickers_text = gr.Textbox(
303
- value="AAPL, MSFT, NVDA, AMZN, GOOGL, META, TSLA, JPM, V, XOM",
304
- label="Tickers (comma-separated, up to 10)",
305
- )
306
 
307
  with gr.Row():
308
  lookback_days = gr.Slider(200, 2000, value=730, step=10, label="Lookback days (history window)")
@@ -316,24 +395,28 @@ with gr.Blocks(title="Educational Stock Signals (Lightning)") as demo:
316
  with gr.Row():
317
  buy_threshold = gr.Slider(0.50, 0.80, value=0.55, step=0.01, label="BUY threshold (p_up)")
318
  sell_threshold = gr.Slider(0.20, 0.50, value=0.45, step=0.01, label="SELL threshold (p_up)")
 
319
 
320
  run_btn = gr.Button("Build signals", variant="primary")
321
 
322
  with gr.Tab("Signals"):
323
- signals_df = gr.Dataframe(label="Signals (educational)", wrap=True)
324
 
325
  with gr.Tab("Backtest (toy)"):
326
- backtest_img = gr.Image(label="Toy equity curve (val only; first ticker)", type="numpy")
327
 
328
  with gr.Tab("Data"):
329
- preview_df = gr.Dataframe(label="Feature dataset preview (first 20 rows)", wrap=True)
330
- download_file = gr.File(label="Download full dataset CSV (features + target)")
331
- summary_txt = gr.Textbox(label="Run summary", lines=6)
332
 
333
  run_btn.click(
334
  fn=run_app,
335
- inputs=[tickers_text, lookback_days, lr, batch_size, epochs, seed, buy_threshold, sell_threshold],
336
- outputs=[signals_df, backtest_img, preview_df, download_file, summary_txt],
 
 
 
337
  )
338
 
339
  if __name__ == "__main__":
 
3
  import tempfile
4
  from dataclasses import dataclass
5
  from functools import lru_cache
6
+ from typing import List, Tuple
7
 
8
  import gradio as gr
9
+
10
+ # Headless plotting for HF Spaces
11
  import matplotlib
12
  matplotlib.use("Agg")
13
  import matplotlib.pyplot as plt
14
+
15
  import numpy as np
16
  import pandas as pd
17
+ import requests
18
  import torch
19
  import torch.nn as nn
20
  import torch.nn.functional as F
 
21
  import lightning.pytorch as pl
22
  from torch.utils.data import DataLoader, TensorDataset
23
 
 
 
24
 
25
  DISCLAIMER = """
26
  **Disclaimer (Educational Use Only):**
 
31
 
32
 
33
  # -----------------------------
34
+ # Feature engineering
35
  # -----------------------------
36
  @dataclass
37
  class FeatureSpec:
38
+ lookback_days: int = 730 # ~2 years
39
  sma_fast: int = 10
40
  sma_slow: int = 20
41
  rsi_period: int = 14
 
44
 
45
  def _rsi(close: pd.Series, period: int = 14) -> pd.Series:
46
  delta = close.diff()
47
+ gain = (delta.where(delta > 0, 0.0)).rolling(period).mean()
48
+ loss = (-delta.where(delta < 0, 0.0)).rolling(period).mean()
49
  rs = gain / (loss + 1e-9)
50
  return 100 - (100 / (1 + rs))
51
 
52
 
53
+ def _normalize_stooq_ticker(ticker: str) -> str:
 
54
  """
55
+ Stooq expects symbols like:
56
+ - aapl.us (US equities)
57
+ - msft.us
58
+ If user types AAPL, we convert to aapl.us.
59
+ If user already provides suffix (contains '.'), we keep it.
60
  """
 
61
  t = ticker.strip().lower()
62
+ if not t:
63
+ return t
64
+ if "." not in t:
65
+ # default: US equity
66
+ t = f"{t}.us"
67
+ return t
68
+
69
+
70
+ @lru_cache(maxsize=128)
71
+ def fetch_prices_stooq(ticker: str) -> pd.DataFrame:
72
+ """
73
+ Fetch daily OHLCV from Stooq via CSV.
74
+ Returns DataFrame indexed by Date ascending with columns:
75
+ Open, High, Low, Close, Volume
76
+ """
77
+ sym = _normalize_stooq_ticker(ticker)
78
+ url = f"https://stooq.com/q/d/l/?s={sym}&i=d"
79
+ r = requests.get(url, timeout=25)
80
+ r.raise_for_status()
81
+
82
+ df = pd.read_csv(io.StringIO(r.text))
83
+ if df.empty or "Date" not in df.columns:
84
+ raise ValueError(f"No data returned for ticker '{ticker}' (stooq symbol '{sym}').")
85
+
86
+ df["Date"] = pd.to_datetime(df["Date"])
87
+ df = df.set_index("Date").sort_index()
88
+
89
+ # Basic validation
90
+ needed = {"Open", "High", "Low", "Close"}
91
+ if not needed.issubset(set(df.columns)):
92
+ raise ValueError(f"Unexpected Stooq columns for '{ticker}': {list(df.columns)}")
93
+
94
+ # Ensure numeric
95
+ for c in ["Open", "High", "Low", "Close", "Volume"]:
96
+ if c in df.columns:
97
+ df[c] = pd.to_numeric(df[c], errors="coerce")
98
+
99
+ df = df.dropna(subset=["Close"]).copy()
100
  return df
101
 
102
 
103
+ def build_features(prices: pd.DataFrame, spec: FeatureSpec) -> pd.DataFrame:
104
  """
105
+ Build simple features + target:
106
  target = 1 if next-day return > 0 else 0
107
+ Keep ret_next for a toy backtest.
108
  """
109
+ df = prices.copy()
110
+ df["close"] = df["Close"].astype(float)
111
 
112
+ df["ret_1"] = df["close"].pct_change()
113
+ df["ret_5"] = df["close"].pct_change(5)
 
 
 
114
 
115
+ df["sma_fast"] = df["close"].rolling(spec.sma_fast).mean()
116
+ df["sma_slow"] = df["close"].rolling(spec.sma_slow).mean()
117
+ df["sma_ratio"] = df["sma_fast"] / (df["sma_slow"] + 1e-9) - 1.0
118
 
119
+ df["rsi"] = _rsi(df["close"], spec.rsi_period)
120
+ df["vol"] = df["ret_1"].rolling(spec.vol_window).std()
 
121
 
122
+ # Next-day realized return and label
123
+ df["ret_next"] = df["close"].pct_change().shift(-1)
124
+ df["target"] = (df["ret_next"] > 0).astype(int)
125
 
126
+ df = df.dropna().copy()
 
 
 
 
127
 
128
+ # Final dataset columns used by model + extras
129
+ out = df[["close", "ret_1", "ret_5", "sma_ratio", "rsi", "vol", "ret_next", "target"]].copy()
130
+ return out
131
 
132
+
133
+ def make_dataset_for_tickers(tickers: List[str], spec: FeatureSpec) -> Tuple[pd.DataFrame, List[str]]:
134
+ """
135
+ Returns:
136
+ - combined dataset with columns:
137
+ date, ticker, close, ret_1, ret_5, sma_ratio, rsi, vol, ret_next, target
138
+ - list of tickers that failed
139
+ """
140
  frames = []
141
+ failed = []
142
+
143
  for t in tickers:
144
+ try:
145
+ prices = fetch_prices_stooq(t)
146
+ # keep a window + buffer for rolling indicators
147
+ prices = prices.iloc[-(spec.lookback_days + 120):].copy()
148
+ feats = build_features(prices, spec)
149
+ feats = feats.reset_index().rename(columns={"Date": "date"})
150
+ feats["ticker"] = t.upper()
151
+ frames.append(feats)
152
+ except Exception:
153
+ failed.append(t.upper())
154
+
155
+ if not frames:
156
+ raise ValueError("No tickers returned usable data. Try different tickers (e.g., AAPL, MSFT).")
157
+
158
+ df = pd.concat(frames, ignore_index=True)
159
+ df["date"] = pd.to_datetime(df["date"])
160
+ df = df.sort_values(["ticker", "date"]).reset_index(drop=True)
161
+ return df, failed
162
+
163
+
164
+ def split_train_val_per_ticker(df: pd.DataFrame, train_frac: float = 0.8) -> pd.DataFrame:
165
+ parts = []
166
+ for t, dft in df.groupby("ticker", sort=False):
167
+ dft = dft.sort_values("date").reset_index(drop=True)
168
+ n = len(dft)
169
+ cut = max(int(n * train_frac), 1)
170
+ dft["split"] = "train"
171
+ if cut < n:
172
+ dft.loc[cut:, "split"] = "val"
173
+ parts.append(dft)
174
+ return pd.concat(parts, ignore_index=True)
175
+
176
+
177
+ def fig_to_image(fig) -> np.ndarray:
178
+ buf = io.BytesIO()
179
+ fig.savefig(buf, format="png", bbox_inches="tight", dpi=160)
180
+ plt.close(fig)
181
+ buf.seek(0)
182
+ return plt.imread(buf)
183
+
184
+
185
+ def save_df_to_temp_csv(df: pd.DataFrame) -> str:
186
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", prefix="signals_dataset_")
187
+ df.to_csv(tmp.name, index=False)
188
+ return tmp.name
189
 
190
 
191
  # -----------------------------
 
223
  return torch.optim.Adam(self.parameters(), lr=self.lr)
224
 
225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  def signal_from_prob(p_up: float, buy_th: float, sell_th: float) -> str:
 
227
  if p_up >= buy_th:
228
  return "BUY (signal)"
229
  if p_up <= sell_th:
 
231
  return "HOLD (signal)"
232
 
233
 
234
+ # -----------------------------
235
+ # Main Gradio function
236
+ # -----------------------------
237
  def run_app(
238
  tickers_text: str,
239
  lookback_days: int,
 
243
  seed: int,
244
  buy_threshold: float,
245
  sell_threshold: float,
246
+ device_choice: str,
247
  ):
248
  pl.seed_everything(int(seed), workers=True)
249
 
250
  tickers = [t.strip().upper() for t in tickers_text.split(",") if t.strip()]
251
  tickers = tickers[:10]
252
+ if not tickers:
253
+ raise gr.Error("Enter at least 1 ticker, e.g. AAPL, MSFT, NVDA")
254
 
255
  spec = FeatureSpec(lookback_days=int(lookback_days))
256
+ df_raw, failed = make_dataset_for_tickers(tickers, spec)
257
 
258
+ # split per ticker
259
+ df = split_train_val_per_ticker(df_raw, train_frac=0.8)
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
  feature_cols = ["ret_1", "ret_5", "sma_ratio", "rsi", "vol"]
262
 
263
+ # Standardize using TRAIN split stats
264
  train_df = df[df["split"] == "train"].copy()
265
  mu = train_df[feature_cols].mean()
266
  sd = train_df[feature_cols].std().replace(0, 1.0)
267
 
268
  df_std = df.copy()
269
  df_std[feature_cols] = (df_std[feature_cols] - mu) / sd
270
+ df_std = df_std.replace([np.inf, -np.inf], np.nan).dropna().copy()
271
 
272
  # Torch tensors
273
  X_train = torch.tensor(df_std[df_std["split"] == "train"][feature_cols].values, dtype=torch.float32)
 
279
  train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=int(batch_size), shuffle=True)
280
  val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=int(batch_size), shuffle=False)
281
 
282
+ # Lightning Trainer device selection
283
+ want_cuda = (device_choice == "cuda")
284
+ has_cuda = torch.cuda.is_available()
285
+ using_cuda = want_cuda and has_cuda
286
+ accelerator = "gpu" if using_cuda else "cpu"
287
+
288
  model = LitClassifier(n_features=len(feature_cols), lr=float(lr))
289
 
290
  trainer = pl.Trainer(
291
  max_epochs=int(epochs),
292
+ accelerator=accelerator,
293
+ devices=1,
294
  logger=False,
295
  enable_checkpointing=False,
296
  enable_progress_bar=False,
 
300
  trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)
301
 
302
  # Inference: latest row per ticker
 
303
  model.eval()
304
+ out_rows = []
305
  with torch.no_grad():
306
  for t in tickers:
307
+ dft = df_std[df_std["ticker"] == t].sort_values("date")
308
+ if dft.empty:
309
+ continue
310
  last = dft.iloc[-1]
311
  x = torch.tensor(last[feature_cols].values.astype(np.float32)).unsqueeze(0)
312
  logit = model(x).item()
 
315
  out_rows.append(
316
  {
317
  "ticker": t,
318
+ "date": last["date"].date().isoformat(),
319
+ "last_close": round(float(last["close"]), 4),
320
  "p_up_next_day": round(float(p_up), 4),
321
  "signal": sig,
 
322
  }
323
  )
 
324
 
325
+ signals_df = pd.DataFrame(out_rows)
326
+ if not signals_df.empty:
327
+ signals_df = signals_df.sort_values("p_up_next_day", ascending=False).reset_index(drop=True)
328
+
329
+ # Toy backtest for first ticker (val split only)
330
+ backtest_img = None
331
  t0 = tickers[0]
332
+ d0 = df_std[(df_std["ticker"] == t0) & (df_std["split"] == "val")].sort_values("date").copy()
333
+ if len(d0) >= 30:
334
+ X0 = torch.tensor(d0[feature_cols].values, dtype=torch.float32)
335
+ with torch.no_grad():
336
+ logits = model(X0).detach().cpu().numpy()
337
  p = 1 / (1 + np.exp(-logits))
338
 
339
+ pos = np.zeros_like(p, dtype=float)
340
  pos[p >= float(buy_threshold)] = 1.0
341
  pos[p <= float(sell_threshold)] = -1.0 # short (toy)
342
+
343
+ strat = pos * d0["ret_next"].values
 
344
  equity = (1 + strat).cumprod()
345
 
346
  fig = plt.figure()
347
  plt.plot(equity)
348
+ plt.title(f"Toy Backtest (VAL only) — {t0} | long/short by signal")
349
  plt.xlabel("Val days")
350
  plt.ylabel("Equity (start=1.0)")
351
  plt.grid(True, alpha=0.3)
352
  backtest_img = fig_to_image(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
353
 
354
+ # Data preview + download (download the unstandardized feature table with split)
355
+ export_df = df.copy()
356
+ export_df["date"] = export_df["date"].dt.date.astype(str)
357
+ export_df = export_df[["date", "ticker", "split", "close", "ret_1", "ret_5", "sma_ratio", "rsi", "vol", "ret_next", "target"]]
358
+ preview_df = export_df.head(25).round(6)
359
+ csv_path = save_df_to_temp_csv(export_df.round(8))
360
+
361
+ summary_lines = [
362
+ f"Using device: {'cuda' if using_cuda else 'cpu'}",
363
+ f"Tickers requested (max 10): {', '.join(tickers)}",
364
+ f"Rows: {len(export_df)} | train={int((export_df['split']=='train').sum())} | val={int((export_df['split']=='val').sum())}",
365
+ f"BUY if p_up >= {buy_threshold:.2f} | SELL if p_up <= {sell_threshold:.2f}",
366
+ ]
367
+ if failed:
368
+ summary_lines.append(f"Tickers with no data / error: {', '.join(failed)}")
369
+ summary = "\n".join(summary_lines)
370
 
371
+ return signals_df, backtest_img, preview_df, csv_path, summary
372
 
373
+
374
+ # -----------------------------
375
+ # Gradio UI
376
+ # -----------------------------
377
  with gr.Blocks(title="Educational Stock Signals (Lightning)") as demo:
378
+ gr.Markdown("# Educational Stock Signals (Lightning)\n" + DISCLAIMER)
379
 
380
+ tickers_text = gr.Textbox(
381
+ value="AAPL, MSFT, NVDA, AMZN, GOOGL, META, TSLA, JPM, V, XOM",
382
+ label="Tickers (comma-separated, up to 10)",
383
+ info="Tip: Stooq uses US symbols like AAPL -> aapl.us automatically. If needed, specify suffix (e.g., '7203.jp').",
384
+ )
385
 
386
  with gr.Row():
387
  lookback_days = gr.Slider(200, 2000, value=730, step=10, label="Lookback days (history window)")
 
395
  with gr.Row():
396
  buy_threshold = gr.Slider(0.50, 0.80, value=0.55, step=0.01, label="BUY threshold (p_up)")
397
  sell_threshold = gr.Slider(0.20, 0.50, value=0.45, step=0.01, label="SELL threshold (p_up)")
398
+ device_choice = gr.Radio(["cpu", "cuda"], value="cpu", label="Device (cuda only if available)")
399
 
400
  run_btn = gr.Button("Build signals", variant="primary")
401
 
402
  with gr.Tab("Signals"):
403
+ signals_out = gr.Dataframe(label="Signals (educational)", wrap=True)
404
 
405
  with gr.Tab("Backtest (toy)"):
406
+ backtest_out = gr.Image(label="Toy equity curve (val only; first ticker)", type="numpy")
407
 
408
  with gr.Tab("Data"):
409
+ preview_out = gr.Dataframe(label="Feature dataset preview", wrap=True)
410
+ download_out = gr.File(label="Download full dataset CSV (features + target + split)")
411
+ summary_out = gr.Textbox(label="Run summary", lines=8)
412
 
413
  run_btn.click(
414
  fn=run_app,
415
+ inputs=[
416
+ tickers_text, lookback_days, lr, batch_size, epochs, seed,
417
+ buy_threshold, sell_threshold, device_choice
418
+ ],
419
+ outputs=[signals_out, backtest_out, preview_out, download_out, summary_out],
420
  )
421
 
422
  if __name__ == "__main__":