eaglelandsonce commited on
Commit
6143392
·
verified ·
1 Parent(s): 0e30cfe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -25
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import io
2
  import math
3
  import tempfile
@@ -20,6 +21,8 @@ import torch.nn.functional as F
20
  import lightning.pytorch as pl
21
  from torch.utils.data import DataLoader, TensorDataset
22
 
 
 
23
  import onnxruntime as ort
24
 
25
 
@@ -230,8 +233,12 @@ class OnnxWrapper(nn.Module):
230
  return torch.sigmoid(logits)
231
 
232
 
233
- def export_onnx_model(trained_model: LitClassifier, mu: np.ndarray, sd: np.ndarray, n_features: int) -> str:
234
- # Build wrapper on CPU for export
 
 
 
 
235
  wrapper = OnnxWrapper(trained_model.net.cpu().eval(), mu=mu, sd=sd).eval()
236
 
237
  dummy = torch.zeros(1, n_features, dtype=torch.float32)
@@ -246,12 +253,24 @@ def export_onnx_model(trained_model: LitClassifier, mu: np.ndarray, sd: np.ndarr
246
  output_names=["p_up"],
247
  dynamic_axes={"features": {0: "batch"}, "p_up": {0: "batch"}},
248
  opset_version=17,
 
249
  )
 
 
 
 
 
 
 
 
 
 
 
 
250
  return onnx_path
251
 
252
 
253
  def onnx_predict_probs(onnx_path: str, X: np.ndarray) -> np.ndarray:
254
- # CPU provider is the most compatible for Spaces
255
  sess = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"])
256
  input_name = sess.get_inputs()[0].name
257
  out = sess.run(None, {input_name: X.astype(np.float32)})
@@ -326,7 +345,7 @@ def run_app(
326
  )
327
  trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)
328
 
329
- # ---- Export ONNX (includes preprocessing + sigmoid)
330
  onnx_path = export_onnx_model(model, mu=mu, sd=sd, n_features=n_features)
331
 
332
  # ---- Inference: latest row per ticker (compare Torch vs ONNX)
@@ -335,16 +354,14 @@ def run_app(
335
  torch_probs_for_onnx_compare = []
336
  onnx_inputs = []
337
 
338
- # Use RAW (unstandardized) latest feature row for ONNX input
339
  for t in tickers:
340
  dft_raw = df[df["ticker"] == t].sort_values("date")
341
  if dft_raw.empty:
342
  continue
343
  last_raw = dft_raw.iloc[-1]
344
- x_raw = last_raw[feature_cols].values.astype(np.float32) # raw features
345
  onnx_inputs.append(x_raw)
346
 
347
- # Torch probability (do same preprocessing here for comparison)
348
  x_std = (x_raw - mu) / sd
349
  x_t = torch.tensor(x_std, dtype=torch.float32).unsqueeze(0)
350
  with torch.no_grad():
@@ -354,10 +371,9 @@ def run_app(
354
 
355
  onnx_probs = np.array([])
356
  if len(onnx_inputs) > 0:
357
- X_onnx = np.stack(onnx_inputs, axis=0) # shape (n_tickers, n_features)
358
  onnx_probs = onnx_predict_probs(onnx_path, X_onnx)
359
 
360
- # Build final table
361
  idx = 0
362
  for t in tickers:
363
  dft_raw = df[df["ticker"] == t].sort_values("date")
@@ -367,8 +383,12 @@ def run_app(
367
 
368
  p_torch = float(torch_probs_for_onnx_compare[idx])
369
  p_onnx = float(onnx_probs[idx]) if len(onnx_probs) else float("nan")
370
- sig = signal_from_prob(p_onnx if not math.isnan(p_onnx) else p_torch,
371
- float(buy_threshold), float(sell_threshold))
 
 
 
 
372
 
373
  out_rows.append(
374
  {
@@ -390,12 +410,8 @@ def run_app(
390
  # Toy backtest for first ticker (val split only)
391
  backtest_img = None
392
  t0 = tickers[0]
393
- d0 = df_std[(df_std["ticker"] == t0) & (df_std["split"] == "val")].sort_values("date").copy()
394
- if len(d0) >= 30:
395
- X0_std = d0[feature_cols].values.astype(np.float32)
396
-
397
- # Use ONNX for backtest probability (feed RAW features to ONNX wrapper)
398
- d0_raw = df[(df["ticker"] == t0) & (df["split"] == "val")].sort_values("date").copy()
399
  X0_raw = d0_raw[feature_cols].values.astype(np.float32)
400
  p = onnx_predict_probs(onnx_path, X0_raw)
401
 
@@ -417,15 +433,16 @@ def run_app(
417
  # Data preview + download
418
  export_df = df.copy()
419
  export_df["date"] = export_df["date"].dt.date.astype(str)
420
- export_df = export_df[["date", "ticker", "split", "close", "ret_1", "ret_5", "sma_ratio", "rsi", "vol", "ret_next", "target"]]
 
 
421
  preview_df = export_df.head(25).round(6)
422
  csv_path = save_df_to_temp_csv(export_df.round(8), prefix="signals_dataset_")
423
 
424
- # ONNX download + simple inference snippet for students
425
  inference_snippet = """import numpy as np
426
  import onnxruntime as ort
427
 
428
- onnx_path = "signals_model.onnx" # downloaded file
429
  sess = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"])
430
  inp = sess.get_inputs()[0].name
431
 
@@ -434,7 +451,7 @@ inp = sess.get_inputs()[0].name
434
  x = np.array([[0.001, 0.01, 0.02, 55.0, 0.012]], dtype=np.float32)
435
 
436
  p_up = sess.run(None, {inp: x})[0]
437
- print("p_up:", float(p_up[0]))
438
  """
439
  snippet_path = save_bytes_to_temp_file(inference_snippet.encode("utf-8"), suffix=".py", prefix="onnx_inference_example_")
440
 
@@ -443,7 +460,7 @@ print("p_up:", float(p_up[0]))
443
  f"Tickers requested (max 10): {', '.join(tickers)}",
444
  f"Rows: {len(export_df)} | train={int((export_df['split']=='train').sum())} | val={int((export_df['split']=='val').sum())}",
445
  f"BUY if p_up >= {buy_threshold:.2f} | SELL if p_up <= {sell_threshold:.2f}",
446
- "ONNX export: wrapper includes preprocessing + sigmoid, so ONNX input is RAW features.",
447
  ]
448
  if failed:
449
  summary_lines.append(f"Tickers with no data / error: {', '.join(failed)}")
@@ -481,18 +498,18 @@ with gr.Blocks(title="Educational Stock Signals (Lightning + ONNX)") as demo:
481
  run_btn = gr.Button("Train + Export ONNX + Infer", variant="primary")
482
 
483
  with gr.Tab("Signals (Torch vs ONNX)"):
484
- signals_out = gr.Dataframe(label="Signals (educational) + Torch/ONNX comparison", wrap=True)
485
 
486
  with gr.Tab("Backtest (toy)"):
487
  backtest_out = gr.Image(label="Toy equity curve (val only; first ticker) using ONNX probs", type="numpy")
488
 
489
  with gr.Tab("Data"):
490
  preview_out = gr.Dataframe(label="Feature dataset preview", wrap=True)
491
- download_out = gr.File(label="Download full dataset CSV (features + target + split)")
492
  summary_out = gr.Textbox(label="Run summary", lines=10)
493
 
494
  with gr.Tab("ONNX Export"):
495
- onnx_file = gr.File(label="Download ONNX model (.onnx)")
496
  onnx_example = gr.File(label="Download ONNX inference example (.py)")
497
 
498
  run_btn.click(
 
1
+ import os
2
  import io
3
  import math
4
  import tempfile
 
21
  import lightning.pytorch as pl
22
  from torch.utils.data import DataLoader, TensorDataset
23
 
24
+ import onnx
25
+ from onnx import external_data_helper
26
  import onnxruntime as ort
27
 
28
 
 
233
  return torch.sigmoid(logits)
234
 
235
 
236
+ def export_onnx_model(trained_model, mu: np.ndarray, sd: np.ndarray, n_features: int) -> str:
237
+ """
238
+ Exports a SINGLE-FILE ONNX.
239
+ If PyTorch writes external data (onnx_path + '.data'), we merge it into the .onnx
240
+ so you do NOT need a separate weights file for inference.
241
+ """
242
  wrapper = OnnxWrapper(trained_model.net.cpu().eval(), mu=mu, sd=sd).eval()
243
 
244
  dummy = torch.zeros(1, n_features, dtype=torch.float32)
 
253
  output_names=["p_up"],
254
  dynamic_axes={"features": {0: "batch"}, "p_up": {0: "batch"}},
255
  opset_version=17,
256
+ do_constant_folding=True,
257
  )
258
+
259
+ # Merge external data into the ONNX (if created)
260
+ data_path = onnx_path + ".data"
261
+ if os.path.exists(data_path):
262
+ m = onnx.load_model(onnx_path, load_external_data=True)
263
+ external_data_helper.convert_model_from_external_data(m)
264
+ onnx.save_model(m, onnx_path)
265
+ try:
266
+ os.remove(data_path)
267
+ except OSError:
268
+ pass
269
+
270
  return onnx_path
271
 
272
 
273
  def onnx_predict_probs(onnx_path: str, X: np.ndarray) -> np.ndarray:
 
274
  sess = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"])
275
  input_name = sess.get_inputs()[0].name
276
  out = sess.run(None, {input_name: X.astype(np.float32)})
 
345
  )
346
  trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)
347
 
348
+ # ---- Export ONNX (single-file; includes preprocessing + sigmoid)
349
  onnx_path = export_onnx_model(model, mu=mu, sd=sd, n_features=n_features)
350
 
351
  # ---- Inference: latest row per ticker (compare Torch vs ONNX)
 
354
  torch_probs_for_onnx_compare = []
355
  onnx_inputs = []
356
 
 
357
  for t in tickers:
358
  dft_raw = df[df["ticker"] == t].sort_values("date")
359
  if dft_raw.empty:
360
  continue
361
  last_raw = dft_raw.iloc[-1]
362
+ x_raw = last_raw[feature_cols].values.astype(np.float32) # raw features (ONNX expects raw)
363
  onnx_inputs.append(x_raw)
364
 
 
365
  x_std = (x_raw - mu) / sd
366
  x_t = torch.tensor(x_std, dtype=torch.float32).unsqueeze(0)
367
  with torch.no_grad():
 
371
 
372
  onnx_probs = np.array([])
373
  if len(onnx_inputs) > 0:
374
+ X_onnx = np.stack(onnx_inputs, axis=0)
375
  onnx_probs = onnx_predict_probs(onnx_path, X_onnx)
376
 
 
377
  idx = 0
378
  for t in tickers:
379
  dft_raw = df[df["ticker"] == t].sort_values("date")
 
383
 
384
  p_torch = float(torch_probs_for_onnx_compare[idx])
385
  p_onnx = float(onnx_probs[idx]) if len(onnx_probs) else float("nan")
386
+
387
+ sig = signal_from_prob(
388
+ p_onnx if not math.isnan(p_onnx) else p_torch,
389
+ float(buy_threshold),
390
+ float(sell_threshold),
391
+ )
392
 
393
  out_rows.append(
394
  {
 
410
  # Toy backtest for first ticker (val split only)
411
  backtest_img = None
412
  t0 = tickers[0]
413
+ d0_raw = df[(df["ticker"] == t0) & (df["split"] == "val")].sort_values("date").copy()
414
+ if len(d0_raw) >= 30:
 
 
 
 
415
  X0_raw = d0_raw[feature_cols].values.astype(np.float32)
416
  p = onnx_predict_probs(onnx_path, X0_raw)
417
 
 
433
  # Data preview + download
434
  export_df = df.copy()
435
  export_df["date"] = export_df["date"].dt.date.astype(str)
436
+ export_df = export_df[
437
+ ["date", "ticker", "split", "close", "ret_1", "ret_5", "sma_ratio", "rsi", "vol", "ret_next", "target"]
438
+ ]
439
  preview_df = export_df.head(25).round(6)
440
  csv_path = save_df_to_temp_csv(export_df.round(8), prefix="signals_dataset_")
441
 
 
442
  inference_snippet = """import numpy as np
443
  import onnxruntime as ort
444
 
445
+ onnx_path = "model.onnx"
446
  sess = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"])
447
  inp = sess.get_inputs()[0].name
448
 
 
451
  x = np.array([[0.001, 0.01, 0.02, 55.0, 0.012]], dtype=np.float32)
452
 
453
  p_up = sess.run(None, {inp: x})[0]
454
+ print("p_up:", float(np.array(p_up).reshape(-1)[0]))
455
  """
456
  snippet_path = save_bytes_to_temp_file(inference_snippet.encode("utf-8"), suffix=".py", prefix="onnx_inference_example_")
457
 
 
460
  f"Tickers requested (max 10): {', '.join(tickers)}",
461
  f"Rows: {len(export_df)} | train={int((export_df['split']=='train').sum())} | val={int((export_df['split']=='val').sum())}",
462
  f"BUY if p_up >= {buy_threshold:.2f} | SELL if p_up <= {sell_threshold:.2f}",
463
+ "ONNX export: wrapper includes preprocessing + sigmoid; exported ONNX is SINGLE-FILE (no .onnx.data).",
464
  ]
465
  if failed:
466
  summary_lines.append(f"Tickers with no data / error: {', '.join(failed)}")
 
498
  run_btn = gr.Button("Train + Export ONNX + Infer", variant="primary")
499
 
500
  with gr.Tab("Signals (Torch vs ONNX)"):
501
+ signals_out = gr.Dataframe(label="Signals + Torch/ONNX comparison", wrap=True)
502
 
503
  with gr.Tab("Backtest (toy)"):
504
  backtest_out = gr.Image(label="Toy equity curve (val only; first ticker) using ONNX probs", type="numpy")
505
 
506
  with gr.Tab("Data"):
507
  preview_out = gr.Dataframe(label="Feature dataset preview", wrap=True)
508
+ download_out = gr.File(label="Download dataset CSV (features + target + split)")
509
  summary_out = gr.Textbox(label="Run summary", lines=10)
510
 
511
  with gr.Tab("ONNX Export"):
512
+ onnx_file = gr.File(label="Download ONNX model (.onnx) — single-file")
513
  onnx_example = gr.File(label="Download ONNX inference example (.py)")
514
 
515
  run_btn.click(