amitke commited on
Commit
514c4c0
·
1 Parent(s): c5635dd

v1.0 check

Browse files
Files changed (9) hide show
  1. .gitignore +2 -0
  2. README.md +1 -1
  3. app.py +58 -0
  4. apptest.py +58 -0
  5. inference.py +64 -0
  6. models.py +24 -0
  7. requirements.txt +6 -0
  8. testing.py +49 -0
  9. train.py +140 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ artifacts/
2
+ __pycache__/
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: StockSense
3
  emoji: 🐠
4
  colorFrom: gray
5
  colorTo: pink
 
1
  ---
2
+ title: StockSenseSpace
3
  emoji: 🐠
4
  colorFrom: gray
5
  colorTo: pink
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from train import train as train_fn
3
+ from testing import evaluate as eval_fn
4
+ from inference import predict_next as predict_fn
5
+
6
+ def train_api(symbol, seq_len=60, epochs=5, batch_size=32, start="", end=""):
7
+ return train_fn(
8
+ symbol,
9
+ seq_len=int(seq_len),
10
+ epochs=int(epochs),
11
+ batch_size=int(batch_size),
12
+ start=start or None,
13
+ end=end or None,
14
+ )
15
+
16
+ def test_api(symbol):
17
+ return eval_fn(symbol)
18
+
19
+ def predict_api(symbol, days=1):
20
+ return predict_fn(symbol, n_days=int(days))
21
+
22
+ def hello_api(name="world"):
23
+ return {"message": f"hello {name}"}
24
+
25
+ with gr.Blocks() as demo:
26
+ gr.Markdown("## LSTM Stock Predictor (PyTorch • Train / Test / Predict)")
27
+ with gr.Tab("Train"):
28
+ sym_t = gr.Textbox(label="Symbol", value="AAPL")
29
+ seq = gr.Number(label="Seq length", value=60, precision=0)
30
+ ep = gr.Number(label="Epochs", value=5, precision=0)
31
+ bs = gr.Number(label="Batch size", value=32, precision=0)
32
+ start = gr.Textbox(label="Start (YYYY-MM-DD)", placeholder="optional")
33
+ end = gr.Textbox(label="End (YYYY-MM-DD)", placeholder="optional")
34
+ btn_t = gr.Button("Train")
35
+ out_t = gr.JSON()
36
+ btn_t.click(train_api, [sym_t, seq, ep, bs, start, end], out_t, api_name="train")
37
+
38
+ with gr.Tab("Test"):
39
+ sym_e = gr.Textbox(label="Symbol", value="AAPL")
40
+ btn_e = gr.Button("Run Test")
41
+ out_e = gr.JSON()
42
+ btn_e.click(test_api, [sym_e], out_e, api_name="test")
43
+
44
+ with gr.Tab("Predict"):
45
+ sym_p = gr.Textbox(label="Symbol", value="AAPL")
46
+ days = gr.Number(label="Days to predict", value=1, precision=0)
47
+ btn_p = gr.Button("Predict")
48
+ out_p = gr.JSON()
49
+ btn_p.click(predict_api, [sym_p, days], out_p, api_name="predict")
50
+
51
+ with gr.Tab("Hello"):
52
+ who = gr.Textbox(label="Name", value="world")
53
+ btn_h = gr.Button("Say Hello")
54
+ out_h = gr.JSON()
55
+ btn_h.click(hello_api, [who], out_h, api_name="hello")
56
+
57
+ if __name__ == "__main__":
58
+ demo.launch()
apptest.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from train import train as train_fn
3
+ from testing import evaluate as eval_fn
4
+ from inference import predict_next as predict_fn
5
+
6
+ def train_api(symbol, seq_len=60, epochs=5, batch_size=32, start="", end=""):
7
+ return train_fn(
8
+ symbol,
9
+ seq_len=int(seq_len),
10
+ epochs=int(epochs),
11
+ batch_size=int(batch_size),
12
+ start=start or None,
13
+ end=end or None,
14
+ )
15
+
16
+ def test_api(symbol):
17
+ return eval_fn(symbol)
18
+
19
+ def predict_api(symbol, days=1):
20
+ return predict_fn(symbol, n_days=int(days))
21
+
22
+ def hello_api(name="world"):
23
+ return {"message": f"hello {name}"}
24
+
25
+ with gr.Blocks() as demo:
26
+ gr.Markdown("## LSTM Stock Predictor (PyTorch • Train / Test / Predict)")
27
+ with gr.Tab("Train"):
28
+ sym_t = gr.Textbox(label="Symbol", value="AAPL")
29
+ seq = gr.Number(label="Seq length", value=60, precision=0)
30
+ ep = gr.Number(label="Epochs", value=5, precision=0)
31
+ bs = gr.Number(label="Batch size", value=32, precision=0)
32
+ start = gr.Textbox(label="Start (YYYY-MM-DD)", placeholder="optional")
33
+ end = gr.Textbox(label="End (YYYY-MM-DD)", placeholder="optional")
34
+ btn_t = gr.Button("Train")
35
+ out_t = gr.JSON()
36
+ btn_t.click(train_api, [sym_t, seq, ep, bs, start, end], out_t, api_name="train")
37
+
38
+ with gr.Tab("Test"):
39
+ sym_e = gr.Textbox(label="Symbol", value="AAPL")
40
+ btn_e = gr.Button("Run Test")
41
+ out_e = gr.JSON()
42
+ btn_e.click(test_api, [sym_e], out_e, api_name="test")
43
+
44
+ with gr.Tab("Predict"):
45
+ sym_p = gr.Textbox(label="Symbol", value="AAPL")
46
+ days = gr.Number(label="Days to predict", value=1, precision=0)
47
+ btn_p = gr.Button("Predict")
48
+ out_p = gr.JSON()
49
+ btn_p.click(predict_api, [sym_p, days], out_p, api_name="predict")
50
+
51
+ with gr.Tab("Hello"):
52
+ who = gr.Textbox(label="Name", value="world")
53
+ btn_h = gr.Button("Say Hello")
54
+ out_h = gr.JSON()
55
+ btn_h.click(hello_api, [who], out_h, api_name="hello")
56
+
57
+ if __name__ == "__main__":
58
+ demo.launch()
inference.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, pickle
2
+ from datetime import datetime, timedelta
3
+ import numpy as np
4
+ import yfinance as yf
5
+ import torch
6
+
7
+ from models import StockLSTM
8
+
9
+ ARTIFACTS_DIR = "artifacts"
10
+
11
+ def _paths(symbol: str):
12
+ base = os.path.join(ARTIFACTS_DIR, symbol.upper())
13
+ return {
14
+ "model": os.path.join(base, "model.pt"),
15
+ "scaler": os.path.join(base, "scaler.pkl"),
16
+ "meta": os.path.join(base, "meta.json"),
17
+ }
18
+
19
+ def _load_artifacts(symbol: str):
20
+ p = _paths(symbol)
21
+ if not (os.path.exists(p["model"]) and os.path.exists(p["scaler"])):
22
+ raise FileNotFoundError(f"Model/scaler not found for {symbol}. Train first.")
23
+ with open(p["meta"], "r") as f:
24
+ meta = json.load(f)
25
+ with open(p["scaler"], "rb") as f:
26
+ scaler = pickle.load(f)
27
+
28
+ model = StockLSTM(input_dim=1, hidden_dim=64, num_layers=2, dropout=0.2)
29
+ model.load_state_dict(torch.load(p["model"], map_location="cpu"))
30
+ model.eval()
31
+ return model, scaler, meta
32
+
33
+ def _last_close_series(symbol: str, days: int = 400):
34
+ end = datetime.utcnow().date()
35
+ start = end - timedelta(days=days)
36
+ df = yf.download(symbol, start=start.isoformat(), end=end.isoformat(), progress=False, auto_adjust=True)
37
+ if df.empty:
38
+ raise ValueError(f"No data for {symbol}")
39
+ return df["Close"].values.reshape(-1, 1)
40
+
41
+ @torch.no_grad()
42
+ def predict_next(symbol: str, n_days: int = 1):
43
+ model, scaler, meta = _load_artifacts(symbol)
44
+ seq_len = meta["seq_len"]
45
+
46
+ closes = _last_close_series(symbol, days=max(400, seq_len*5))
47
+ scaled = scaler.transform(closes)
48
+
49
+ # seed window
50
+ window = scaled[-seq_len:].reshape(1, seq_len, 1).astype(np.float32)
51
+ window_t = torch.from_numpy(window)
52
+
53
+ preds_scaled = []
54
+ for _ in range(n_days):
55
+ yhat = model(window_t).numpy() # [1,1] in scaled space
56
+ preds_scaled.append(yhat[0, 0])
57
+ # roll
58
+ next_window = np.concatenate([window[:, 1:, :], yhat.reshape(1, 1, 1)], axis=1)
59
+ window = next_window
60
+ window_t = torch.from_numpy(window.astype(np.float32))
61
+
62
+ preds_scaled = np.array(preds_scaled, dtype=np.float32).reshape(-1, 1)
63
+ preds_unscaled = scaler.inverse_transform(preds_scaled).flatten().tolist()
64
+ return {"symbol": symbol.upper(), "days": n_days, "predictions": preds_unscaled, "seq_len": seq_len, "meta": meta}
models.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class StockLSTM(nn.Module):
5
+ def __init__(self, input_dim=1, hidden_dim=64, num_layers=2, dropout=0.2):
6
+ super().__init__()
7
+ self.lstm = nn.LSTM(
8
+ input_size=input_dim,
9
+ hidden_size=hidden_dim,
10
+ num_layers=num_layers,
11
+ dropout=dropout if num_layers > 1 else 0.0,
12
+ batch_first=True,
13
+ )
14
+ self.head = nn.Sequential(
15
+ nn.Dropout(dropout),
16
+ nn.Linear(hidden_dim, 1)
17
+ )
18
+
19
+ def forward(self, x):
20
+ # x: [B, T, 1]
21
+ out, (h_n, c_n) = self.lstm(x) # out: [B, T, H]
22
+ last = out[:, -1, :] # [B, H]
23
+ y = self.head(last) # [B, 1]
24
+ return y
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ numpy==2.3.5
2
+ pandas==2.3.3
3
+ scikit_learn==1.8.0
4
+ tensorflow==2.20.0
5
+ yfinance==0.2.65
6
+ torch==2.9.1
testing.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, math, pickle
2
+ from datetime import datetime, timedelta
3
+ import numpy as np
4
+ import yfinance as yf
5
+ from sklearn.metrics import mean_absolute_error, mean_squared_error
6
+ import torch
7
+
8
+ from models import StockLSTM
9
+
10
+ ARTIFACTS_DIR = "artifacts"
11
+
12
+ @torch.no_grad()
13
+ def evaluate(symbol: str):
14
+ base = os.path.join(ARTIFACTS_DIR, symbol.upper())
15
+ model = StockLSTM(input_dim=1, hidden_dim=64, num_layers=2, dropout=0.2)
16
+ model.load_state_dict(torch.load(os.path.join(base, "model.pt"), map_location="cpu"))
17
+ model.eval()
18
+ with open(os.path.join(base, "scaler.pkl"), "rb") as f:
19
+ scaler = pickle.load(f)
20
+ with open(os.path.join(base, "meta.json"), "r") as f:
21
+ meta = json.load(f)
22
+
23
+ seq_len = meta["seq_len"]
24
+
25
+ end = datetime.utcnow().date()
26
+ start = end - timedelta(days=5*365)
27
+ df = yf.download(symbol, start=start.isoformat(), end=end.isoformat(), progress=False, auto_adjust=True)
28
+ data = df[["Close"]].dropna().values
29
+
30
+ scaled = scaler.transform(data)
31
+ split_idx = int(len(scaled) * 0.8)
32
+ test_scaled = scaled[split_idx - seq_len:] # include tail of train for continuity
33
+
34
+ # build sequences
35
+ X, y = [], []
36
+ for i in range(seq_len, len(test_scaled)):
37
+ X.append(test_scaled[i-seq_len:i])
38
+ y.append(test_scaled[i])
39
+ X = np.array(X, dtype=np.float32)
40
+ y = np.array(y, dtype=np.float32)
41
+
42
+ X_t = torch.from_numpy(X) # [N, T, 1]
43
+ pred_scaled = model(X_t).numpy()
44
+ pred = scaler.inverse_transform(pred_scaled)
45
+ y_true = scaler.inverse_transform(y)
46
+
47
+ rmse = math.sqrt(mean_squared_error(y_true, pred))
48
+ mae = mean_absolute_error(y_true, pred)
49
+ return {"symbol": symbol.upper(), "rmse": rmse, "mae": mae, "n": len(y_true)}
train.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, math, pickle
2
+ from datetime import datetime, timedelta
3
+ import numpy as np
4
+ import pandas as pd
5
+ import yfinance as yf
6
+
7
+ from sklearn.preprocessing import MinMaxScaler
8
+ from sklearn.metrics import mean_absolute_error, mean_squared_error
9
+
10
+ import torch
11
+ from torch.utils.data import TensorDataset, DataLoader, random_split
12
+ import torch.nn as nn
13
+ import torch.optim as optim
14
+
15
+ from models import StockLSTM
16
+
17
+ ARTIFACTS_DIR = "artifacts"
18
+ os.makedirs(ARTIFACTS_DIR, exist_ok=True)
19
+
20
+ def fetch_data(symbol: str, start: str = None, end: str = None) -> pd.DataFrame:
21
+ if end is None:
22
+ end = datetime.utcnow().date().isoformat()
23
+ if start is None:
24
+ start = (datetime.utcnow().date() - timedelta(days=5*365)).isoformat()
25
+ df = yf.download(symbol, start=start, end=end, progress=False, auto_adjust=True)
26
+ if df.empty:
27
+ raise ValueError(f"No data for symbol {symbol}")
28
+ return df[['Close']].dropna()
29
+
30
+ def make_sequences(values: np.ndarray, seq_len: int):
31
+ X, y = [], []
32
+ for i in range(seq_len, len(values)):
33
+ X.append(values[i-seq_len:i])
34
+ y.append(values[i])
35
+ X = np.array(X) # [N, T, 1]
36
+ y = np.array(y) # [N, 1]
37
+ return X, y
38
+
39
+ def to_tensor_loader(X, y, batch_size=32):
40
+ X_t = torch.from_numpy(X).float()
41
+ y_t = torch.from_numpy(y).float()
42
+ ds = TensorDataset(X_t, y_t)
43
+ return ds
44
+
45
+ def train(symbol: str, seq_len: int = 60, epochs: int = 5, batch_size: int = 32,
46
+ start: str = None, end: str = None, lr: float = 1e-3):
47
+ device = torch.device("cpu")
48
+
49
+ # --- data ---
50
+ df = fetch_data(symbol, start, end)
51
+ data = df['Close'].values.reshape(-1, 1)
52
+
53
+ scaler = MinMaxScaler((0, 1))
54
+ scaled = scaler.fit_transform(data)
55
+
56
+ split_idx = int(len(scaled) * 0.8)
57
+ train_scaled, test_scaled = scaled[:split_idx], scaled[split_idx:]
58
+
59
+ X_train, y_train = make_sequences(train_scaled, seq_len)
60
+ # Ensure continuity at split boundary
61
+ X_test_like_train, y_test_like_train = make_sequences(
62
+ np.vstack([train_scaled[-seq_len:], test_scaled]), seq_len
63
+ )
64
+
65
+ # Train/val split on the training portion
66
+ full_train_ds = to_tensor_loader(X_train, y_train)
67
+ val_size = max(1, int(0.1 * len(full_train_ds)))
68
+ train_size = len(full_train_ds) - val_size
69
+ train_ds, val_ds = random_split(full_train_ds, [train_size, val_size])
70
+
71
+ train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
72
+ val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
73
+
74
+ # --- model ---
75
+ model = StockLSTM(input_dim=1, hidden_dim=64, num_layers=2, dropout=0.2).to(device)
76
+ criterion = nn.MSELoss()
77
+ optimizer = optim.Adam(model.parameters(), lr=lr)
78
+
79
+ # --- training ---
80
+ model.train()
81
+ for ep in range(epochs):
82
+ train_loss = 0.0
83
+ for xb, yb in train_loader:
84
+ xb, yb = xb.to(device), yb.to(device)
85
+ optimizer.zero_grad()
86
+ pred = model(xb)
87
+ loss = criterion(pred, yb)
88
+ loss.backward()
89
+ optimizer.step()
90
+ train_loss += loss.item() * xb.size(0)
91
+ train_loss /= len(train_loader.dataset)
92
+
93
+ # quick val
94
+ model.eval()
95
+ val_loss = 0.0
96
+ with torch.no_grad():
97
+ for xb, yb in val_loader:
98
+ xb, yb = xb.to(device), yb.to(device)
99
+ pred = model(xb)
100
+ val_loss += criterion(pred, yb).item() * xb.size(0)
101
+ val_loss /= len(val_loader.dataset)
102
+ model.train()
103
+
104
+ # --- evaluation on held-out tail (like test) in original scale ---
105
+ model.eval()
106
+ with torch.no_grad():
107
+ X_t = torch.from_numpy(X_test_like_train).float().to(device)
108
+ preds_scaled = model(X_t).cpu().numpy() # scaled space
109
+ preds = scaler.inverse_transform(preds_scaled)
110
+ y_true = scaler.inverse_transform(y_test_like_train)
111
+
112
+ rmse = math.sqrt(mean_squared_error(y_true, preds))
113
+ mae = mean_absolute_error(y_true, preds)
114
+
115
+ # --- save artifacts ---
116
+ base = os.path.join(ARTIFACTS_DIR, symbol.upper())
117
+ os.makedirs(base, exist_ok=True)
118
+ model_path = os.path.join(base, "model.pt")
119
+ scaler_path = os.path.join(base, "scaler.pkl")
120
+ meta_path = os.path.join(base, "meta.json")
121
+
122
+ torch.save(model.state_dict(), model_path)
123
+ with open(scaler_path, "wb") as f:
124
+ pickle.dump(scaler, f)
125
+ with open(meta_path, "w") as f:
126
+ json.dump({
127
+ "symbol": symbol.upper(),
128
+ "seq_len": seq_len,
129
+ "epochs": epochs,
130
+ "batch_size": batch_size,
131
+ "train_size": split_idx,
132
+ "timestamps": {
133
+ "start": df.index.min().strftime("%Y-%m-%d"),
134
+ "end": df.index.max().strftime("%Y-%m-%d"),
135
+ "trained_at_utc": datetime.utcnow().isoformat()
136
+ },
137
+ "metrics": {"rmse": rmse, "mae": mae}
138
+ }, f, indent=2)
139
+
140
+ return {"rmse": rmse, "mae": mae, "rows": len(df), "symbol": symbol.upper()}