MIYASAJID19 commited on
Commit
17b47c7
·
verified ·
1 Parent(s): 7268a7d

Upload 29 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ static/models/class_balancing_comparison.png filter=lfs diff=lfs merge=lfs -text
37
+ static/models/final_results_visualization.png filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,927 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import math
4
+ from typing import List, Dict, Any, Optional
5
+ import warnings
6
+ warnings.filterwarnings('ignore', category=RuntimeWarning)
7
+
8
+ try:
9
+ from dotenv import load_dotenv
10
+ load_dotenv()
11
+ except ImportError:
12
+ pass
13
+
14
+ from flask import Flask, request, jsonify
15
+ from flask_cors import CORS
16
+
17
+ try:
18
+ import joblib
19
+ except Exception:
20
+ joblib = None
21
+ import pickle
22
+
23
+ try:
24
+ import numpy as np
25
+ import pandas as pd
26
+ except Exception as e:
27
+ print(f"Warning: NumPy/Pandas import issue: {e}")
28
+ import sys
29
+ sys.exit(1)
30
+
31
+ try:
32
+ import torch
33
+ import torch.nn as nn
34
+ TORCH_AVAILABLE = True
35
+ except ImportError:
36
+ TORCH_AVAILABLE = False
37
+ print("Warning: PyTorch not available. Deep learning models will not load.")
38
+
39
+ # Import PyTorch model architectures
40
+ if TORCH_AVAILABLE:
41
+ try:
42
+ from models import LSTMModel, GRUModel, FeedForwardNN, BiLSTMModel, CNN1DModel
43
+ except ImportError as e:
44
+ print(f"Warning: Could not import model architectures: {e}")
45
+ TORCH_AVAILABLE = False
46
+
47
+ from datetime import datetime, timedelta
48
+ import requests
49
+ from sklearn.base import BaseEstimator, TransformerMixin
50
+ from sklearn.pipeline import Pipeline
51
+
52
+
53
+ API_TITLE = "ELC-V Prediction API"
54
+ API_VERSION = "0.3.0"
55
+
56
+ RAW_FEATURES = ["Open", "High", "Low", "Close", "Volume"]
57
+
58
+ MARKETSTACK_API_KEY = os.environ.get("MARKETSTACK_API_KEY")
59
+ print("\n\n\n\n\n", MARKETSTACK_API_KEY, "\n\n\n\n\n")
60
+ MARKETSTACK_BASE_URL = "http://api.marketstack.com/v1"
61
+
62
+
63
+ def load_artifact(path: str):
64
+ # Handle PyTorch models
65
+ if path.endswith('.pth'):
66
+ if not TORCH_AVAILABLE:
67
+ raise ImportError("PyTorch is required to load .pth files")
68
+ # Use weights_only=False for trusted model files
69
+ return torch.load(path, map_location=torch.device('cpu'), weights_only=False)
70
+
71
+ # Handle sklearn/joblib models
72
+ if joblib is not None:
73
+ try:
74
+ return joblib.load(path)
75
+ except Exception:
76
+ pass
77
+ with open(path, "rb") as f:
78
+ return pickle.load(f)
79
+
80
+
81
+ class FeatureEngineer(BaseEstimator, TransformerMixin):
82
+ """Sklearn-compatible feature engineering transformer."""
83
+ def __init__(self, lookback: int = 7):
84
+ self.lookback = lookback
85
+ self.feature_names_ = None
86
+
87
+ def fit(self, X, y=None):
88
+ return self
89
+
90
+ def transform(self, X):
91
+ if not isinstance(X, pd.DataFrame):
92
+ X = pd.DataFrame(X, columns=RAW_FEATURES)
93
+ else:
94
+ X = X.copy()
95
+
96
+ for col in RAW_FEATURES:
97
+ if col not in X.columns:
98
+ raise ValueError(f"Missing column: {col}")
99
+
100
+ for i in range(1, self.lookback + 1):
101
+ X[f'Close_lag_{i}'] = X['Close'].shift(i).fillna(X['Close'].mean())
102
+ X[f'Volume_lag_{i}'] = X['Volume'].shift(i).fillna(X['Volume'].mean())
103
+ X[f'Open_lag_{i}'] = X['Open'].shift(i).fillna(X['Open'].mean())
104
+ X[f'High_lag_{i}'] = X['High'].shift(i).fillna(X['High'].mean())
105
+ X[f'Low_lag_{i}'] = X['Low'].shift(i).fillna(X['Low'].mean())
106
+
107
+ X['returns'] = X['Close'].pct_change()
108
+ X['log_returns'] = np.log(X['Close'] / X['Close'].shift(1))
109
+
110
+ X['ma_7'] = X['Close'].rolling(window=7, min_periods=1).mean()
111
+ X['ma_14'] = X['Close'].rolling(window=14, min_periods=1).mean()
112
+ X['ma_30'] = X['Close'].rolling(window=30, min_periods=1).mean()
113
+
114
+ X['volatility_7'] = X['returns'].rolling(window=7, min_periods=1).std()
115
+ X['volatility_14'] = X['returns'].rolling(window=14, min_periods=1).std()
116
+
117
+ X['momentum_7'] = X['Close'] - X['Close'].shift(7).fillna(X['Close'].iloc[0] if len(X) > 0 else X['Close'].mean())
118
+ X['momentum_14'] = X['Close'] - X['Close'].shift(14).fillna(X['Close'].iloc[0] if len(X) > 0 else X['Close'].mean())
119
+
120
+ delta = X['Close'].diff()
121
+ gain = (delta.where(delta > 0, 0)).rolling(window=14, min_periods=1).mean()
122
+ loss = (-delta.where(delta < 0, 0)).rolling(window=14, min_periods=1).mean()
123
+ rs = gain / loss
124
+ X['rsi_14'] = 100 - (100 / (1 + rs))
125
+
126
+ X['bb_upper'] = X['ma_7'] + (X['volatility_7'] * 2)
127
+ X['bb_Lower'] = X['ma_7'] - (X['volatility_7'] * 2)
128
+ X['bb_width'] = X['bb_upper'] - X['bb_Lower']
129
+
130
+ X = X.bfill().ffill()
131
+ for col in X.columns:
132
+ if X[col].isna().any():
133
+ X[col] = X[col].fillna(X[col].mean() if X[col].notna().any() else 0)
134
+ X = X.fillna(0)
135
+
136
+ expected_order = [
137
+ 'Close', 'Volume', 'Open', 'High', 'Low',
138
+ 'Close_lag_1', 'Volume_lag_1', 'Open_lag_1', 'High_lag_1', 'Low_lag_1',
139
+ 'Close_lag_2', 'Volume_lag_2', 'Open_lag_2', 'High_lag_2', 'Low_lag_2',
140
+ 'Close_lag_3', 'Volume_lag_3', 'Open_lag_3', 'High_lag_3', 'Low_lag_3',
141
+ 'Close_lag_4', 'Volume_lag_4', 'Open_lag_4', 'High_lag_4', 'Low_lag_4',
142
+ 'Close_lag_5', 'Volume_lag_5', 'Open_lag_5', 'High_lag_5', 'Low_lag_5',
143
+ 'Close_lag_6', 'Volume_lag_6', 'Open_lag_6', 'High_lag_6', 'Low_lag_6',
144
+ 'Close_lag_7', 'Volume_lag_7', 'Open_lag_7', 'High_lag_7', 'Low_lag_7',
145
+ 'returns', 'log_returns', 'ma_7', 'ma_14', 'ma_30',
146
+ 'volatility_7', 'volatility_14', 'momentum_7', 'momentum_14',
147
+ 'rsi_14', 'bb_upper', 'bb_Lower', 'bb_width'
148
+ ]
149
+
150
+ for col in expected_order:
151
+ if col not in X.columns:
152
+ X[col] = 0
153
+
154
+ X = X[expected_order]
155
+
156
+ if self.feature_names_ is None:
157
+ self.feature_names_ = list(X.columns)
158
+
159
+ X.columns.name = None
160
+ return X
161
+
162
+ def get_feature_names_out(self, input_features=None):
163
+ if self.feature_names_ is None:
164
+ dummy = pd.DataFrame(
165
+ np.random.randn(30, 5),
166
+ columns=RAW_FEATURES
167
+ )
168
+ self.transform(dummy)
169
+ return np.array(self.feature_names_)
170
+
171
+
172
+ class PyTorchModelWrapper:
173
+ """Wrapper to make PyTorch models compatible with sklearn pipeline API."""
174
+ def __init__(self, model):
175
+ self.model = model
176
+ if TORCH_AVAILABLE:
177
+ self.model.eval() # Set to evaluation mode
178
+
179
+ def predict(self, X):
180
+ if not TORCH_AVAILABLE:
181
+ raise ImportError("PyTorch is required for this model")
182
+
183
+ # Convert to numpy if DataFrame
184
+ if isinstance(X, pd.DataFrame):
185
+ X = X.values
186
+
187
+ # Convert to torch tensor
188
+ X_tensor = torch.FloatTensor(X)
189
+
190
+ # Make prediction
191
+ with torch.no_grad():
192
+ output = self.model(X_tensor)
193
+ # Handle different output formats
194
+ if isinstance(output, torch.Tensor):
195
+ predictions = output.cpu().numpy()
196
+ # If output is probabilities (2 classes), take class with higher prob
197
+ if predictions.shape[-1] == 2:
198
+ predictions = predictions.argmax(axis=-1)
199
+ # Flatten if needed
200
+ if len(predictions.shape) > 1 and predictions.shape[-1] == 1:
201
+ predictions = predictions.flatten()
202
+ else:
203
+ predictions = output
204
+
205
+ return predictions
206
+
207
+
208
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
209
+ MODELS_DIR = os.path.join(BASE_DIR, "static", "models")
210
+
211
+
212
+ def create_app() -> Flask:
213
+ app = Flask(__name__)
214
+ CORS(app, resources={
215
+ r"/*": {
216
+ "origins": ["http://localhost:3000", "http://127.0.0.1:3000"],
217
+ "methods": ["GET", "POST", "OPTIONS"],
218
+ "allow_headers": ["Content-Type"],
219
+ }
220
+ })
221
+
222
+ app.config["ARTIFACTS"] = {
223
+ "pipeline": None,
224
+ "models": {
225
+ # Tree/linear/boosting models
226
+ "random_forest": None,
227
+ "adaboost": None,
228
+ "extra_trees": None,
229
+ "gradient_boosting": None,
230
+ "xgboost": None,
231
+ "lightgbm": None,
232
+ "catboost": None,
233
+ "svm": None,
234
+ "logistic_regression": None,
235
+ "knn": None,
236
+ "best_ensemble": None,
237
+ "best_individual": None,
238
+
239
+ # Deep learning models (can remain unavailable if PyTorch not installed)
240
+ "lstm": None,
241
+ "bilstm": None,
242
+ "gru": None,
243
+ "1d_cnn": None,
244
+ "feed_forward_nn": None,
245
+
246
+ # Convenience entries for API defaults
247
+ "ensemble": None,
248
+ "individual": None,
249
+ },
250
+ "summary": None,
251
+ "scaler": None,
252
+ "available_models": [],
253
+ }
254
+
255
+ def _artifact_path(name: str) -> str:
256
+ return os.path.join(MODELS_DIR, name)
257
+
258
+ def _load_artifacts() -> Dict[str, Any]:
259
+ artifacts = app.config["ARTIFACTS"]
260
+ try:
261
+ engineer = FeatureEngineer(lookback=7)
262
+
263
+ # Load the pipeline with scaler
264
+ pipeline_path = _artifact_path("random_forest_pipeline.joblib")
265
+ if os.path.exists(pipeline_path):
266
+ saved_pipeline = load_artifact(pipeline_path)
267
+ artifacts["pipeline"] = Pipeline([
268
+ ('features', engineer),
269
+ ('saved_pipeline', saved_pipeline),
270
+ ])
271
+ else:
272
+ scaler_path = _artifact_path("random_forest_scaler.joblib")
273
+ scaler = None
274
+ if os.path.exists(scaler_path):
275
+ scaler = load_artifact(scaler_path)
276
+
277
+ if scaler is not None:
278
+ artifacts["pipeline"] = Pipeline([
279
+ ('features', engineer),
280
+ ('scaler', scaler),
281
+ ])
282
+ else:
283
+ artifacts["pipeline"] = Pipeline([
284
+ ('features', engineer),
285
+ ])
286
+
287
+ # Load individual models (ML first, DL optional)
288
+ model_mappings = {
289
+ # Primary models
290
+ "random_forest": "random_forest.pkl",
291
+ "adaboost": "adaboost.pkl",
292
+ "extra_trees": "extra_trees.pkl",
293
+ "gradient_boosting": "gradient_boosting.pkl",
294
+ "xgboost": "xgboost.pkl",
295
+ "lightgbm": "lightgbm.pkl",
296
+ "catboost": "catboost.pkl",
297
+ "svm": "svm.pkl",
298
+ "logistic_regression": "logistic_regression.pkl",
299
+ "knn": "knn.pkl",
300
+ "best_ensemble": "best_ensemble_package.pkl", # Fixed filename
301
+ "best_individual": "best_individual_model.pkl",
302
+
303
+ # Deep learning models (load only if PyTorch available)
304
+ "lstm": "lstm.pth",
305
+ "bilstm": "bilstm.pth",
306
+ "gru": "gru.pth",
307
+ "1d_cnn": "1d-cnn.pth",
308
+ "feed_forward_nn": "feed-forward_nn.pth",
309
+ }
310
+
311
+ loaded_models = []
312
+ for model_name, model_file in model_mappings.items():
313
+ model_path = _artifact_path(model_file)
314
+ if os.path.exists(model_path):
315
+ try:
316
+ loaded = load_artifact(model_path)
317
+ # Skip if it's a dict (training metadata) or doesn't have predict method
318
+ if isinstance(loaded, dict):
319
+ print(f"Skipped {model_name}: loaded as dict (metadata)")
320
+ continue
321
+ if not hasattr(loaded, 'predict'):
322
+ print(f"Skipped {model_name}: no predict method")
323
+ continue
324
+ # Wrap PyTorch models
325
+ if model_file.endswith('.pth') and TORCH_AVAILABLE:
326
+ loaded = PyTorchModelWrapper(loaded)
327
+ artifacts["models"][model_name] = loaded
328
+ loaded_models.append(model_name)
329
+ print(f"Loaded model: {model_name}")
330
+ except Exception as e:
331
+ print(f"Failed to load {model_name}: {e}")
332
+ else:
333
+ print(f"Model file not found: {model_file}")
334
+
335
+ artifacts["available_models"] = loaded_models
336
+
337
+ # Set defaults: prefer saved best ensemble/individual, then random forest, then first available
338
+ ensemble_default = artifacts["models"].get("best_ensemble") or artifacts["models"].get("random_forest")
339
+ individual_default = artifacts["models"].get("best_individual") or artifacts["models"].get("random_forest")
340
+
341
+ if ensemble_default is None and loaded_models:
342
+ ensemble_default = artifacts["models"].get(loaded_models[0])
343
+ if individual_default is None and loaded_models:
344
+ individual_default = artifacts["models"].get(loaded_models[0])
345
+
346
+ artifacts["models"]["ensemble"] = ensemble_default
347
+ artifacts["models"]["individual"] = individual_default
348
+
349
+ summary_path = _artifact_path("results_summary.pkl")
350
+ if os.path.exists(summary_path):
351
+ artifacts["summary"] = load_artifact(summary_path)
352
+ except Exception as e:
353
+ artifacts["summary"] = {"error": f"Failed to load artifacts: {e}"}
354
+
355
+ return artifacts
356
+
357
+ def _validate_instance(instance: Dict[str, Any]) -> List[str]:
358
+ return [k for k in RAW_FEATURES if k not in instance]
359
+
360
+ def _prepare_pipeline_input(instances: List[Dict[str, Any]]) -> pd.DataFrame:
361
+ data = []
362
+ for inst in instances:
363
+ row = {col: inst[col] for col in RAW_FEATURES}
364
+ data.append(row)
365
+ return pd.DataFrame(data)
366
+
367
+ @app.route("/health", methods=["GET"])
368
+ def health():
369
+ artifacts = app.config["ARTIFACTS"]
370
+ status = {
371
+ "title": API_TITLE,
372
+ "version": API_VERSION,
373
+ "pipeline": artifacts["pipeline"] is not None,
374
+ "available_models": artifacts["available_models"],
375
+ "models": {k: artifacts["models"][k] is not None for k in artifacts["models"]},
376
+ "model_count": len([m for m in artifacts["models"].values() if m is not None]),
377
+ }
378
+ has_models = any([m for m in artifacts["models"].values() if m is not None])
379
+ http_code = 200 if has_models else 503
380
+ return jsonify(status), http_code
381
+
382
+ @app.route("/metadata", methods=["GET"])
383
+ def metadata():
384
+ artifacts = app.config["ARTIFACTS"]
385
+ summary = artifacts.get("summary")
386
+ try:
387
+ def _to_jsonable(obj):
388
+ try:
389
+ json.dumps(obj)
390
+ return obj
391
+ except TypeError:
392
+ return str(obj)
393
+
394
+ if isinstance(summary, dict):
395
+ return jsonify({k: _to_jsonable(v) for k, v in summary.items()})
396
+ return jsonify({"summary": _to_jsonable(summary)})
397
+ except Exception:
398
+ return jsonify({"summary": "unavailable"})
399
+
400
+ @app.route("/predict", methods=["POST"])
401
+ def predict():
402
+ artifacts = app.config["ARTIFACTS"]
403
+ pipeline = artifacts["pipeline"]
404
+ if pipeline is None:
405
+ return jsonify({"error": "Pipeline not loaded"}), 503
406
+
407
+ models_loaded = any(artifacts["models"].values())
408
+ pipeline_can_predict = hasattr(pipeline, "predict")
409
+ if not models_loaded and not pipeline_can_predict:
410
+ return jsonify({"error": "Models not loaded"}), 503
411
+
412
+ payload = request.get_json(silent=True) or {}
413
+ model_choice = (payload.get("model") or "random_forest").lower()
414
+ return_all = bool(payload.get("all_models"))
415
+
416
+ # Try to get the requested model
417
+ model = artifacts["models"].get(model_choice)
418
+
419
+ # Fallback to ensemble, then individual, then first available
420
+ if model is None:
421
+ if artifacts["models"].get("ensemble"):
422
+ model = artifacts["models"]["ensemble"]
423
+ elif artifacts["models"].get("individual"):
424
+ model = artifacts["models"]["individual"]
425
+ elif artifacts["available_models"]:
426
+ model = artifacts["models"][artifacts["available_models"][0]]
427
+
428
+ if model is None:
429
+ return jsonify({
430
+ "error": f"Requested model '{model_choice}' not available",
431
+ "available_models": artifacts["available_models"]
432
+ }), 400
433
+
434
+ instances = []
435
+ if isinstance(payload.get("instances"), list):
436
+ instances = payload["instances"]
437
+ elif all(k in payload for k in RAW_FEATURES):
438
+ instances = [payload]
439
+
440
+ if not instances:
441
+ return jsonify({
442
+ "error": "No instances provided",
443
+ "expected_format": {
444
+ "single": {c: "<number>" for c in RAW_FEATURES},
445
+ "batch": {"instances": [{c: "<number>" for c in RAW_FEATURES}]},
446
+ },
447
+ "input_columns": RAW_FEATURES,
448
+ "available_models": artifacts["available_models"],
449
+ "note": "For best results, provide at least 7 rows of history for lagged features"
450
+ }), 400
451
+
452
+ for i, inst in enumerate(instances):
453
+ missing = _validate_instance(inst)
454
+ if missing:
455
+ return jsonify({
456
+ "error": f"Instance {i} missing features: {missing}",
457
+ "input_columns": RAW_FEATURES
458
+ }), 400
459
+
460
+ try:
461
+ X_input = _prepare_pipeline_input(instances)
462
+ pipeline_can_transform = hasattr(pipeline, "transform")
463
+
464
+ # Prepare transformed features once
465
+ if pipeline_can_transform:
466
+ X_transformed = pipeline.transform(X_input)
467
+ X_to_predict = X_transformed[-1:, :] if len(X_transformed) > 1 else X_transformed
468
+ else:
469
+ X_to_predict = X_input[-1:, :]
470
+
471
+ # Main prediction using requested/fallback model
472
+ if pipeline_can_predict:
473
+ y_all = pipeline.predict(X_input)
474
+ y_pred = y_all[-1:] if len(y_all) > 1 else y_all
475
+ elif model is not None:
476
+ y_pred = model.predict(X_to_predict)
477
+ else:
478
+ return jsonify({"error": "Pipeline cannot process data"}), 503
479
+
480
+ # Optional per-model predictions
481
+ predictions_by_model = None
482
+ if return_all:
483
+ predictions_by_model = {}
484
+ for m_name in artifacts["available_models"]:
485
+ m = artifacts["models"].get(m_name)
486
+ if m is None:
487
+ continue
488
+ try:
489
+ y_m = m.predict(X_to_predict)
490
+ pred_val = float(y_m[0].item() if hasattr(y_m[0], "item") else y_m[0])
491
+ predictions_by_model[m_name] = pred_val
492
+ except Exception as e:
493
+ predictions_by_model[m_name] = {"error": str(e)}
494
+
495
+ response = {
496
+ "model": model_choice,
497
+ "input_columns": RAW_FEATURES,
498
+ "predictions": [float(v.item() if hasattr(v, "item") else v) for v in y_pred],
499
+ "count": len(y_pred),
500
+ "available_models": artifacts["available_models"],
501
+ "note": f"Processed {len(instances)} input row(s), predicted on last row"
502
+ }
503
+
504
+ if predictions_by_model is not None:
505
+ response["predictions_by_model"] = predictions_by_model
506
+
507
+ return jsonify(response)
508
+ except Exception as e:
509
+ return jsonify({"error": f"Pipeline failed: {str(e)}"}), 400
510
+
511
+ @app.route("/predict/symbol", methods=["POST"])
512
+ def predict_symbol():
513
+ artifacts = app.config["ARTIFACTS"]
514
+ pipeline = artifacts["pipeline"]
515
+ if pipeline is None:
516
+ return jsonify({"error": "Pipeline not loaded"}), 503
517
+
518
+ pipeline_can_transform = hasattr(pipeline, "transform")
519
+ pipeline_can_predict = hasattr(pipeline, "predict")
520
+
521
+ payload = request.get_json(silent=True) or {}
522
+ symbol = (payload.get("symbol") or "").upper()
523
+ days = int(payload.get("days", 30))
524
+ model_choice = (payload.get("model") or "random_forest").lower()
525
+
526
+ model = artifacts["models"].get(model_choice)
527
+ if model is None:
528
+ if artifacts["models"].get("ensemble"):
529
+ model = artifacts["models"]["ensemble"]
530
+ elif artifacts["models"].get("individual"):
531
+ model = artifacts["models"]["individual"]
532
+ elif artifacts["available_models"]:
533
+ model = artifacts["models"][artifacts["available_models"][0]]
534
+
535
+ if not symbol:
536
+ return jsonify({"error": "Symbol required"}), 400
537
+
538
+ df = fetch_market_data(symbol, days)
539
+ if df is None or len(df) == 0:
540
+ return jsonify({"error": f"Could not fetch market data for symbol: {symbol}", "symbol": symbol}), 404
541
+
542
+ try:
543
+ if pipeline_can_predict:
544
+ y_all = pipeline.predict(df)
545
+ y_pred = y_all[-1:] if len(y_all) > 1 else y_all
546
+ elif pipeline_can_transform and model is not None:
547
+ # Create feature engineer to transform raw OHLCV data
548
+ engineer = FeatureEngineer(lookback=7)
549
+ print(f"[DEBUG] Input df shape: {df.shape}")
550
+ X_engineered = engineer.fit_transform(df)
551
+ print(f"[DEBUG] Engineered shape: {X_engineered.shape}, type: {type(X_engineered)}")
552
+
553
+ # Get the last row for prediction
554
+ if isinstance(X_engineered, pd.DataFrame):
555
+ X_last_row = X_engineered.iloc[-1:].values
556
+ else:
557
+ # Ensure it's a numpy array and 2D
558
+ X_engineered = np.array(X_engineered)
559
+ if X_engineered.ndim == 1:
560
+ X_last_row = X_engineered.reshape(1, -1)
561
+ else:
562
+ X_last_row = X_engineered[-1:]
563
+
564
+ # Apply scaler if available
565
+ if hasattr(pipeline, "named_steps") and pipeline.named_steps.get("scaler"):
566
+ X_to_predict = pipeline.named_steps["scaler"].transform(X_last_row)
567
+ else:
568
+ X_to_predict = X_last_row
569
+
570
+ y_pred = model.predict(X_to_predict)
571
+ else:
572
+ return jsonify({"error": "Pipeline cannot process data"}), 503
573
+ except Exception as e:
574
+ import traceback
575
+ return jsonify({
576
+ "error": f"Prediction failed: {str(e)}",
577
+ "traceback": traceback.format_exc()
578
+ }), 500
579
+
580
+ pred_scalar = y_pred[-1] if hasattr(y_pred, "__len__") else y_pred
581
+ try:
582
+ pred_scalar = float(getattr(pred_scalar, "item", lambda: pred_scalar)())
583
+ except Exception:
584
+ pred_scalar = float(pred_scalar)
585
+
586
+ proba_val = None
587
+ if hasattr(pipeline, "predict_proba"):
588
+ try:
589
+ p_all = pipeline.predict_proba(df)
590
+ p_last = p_all[-1] if len(p_all) > 1 else p_all[0]
591
+ proba_val = float(max(p_last))
592
+ except Exception:
593
+ pass
594
+ elif model is not None and hasattr(model, "predict_proba"):
595
+ try:
596
+ X_transformed = pipeline.transform(df) if pipeline_can_transform else df
597
+ X_to_predict = X_transformed[-1:, :] if len(X_transformed) > 1 else X_transformed
598
+ p = model.predict_proba(X_to_predict)
599
+ proba_val = float(max(p[0]))
600
+ except Exception:
601
+ pass
602
+
603
+ if proba_val is None or (isinstance(proba_val, float) and math.isnan(proba_val)):
604
+ proba_val = 1.0 if pred_scalar in (0.0, 1.0) else 0.5
605
+
606
+ latest = df.iloc[-1].to_dict()
607
+ return jsonify({
608
+ "symbol": symbol,
609
+ "data_points": len(df),
610
+ "latest_data": latest,
611
+ "predictions": [float(v.item() if hasattr(v, "item") else v) for v in y_pred],
612
+ "count": len(y_pred),
613
+ "predicted_price": pred_scalar,
614
+ "confidence": proba_val,
615
+ "model": model_choice,
616
+ "available_models": artifacts["available_models"],
617
+ "note": "Prediction based on Marketstack historical data"
618
+ })
619
+
620
+ @app.route("/predict/consensus", methods=["POST"])
621
+ def predict_consensus():
622
+ """Get consensus (mode) prediction from all models with individual model details for a symbol"""
623
+ artifacts = app.config["ARTIFACTS"]
624
+ pipeline = artifacts["pipeline"]
625
+ if pipeline is None:
626
+ return jsonify({"error": "Pipeline not loaded"}), 503
627
+
628
+ payload = request.get_json(silent=True) or {}
629
+ symbol = (payload.get("symbol") or "").upper()
630
+ days = int(payload.get("days", 30))
631
+
632
+ if not symbol:
633
+ return jsonify({"error": "Symbol required"}), 400
634
+
635
+ # Fetch market data for the symbol
636
+ df = fetch_market_data(symbol, days)
637
+ if df is None or len(df) == 0:
638
+ return jsonify({"error": f"Could not fetch market data for symbol: {symbol}", "symbol": symbol}), 404
639
+
640
+ try:
641
+ # Create feature engineer to transform raw OHLCV data
642
+ engineer = FeatureEngineer(lookback=7)
643
+
644
+ # Transform raw market data to engineered features
645
+ X_engineered = engineer.fit_transform(df)
646
+ print(f"Engineered features shape (before conversion): {X_engineered.shape}")
647
+
648
+ # Get the last row for prediction
649
+ if isinstance(X_engineered, pd.DataFrame):
650
+ X_last_row = X_engineered.iloc[-1:].values
651
+ else:
652
+ # Ensure it's a numpy array and 2D
653
+ X_engineered = np.array(X_engineered)
654
+ if X_engineered.ndim == 1:
655
+ X_last_row = X_engineered.reshape(1, -1)
656
+ else:
657
+ X_last_row = X_engineered[-1:]
658
+
659
+ print(f"Last row shape for prediction: {X_last_row.shape}")
660
+
661
+ # Then apply scaler from pipeline if available
662
+ pipeline_can_transform = hasattr(pipeline, "transform")
663
+ if pipeline_can_transform:
664
+ # Use the scaler/preprocessing from the pipeline
665
+ try:
666
+ # Get the scaler step from pipeline
667
+ scaler = None
668
+ if hasattr(pipeline, "named_steps"):
669
+ scaler = pipeline.named_steps.get("scaler")
670
+
671
+ if scaler is not None:
672
+ X_to_predict = scaler.transform(X_last_row)
673
+ else:
674
+ X_to_predict = X_last_row
675
+ except Exception as e:
676
+ print(f"Scaler error: {e}")
677
+ X_to_predict = X_last_row
678
+ else:
679
+ X_to_predict = X_last_row
680
+
681
+ # Get predictions from all available models with confidence
682
+ predictions_by_model = {}
683
+ valid_predictions = []
684
+
685
+ if not artifacts["available_models"]:
686
+ return jsonify({
687
+ "error": "No models available for prediction",
688
+ "debug": "available_models is empty"
689
+ }), 503
690
+
691
+ for model_name in artifacts["available_models"]:
692
+ model = artifacts["models"].get(model_name)
693
+ if model is None:
694
+ predictions_by_model[model_name] = {"error": "Model not found"}
695
+ continue
696
+
697
+ try:
698
+ y_pred = model.predict(X_to_predict)
699
+ pred_value = float(y_pred[0].item() if hasattr(y_pred[0], "item") else y_pred[0])
700
+ valid_predictions.append(pred_value)
701
+
702
+ # Try to get confidence/probability
703
+ confidence = None
704
+ if hasattr(model, "predict_proba"):
705
+ try:
706
+ proba = model.predict_proba(X_to_predict)
707
+ confidence = float(np.max(proba[0]))
708
+ except Exception as conf_err:
709
+ pass
710
+
711
+ predictions_by_model[model_name] = {
712
+ "prediction": pred_value,
713
+ "confidence": confidence
714
+ }
715
+ except Exception as e:
716
+ predictions_by_model[model_name] = {"error": str(e)}
717
+ print(f"Error predicting with {model_name}: {e}")
718
+
719
+ if not valid_predictions:
720
+ return jsonify({
721
+ "error": "No models could make predictions",
722
+ "available_models": artifacts["available_models"],
723
+ "predictions_by_model": predictions_by_model,
724
+ "data_shape": str(X_to_predict.shape) if hasattr(X_to_predict, "shape") else "unknown"
725
+ }), 503
726
+
727
+ # Calculate mode (most common prediction)
728
+ from collections import Counter
729
+ # Round to nearest integer for mode calculation (for classification)
730
+ rounded_preds = [round(p) for p in valid_predictions]
731
+ pred_counts = Counter(rounded_preds)
732
+ mode_prediction = pred_counts.most_common(1)[0][0]
733
+
734
+ # Calculate confidence: how many models agree with the mode
735
+ agreeing_models = sum(1 for p in rounded_preds if p == mode_prediction)
736
+ confidence_score = agreeing_models / len(valid_predictions)
737
+
738
+ # Map prediction to human-readable label if 0/1 classification
739
+ pred_label = "HIGH" if mode_prediction == 1 else "LOW"
740
+
741
+ return jsonify({
742
+ "symbol": symbol,
743
+ "mode_prediction": mode_prediction,
744
+ "prediction_label": pred_label,
745
+ "confidence": round(confidence_score, 2),
746
+ "agreeing_models": agreeing_models,
747
+ "total_models": len(valid_predictions),
748
+ "predictions_by_model": predictions_by_model,
749
+ "note": "Consensus prediction based on Marketstack historical data"
750
+ })
751
+
752
+ except Exception as e:
753
+ import traceback
754
+ return jsonify({
755
+ "error": f"Consensus prediction failed: {str(e)}",
756
+ "traceback": traceback.format_exc(),
757
+ "symbol": symbol if 'symbol' in locals() else "unknown",
758
+ "data_fetched": len(df) if 'df' in locals() and df is not None else 0
759
+ }), 400
760
+
761
+ @app.route("/predict/ensemble", methods=["POST"])
762
+ def predict_ensemble():
763
+ """Make predictions using all available models and return ensemble results"""
764
+ artifacts = app.config["ARTIFACTS"]
765
+ pipeline = artifacts["pipeline"]
766
+ if pipeline is None:
767
+ return jsonify({"error": "Pipeline not loaded"}), 503
768
+
769
+ payload = request.get_json(silent=True) or {}
770
+
771
+ instances = []
772
+ if isinstance(payload.get("instances"), list):
773
+ instances = payload["instances"]
774
+ elif all(k in payload for k in RAW_FEATURES):
775
+ instances = [payload]
776
+
777
+ if not instances:
778
+ return jsonify({
779
+ "error": "No instances provided",
780
+ "expected_format": {
781
+ "single": {c: "<number>" for c in RAW_FEATURES},
782
+ "batch": {"instances": [{c: "<number>" for c in RAW_FEATURES}]},
783
+ },
784
+ "input_columns": RAW_FEATURES,
785
+ "available_models": artifacts["available_models"],
786
+ }), 400
787
+
788
+ for i, inst in enumerate(instances):
789
+ missing = _validate_instance(inst)
790
+ if missing:
791
+ return jsonify({
792
+ "error": f"Instance {i} missing features: {missing}",
793
+ "input_columns": RAW_FEATURES
794
+ }), 400
795
+
796
+ try:
797
+ X_input = _prepare_pipeline_input(instances)
798
+ pipeline_can_transform = hasattr(pipeline, "transform")
799
+
800
+ if pipeline_can_transform:
801
+ X_transformed = pipeline.transform(X_input)
802
+ X_to_predict = X_transformed[-1:, :] if len(X_transformed) > 1 else X_transformed
803
+ else:
804
+ X_to_predict = X_input[-1:, :]
805
+
806
+ # Get predictions from all available models
807
+ predictions_by_model = {}
808
+ for model_name in artifacts["available_models"]:
809
+ model = artifacts["models"].get(model_name)
810
+ if model is not None:
811
+ try:
812
+ y_pred = model.predict(X_to_predict)
813
+ pred_value = float(y_pred[0].item() if hasattr(y_pred[0], "item") else y_pred[0])
814
+ predictions_by_model[model_name] = pred_value
815
+ except Exception as e:
816
+ predictions_by_model[model_name] = {"error": str(e)}
817
+
818
+ if not predictions_by_model:
819
+ return jsonify({"error": "No models available for prediction"}), 503
820
+
821
+ # Calculate ensemble average (exclude errors)
822
+ valid_predictions = [v for v in predictions_by_model.values() if isinstance(v, (int, float))]
823
+ ensemble_prediction = float(np.mean(valid_predictions)) if valid_predictions else None
824
+
825
+ return jsonify({
826
+ "model": "ensemble",
827
+ "input_columns": RAW_FEATURES,
828
+ "ensemble_prediction": ensemble_prediction,
829
+ "predictions_by_model": predictions_by_model,
830
+ "model_count": len(artifacts["available_models"]),
831
+ "successful_models": len([v for v in predictions_by_model.values() if isinstance(v, (int, float))]),
832
+ "count": 1,
833
+ "note": f"Ensemble prediction based on {len([v for v in predictions_by_model.values() if isinstance(v, (int, float))])} models"
834
+ })
835
+
836
+ except Exception as e:
837
+ return jsonify({"error": f"Ensemble prediction failed: {str(e)}"}), 400
838
+
839
+ @app.route("/market/data", methods=["GET"])
840
+ def market_data():
841
+ symbol = (request.args.get("symbol") or "").upper()
842
+ days = int(request.args.get("days", 30))
843
+
844
+ if not symbol:
845
+ return jsonify({"error": "Symbol required"}), 400
846
+
847
+ df = fetch_market_data(symbol, days)
848
+ if df is None or len(df) == 0:
849
+ return jsonify({"error": f"Could not fetch market data for symbol: {symbol}", "symbol": symbol}), 404
850
+
851
+ data = df.to_dict(orient="records")
852
+ return jsonify({
853
+ "symbol": symbol,
854
+ "days": days,
855
+ "data_points": len(data),
856
+ "data": data,
857
+ })
858
+
859
+ def _startup():
860
+ try:
861
+ _load_artifacts()
862
+ except Exception as e:
863
+ print(f"Warning: Artifact loading failed: {e}")
864
+
865
+ with app.app_context():
866
+ _startup()
867
+
868
+ return app
869
+
870
+
871
+ def fetch_market_data(symbol: str, days: int = 30) -> Optional[pd.DataFrame]:
872
+ try:
873
+ end_date = datetime.now()
874
+ start_date = end_date - timedelta(days=days)
875
+
876
+ url = f"{MARKETSTACK_BASE_URL}/eod"
877
+ params = {
878
+ "access_key": MARKETSTACK_API_KEY,
879
+ "symbols": symbol,
880
+ "date_from": start_date.strftime("%Y-%m-%d"),
881
+ "date_to": end_date.strftime("%Y-%m-%d"),
882
+ "limit": days,
883
+ }
884
+
885
+ response = requests.get(url, params=params, timeout=10)
886
+ if response.status_code != 200:
887
+ print(f"API Error: {response.status_code} - {response.text}")
888
+ return None
889
+
890
+ data = response.json()
891
+ if "data" not in data or not data["data"]:
892
+ print(f"No data returned for symbol: {symbol}")
893
+ return None
894
+
895
+ df = pd.DataFrame(data["data"])
896
+ df = df.rename(columns={
897
+ "open": "Open",
898
+ "high": "High",
899
+ "low": "Low",
900
+ "close": "Close",
901
+ "volume": "Volume",
902
+ })
903
+
904
+ if "date" in df.columns:
905
+ df["date"] = pd.to_datetime(df["date"])
906
+ df = df.sort_values("date")
907
+
908
+ df = df[RAW_FEATURES]
909
+ return df
910
+ except Exception as exc:
911
+ print(f"Error fetching market data: {exc}")
912
+ return None
913
+
914
+
915
+ app = create_app()
916
+ @app.route("/", methods=["GET"])
917
+ def root():
918
+ return jsonify({
919
+ "status": "ok",
920
+ "service": API_TITLE,
921
+ "version": API_VERSION
922
+ }), 200
923
+
924
+ # if __name__ == "__main__":
925
+ # port = int(os.environ.get("PORT", "5000"))
926
+ # app.run(host="0.0.0.0", port=port)
927
+
models.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PyTorch model architectures for stock prediction.
3
+ These classes must match the architectures used during training.
4
+ """
5
+ import torch
6
+ import torch.nn as nn
7
+
8
+
9
+ class LSTMModel(nn.Module):
10
+ """LSTM model for sequence prediction"""
11
+ def __init__(self, input_size, hidden_size=128, num_layers=2, dropout=0.2):
12
+ super(LSTMModel, self).__init__()
13
+ self.hidden_size = hidden_size
14
+ self.num_layers = num_layers
15
+ self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
16
+ batch_first=True, dropout=dropout if num_layers > 1 else 0)
17
+ self.fc1 = nn.Linear(hidden_size, 64)
18
+ self.relu = nn.ReLU()
19
+ self.dropout = nn.Dropout(dropout)
20
+ self.fc2 = nn.Linear(64, 2)
21
+
22
+ def forward(self, x):
23
+ x = x.unsqueeze(1) # Add sequence dimension
24
+ h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
25
+ c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
26
+ out, _ = self.lstm(x, (h0, c0))
27
+ out = self.fc1(out[:, -1, :])
28
+ out = self.relu(out)
29
+ out = self.dropout(out)
30
+ out = self.fc2(out)
31
+ return out
32
+
33
+
34
+ class GRUModel(nn.Module):
35
+ """GRU model for sequence prediction"""
36
+ def __init__(self, input_size, hidden_size=128, num_layers=2, dropout=0.2):
37
+ super(GRUModel, self).__init__()
38
+ self.hidden_size = hidden_size
39
+ self.num_layers = num_layers
40
+ self.gru = nn.GRU(input_size, hidden_size, num_layers,
41
+ batch_first=True, dropout=dropout if num_layers > 1 else 0)
42
+ self.fc1 = nn.Linear(hidden_size, 64)
43
+ self.relu = nn.ReLU()
44
+ self.dropout = nn.Dropout(dropout)
45
+ self.fc2 = nn.Linear(64, 2)
46
+
47
+ def forward(self, x):
48
+ x = x.unsqueeze(1)
49
+ h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
50
+ out, _ = self.gru(x, h0)
51
+ out = self.fc1(out[:, -1, :])
52
+ out = self.relu(out)
53
+ out = self.dropout(out)
54
+ out = self.fc2(out)
55
+ return out
56
+
57
+
58
+ class FeedForwardNN(nn.Module):
59
+ """Feed-forward neural network"""
60
+ def __init__(self, input_size, hidden_sizes=[256, 128, 64], dropout=0.3):
61
+ super(FeedForwardNN, self).__init__()
62
+ layers = []
63
+ prev_size = input_size
64
+ for hidden_size in hidden_sizes:
65
+ layers.append(nn.Linear(prev_size, hidden_size))
66
+ layers.append(nn.BatchNorm1d(hidden_size))
67
+ layers.append(nn.ReLU())
68
+ layers.append(nn.Dropout(dropout))
69
+ prev_size = hidden_size
70
+ layers.append(nn.Linear(prev_size, 2))
71
+ self.network = nn.Sequential(*layers)
72
+
73
+ def forward(self, x):
74
+ return self.network(x)
75
+
76
+
77
+ class BiLSTMModel(nn.Module):
78
+ """Bidirectional LSTM model"""
79
+ def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.2):
80
+ super(BiLSTMModel, self).__init__()
81
+ self.hidden_size = hidden_size
82
+ self.num_layers = num_layers
83
+ self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
84
+ batch_first=True, dropout=dropout if num_layers > 1 else 0,
85
+ bidirectional=True)
86
+ self.fc1 = nn.Linear(hidden_size * 2, 128)
87
+ self.relu = nn.ReLU()
88
+ self.dropout = nn.Dropout(dropout)
89
+ self.fc2 = nn.Linear(128, 64)
90
+ self.fc3 = nn.Linear(64, 2)
91
+
92
+ def forward(self, x):
93
+ x = x.unsqueeze(1)
94
+ h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
95
+ c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
96
+ out, _ = self.lstm(x, (h0, c0))
97
+ out = self.fc1(out[:, -1, :])
98
+ out = self.relu(out)
99
+ out = self.dropout(out)
100
+ out = self.fc2(out)
101
+ out = self.relu(out)
102
+ out = self.dropout(out)
103
+ out = self.fc3(out)
104
+ return out
105
+
106
+
107
+ class CNN1DModel(nn.Module):
108
+ """1D Convolutional Neural Network"""
109
+ def __init__(self, input_size, num_filters=64, dropout=0.3):
110
+ super(CNN1DModel, self).__init__()
111
+ self.conv1 = nn.Conv1d(1, num_filters, kernel_size=3, padding=1)
112
+ self.conv2 = nn.Conv1d(num_filters, num_filters * 2, kernel_size=3, padding=1)
113
+ self.pool = nn.MaxPool1d(2)
114
+ self.relu = nn.ReLU()
115
+ self.dropout = nn.Dropout(dropout)
116
+ self.batch_norm1 = nn.BatchNorm1d(num_filters)
117
+ self.batch_norm2 = nn.BatchNorm1d(num_filters * 2)
118
+ self.flatten = nn.Flatten()
119
+ # Calculate flattened size
120
+ self.fc1 = nn.Linear(num_filters * 2 * (input_size // 4), 128)
121
+ self.fc2 = nn.Linear(128, 2)
122
+
123
+ def forward(self, x):
124
+ x = x.unsqueeze(1) # Add channel dimension
125
+ x = self.conv1(x)
126
+ x = self.batch_norm1(x)
127
+ x = self.relu(x)
128
+ x = self.pool(x)
129
+ x = self.conv2(x)
130
+ x = self.batch_norm2(x)
131
+ x = self.relu(x)
132
+ x = self.pool(x)
133
+ x = self.flatten(x)
134
+ x = self.fc1(x)
135
+ x = self.relu(x)
136
+ x = self.dropout(x)
137
+ x = self.fc2(x)
138
+ return x
procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: gunicorn app:app --bind 0.0.0.0:$PORT --workers 1 --timeout 120 --preload
requirements.txt ADDED
Binary file (198 Bytes). View file
 
static/models/1d-cnn.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2676e9bba6d82c4f5094e719b97994b2a7c31f1bfb0e3a03a87c8e39d38c2d90
3
+ size 967509
static/models/README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AAPL Stock Prediction Models
2
+
3
+ ## Training Summary
4
+ - **Date**: 2025-12-29 03:26:07
5
+ - **Dataset**: AAPL_dataset.csv
6
+ - **Total Samples**: 2,614
7
+ - **Features**: 53
8
+ - **Train/Val/Test Split**: 1830/391/393
9
+
10
+ ## Best Models
11
+
12
+ ### Best Individual Model: SVM
13
+ - Validation F1-Score: 0.5696
14
+ - Test F1-Score: 0.6288
15
+ - Test Accuracy: 0.5344
16
+
17
+ ### Best Ensemble: Simple Voting
18
+ - Validation F1-Score: 0.5545
19
+ - Test F1-Score: 0.5330
20
+ - Test Accuracy: 0.4962
21
+
22
+ ## Top 5 Models
23
+ 1. SVM - F1: 0.5696
24
+ 2. Extra Trees - F1: 0.5509
25
+ 3. CatBoost - F1: 0.5445
26
+ 4. GRU - F1: 0.5306
27
+ 5. KNN - F1: 0.5175
28
+
29
+ ## Saved Files
30
+
31
+ ### Models
32
+ - Individual model files: `*.pkl` (sklearn) or `*.pth` (PyTorch)
33
+ - Best ensemble package: `best_ensemble_package.pkl`
34
+
35
+ ### Data & Configuration
36
+ - Feature scaler: `feature_scaler.pkl`
37
+ - Results summary: `results_summary.pkl`
38
+ - Configuration: `config_metadata.json`
39
+ - Results CSV: `model_results.csv`
40
+
41
+ ### Visualizations
42
+ - Model comparison: `model_comparison.png`
43
+ - Final results: `final_results_visualization.png`
44
+
45
+ ## Usage
46
+
47
+ ```python
48
+ import joblib
49
+ import pickle
50
+
51
+ # Load scaler
52
+ scaler = joblib.load('saved_models/feature_scaler.pkl')
53
+
54
+ # Load best individual model
55
+ best_model = joblib.load('saved_models/svm.pkl')
56
+
57
+ # Load ensemble package
58
+ with open('saved_models/best_ensemble_package.pkl', 'rb') as f:
59
+ ensemble = pickle.load(f)
60
+ ```
61
+
62
+ ## Total Models Trained: 15
static/models/adaboost.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76ad9bba718f661773bf510eb1ac59795aa1347f4cfa8d44e95d5ee61c670cd2
3
+ size 129444
static/models/best_ensemble_package.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41d08537839fcc509c07c67518aa44780bbc6c2d6c26a65fb75f9e6273267665
3
+ size 4444669
static/models/bilstm.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fe0dabaf7d923721eb5cf815fb4e0fb3061d05660f102d857f9244bf9fd41a6
3
+ size 752261
static/models/catboost.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a70b5af14b166df9b021a009522c0be4dbee5d6fe2826af77a70b9510d3b25fd
3
+ size 77128
static/models/class_balancing_comparison.png ADDED

Git LFS Details

  • SHA256: 7f5f82b7548d9b81913b37765abc426e25b26d8e554631aad79f4a9d6228ad33
  • Pointer size: 131 Bytes
  • Size of remote file: 149 kB
static/models/config_metadata.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "random_state": 42,
3
+ "batch_size": 64,
4
+ "device": "cpu",
5
+ "pytorch_version": "2.9.0+cpu",
6
+ "training_date": "2025-12-29 03:26:07",
7
+ "data_path": "AAPL_dataset.csv",
8
+ "lookback_days": 7
9
+ }
static/models/extra_trees.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57fd812796dec9b3de9dfc62f58ef85f77c9cefa4e119e61b13960700e7dfdc3
3
+ size 2075209
static/models/feature_scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8841eafc764259d4e920358b6058a04a5011689840e2508f9efdf5b6b9d6d20
3
+ size 2767
static/models/feed-forward_nn.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38cbecc0bc99196484f3aa8bdf952b6680f4754e0c2a7886f23b38645aa829eb
3
+ size 242438
static/models/final_balance_comparison.png ADDED
static/models/final_results_visualization.png ADDED

Git LFS Details

  • SHA256: 5890c3ca10fb3ee9ad49dd6c9780e4b4eb8a093daaf4485920423dd2227b0ac7
  • Pointer size: 131 Bytes
  • Size of remote file: 152 kB
static/models/gradient_boosting.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3c968921605ce6858b57a233247496fb6a97a84888013d24b43615134038c1b
3
+ size 419831
static/models/gru.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0865ead03acf7d04444d22df0eaed2c27b4de7312c037a6d59d0f9e16830ff5
3
+ size 718647
static/models/knn.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ef7c8a6717b6f3fc903e5854d0ad8ee24f4a53c80e1cb526de23d65428a52c8
3
+ size 791364
static/models/lightgbm.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3054d00141b8d840c98aa33ab76486c72e131283d3d7bd4bf05cb4d865ad7a34
3
+ size 1053396
static/models/logistic_regression.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1850805b4faff53da83d7942a7fce2e8bc81ceb78822b4b84188c94071f839cc
3
+ size 1323
static/models/lstm.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ac90a779c073d8db6733766bb1c3e684949627532422d6c8f60264d220f1f00
3
+ size 944329
static/models/model_comparison.png ADDED
static/models/model_results.csv ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,Train Accuracy,Val Accuracy,Val Precision,Val Recall,Val F1-Score,Val AUC
2
+ SVM,0.521311475409836,0.4859335038363171,0.4907749077490775,0.6785714285714286,0.569593147751606,0.5096284667713239
3
+ Extra Trees,0.9109289617486339,0.5370843989769821,0.5362318840579711,0.5663265306122449,0.5508684863523573,0.552668759811617
4
+ CatBoost,0.8765027322404372,0.5549872122762148,0.5591397849462365,0.5306122448979592,0.5445026178010471,0.5470434327577185
5
+ GRU,0.5448087431693989,0.5294117647058824,0.5306122448979592,0.5306122448979592,0.5306122448979592,0.5184720041862899
6
+ KNN,1.0,0.5421994884910486,0.5485714285714286,0.4897959183673469,0.5175202156334232,0.5459445316588174
7
+ LSTM,0.5497267759562842,0.5140664961636828,0.5153061224489796,0.5153061224489796,0.5153061224489796,0.527498691784406
8
+ AdaBoost,0.587431693989071,0.4936061381074169,0.49528301886792453,0.5357142857142857,0.5147058823529411,0.4947278911564625
9
+ LightGBM,0.9322404371584699,0.5217391304347826,0.5243243243243243,0.49489795918367346,0.5091863517060368,0.5209314495028782
10
+ Gradient Boosting,0.9808743169398907,0.5345268542199488,0.5402298850574713,0.47959183673469385,0.5081081081081081,0.5361329147043433
11
+ 1D-CNN,0.5617486338797815,0.5294117647058824,0.5337078651685393,0.4846938775510204,0.5080213903743316,0.5179225536368394
12
+ Logistic Regression,0.546448087431694,0.47058823529411764,0.4751131221719457,0.5357142857142857,0.5035971223021583,0.4817634746206175
13
+ Random Forest,1.0,0.5115089514066496,0.5135135135135135,0.4846938775510204,0.49868766404199477,0.5381998953427525
14
+ BiLSTM,0.5502732240437158,0.5140664961636828,0.5172413793103449,0.45918367346938777,0.4864864864864865,0.5289638932496076
15
+ Feed-Forward NN,0.5819672131147541,0.5038363171355499,0.5055555555555555,0.4642857142857143,0.48404255319148937,0.5037414965986395
16
+ XGBoost,0.6710382513661202,0.4961636828644501,0.4971751412429379,0.4489795918367347,0.4718498659517426,0.5119178440607012
static/models/random_forest.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6da8873838ce35c832e45d9957e9b8ff6fd0226255980fca14634f329325642
3
+ size 3807849
static/models/results_summary.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a478fad3f72f73d454e60f281b70f0bdec6ee491970502ac7ee9d57ec62f45cd
3
+ size 12836673
static/models/svm.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8869b1dcd921170b5907fd9ef7629d7bfb764e89523fa8e7b84d0bd102821c06
3
+ size 791083
static/models/xgboost.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca550283c6edeaad2fc5ba673d3f155b09888316d5877d95a42c1d04238575b9
3
+ size 116943