danielthatu12 commited on
Commit
a6ba8a1
·
verified ·
1 Parent(s): 148584d

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +552 -564
  2. model.py +22 -58
  3. requirements.txt +1 -0
app.py CHANGED
@@ -1,564 +1,552 @@
1
- """
2
- app.py – StockBuddy Flask API
3
- =================================
4
- LIGHTWEIGHT CHANGES vs original:
5
- [OPT-A] Removed the startup TF validation model (was creating & running a test
6
- LSTM on every cold start – wastes ~10 s and ~100 MB RAM on free tier).
7
- Replaced with a simple tf.constant() smoke-test.
8
- [OPT-B] PORT is now read from the PORT environment variable so the server
9
- works on Render (sets $PORT automatically) and Hugging Face Spaces
10
- (expects port 7860) without code changes.
11
- [OPT-C] time_step updated to 30 throughout (was 45) to match the lighter model.
12
- All REST API routes are unchanged from the original.
13
- """
14
-
15
- from flask import Flask, request, jsonify
16
- from flask_cors import CORS
17
- import numpy as np
18
- import pandas as pd
19
- import os
20
- import threading
21
- import time
22
- from datetime import datetime, timedelta
23
- import json
24
- import model as stock_model
25
- import sys
26
- import requests
27
- import traceback
28
- from sklearn.preprocessing import MinMaxScaler
29
- from tensorflow.keras.models import Sequential
30
- from tensorflow.keras.layers import LSTM, Dense, Dropout
31
- from tensorflow.keras.callbacks import Callback
32
- import tensorflow as tf
33
- import xgboost as xgb
34
-
35
- app = Flask(__name__)
36
- CORS(app)
37
-
38
-
39
- @app.route("/", methods=["GET"])
40
- def home():
41
- return jsonify({"status": "running", "message": "StockBuddy API is live!"})
42
-
43
- # [OPT-A] Lightweight TF smoke-test instead of building & running a full LSTM
44
- def validate_tensorflow():
45
- """Quick TensorFlow sanity-check (no model created, no GPU required)."""
46
- try:
47
- print("TensorFlow version:", tf.__version__)
48
- # A tiny constant operation is enough to confirm TF is importable and
49
- # the runtime works. Full model creation is deferred to the first
50
- # prediction request so the cold-start is fast on free-tier hosts.
51
- _ = tf.constant([1.0, 2.0, 3.0])
52
- gpus = tf.config.list_physical_devices("GPU")
53
- if gpus:
54
- msg = f"GPU available ({len(gpus)} device(s)) – running in GPU mode."
55
- else:
56
- msg = "No GPU detected – running in CPU mode (expected on free tier)."
57
- print(f"TensorFlow OK: {msg}")
58
- return True, msg
59
- except Exception as e:
60
- print(f"TensorFlow validation failed: {e}")
61
- return False, f"TensorFlow error: {e}"
62
-
63
-
64
- # Run smoke-test at startup
65
- tf_status, tf_message = validate_tensorflow()
66
- if not tf_status:
67
- print(f"WARNING: {tf_message}")
68
- else:
69
- print(f"TensorFlow validation: {tf_message}")
70
-
71
- # Dictionary to store running prediction tasks
72
- prediction_tasks = {}
73
-
74
-
75
- class PredictionTask:
76
- def __init__(self, user_id, symbol, days_ahead):
77
- self.user_id = user_id
78
- self.symbol = symbol
79
- self.days_ahead = days_ahead
80
- self.progress = 0
81
- self.status = "pending"
82
- self.result = None
83
- self.sentiment_result = None
84
- self.thread = None
85
- self.stop_requested = False
86
- self.stop_acknowledged = False
87
- # Unique task ID: millisecond timestamp + random hex suffix
88
- timestamp = int(time.time() * 1000)
89
- random_suffix = os.urandom(4).hex()
90
- self.task_id = f"{user_id}_{symbol}_{timestamp}_{random_suffix}"
91
-
92
- def run(self):
93
- self.thread = threading.Thread(target=self._run_prediction)
94
- self.thread.daemon = True
95
- self.thread.start()
96
- return self.task_id
97
-
98
- def is_stop_requested(self):
99
- """Callback for model training loops to poll stop flag."""
100
- if self.stop_requested and not self.stop_acknowledged:
101
- self.stop_acknowledged = True
102
- self.status = "stopped"
103
- return True
104
- return self.stop_requested
105
-
106
- def _run_prediction(self):
107
- try:
108
- print(f"Starting prediction for {self.symbol} (task: {self.task_id})")
109
- self.status = "running"
110
- self.progress = 10
111
-
112
- # ── Fetch historical data ────────────────────────────────────────
113
- print(f"Fetching historical data for {self.symbol}...")
114
- try:
115
- data = stock_model.fetch_stock_data(self.symbol, outputsize="compact")
116
- print(f"Fetched {len(data)} rows for {self.symbol}")
117
- except Exception as e:
118
- print(f"Data fetch error: {e}")
119
- self.status = "failed"
120
- self.result = {"error": f"Could not fetch data for {self.symbol}: {e}"}
121
- return
122
-
123
- if data is None:
124
- self.status = "failed"
125
- self.result = {"error": f"Could not fetch data for {self.symbol}"}
126
- return
127
-
128
- if self.stop_requested:
129
- self.status = "stopped"; return
130
-
131
- if len(data) < 60:
132
- self.status = "failed"
133
- self.result = {"error": f"Insufficient data for {self.symbol} "
134
- f"(got {len(data)}, need ≥60)"}
135
- return
136
-
137
- # ── Extract last actual close ────────────────────────────────────
138
- try:
139
- if isinstance(data, pd.DataFrame) and "Close" in data.columns:
140
- last_actual_close = float(data["Close"].iloc[-1])
141
- last_date = data.index[-1]
142
- else:
143
- last_actual_close = float(data.iloc[-1, 0])
144
- last_date = data.index[-1]
145
- print(f"Latest close for {self.symbol}: "
146
- f"${last_actual_close:.2f} on {last_date.strftime('%Y-%m-%d')}")
147
- except Exception as e:
148
- self.status = "failed"
149
- self.result = {"error": f"Error reading price data: {e}"}
150
- return
151
-
152
- self.progress = 20
153
- if self.stop_requested:
154
- self.status = "stopped"; return
155
-
156
- # ── Sentiment analysis ───────────────────────────────────────────
157
- try:
158
- print(f"Fetching news for {self.symbol}...")
159
- headlines = stock_model.fetch_finnhub_news(self.symbol)
160
- print(f"Got {len(headlines)} headlines")
161
- self.progress = 30
162
- if self.stop_requested:
163
- self.status = "stopped"; return
164
-
165
- sentiment_results, sentiment_totals = \
166
- stock_model.analyze_sentiment(headlines)
167
- sentiment_summary = stock_model.generate_sentiment_summary(
168
- sentiment_totals, headlines, self.symbol)
169
- self.sentiment_result = {
170
- "totals": sentiment_totals,
171
- "summary": sentiment_summary,
172
- }
173
- except Exception as e:
174
- print(f"Sentiment error (non-fatal): {e}")
175
- self.sentiment_result = {
176
- "totals": {"positive": 0, "negative": 0, "neutral": 0},
177
- "summary": f"Unable to analyse sentiment: {e}",
178
- }
179
-
180
- self.progress = 40
181
- if self.stop_requested:
182
- self.status = "stopped"; return
183
-
184
- # ── Preprocess data ──────────────────────────────────────────────
185
- try:
186
- print("Preprocessing data...")
187
- scaled_data, scaler = stock_model.preprocess_data(data)
188
-
189
- # [OPT-C] time_step 45 → 30
190
- time_step = 30
191
- X, y = stock_model.create_sequences(scaled_data, time_step)
192
- print(f"Sequences: X={X.shape}, y={y.shape}")
193
- except Exception as e:
194
- self.status = "failed"
195
- self.result = {"error": f"Preprocessing failed: {e}"}
196
- return
197
-
198
- if len(X) == 0:
199
- self.status = "failed"
200
- self.result = {"error": f"Could not create training sequences for {self.symbol}"}
201
- return
202
-
203
- self.progress = 50
204
- if self.stop_requested:
205
- self.status = "stopped"; return
206
-
207
- # ── Train LSTM ───────────────────────────────────────────────────
208
- try:
209
- train_size = int(len(X) * 0.8)
210
- if train_size == 0:
211
- self.status = "failed"
212
- self.result = {"error": "Not enough data to split for training"}
213
- return
214
-
215
- X_train, y_train = X[:train_size], y[:train_size]
216
- self.progress = 55
217
- print(f"Training LSTM with {len(X_train)} samples...")
218
- lstm_model = stock_model.train_lstm(
219
- X_train, y_train, time_step, self.is_stop_requested)
220
- except Exception as e:
221
- self.status = "failed"
222
- self.result = {"error": f"LSTM training failed: {e}"}
223
- return
224
-
225
- if self.stop_requested:
226
- self.status = "stopped"; return
227
-
228
- self.progress = 75
229
- if self.stop_requested:
230
- self.status = "stopped"; return
231
-
232
- # ── Train XGBoost on residuals ───────────────────────────────────
233
- try:
234
- print("Calculating residuals for XGBoost...")
235
- lstm_preds = lstm_model.predict(X_train, verbose=0).flatten()
236
- residuals = y_train - lstm_preds
237
- xgb_model = stock_model.train_xgboost(
238
- X_train.reshape(X_train.shape[0], -1),
239
- residuals,
240
- self.is_stop_requested,
241
- )
242
- if self.stop_requested or xgb_model is None:
243
- self.status = "stopped"; return
244
- except Exception as e:
245
- print(f"XGBoost training error (non-fatal): {e}")
246
- xgb_model = None
247
-
248
- self.progress = 90
249
- if self.stop_requested:
250
- self.status = "stopped"; return
251
-
252
- # ── Generate predictions ─────────────────────────────────────────
253
- try:
254
- print(f"Generating {self.days_ahead}-day predictions...")
255
- predictions = stock_model.predict_stock_price(
256
- lstm_model, xgb_model, scaled_data, scaler,
257
- time_step, self.days_ahead, self.is_stop_requested,
258
- )
259
- if self.stop_requested or predictions is None:
260
- self.status = "stopped"; return
261
- except Exception as e:
262
- self.status = "failed"
263
- self.result = {"error": f"Prediction generation failed: {e}"}
264
- return
265
-
266
- self.progress = 95
267
- if self.stop_requested:
268
- self.status = "stopped"; return
269
-
270
- # ── Build future trading-day dates ───────────────────────────────
271
- future_dates = []
272
- for i in range(1, self.days_ahead + 1):
273
- if self.stop_requested:
274
- break
275
- next_date = last_date + timedelta(days=i)
276
- while next_date.weekday() > 4:
277
- next_date += timedelta(days=1)
278
- future_dates.append(next_date)
279
-
280
- if self.stop_requested:
281
- self.status = "stopped"; return
282
-
283
- # Deduplicate dates
284
- unique_future_dates = []
285
- seen_dates = set()
286
- for date in future_dates:
287
- ds = date.strftime("%Y-%m-%d")
288
- if ds not in seen_dates:
289
- seen_dates.add(ds)
290
- unique_future_dates.append(date)
291
-
292
- # Pad if needed
293
- while (len(unique_future_dates) < len(predictions)
294
- and not self.stop_requested):
295
- next_date = unique_future_dates[-1] + timedelta(days=1)
296
- while next_date.weekday() > 4:
297
- next_date += timedelta(days=1)
298
- ds = next_date.strftime("%Y-%m-%d")
299
- if ds not in seen_dates:
300
- unique_future_dates.append(next_date)
301
- seen_dates.add(ds)
302
-
303
- if self.stop_requested:
304
- self.status = "stopped"; return
305
-
306
- unique_future_dates = unique_future_dates[: len(predictions)]
307
-
308
- # ── Assemble result payload ──────────────────────────────────────
309
- prediction_data = []
310
- for i in range(min(len(unique_future_dates), len(predictions))):
311
- predicted_price = float(predictions[i][0])
312
- percent_change = (
313
- (predicted_price - last_actual_close) / last_actual_close * 100
314
- )
315
- prediction_data.append({
316
- "date": unique_future_dates[i].strftime("%Y-%m-%d"),
317
- "price": round(predicted_price, 2),
318
- "change": round(percent_change, 2),
319
- })
320
-
321
- self.result = {
322
- "symbol": self.symbol,
323
- "lastActualClose": {
324
- "date": last_date.strftime("%Y-%m-%d"),
325
- "price": round(last_actual_close, 2),
326
- },
327
- "predictions": prediction_data,
328
- "sentiment": self.sentiment_result,
329
- "tableDisplay": True,
330
- }
331
- self.progress = 100
332
- self.status = "completed"
333
- print(f"Prediction complete for {self.symbol}")
334
-
335
- except Exception as e:
336
- self.status = "failed"
337
- self.result = {"error": str(e)}
338
- print(f"Prediction task error: {e}")
339
- traceback.print_exc()
340
-
341
-
342
- # =============================================================================
343
- # REST API ROUTES
344
- # (all routes are identical to the original – no frontend changes needed)
345
- # =============================================================================
346
-
347
- @app.route("/api/predict", methods=["POST"])
348
- def start_prediction():
349
- try:
350
- data = request.json
351
- print(f"POST /api/predict body={data}")
352
-
353
- if not data:
354
- return jsonify({"error": "Invalid or missing request body"}), 400
355
-
356
- user_id = data.get("userId")
357
- symbol = data.get("symbol")
358
- days_ahead = int(data.get("daysAhead", 5))
359
-
360
- if not user_id or not symbol:
361
- return jsonify({"error": "Missing required parameters (userId or symbol)"}), 400
362
-
363
- if not isinstance(symbol, str) or len(symbol) > 10:
364
- return jsonify({"error": f"Invalid symbol format: {symbol}"}), 400
365
-
366
- if not tf_status:
367
- return jsonify({
368
- "error": f"Prediction service unavailable: {tf_message}",
369
- "tf_status": tf_message,
370
- }), 503
371
-
372
- task = PredictionTask(user_id, symbol, days_ahead)
373
- task_id = task.run()
374
- prediction_tasks[task_id] = task
375
-
376
- return jsonify({
377
- "taskId": task_id,
378
- "status": "pending",
379
- "message": f"Prediction started for {symbol}",
380
- })
381
- except ValueError as e:
382
- return jsonify({"error": str(e)}), 400
383
- except Exception as e:
384
- print(f"Critical error starting prediction: {e}")
385
- traceback.print_exc()
386
- return jsonify({"error": "Failed to start prediction", "details": str(e)}), 500
387
-
388
-
389
- @app.route("/api/predict/status/<task_id>", methods=["GET"])
390
- def prediction_status(task_id):
391
- try:
392
- task = prediction_tasks.get(task_id)
393
- if not task:
394
- return jsonify({"error": "Task not found"}), 404
395
-
396
- try:
397
- if task.status == "completed" and task.result:
398
- if isinstance(task.result, dict):
399
- if "predictions" in task.result and isinstance(
400
- task.result["predictions"], list):
401
- for pred in task.result["predictions"]:
402
- if (not isinstance(pred, dict)
403
- or "date" not in pred
404
- or "price" not in pred):
405
- task.status = "failed"
406
- task.result = {"error": "Malformed prediction data"}
407
- break
408
- else:
409
- task.status = "failed"
410
- task.result = {"error": "Missing prediction data"}
411
- else:
412
- task.status = "failed"
413
- task.result = {"error": "Invalid result format"}
414
-
415
- return jsonify({
416
- "taskId": task_id,
417
- "status": task.status,
418
- "progress": task.progress,
419
- "result": task.result if task.status == "completed" else None,
420
- })
421
- except Exception as e:
422
- print(f"Error generating status response: {e}")
423
- return jsonify({
424
- "taskId": task_id,
425
- "status": "error",
426
- "progress": task.progress,
427
- "error": str(e),
428
- })
429
- except Exception as e:
430
- print(f"Critical error in prediction status: {e}")
431
- return jsonify({"taskId": task_id, "status": "error",
432
- "error": "Server error"}), 500
433
-
434
-
435
- @app.route("/api/predict/stop/<task_id>", methods=["POST"])
436
- def stop_prediction(task_id):
437
- task = prediction_tasks.get(task_id)
438
- if not task:
439
- return jsonify({"error": "Task not found"}), 404
440
-
441
- task.stop_requested = True
442
-
443
- if task.thread and task.thread.is_alive():
444
- task.status = "stopping"
445
- print(f"Stop requested for task {task_id} ({task.symbol})")
446
- stop_wait_start = time.time()
447
- while time.time() - stop_wait_start < 2:
448
- if task.stop_acknowledged:
449
- task.status = "stopped"
450
- break
451
- time.sleep(0.1)
452
- else:
453
- task.status = "stopped"
454
-
455
- return jsonify({
456
- "taskId": task_id,
457
- "status": task.status,
458
- "symbol": task.symbol,
459
- "progress": task.progress,
460
- "stopRequested": task.stop_requested,
461
- "stopAcknowledged": task.stop_acknowledged,
462
- })
463
-
464
-
465
- @app.route("/api/predict/sentiment/<symbol>", methods=["GET"])
466
- def get_sentiment(symbol):
467
- try:
468
- headlines = stock_model.fetch_finnhub_news(symbol)
469
- sentiment_results, sentiment_totals = \
470
- stock_model.analyze_sentiment(headlines)
471
- sentiment_summary = stock_model.generate_sentiment_summary(
472
- sentiment_totals, headlines, symbol)
473
- return jsonify({
474
- "symbol": symbol,
475
- "sentiment": {
476
- "totals": sentiment_totals,
477
- "summary": sentiment_summary,
478
- "period": 28,
479
- },
480
- })
481
- except Exception as e:
482
- return jsonify({"error": str(e)}), 500
483
-
484
-
485
- @app.route("/api/diagnose", methods=["GET"])
486
- def diagnose():
487
- """Diagnostic endpoint – checks environment, APIs and model primitives."""
488
- try:
489
- env_info = {
490
- "python_version": sys.version,
491
- "tensorflow_version": tf.__version__,
492
- "numpy_version": np.__version__,
493
- "pandas_version": pd.__version__,
494
- "xgboost_version": xgb.__version__,
495
- }
496
-
497
- api_status = {}
498
- try:
499
- url = "https://www.alphavantage.co/query"
500
- params = {
501
- "function": "TIME_SERIES_DAILY",
502
- "symbol": "AAPL",
503
- "apikey": stock_model.ALPHAVANTAGE_API_KEY,
504
- "outputsize": "compact",
505
- "datatype": "json",
506
- }
507
- resp = requests.get(url, params=params)
508
- rj = resp.json()
509
- api_status["alpha_vantage"] = {
510
- "status_code": resp.status_code,
511
- "has_data": "Time Series (Daily)" in rj,
512
- "error": rj.get("Error Message") or rj.get("Note")
513
- if "Time Series (Daily)" not in rj else None,
514
- }
515
- except Exception as e:
516
- api_status["alpha_vantage"] = {"error": str(e)}
517
-
518
- try:
519
- headers = {"X-Finnhub-Token": stock_model.FINNHUB_API_KEY}
520
- resp = requests.get(
521
- "https://finnhub.io/api/v1/news?category=general",
522
- headers=headers)
523
- api_status["finnhub"] = {
524
- "status_code": resp.status_code,
525
- "has_data": len(resp.json()) > 0,
526
- "error": None if resp.status_code == 200 else str(resp.text),
527
- }
528
- except Exception as e:
529
- api_status["finnhub"] = {"error": str(e)}
530
-
531
- model_status = {}
532
- try:
533
- test_data = np.random.rand(100, 6) # 6 features (OPT-2)
534
- test_scaler = MinMaxScaler()
535
- test_data[:, 0] = test_scaler.fit_transform(
536
- np.arange(100).reshape(-1, 1)).flatten()
537
- X, y = stock_model.create_sequences(test_data, time_step=30)
538
- model_status["sequence_creation"] = {
539
- "success": len(X) > 0,
540
- "X_shape": str(X.shape),
541
- "y_shape": str(y.shape),
542
- }
543
- except Exception as e:
544
- model_status["error"] = str(e)
545
-
546
- return jsonify({
547
- "timestamp": datetime.now().isoformat(),
548
- "status": "OK",
549
- "environment": env_info,
550
- "api_status": api_status,
551
- "model_status": model_status,
552
- })
553
- except Exception as e:
554
- return jsonify({"status": "ERROR", "error": str(e)}), 500
555
-
556
-
557
- if __name__ == "__main__":
558
- # [OPT-B] Read port from environment variable so the same binary works on:
559
- # • Render (sets $PORT automatically, usually 10000)
560
- # • Hugging Face (expects 7860)
561
- # • Local dev (falls back to 5001)
562
- port = int(os.environ.get("PORT", 5001))
563
- print(f"Starting StockBuddy API on port {port}")
564
- app.run(host="0.0.0.0", port=port)
 
1
+ """
2
+ app.py – StockBuddy Flask API
3
+ =================================
4
+ LIGHTWEIGHT CHANGES vs original:
5
+ [OPT-A] Removed the startup TF validation model (was creating & running a test
6
+ LSTM on every cold start – wastes ~10 s and ~100 MB RAM on free tier).
7
+ Replaced with a simple tf.constant() smoke-test.
8
+ [OPT-B] PORT is now read from the PORT environment variable so the server
9
+ works on Render (sets $PORT automatically) and Hugging Face Spaces
10
+ (expects port 7860) without code changes.
11
+ [OPT-C] time_step updated to 30 throughout (was 45) to match the lighter model.
12
+ All REST API routes are unchanged from the original.
13
+ """
14
+
15
+ from flask import Flask, request, jsonify
16
+ from flask_cors import CORS
17
+ import numpy as np
18
+ import pandas as pd
19
+ import os
20
+ import threading
21
+ import time
22
+ from datetime import datetime, timedelta
23
+ import json
24
+ import model as stock_model
25
+ import sys
26
+ import requests
27
+ import traceback
28
+ from sklearn.preprocessing import MinMaxScaler
29
+ from tensorflow.keras.models import Sequential
30
+ from tensorflow.keras.layers import LSTM, Dense, Dropout
31
+ from tensorflow.keras.callbacks import Callback
32
+ import tensorflow as tf
33
+ import xgboost as xgb
34
+
35
+ app = Flask(__name__)
36
+ CORS(app)
37
+
38
+
39
+ # [OPT-A] Lightweight TF smoke-test instead of building & running a full LSTM
40
+ def validate_tensorflow():
41
+ """Quick TensorFlow sanity-check (no model created, no GPU required)."""
42
+ try:
43
+ print("TensorFlow version:", tf.__version__)
44
+ # A tiny constant operation is enough to confirm TF is importable and
45
+ # the runtime works. Full model creation is deferred to the first
46
+ # prediction request so the cold-start is fast on free-tier hosts.
47
+ _ = tf.constant([1.0, 2.0, 3.0])
48
+ gpus = tf.config.list_physical_devices("GPU")
49
+ if gpus:
50
+ msg = f"GPU available ({len(gpus)} device(s)) running in GPU mode."
51
+ else:
52
+ msg = "No GPU detected – running in CPU mode (expected on free tier)."
53
+ print(f"TensorFlow OK: {msg}")
54
+ return True, msg
55
+ except Exception as e:
56
+ print(f"TensorFlow validation failed: {e}")
57
+ return False, f"TensorFlow error: {e}"
58
+
59
+
60
+ # Run smoke-test at startup
61
+ tf_status, tf_message = validate_tensorflow()
62
+ if not tf_status:
63
+ print(f"WARNING: {tf_message}")
64
+ else:
65
+ print(f"TensorFlow validation: {tf_message}")
66
+
67
+ # Dictionary to store running prediction tasks
68
+ prediction_tasks = {}
69
+
70
+
71
+ class PredictionTask:
72
+ def __init__(self, user_id, symbol, days_ahead):
73
+ self.user_id = user_id
74
+ self.symbol = symbol
75
+ self.days_ahead = days_ahead
76
+ self.progress = 0
77
+ self.status = "pending"
78
+ self.result = None
79
+ self.sentiment_result = None
80
+ self.thread = None
81
+ self.stop_requested = False
82
+ self.stop_acknowledged = False
83
+ # Unique task ID: millisecond timestamp + random hex suffix
84
+ timestamp = int(time.time() * 1000)
85
+ random_suffix = os.urandom(4).hex()
86
+ self.task_id = f"{user_id}_{symbol}_{timestamp}_{random_suffix}"
87
+
88
+ def run(self):
89
+ self.thread = threading.Thread(target=self._run_prediction)
90
+ self.thread.daemon = True
91
+ self.thread.start()
92
+ return self.task_id
93
+
94
+ def is_stop_requested(self):
95
+ """Callback for model training loops to poll stop flag."""
96
+ if self.stop_requested and not self.stop_acknowledged:
97
+ self.stop_acknowledged = True
98
+ self.status = "stopped"
99
+ return True
100
+ return self.stop_requested
101
+
102
+ def _run_prediction(self):
103
+ try:
104
+ print(f"Starting prediction for {self.symbol} (task: {self.task_id})")
105
+ self.status = "running"
106
+ self.progress = 10
107
+
108
+ # ── Fetch historical data ────────────────────────────────────────
109
+ print(f"Fetching historical data for {self.symbol}...")
110
+ try:
111
+ data = stock_model.fetch_stock_data(self.symbol, outputsize="compact")
112
+ print(f"Fetched {len(data)} rows for {self.symbol}")
113
+ except Exception as e:
114
+ print(f"Data fetch error: {e}")
115
+ self.status = "failed"
116
+ self.result = {"error": f"Could not fetch data for {self.symbol}: {e}"}
117
+ return
118
+
119
+ if data is None:
120
+ self.status = "failed"
121
+ self.result = {"error": f"Could not fetch data for {self.symbol}"}
122
+ return
123
+
124
+ if self.stop_requested:
125
+ self.status = "stopped"; return
126
+
127
+ if len(data) < 60:
128
+ self.status = "failed"
129
+ self.result = {"error": f"Insufficient data for {self.symbol} "
130
+ f"(got {len(data)}, need ≥60)"}
131
+ return
132
+
133
+ # ── Extract last actual close ────────────────────────────────────
134
+ try:
135
+ if isinstance(data, pd.DataFrame) and "Close" in data.columns:
136
+ last_actual_close = float(data["Close"].iloc[-1])
137
+ last_date = data.index[-1]
138
+ else:
139
+ last_actual_close = float(data.iloc[-1, 0])
140
+ last_date = data.index[-1]
141
+ print(f"Latest close for {self.symbol}: "
142
+ f"${last_actual_close:.2f} on {last_date.strftime('%Y-%m-%d')}")
143
+ except Exception as e:
144
+ self.status = "failed"
145
+ self.result = {"error": f"Error reading price data: {e}"}
146
+ return
147
+
148
+ self.progress = 20
149
+ if self.stop_requested:
150
+ self.status = "stopped"; return
151
+
152
+ # ── Sentiment analysis ───────────────────────────────────────────
153
+ try:
154
+ print(f"Fetching news for {self.symbol}...")
155
+ headlines = stock_model.fetch_finnhub_news(self.symbol)
156
+ print(f"Got {len(headlines)} headlines")
157
+ self.progress = 30
158
+ if self.stop_requested:
159
+ self.status = "stopped"; return
160
+
161
+ sentiment_results, sentiment_totals = \
162
+ stock_model.analyze_sentiment(headlines)
163
+ sentiment_summary = stock_model.generate_sentiment_summary(
164
+ sentiment_totals, headlines, self.symbol)
165
+ self.sentiment_result = {
166
+ "totals": sentiment_totals,
167
+ "summary": sentiment_summary,
168
+ }
169
+ except Exception as e:
170
+ print(f"Sentiment error (non-fatal): {e}")
171
+ self.sentiment_result = {
172
+ "totals": {"positive": 0, "negative": 0, "neutral": 0},
173
+ "summary": f"Unable to analyse sentiment: {e}",
174
+ }
175
+
176
+ self.progress = 40
177
+ if self.stop_requested:
178
+ self.status = "stopped"; return
179
+
180
+ # ── Preprocess data ──────────────────────────────────────────────
181
+ try:
182
+ print("Preprocessing data...")
183
+ scaled_data, scaler = stock_model.preprocess_data(data)
184
+
185
+ # [OPT-C] time_step 45 → 30
186
+ time_step = 30
187
+ X, y = stock_model.create_sequences(scaled_data, time_step)
188
+ print(f"Sequences: X={X.shape}, y={y.shape}")
189
+ except Exception as e:
190
+ self.status = "failed"
191
+ self.result = {"error": f"Preprocessing failed: {e}"}
192
+ return
193
+
194
+ if len(X) == 0:
195
+ self.status = "failed"
196
+ self.result = {"error": f"Could not create training sequences for {self.symbol}"}
197
+ return
198
+
199
+ self.progress = 50
200
+ if self.stop_requested:
201
+ self.status = "stopped"; return
202
+
203
+ # ── Train LSTM ───────────────────────────────────────────────────
204
+ try:
205
+ train_size = int(len(X) * 0.8)
206
+ if train_size == 0:
207
+ self.status = "failed"
208
+ self.result = {"error": "Not enough data to split for training"}
209
+ return
210
+
211
+ X_train, y_train = X[:train_size], y[:train_size]
212
+ self.progress = 55
213
+ print(f"Training LSTM with {len(X_train)} samples...")
214
+ lstm_model = stock_model.train_lstm(
215
+ X_train, y_train, time_step, self.is_stop_requested)
216
+ except Exception as e:
217
+ self.status = "failed"
218
+ self.result = {"error": f"LSTM training failed: {e}"}
219
+ return
220
+
221
+ if self.stop_requested:
222
+ self.status = "stopped"; return
223
+
224
+ self.progress = 75
225
+ if self.stop_requested:
226
+ self.status = "stopped"; return
227
+
228
+ # ── Train XGBoost on residuals ───────────────────────────────────
229
+ try:
230
+ print("Calculating residuals for XGBoost...")
231
+ lstm_preds = lstm_model.predict(X_train, verbose=0).flatten()
232
+ residuals = y_train - lstm_preds
233
+ xgb_model = stock_model.train_xgboost(
234
+ X_train.reshape(X_train.shape[0], -1),
235
+ residuals,
236
+ self.is_stop_requested,
237
+ )
238
+ if self.stop_requested or xgb_model is None:
239
+ self.status = "stopped"; return
240
+ except Exception as e:
241
+ print(f"XGBoost training error (non-fatal): {e}")
242
+ xgb_model = None
243
+
244
+ self.progress = 90
245
+ if self.stop_requested:
246
+ self.status = "stopped"; return
247
+
248
+ # ── Generate predictions ─────────────────────────────────────────
249
+ try:
250
+ print(f"Generating {self.days_ahead}-day predictions...")
251
+ predictions = stock_model.predict_stock_price(
252
+ lstm_model, xgb_model, scaled_data, scaler,
253
+ time_step, self.days_ahead, self.is_stop_requested,
254
+ )
255
+ if self.stop_requested or predictions is None:
256
+ self.status = "stopped"; return
257
+ except Exception as e:
258
+ self.status = "failed"
259
+ self.result = {"error": f"Prediction generation failed: {e}"}
260
+ return
261
+
262
+ self.progress = 95
263
+ if self.stop_requested:
264
+ self.status = "stopped"; return
265
+
266
+ # ── Build future trading-day dates ───────────────────────────────
267
+ future_dates = []
268
+ for i in range(1, self.days_ahead + 1):
269
+ if self.stop_requested:
270
+ break
271
+ next_date = last_date + timedelta(days=i)
272
+ while next_date.weekday() > 4:
273
+ next_date += timedelta(days=1)
274
+ future_dates.append(next_date)
275
+
276
+ if self.stop_requested:
277
+ self.status = "stopped"; return
278
+
279
+ # Deduplicate dates
280
+ unique_future_dates = []
281
+ seen_dates = set()
282
+ for date in future_dates:
283
+ ds = date.strftime("%Y-%m-%d")
284
+ if ds not in seen_dates:
285
+ seen_dates.add(ds)
286
+ unique_future_dates.append(date)
287
+
288
+ # Pad if needed
289
+ while (len(unique_future_dates) < len(predictions)
290
+ and not self.stop_requested):
291
+ next_date = unique_future_dates[-1] + timedelta(days=1)
292
+ while next_date.weekday() > 4:
293
+ next_date += timedelta(days=1)
294
+ ds = next_date.strftime("%Y-%m-%d")
295
+ if ds not in seen_dates:
296
+ unique_future_dates.append(next_date)
297
+ seen_dates.add(ds)
298
+
299
+ if self.stop_requested:
300
+ self.status = "stopped"; return
301
+
302
+ unique_future_dates = unique_future_dates[: len(predictions)]
303
+
304
+ # ── Assemble result payload ──────────────────────────────────────
305
+ prediction_data = []
306
+ for i in range(min(len(unique_future_dates), len(predictions))):
307
+ predicted_price = float(predictions[i][0])
308
+ percent_change = (
309
+ (predicted_price - last_actual_close) / last_actual_close * 100
310
+ )
311
+ prediction_data.append({
312
+ "date": unique_future_dates[i].strftime("%Y-%m-%d"),
313
+ "price": round(predicted_price, 2),
314
+ "change": round(percent_change, 2),
315
+ })
316
+
317
+ self.result = {
318
+ "symbol": self.symbol,
319
+ "lastActualClose": {
320
+ "date": last_date.strftime("%Y-%m-%d"),
321
+ "price": round(last_actual_close, 2),
322
+ },
323
+ "predictions": prediction_data,
324
+ "sentiment": self.sentiment_result,
325
+ "tableDisplay": True,
326
+ }
327
+ self.progress = 100
328
+ self.status = "completed"
329
+ print(f"Prediction complete for {self.symbol}")
330
+
331
+ except Exception as e:
332
+ self.status = "failed"
333
+ self.result = {"error": str(e)}
334
+ print(f"Prediction task error: {e}")
335
+ traceback.print_exc()
336
+
337
+
338
+ # =============================================================================
339
+ # REST API ROUTES
340
+ # (all routes are identical to the original – no frontend changes needed)
341
+ # =============================================================================
342
+
343
+ @app.route("/api/predict", methods=["POST"])
344
+ def start_prediction():
345
+ try:
346
+ data = request.json
347
+ print(f"POST /api/predict body={data}")
348
+
349
+ if not data:
350
+ return jsonify({"error": "Invalid or missing request body"}), 400
351
+
352
+ user_id = data.get("userId")
353
+ symbol = data.get("symbol")
354
+ days_ahead = int(data.get("daysAhead", 5))
355
+
356
+ if not user_id or not symbol:
357
+ return jsonify({"error": "Missing required parameters (userId or symbol)"}), 400
358
+
359
+ if not isinstance(symbol, str) or len(symbol) > 10:
360
+ return jsonify({"error": f"Invalid symbol format: {symbol}"}), 400
361
+
362
+ if not tf_status:
363
+ return jsonify({
364
+ "error": f"Prediction service unavailable: {tf_message}",
365
+ "tf_status": tf_message,
366
+ }), 503
367
+
368
+ task = PredictionTask(user_id, symbol, days_ahead)
369
+ task_id = task.run()
370
+ prediction_tasks[task_id] = task
371
+
372
+ return jsonify({
373
+ "taskId": task_id,
374
+ "status": "pending",
375
+ "message": f"Prediction started for {symbol}",
376
+ })
377
+ except ValueError as e:
378
+ return jsonify({"error": str(e)}), 400
379
+ except Exception as e:
380
+ print(f"Critical error starting prediction: {e}")
381
+ traceback.print_exc()
382
+ return jsonify({"error": "Failed to start prediction", "details": str(e)}), 500
383
+
384
+
385
+ @app.route("/api/predict/status/<task_id>", methods=["GET"])
386
+ def prediction_status(task_id):
387
+ try:
388
+ task = prediction_tasks.get(task_id)
389
+ if not task:
390
+ return jsonify({"error": "Task not found"}), 404
391
+
392
+ try:
393
+ if task.status == "completed" and task.result:
394
+ if isinstance(task.result, dict):
395
+ if "predictions" in task.result and isinstance(
396
+ task.result["predictions"], list):
397
+ for pred in task.result["predictions"]:
398
+ if (not isinstance(pred, dict)
399
+ or "date" not in pred
400
+ or "price" not in pred):
401
+ task.status = "failed"
402
+ task.result = {"error": "Malformed prediction data"}
403
+ break
404
+ else:
405
+ task.status = "failed"
406
+ task.result = {"error": "Missing prediction data"}
407
+ else:
408
+ task.status = "failed"
409
+ task.result = {"error": "Invalid result format"}
410
+
411
+ return jsonify({
412
+ "taskId": task_id,
413
+ "status": task.status,
414
+ "progress": task.progress,
415
+ "result": task.result if task.status == "completed" else None,
416
+ })
417
+ except Exception as e:
418
+ print(f"Error generating status response: {e}")
419
+ return jsonify({
420
+ "taskId": task_id,
421
+ "status": "error",
422
+ "progress": task.progress,
423
+ "error": str(e),
424
+ })
425
+ except Exception as e:
426
+ print(f"Critical error in prediction status: {e}")
427
+ return jsonify({"taskId": task_id, "status": "error",
428
+ "error": "Server error"}), 500
429
+
430
+
431
+ @app.route("/api/predict/stop/<task_id>", methods=["POST"])
432
+ def stop_prediction(task_id):
433
+ task = prediction_tasks.get(task_id)
434
+ if not task:
435
+ return jsonify({"error": "Task not found"}), 404
436
+
437
+ task.stop_requested = True
438
+
439
+ if task.thread and task.thread.is_alive():
440
+ task.status = "stopping"
441
+ print(f"Stop requested for task {task_id} ({task.symbol})")
442
+ stop_wait_start = time.time()
443
+ while time.time() - stop_wait_start < 2:
444
+ if task.stop_acknowledged:
445
+ task.status = "stopped"
446
+ break
447
+ time.sleep(0.1)
448
+ else:
449
+ task.status = "stopped"
450
+
451
+ return jsonify({
452
+ "taskId": task_id,
453
+ "status": task.status,
454
+ "symbol": task.symbol,
455
+ "progress": task.progress,
456
+ "stopRequested": task.stop_requested,
457
+ "stopAcknowledged": task.stop_acknowledged,
458
+ })
459
+
460
+
461
+ @app.route("/api/predict/sentiment/<symbol>", methods=["GET"])
462
+ def get_sentiment(symbol):
463
+ try:
464
+ headlines = stock_model.fetch_finnhub_news(symbol)
465
+ sentiment_results, sentiment_totals = \
466
+ stock_model.analyze_sentiment(headlines)
467
+ sentiment_summary = stock_model.generate_sentiment_summary(
468
+ sentiment_totals, headlines, symbol)
469
+ return jsonify({
470
+ "symbol": symbol,
471
+ "sentiment": {
472
+ "totals": sentiment_totals,
473
+ "summary": sentiment_summary,
474
+ "period": 28,
475
+ },
476
+ })
477
+ except Exception as e:
478
+ return jsonify({"error": str(e)}), 500
479
+
480
+
481
+ @app.route("/api/diagnose", methods=["GET"])
482
+ def diagnose():
483
+ """Diagnostic endpoint – checks environment, APIs and model primitives."""
484
+ try:
485
+ env_info = {
486
+ "python_version": sys.version,
487
+ "tensorflow_version": tf.__version__,
488
+ "numpy_version": np.__version__,
489
+ "pandas_version": pd.__version__,
490
+ "xgboost_version": xgb.__version__,
491
+ }
492
+
493
+ api_status = {}
494
+ try:
495
+ import yfinance as yf
496
+ test_stock = yf.Ticker("AAPL")
497
+ test_df = test_stock.history(period="1d")
498
+ api_status["yfinance"] = {
499
+ "status": "OK" if not test_df.empty else "No data",
500
+ "has_data": not test_df.empty,
501
+ "error": None
502
+ }
503
+ except Exception as e:
504
+ api_status["yfinance"] = {"error": str(e)}
505
+
506
+ try:
507
+ headers = {"X-Finnhub-Token": stock_model.FINNHUB_API_KEY}
508
+ resp = requests.get(
509
+ "https://finnhub.io/api/v1/news?category=general",
510
+ headers=headers)
511
+ api_status["finnhub"] = {
512
+ "status_code": resp.status_code,
513
+ "has_data": len(resp.json()) > 0,
514
+ "error": None if resp.status_code == 200 else str(resp.text),
515
+ }
516
+ except Exception as e:
517
+ api_status["finnhub"] = {"error": str(e)}
518
+
519
+ model_status = {}
520
+ try:
521
+ test_data = np.random.rand(100, 6) # 6 features (OPT-2)
522
+ test_scaler = MinMaxScaler()
523
+ test_data[:, 0] = test_scaler.fit_transform(
524
+ np.arange(100).reshape(-1, 1)).flatten()
525
+ X, y = stock_model.create_sequences(test_data, time_step=30)
526
+ model_status["sequence_creation"] = {
527
+ "success": len(X) > 0,
528
+ "X_shape": str(X.shape),
529
+ "y_shape": str(y.shape),
530
+ }
531
+ except Exception as e:
532
+ model_status["error"] = str(e)
533
+
534
+ return jsonify({
535
+ "timestamp": datetime.now().isoformat(),
536
+ "status": "OK",
537
+ "environment": env_info,
538
+ "api_status": api_status,
539
+ "model_status": model_status,
540
+ })
541
+ except Exception as e:
542
+ return jsonify({"status": "ERROR", "error": str(e)}), 500
543
+
544
+
545
+ if __name__ == "__main__":
546
+ # [OPT-B] Read port from environment variable so the same binary works on:
547
+ # • Render (sets $PORT automatically, usually 10000)
548
+ # • Hugging Face (expects 7860)
549
+ # • Local dev (falls back to 5001)
550
+ port = int(os.environ.get("PORT", 5001))
551
+ print(f"Starting StockBuddy API on port {port}")
552
+ app.run(host="0.0.0.0", port=port)
 
 
 
 
 
 
 
 
 
 
 
 
model.py CHANGED
@@ -44,65 +44,29 @@ FINNHUB_API_KEY = "cu5gvghr01qqj8u6iau0cu5gvghr01qqj8u6iaug"
44
  # STOCK PRICE PREDICTION FUNCTIONS
45
  # =============================================================================
46
 
47
- def fetch_stock_data(symbol, outputsize="full"):
48
- url = "https://www.alphavantage.co/query"
49
- params = {
50
- "function": "TIME_SERIES_DAILY",
51
- "symbol": symbol,
52
- "apikey": ALPHAVANTAGE_API_KEY,
53
- "outputsize": outputsize,
54
- "datatype": "json",
55
- }
56
- response = requests.get(url, params=params)
57
- data = response.json()
58
-
59
- if "Time Series (Daily)" not in data:
60
- if "Error Message" in data:
61
- raise ValueError(
62
- f"Symbol '{symbol}' not found. Please verify the stock symbol.")
63
- elif "Note" in data:
64
- raise ValueError("API request limit reached. Please try again in a minute.")
65
- else:
66
- raise ValueError(
67
- f"Unable to fetch data for symbol '{symbol}'. Please verify the symbol.")
68
-
69
- ts = data["Time Series (Daily)"]
70
-
71
- df = pd.DataFrame.from_dict(ts, orient="index")
72
- df.index = pd.to_datetime(df.index)
73
- df.sort_index(inplace=True)
74
-
75
- for col in ["1. open", "2. high", "3. low", "4. close", "5. volume"]:
76
- if col in df.columns:
77
- df[col] = df[col].astype(float)
78
 
79
- df = df.rename(columns={
80
- "1. open": "Open",
81
- "2. high": "High",
82
- "3. low": "Low",
83
- "4. close": "Close",
84
- "5. volume": "Volume",
85
- })
86
-
87
- latest_date = df.index[-1]
88
- today = pd.Timestamp.now().normalize()
89
- market_closed_days = 0
90
- if today.dayofweek >= 5:
91
- market_closed_days = today.dayofweek - 4
92
- elif today.hour < 16:
93
- market_closed_days = 1
94
- expected_latest = today - pd.Timedelta(days=market_closed_days)
95
- date_diff = (expected_latest - latest_date).days
96
- if date_diff > 5:
97
- print(f"WARNING: Latest data for {symbol} is from "
98
- f"{latest_date.strftime('%Y-%m-%d')} ({date_diff} days old).")
99
-
100
- print(f"\nLatest closing price for {symbol} "
101
- f"(as of {latest_date.strftime('%Y-%m-%d')}): ${df['Close'].iloc[-1]:.2f}")
102
-
103
- # Add lightweight technical indicators
104
- df = add_technical_indicators(df)
105
- return df
106
 
107
 
108
  # [OPT-2] Reduced feature set: 11 → 6 (Close, RSI, SMA5, MACD, Upper_Band, ROC)
 
44
  # STOCK PRICE PREDICTION FUNCTIONS
45
  # =============================================================================
46
 
47
+ import yfinance as yf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ def fetch_stock_data(symbol, outputsize="full"):
50
+ try:
51
+ stock = yf.Ticker(symbol)
52
+ # Use 'max' or '5y' for full, '1y' for compact
53
+ period = "5y" if outputsize == "full" else "1y"
54
+ df = stock.history(period=period)
55
+
56
+ if df.empty:
57
+ raise ValueError(f"Unable to fetch data for symbol '{symbol}'. Please verify the symbol.")
58
+
59
+ df.index = pd.to_datetime(df.index).tz_localize(None)
60
+
61
+ latest_date = df.index[-1]
62
+ print(f"\nLatest closing price for {symbol} "
63
+ f"(as of {latest_date.strftime('%Y-%m-%d')}): ${df['Close'].iloc[-1]:.2f}")
64
+
65
+ # Add lightweight technical indicators
66
+ df = add_technical_indicators(df)
67
+ return df
68
+ except Exception as e:
69
+ raise ValueError(f"Error fetching data for {symbol}: {e}")
 
 
 
 
 
 
70
 
71
 
72
  # [OPT-2] Reduced feature set: 11 → 6 (Close, RSI, SMA5, MACD, Upper_Band, ROC)
requirements.txt CHANGED
@@ -11,3 +11,4 @@ transformers==4.33.2
11
  plotly==5.17.0
12
  gunicorn==21.2.0
13
  tf-keras==2.15.0
 
 
11
  plotly==5.17.0
12
  gunicorn==21.2.0
13
  tf-keras==2.15.0
14
+ yfinance==0.2.40