danielthatu12 commited on
Commit
dee7f76
Β·
1 Parent(s): e3e113f
Files changed (5) hide show
  1. Dockerfile +13 -0
  2. Procfile +5 -0
  3. app.py +565 -0
  4. model.py +676 -0
  5. requirements.txt +43 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ ENV PORT=7860
11
+ EXPOSE 7860
12
+
13
+ CMD ["gunicorn", "app:app", "--bind", "0.0.0.0:7860", "--timeout", "300", "--workers", "1"]
Procfile ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Render reads this file to know how to start the web service.
2
+ # --workers 1 : single worker keeps RAM under free-tier limit (~512 MB)
3
+ # --timeout 300 : prediction tasks can run up to 5 minutes before Gunicorn kills them
4
+ # --bind : Render injects $PORT automatically
5
+ web: gunicorn app:app --bind 0.0.0.0:$PORT --timeout 300 --workers 1
app.py ADDED
@@ -0,0 +1,565 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app.py – StockBuddy Flask API
3
+ =================================
4
+ LIGHTWEIGHT CHANGES vs original:
5
+ [OPT-A] Removed the startup TF validation model (was creating & running a test
6
+ LSTM on every cold start – wastes ~10 s and ~100 MB RAM on free tier).
7
+ Replaced with a simple tf.constant() smoke-test.
8
+ [OPT-B] PORT is now read from the PORT environment variable so the server
9
+ works on Render (sets $PORT automatically) and Hugging Face Spaces
10
+ (expects port 7860) without code changes.
11
+ [OPT-C] time_step updated to 30 throughout (was 45) to match the lighter model.
12
+ All REST API routes are unchanged from the original.
13
+ """
14
+
15
+ from flask import Flask, request, jsonify
16
+ from flask_cors import CORS
17
+ import numpy as np
18
+ import pandas as pd
19
+ import os
20
+ import threading
21
+ import time
22
+ from datetime import datetime, timedelta
23
+ import json
24
+ import model as stock_model
25
+ import sys
26
+ import requests
27
+ import traceback
28
+ from sklearn.preprocessing import MinMaxScaler
29
+ from tensorflow.keras.models import Sequential
30
+ from tensorflow.keras.layers import LSTM, Dense, Dropout
31
+ from tensorflow.keras.callbacks import Callback
32
+ import tensorflow as tf
33
+ import xgboost as xgb
34
+
35
+ app = Flask(__name__)
36
+ CORS(app)
37
+
38
+
39
+ @app.route("/", methods=["GET"])
40
+ def home():
41
+ return jsonify({"status": "running", "message": "StockBuddy API is live!"})
42
+
43
+
44
+ # [OPT-A] Lightweight TF smoke-test instead of building & running a full LSTM
45
+ def validate_tensorflow():
46
+ """Quick TensorFlow sanity-check (no model created, no GPU required)."""
47
+ try:
48
+ print("TensorFlow version:", tf.__version__)
49
+ # A tiny constant operation is enough to confirm TF is importable and
50
+ # the runtime works. Full model creation is deferred to the first
51
+ # prediction request so the cold-start is fast on free-tier hosts.
52
+ _ = tf.constant([1.0, 2.0, 3.0])
53
+ gpus = tf.config.list_physical_devices("GPU")
54
+ if gpus:
55
+ msg = f"GPU available ({len(gpus)} device(s)) – running in GPU mode."
56
+ else:
57
+ msg = "No GPU detected – running in CPU mode (expected on free tier)."
58
+ print(f"TensorFlow OK: {msg}")
59
+ return True, msg
60
+ except Exception as e:
61
+ print(f"TensorFlow validation failed: {e}")
62
+ return False, f"TensorFlow error: {e}"
63
+
64
+
65
+ # Run smoke-test at startup
66
+ tf_status, tf_message = validate_tensorflow()
67
+ if not tf_status:
68
+ print(f"WARNING: {tf_message}")
69
+ else:
70
+ print(f"TensorFlow validation: {tf_message}")
71
+
72
+ # Dictionary to store running prediction tasks
73
+ prediction_tasks = {}
74
+
75
+
76
+ class PredictionTask:
77
+ def __init__(self, user_id, symbol, days_ahead):
78
+ self.user_id = user_id
79
+ self.symbol = symbol
80
+ self.days_ahead = days_ahead
81
+ self.progress = 0
82
+ self.status = "pending"
83
+ self.result = None
84
+ self.sentiment_result = None
85
+ self.thread = None
86
+ self.stop_requested = False
87
+ self.stop_acknowledged = False
88
+ # Unique task ID: millisecond timestamp + random hex suffix
89
+ timestamp = int(time.time() * 1000)
90
+ random_suffix = os.urandom(4).hex()
91
+ self.task_id = f"{user_id}_{symbol}_{timestamp}_{random_suffix}"
92
+
93
+ def run(self):
94
+ self.thread = threading.Thread(target=self._run_prediction)
95
+ self.thread.daemon = True
96
+ self.thread.start()
97
+ return self.task_id
98
+
99
+ def is_stop_requested(self):
100
+ """Callback for model training loops to poll stop flag."""
101
+ if self.stop_requested and not self.stop_acknowledged:
102
+ self.stop_acknowledged = True
103
+ self.status = "stopped"
104
+ return True
105
+ return self.stop_requested
106
+
107
+ def _run_prediction(self):
108
+ try:
109
+ print(f"Starting prediction for {self.symbol} (task: {self.task_id})")
110
+ self.status = "running"
111
+ self.progress = 10
112
+
113
+ # ── Fetch historical data ────────────────────────────────────────
114
+ print(f"Fetching historical data for {self.symbol}...")
115
+ try:
116
+ data = stock_model.fetch_stock_data(self.symbol, outputsize="compact")
117
+ print(f"Fetched {len(data)} rows for {self.symbol}")
118
+ except Exception as e:
119
+ print(f"Data fetch error: {e}")
120
+ self.status = "failed"
121
+ self.result = {"error": f"Could not fetch data for {self.symbol}: {e}"}
122
+ return
123
+
124
+ if data is None:
125
+ self.status = "failed"
126
+ self.result = {"error": f"Could not fetch data for {self.symbol}"}
127
+ return
128
+
129
+ if self.stop_requested:
130
+ self.status = "stopped"; return
131
+
132
+ if len(data) < 60:
133
+ self.status = "failed"
134
+ self.result = {"error": f"Insufficient data for {self.symbol} "
135
+ f"(got {len(data)}, need β‰₯60)"}
136
+ return
137
+
138
+ # ── Extract last actual close ────────────────────────────────────
139
+ try:
140
+ if isinstance(data, pd.DataFrame) and "Close" in data.columns:
141
+ last_actual_close = float(data["Close"].iloc[-1])
142
+ last_date = data.index[-1]
143
+ else:
144
+ last_actual_close = float(data.iloc[-1, 0])
145
+ last_date = data.index[-1]
146
+ print(f"Latest close for {self.symbol}: "
147
+ f"${last_actual_close:.2f} on {last_date.strftime('%Y-%m-%d')}")
148
+ except Exception as e:
149
+ self.status = "failed"
150
+ self.result = {"error": f"Error reading price data: {e}"}
151
+ return
152
+
153
+ self.progress = 20
154
+ if self.stop_requested:
155
+ self.status = "stopped"; return
156
+
157
+ # ── Sentiment analysis ───────────────────────────────────────────
158
+ try:
159
+ print(f"Fetching news for {self.symbol}...")
160
+ headlines = stock_model.fetch_finnhub_news(self.symbol)
161
+ print(f"Got {len(headlines)} headlines")
162
+ self.progress = 30
163
+ if self.stop_requested:
164
+ self.status = "stopped"; return
165
+
166
+ sentiment_results, sentiment_totals = \
167
+ stock_model.analyze_sentiment(headlines)
168
+ sentiment_summary = stock_model.generate_sentiment_summary(
169
+ sentiment_totals, headlines, self.symbol)
170
+ self.sentiment_result = {
171
+ "totals": sentiment_totals,
172
+ "summary": sentiment_summary,
173
+ }
174
+ except Exception as e:
175
+ print(f"Sentiment error (non-fatal): {e}")
176
+ self.sentiment_result = {
177
+ "totals": {"positive": 0, "negative": 0, "neutral": 0},
178
+ "summary": f"Unable to analyse sentiment: {e}",
179
+ }
180
+
181
+ self.progress = 40
182
+ if self.stop_requested:
183
+ self.status = "stopped"; return
184
+
185
+ # ── Preprocess data ──────────────────────────────────────────────
186
+ try:
187
+ print("Preprocessing data...")
188
+ scaled_data, scaler = stock_model.preprocess_data(data)
189
+
190
+ # [OPT-C] time_step 45 β†’ 30
191
+ time_step = 30
192
+ X, y = stock_model.create_sequences(scaled_data, time_step)
193
+ print(f"Sequences: X={X.shape}, y={y.shape}")
194
+ except Exception as e:
195
+ self.status = "failed"
196
+ self.result = {"error": f"Preprocessing failed: {e}"}
197
+ return
198
+
199
+ if len(X) == 0:
200
+ self.status = "failed"
201
+ self.result = {"error": f"Could not create training sequences for {self.symbol}"}
202
+ return
203
+
204
+ self.progress = 50
205
+ if self.stop_requested:
206
+ self.status = "stopped"; return
207
+
208
+ # ── Train LSTM ───────────────────────────────────────────────────
209
+ try:
210
+ train_size = int(len(X) * 0.8)
211
+ if train_size == 0:
212
+ self.status = "failed"
213
+ self.result = {"error": "Not enough data to split for training"}
214
+ return
215
+
216
+ X_train, y_train = X[:train_size], y[:train_size]
217
+ self.progress = 55
218
+ print(f"Training LSTM with {len(X_train)} samples...")
219
+ lstm_model = stock_model.train_lstm(
220
+ X_train, y_train, time_step, self.is_stop_requested)
221
+ except Exception as e:
222
+ self.status = "failed"
223
+ self.result = {"error": f"LSTM training failed: {e}"}
224
+ return
225
+
226
+ if self.stop_requested:
227
+ self.status = "stopped"; return
228
+
229
+ self.progress = 75
230
+ if self.stop_requested:
231
+ self.status = "stopped"; return
232
+
233
+ # ── Train XGBoost on residuals ───────────────────────────────────
234
+ try:
235
+ print("Calculating residuals for XGBoost...")
236
+ lstm_preds = lstm_model.predict(X_train, verbose=0).flatten()
237
+ residuals = y_train - lstm_preds
238
+ xgb_model = stock_model.train_xgboost(
239
+ X_train.reshape(X_train.shape[0], -1),
240
+ residuals,
241
+ self.is_stop_requested,
242
+ )
243
+ if self.stop_requested or xgb_model is None:
244
+ self.status = "stopped"; return
245
+ except Exception as e:
246
+ print(f"XGBoost training error (non-fatal): {e}")
247
+ xgb_model = None
248
+
249
+ self.progress = 90
250
+ if self.stop_requested:
251
+ self.status = "stopped"; return
252
+
253
+ # ── Generate predictions ─────────────────────────────────────────
254
+ try:
255
+ print(f"Generating {self.days_ahead}-day predictions...")
256
+ predictions = stock_model.predict_stock_price(
257
+ lstm_model, xgb_model, scaled_data, scaler,
258
+ time_step, self.days_ahead, self.is_stop_requested,
259
+ )
260
+ if self.stop_requested or predictions is None:
261
+ self.status = "stopped"; return
262
+ except Exception as e:
263
+ self.status = "failed"
264
+ self.result = {"error": f"Prediction generation failed: {e}"}
265
+ return
266
+
267
+ self.progress = 95
268
+ if self.stop_requested:
269
+ self.status = "stopped"; return
270
+
271
+ # ── Build future trading-day dates ───────────────────────────────
272
+ future_dates = []
273
+ for i in range(1, self.days_ahead + 1):
274
+ if self.stop_requested:
275
+ break
276
+ next_date = last_date + timedelta(days=i)
277
+ while next_date.weekday() > 4:
278
+ next_date += timedelta(days=1)
279
+ future_dates.append(next_date)
280
+
281
+ if self.stop_requested:
282
+ self.status = "stopped"; return
283
+
284
+ # Deduplicate dates
285
+ unique_future_dates = []
286
+ seen_dates = set()
287
+ for date in future_dates:
288
+ ds = date.strftime("%Y-%m-%d")
289
+ if ds not in seen_dates:
290
+ seen_dates.add(ds)
291
+ unique_future_dates.append(date)
292
+
293
+ # Pad if needed
294
+ while (len(unique_future_dates) < len(predictions)
295
+ and not self.stop_requested):
296
+ next_date = unique_future_dates[-1] + timedelta(days=1)
297
+ while next_date.weekday() > 4:
298
+ next_date += timedelta(days=1)
299
+ ds = next_date.strftime("%Y-%m-%d")
300
+ if ds not in seen_dates:
301
+ unique_future_dates.append(next_date)
302
+ seen_dates.add(ds)
303
+
304
+ if self.stop_requested:
305
+ self.status = "stopped"; return
306
+
307
+ unique_future_dates = unique_future_dates[: len(predictions)]
308
+
309
+ # ── Assemble result payload ──────────────────────────────────────
310
+ prediction_data = []
311
+ for i in range(min(len(unique_future_dates), len(predictions))):
312
+ predicted_price = float(predictions[i][0])
313
+ percent_change = (
314
+ (predicted_price - last_actual_close) / last_actual_close * 100
315
+ )
316
+ prediction_data.append({
317
+ "date": unique_future_dates[i].strftime("%Y-%m-%d"),
318
+ "price": round(predicted_price, 2),
319
+ "change": round(percent_change, 2),
320
+ })
321
+
322
+ self.result = {
323
+ "symbol": self.symbol,
324
+ "lastActualClose": {
325
+ "date": last_date.strftime("%Y-%m-%d"),
326
+ "price": round(last_actual_close, 2),
327
+ },
328
+ "predictions": prediction_data,
329
+ "sentiment": self.sentiment_result,
330
+ "tableDisplay": True,
331
+ }
332
+ self.progress = 100
333
+ self.status = "completed"
334
+ print(f"Prediction complete for {self.symbol}")
335
+
336
+ except Exception as e:
337
+ self.status = "failed"
338
+ self.result = {"error": str(e)}
339
+ print(f"Prediction task error: {e}")
340
+ traceback.print_exc()
341
+
342
+
343
+ # =============================================================================
344
+ # REST API ROUTES
345
+ # (all routes are identical to the original – no frontend changes needed)
346
+ # =============================================================================
347
+
348
+ @app.route("/api/predict", methods=["POST"])
349
+ def start_prediction():
350
+ try:
351
+ data = request.json
352
+ print(f"POST /api/predict body={data}")
353
+
354
+ if not data:
355
+ return jsonify({"error": "Invalid or missing request body"}), 400
356
+
357
+ user_id = data.get("userId")
358
+ symbol = data.get("symbol")
359
+ days_ahead = int(data.get("daysAhead", 5))
360
+
361
+ if not user_id or not symbol:
362
+ return jsonify({"error": "Missing required parameters (userId or symbol)"}), 400
363
+
364
+ if not isinstance(symbol, str) or len(symbol) > 10:
365
+ return jsonify({"error": f"Invalid symbol format: {symbol}"}), 400
366
+
367
+ if not tf_status:
368
+ return jsonify({
369
+ "error": f"Prediction service unavailable: {tf_message}",
370
+ "tf_status": tf_message,
371
+ }), 503
372
+
373
+ task = PredictionTask(user_id, symbol, days_ahead)
374
+ task_id = task.run()
375
+ prediction_tasks[task_id] = task
376
+
377
+ return jsonify({
378
+ "taskId": task_id,
379
+ "status": "pending",
380
+ "message": f"Prediction started for {symbol}",
381
+ })
382
+ except ValueError as e:
383
+ return jsonify({"error": str(e)}), 400
384
+ except Exception as e:
385
+ print(f"Critical error starting prediction: {e}")
386
+ traceback.print_exc()
387
+ return jsonify({"error": "Failed to start prediction", "details": str(e)}), 500
388
+
389
+
390
+ @app.route("/api/predict/status/<task_id>", methods=["GET"])
391
+ def prediction_status(task_id):
392
+ try:
393
+ task = prediction_tasks.get(task_id)
394
+ if not task:
395
+ return jsonify({"error": "Task not found"}), 404
396
+
397
+ try:
398
+ if task.status == "completed" and task.result:
399
+ if isinstance(task.result, dict):
400
+ if "predictions" in task.result and isinstance(
401
+ task.result["predictions"], list):
402
+ for pred in task.result["predictions"]:
403
+ if (not isinstance(pred, dict)
404
+ or "date" not in pred
405
+ or "price" not in pred):
406
+ task.status = "failed"
407
+ task.result = {"error": "Malformed prediction data"}
408
+ break
409
+ else:
410
+ task.status = "failed"
411
+ task.result = {"error": "Missing prediction data"}
412
+ else:
413
+ task.status = "failed"
414
+ task.result = {"error": "Invalid result format"}
415
+
416
+ return jsonify({
417
+ "taskId": task_id,
418
+ "status": task.status,
419
+ "progress": task.progress,
420
+ "result": task.result if task.status == "completed" else None,
421
+ })
422
+ except Exception as e:
423
+ print(f"Error generating status response: {e}")
424
+ return jsonify({
425
+ "taskId": task_id,
426
+ "status": "error",
427
+ "progress": task.progress,
428
+ "error": str(e),
429
+ })
430
+ except Exception as e:
431
+ print(f"Critical error in prediction status: {e}")
432
+ return jsonify({"taskId": task_id, "status": "error",
433
+ "error": "Server error"}), 500
434
+
435
+
436
+ @app.route("/api/predict/stop/<task_id>", methods=["POST"])
437
+ def stop_prediction(task_id):
438
+ task = prediction_tasks.get(task_id)
439
+ if not task:
440
+ return jsonify({"error": "Task not found"}), 404
441
+
442
+ task.stop_requested = True
443
+
444
+ if task.thread and task.thread.is_alive():
445
+ task.status = "stopping"
446
+ print(f"Stop requested for task {task_id} ({task.symbol})")
447
+ stop_wait_start = time.time()
448
+ while time.time() - stop_wait_start < 2:
449
+ if task.stop_acknowledged:
450
+ task.status = "stopped"
451
+ break
452
+ time.sleep(0.1)
453
+ else:
454
+ task.status = "stopped"
455
+
456
+ return jsonify({
457
+ "taskId": task_id,
458
+ "status": task.status,
459
+ "symbol": task.symbol,
460
+ "progress": task.progress,
461
+ "stopRequested": task.stop_requested,
462
+ "stopAcknowledged": task.stop_acknowledged,
463
+ })
464
+
465
+
466
+ @app.route("/api/predict/sentiment/<symbol>", methods=["GET"])
467
+ def get_sentiment(symbol):
468
+ try:
469
+ headlines = stock_model.fetch_finnhub_news(symbol)
470
+ sentiment_results, sentiment_totals = \
471
+ stock_model.analyze_sentiment(headlines)
472
+ sentiment_summary = stock_model.generate_sentiment_summary(
473
+ sentiment_totals, headlines, symbol)
474
+ return jsonify({
475
+ "symbol": symbol,
476
+ "sentiment": {
477
+ "totals": sentiment_totals,
478
+ "summary": sentiment_summary,
479
+ "period": 28,
480
+ },
481
+ })
482
+ except Exception as e:
483
+ return jsonify({"error": str(e)}), 500
484
+
485
+
486
+ @app.route("/api/diagnose", methods=["GET"])
487
+ def diagnose():
488
+ """Diagnostic endpoint – checks environment, APIs and model primitives."""
489
+ try:
490
+ env_info = {
491
+ "python_version": sys.version,
492
+ "tensorflow_version": tf.__version__,
493
+ "numpy_version": np.__version__,
494
+ "pandas_version": pd.__version__,
495
+ "xgboost_version": xgb.__version__,
496
+ }
497
+
498
+ api_status = {}
499
+ try:
500
+ url = "https://www.alphavantage.co/query"
501
+ params = {
502
+ "function": "TIME_SERIES_DAILY",
503
+ "symbol": "AAPL",
504
+ "apikey": stock_model.ALPHAVANTAGE_API_KEY,
505
+ "outputsize": "compact",
506
+ "datatype": "json",
507
+ }
508
+ resp = requests.get(url, params=params)
509
+ rj = resp.json()
510
+ api_status["alpha_vantage"] = {
511
+ "status_code": resp.status_code,
512
+ "has_data": "Time Series (Daily)" in rj,
513
+ "error": rj.get("Error Message") or rj.get("Note")
514
+ if "Time Series (Daily)" not in rj else None,
515
+ }
516
+ except Exception as e:
517
+ api_status["alpha_vantage"] = {"error": str(e)}
518
+
519
+ try:
520
+ headers = {"X-Finnhub-Token": stock_model.FINNHUB_API_KEY}
521
+ resp = requests.get(
522
+ "https://finnhub.io/api/v1/news?category=general",
523
+ headers=headers)
524
+ api_status["finnhub"] = {
525
+ "status_code": resp.status_code,
526
+ "has_data": len(resp.json()) > 0,
527
+ "error": None if resp.status_code == 200 else str(resp.text),
528
+ }
529
+ except Exception as e:
530
+ api_status["finnhub"] = {"error": str(e)}
531
+
532
+ model_status = {}
533
+ try:
534
+ test_data = np.random.rand(100, 6) # 6 features (OPT-2)
535
+ test_scaler = MinMaxScaler()
536
+ test_data[:, 0] = test_scaler.fit_transform(
537
+ np.arange(100).reshape(-1, 1)).flatten()
538
+ X, y = stock_model.create_sequences(test_data, time_step=30)
539
+ model_status["sequence_creation"] = {
540
+ "success": len(X) > 0,
541
+ "X_shape": str(X.shape),
542
+ "y_shape": str(y.shape),
543
+ }
544
+ except Exception as e:
545
+ model_status["error"] = str(e)
546
+
547
+ return jsonify({
548
+ "timestamp": datetime.now().isoformat(),
549
+ "status": "OK",
550
+ "environment": env_info,
551
+ "api_status": api_status,
552
+ "model_status": model_status,
553
+ })
554
+ except Exception as e:
555
+ return jsonify({"status": "ERROR", "error": str(e)}), 500
556
+
557
+
558
+ if __name__ == "__main__":
559
+ # [OPT-B] Read port from environment variable so the same binary works on:
560
+ # β€’ Render (sets $PORT automatically, usually 10000)
561
+ # β€’ Hugging Face (expects 7860)
562
+ # β€’ Local dev (falls back to 5001)
563
+ port = int(os.environ.get("PORT", 5001))
564
+ print(f"Starting StockBuddy API on port {port}")
565
+ app.run(host="0.0.0.0", port=port)
model.py ADDED
@@ -0,0 +1,676 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ model.py – StockBuddy ML / NLP core
3
+ ========================================
4
+ LIGHTWEIGHT CHANGES vs original:
5
+ [OPT-1] Removed `transformers` pipeline (was downloading ~1.2 GB BART model at
6
+ runtime). Replaced with a fast NLTK-based extractive summariser.
7
+ [OPT-2] Reduced technical indicators: 11 β†’ 6 features (kept only the ones with
8
+ highest predictive signal; fewer features = smaller tensors & faster fits).
9
+ [OPT-3] LSTM architecture: 4 layers (64/64/32/32 units) β†’ 2 layers (32/16 units).
10
+ Still accurate enough for short-horizon forecasts, ~8Γ— fewer parameters.
11
+ [OPT-4] time_step: 45 β†’ 30 (shorter look-back window β†’ smaller tensors).
12
+ [OPT-5] Epochs: 30 β†’ 15, batch_size: 64 β†’ 32 (free-tier CPU training time).
13
+ [OPT-6] XGBoost n_estimators: 300 β†’ 100, max_depth 6 β†’ 4.
14
+ [OPT-7] EarlyStopping patience reduced (5 instead of 10) so training exits fast
15
+ when the model has converged.
16
+ All public function signatures are identical to the original so app.py needs
17
+ only minimal changes.
18
+ """
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+ import requests
23
+ from sklearn.preprocessing import MinMaxScaler
24
+ from tensorflow.keras.models import Sequential
25
+ from tensorflow.keras.layers import LSTM, Dense, Dropout
26
+ import xgboost as xgb
27
+ import plotly.graph_objects as go
28
+ from datetime import datetime, timedelta
29
+ import nltk
30
+ from nltk.sentiment.vader import SentimentIntensityAnalyzer
31
+ # [OPT-1] No longer importing transformers – see generate_sentiment_summary below
32
+ import time
33
+ import os
34
+
35
+ # Download VADER lexicon once (tiny file, safe on free tier)
36
+ nltk.download("vader_lexicon", quiet=True)
37
+
38
+ # =============================================================================
39
+ # API Keys (Replace with your own keys)
40
+ # =============================================================================
41
+ ALPHAVANTAGE_API_KEY = os.environ.get("ALPHAVANTAGE_API_KEY")
42
+ FINNHUB_API_KEY = os.environ.get("FINNHUB_API_KEY")
43
+ # =============================================================================
44
+ # STOCK PRICE PREDICTION FUNCTIONS
45
+ # =============================================================================
46
+
47
+ def fetch_stock_data(symbol, outputsize="full"):
48
+ url = "https://www.alphavantage.co/query"
49
+ params = {
50
+ "function": "TIME_SERIES_DAILY",
51
+ "symbol": symbol,
52
+ "apikey": ALPHAVANTAGE_API_KEY,
53
+ "outputsize": outputsize,
54
+ "datatype": "json",
55
+ }
56
+ response = requests.get(url, params=params)
57
+ data = response.json()
58
+
59
+ if "Time Series (Daily)" not in data:
60
+ if "Error Message" in data:
61
+ raise ValueError(
62
+ f"Symbol '{symbol}' not found. Please verify the stock symbol.")
63
+ elif "Note" in data:
64
+ raise ValueError("API request limit reached. Please try again in a minute.")
65
+ else:
66
+ raise ValueError(
67
+ f"Unable to fetch data for symbol '{symbol}'. Please verify the symbol.")
68
+
69
+ ts = data["Time Series (Daily)"]
70
+
71
+ df = pd.DataFrame.from_dict(ts, orient="index")
72
+ df.index = pd.to_datetime(df.index)
73
+ df.sort_index(inplace=True)
74
+
75
+ for col in ["1. open", "2. high", "3. low", "4. close", "5. volume"]:
76
+ if col in df.columns:
77
+ df[col] = df[col].astype(float)
78
+
79
+ df = df.rename(columns={
80
+ "1. open": "Open",
81
+ "2. high": "High",
82
+ "3. low": "Low",
83
+ "4. close": "Close",
84
+ "5. volume": "Volume",
85
+ })
86
+
87
+ latest_date = df.index[-1]
88
+ today = pd.Timestamp.now().normalize()
89
+ market_closed_days = 0
90
+ if today.dayofweek >= 5:
91
+ market_closed_days = today.dayofweek - 4
92
+ elif today.hour < 16:
93
+ market_closed_days = 1
94
+ expected_latest = today - pd.Timedelta(days=market_closed_days)
95
+ date_diff = (expected_latest - latest_date).days
96
+ if date_diff > 5:
97
+ print(f"WARNING: Latest data for {symbol} is from "
98
+ f"{latest_date.strftime('%Y-%m-%d')} ({date_diff} days old).")
99
+
100
+ print(f"\nLatest closing price for {symbol} "
101
+ f"(as of {latest_date.strftime('%Y-%m-%d')}): ${df['Close'].iloc[-1]:.2f}")
102
+
103
+ # Add lightweight technical indicators
104
+ df = add_technical_indicators(df)
105
+ return df
106
+
107
+
108
+ # [OPT-2] Reduced feature set: 11 β†’ 6 (Close, RSI, SMA5, MACD, Upper_Band, ROC)
109
+ def add_technical_indicators(df):
110
+ """Add a compact set of technical indicators (6 features vs 11 original)."""
111
+ try:
112
+ required_cols = ["Close", "Open", "High", "Low"]
113
+ for col in required_cols:
114
+ if col not in df.columns:
115
+ print(f"Warning: {col} missing – falling back to Close-only.")
116
+ return df[["Close"]]
117
+
118
+ # RSI (14-period)
119
+ delta = df["Close"].diff()
120
+ gain = delta.where(delta > 0, 0).rolling(14).mean()
121
+ loss = -delta.where(delta < 0, 0).rolling(14).mean()
122
+ rs = gain / loss
123
+ df["RSI"] = 100 - (100 / (1 + rs))
124
+
125
+ # Short moving average
126
+ df["SMA5"] = df["Close"].rolling(5).mean()
127
+
128
+ # MACD line only (signal line dropped to save a feature)
129
+ ema12 = df["Close"].ewm(span=12).mean()
130
+ ema26 = df["Close"].ewm(span=26).mean()
131
+ df["MACD"] = ema12 - ema26
132
+
133
+ # Upper Bollinger Band as a proxy for volatility
134
+ ma20 = df["Close"].rolling(20).mean()
135
+ df["Upper_Band"] = ma20 + (df["Close"].rolling(20).std() * 2)
136
+
137
+ # Rate-of-change (5-period)
138
+ df["ROC"] = df["Close"].pct_change(periods=5) * 100
139
+
140
+ df = df.dropna()
141
+
142
+ # [OPT-2] Only 6 features returned
143
+ features = ["Close", "RSI", "SMA5", "MACD", "Upper_Band", "ROC"]
144
+ return df[features]
145
+
146
+ except Exception as e:
147
+ print(f"Error adding technical indicators: {e}")
148
+ if "Close" in df.columns:
149
+ return df[["Close"]]
150
+ return df
151
+
152
+
153
+ def preprocess_data(data):
154
+ """Scale each feature independently; return scaled array + Close scaler."""
155
+ features = data.columns
156
+ scalers = {}
157
+ scaled_data = np.zeros((len(data), len(features)))
158
+
159
+ for i, feature in enumerate(features):
160
+ scalers[feature] = MinMaxScaler(feature_range=(0, 1))
161
+ scaled_data[:, i] = (
162
+ scalers[feature]
163
+ .fit_transform(data[feature].values.reshape(-1, 1))
164
+ .flatten()
165
+ )
166
+
167
+ master_scaler = scalers["Close"]
168
+ return scaled_data, master_scaler
169
+
170
+
171
+ def create_sequences(data, time_step=30):
172
+ """Create (X, y) sequences for LSTM training."""
173
+ X, y = [], []
174
+ for i in range(len(data) - time_step - 1):
175
+ X.append(data[i : i + time_step, :]) # all features
176
+ y.append(data[i + time_step, 0]) # Close price only
177
+ return np.array(X), np.array(y)
178
+
179
+
180
+ # [OPT-3] Slimmed LSTM: 2 layers (32 / 16 units) instead of 4 layers (64/64/32/32)
181
+ # [OPT-4] time_step default lowered to 30
182
+ # [OPT-5] epochs 30 β†’ 15, batch_size 64 β†’ 32, EarlyStopping patience 10 β†’ 5
183
+ def train_lstm(X_train, y_train, time_step=30, stop_requested_callback=None):
184
+ """
185
+ Train a lightweight LSTM model.
186
+
187
+ Architecture change (OPT-3):
188
+ Original : LSTM(64) β†’ LSTM(64) β†’ Dropout β†’ LSTM(32) β†’ LSTM(32) β†’ Dropout β†’ Dense(16) β†’ Dense(16) β†’ Dense(1)
189
+ Updated : LSTM(32) β†’ Dropout(0.2) β†’ LSTM(16) β†’ Dropout(0.2) β†’ Dense(1)
190
+ Parameter count drops from ~110 k to ~14 k for a 6-feature, 30-step input.
191
+ """
192
+ from tensorflow.keras.optimizers import Adam
193
+ from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, Callback
194
+
195
+ n_features = X_train.shape[2]
196
+ X_train = X_train.reshape(X_train.shape[0], time_step, n_features)
197
+
198
+ # [OPT-3] Lightweight architecture
199
+ model = Sequential([
200
+ LSTM(32, return_sequences=True,
201
+ input_shape=(time_step, n_features)),
202
+ Dropout(0.2),
203
+ LSTM(16, return_sequences=False),
204
+ Dropout(0.2),
205
+ Dense(1),
206
+ ])
207
+
208
+ class StopCallback(Callback):
209
+ def on_epoch_end(self, epoch, logs=None):
210
+ if stop_requested_callback and stop_requested_callback():
211
+ self.model.stop_training = True
212
+ print("Training stopped early by user request.")
213
+
214
+ optimizer = Adam(learning_rate=0.001)
215
+ model.compile(optimizer=optimizer, loss="mean_squared_error")
216
+
217
+ # [OPT-7] Patience 10 β†’ 5 for faster early exit on free-tier CPU
218
+ reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.3,
219
+ patience=3, min_lr=0.0001, verbose=0)
220
+ early_stop = EarlyStopping(monitor="val_loss", patience=5,
221
+ restore_best_weights=True, verbose=1)
222
+ callbacks = [reduce_lr, early_stop]
223
+ if stop_requested_callback:
224
+ callbacks.append(StopCallback())
225
+
226
+ print(f"Training lightweight LSTM: {X_train.shape[0]} samples, "
227
+ f"{n_features} features, time_step={time_step}")
228
+
229
+ # [OPT-5] epochs 30 β†’ 15, batch_size 64 β†’ 32
230
+ model.fit(
231
+ X_train, y_train,
232
+ epochs=15,
233
+ batch_size=32,
234
+ validation_split=0.2,
235
+ callbacks=callbacks,
236
+ verbose=1,
237
+ )
238
+ return model
239
+
240
+
241
+ # [OPT-6] XGBoost: n_estimators 300 β†’ 100, max_depth 6 β†’ 4
242
+ def train_xgboost(X_train, residuals, stop_requested_callback=None):
243
+ """Train a leaner XGBoost model on LSTM residuals."""
244
+ if stop_requested_callback and stop_requested_callback():
245
+ print("XGBoost training cancelled due to stop request.")
246
+ return None
247
+
248
+ # [OPT-6] Reduced complexity for free-tier memory / speed
249
+ params = {
250
+ "objective": "reg:squarederror",
251
+ "n_estimators": 100, # was 300
252
+ "learning_rate": 0.1,
253
+ "max_depth": 4, # was 6
254
+ "subsample": 0.8,
255
+ "colsample_bytree": 0.8,
256
+ "min_child_weight": 3,
257
+ "gamma": 0.1,
258
+ "reg_alpha": 0.1,
259
+ "reg_lambda": 1.0,
260
+ "tree_method": "hist",
261
+ }
262
+
263
+ if stop_requested_callback:
264
+ class StopCallbackHandler(xgb.callback.TrainingCallback):
265
+ def after_iteration(self, model, epoch, evals_log):
266
+ if stop_requested_callback():
267
+ print("XGBoost training stopped by user request.")
268
+ return True
269
+ return False
270
+
271
+ xgb_model = xgb.XGBRegressor(**params)
272
+ xgb_model.set_params(callbacks=[StopCallbackHandler()])
273
+ xgb_model.fit(X_train, residuals)
274
+ else:
275
+ xgb_model = xgb.XGBRegressor(**params)
276
+ xgb_model.fit(
277
+ X_train, residuals,
278
+ eval_metric=["rmse"],
279
+ early_stopping_rounds=10, # was 20 [OPT-6]
280
+ verbose=False,
281
+ eval_set=[(X_train, residuals)],
282
+ )
283
+
284
+ return xgb_model
285
+
286
+
287
+ def predict_stock_price(
288
+ lstm_model, xgb_model, data, scaler,
289
+ time_step=30, days_ahead=5, stop_requested_callback=None
290
+ ):
291
+ """Make predictions using both LSTM and XGBoost with price anchoring."""
292
+ if stop_requested_callback and stop_requested_callback():
293
+ return None
294
+
295
+ n_features = data.shape[1]
296
+ temp_input = data[-time_step:].tolist()
297
+
298
+ last_actual_close = scaler.inverse_transform(
299
+ np.array([[data[-1, 0]]]))[0][0]
300
+ print(f"Base price: ${last_actual_close:.2f}")
301
+
302
+ original_prices = scaler.inverse_transform(data[:, 0].reshape(-1, 1))
303
+ daily_returns = np.diff(original_prices, axis=0) / original_prices[:-1]
304
+ volatility = np.std(daily_returns)
305
+
306
+ # Calibrate model against actual last price
307
+ lstm_input = np.array(temp_input[-time_step:]).reshape(1, time_step, n_features)
308
+ lstm_pred_cal = lstm_model.predict(lstm_input, verbose=0)[0][0]
309
+ xgb_input_cal = np.array(temp_input[-time_step:]).reshape(1, -1)
310
+ try:
311
+ combined_cal = lstm_pred_cal + (xgb_model.predict(xgb_input_cal)[0]
312
+ if xgb_model is not None else 0)
313
+ except Exception:
314
+ combined_cal = lstm_pred_cal
315
+
316
+ model_current = scaler.inverse_transform(
317
+ np.array([[combined_cal]]))[0][0]
318
+ correction_factor = (last_actual_close / model_current
319
+ if model_current > 0 else 1.0)
320
+ print(f"Calibration: model=${model_current:.2f}, "
321
+ f"actual=${last_actual_close:.2f}, factor={correction_factor:.4f}")
322
+
323
+ predictions = []
324
+ prev_day_pred = combined_cal
325
+
326
+ for day in range(days_ahead):
327
+ if stop_requested_callback and stop_requested_callback():
328
+ print(f"Prediction stopped at day {day}/{days_ahead}")
329
+ break
330
+
331
+ lstm_input = np.array(temp_input[-time_step:]).reshape(1, time_step, n_features)
332
+ lstm_pred = lstm_model.predict(lstm_input, verbose=0)[0][0]
333
+ xgb_input = np.array(temp_input[-time_step:]).reshape(1, -1)
334
+
335
+ try:
336
+ combined_pred = (lstm_pred + xgb_model.predict(xgb_input)[0]
337
+ if xgb_model is not None else lstm_pred)
338
+ except Exception as e:
339
+ print(f"XGBoost predict error: {e}")
340
+ combined_pred = lstm_pred
341
+
342
+ prev_unscaled = scaler.inverse_transform(
343
+ np.array([[prev_day_pred]]))[0][0]
344
+ current_unscaled = scaler.inverse_transform(
345
+ np.array([[combined_pred]]))[0][0]
346
+ price_change = current_unscaled - prev_unscaled
347
+ trend_direction = 1 if price_change >= 0 else -1
348
+
349
+ day_volatility = volatility * (1 + day * 0.1)
350
+ adjusted_volatility = min(day_volatility, 0.015)
351
+ random_factor = np.random.normal(0, adjusted_volatility)
352
+
353
+ if trend_direction > 0:
354
+ flux_factor = (abs(random_factor) * trend_direction * 0.15
355
+ if np.random.random() < 0.7
356
+ else -abs(random_factor) * trend_direction * 0.3)
357
+ else:
358
+ flux_factor = (abs(random_factor) * trend_direction * 0.25
359
+ if np.random.random() < 0.8
360
+ else -abs(random_factor) * trend_direction * 0.1)
361
+
362
+ flux_amount = prev_unscaled * flux_factor
363
+ adjusted_unscaled = current_unscaled + flux_amount
364
+ adjusted_pred = scaler.transform(
365
+ np.array([[adjusted_unscaled]]))[0][0]
366
+
367
+ next_row = temp_input[-1].copy()
368
+ next_row[0] = adjusted_pred
369
+ prev_day_pred = adjusted_pred
370
+
371
+ predictions.append(adjusted_pred)
372
+ temp_input.append(next_row)
373
+
374
+ if not predictions:
375
+ return None
376
+
377
+ final_predictions = scaler.inverse_transform(
378
+ np.array(predictions).reshape(-1, 1))
379
+ corrected_predictions = final_predictions * correction_factor
380
+
381
+ print("\nPredictions (original β†’ corrected):")
382
+ for i in range(len(final_predictions)):
383
+ print(f" Day {i+1}: ${final_predictions[i][0]:.2f} "
384
+ f"β†’ ${corrected_predictions[i][0]:.2f}")
385
+
386
+ return corrected_predictions
387
+
388
+
389
+ def plot_prices(data, predictions, symbol, days_ahead):
390
+ """Plot actual + predicted prices (used in standalone main())."""
391
+ fig = go.Figure()
392
+ three_months_ago = data.index[-1] - pd.DateOffset(months=3)
393
+ actual_data = data.loc[three_months_ago:]
394
+ close_prices = (actual_data["Close"]
395
+ if isinstance(actual_data, pd.DataFrame) and "Close" in actual_data.columns
396
+ else actual_data.iloc[:, 0])
397
+
398
+ future_dates = []
399
+ last_date = data.index[-1]
400
+ for i in range(1, days_ahead + 1):
401
+ next_date = last_date + timedelta(days=i)
402
+ while next_date.weekday() > 4:
403
+ next_date += timedelta(days=1)
404
+ future_dates.append(next_date)
405
+ future_dates = list(dict.fromkeys(future_dates))
406
+ prediction_data = predictions[: len(future_dates)].flatten()
407
+
408
+ fig.add_trace(go.Scatter(
409
+ x=future_dates, y=prediction_data,
410
+ mode="lines+markers", name="Predicted Price",
411
+ line=dict(color="orange", width=3)))
412
+ fig.add_trace(go.Scatter(
413
+ x=close_prices.index, y=close_prices.values,
414
+ mode="lines", name="Actual Price",
415
+ line=dict(color="blue", width=2)))
416
+ fig.add_trace(go.Scatter(
417
+ x=[close_prices.index[-1]], y=[close_prices.values[-1]],
418
+ mode="markers", name="Latest Price",
419
+ marker=dict(color="green", size=10, symbol="circle")))
420
+
421
+ fig.update_layout(
422
+ title=f"Stock Price Prediction for {symbol}",
423
+ xaxis_title="Date", yaxis_title="Price (USD)",
424
+ template="plotly_white", hovermode="x unified")
425
+ fig.show()
426
+
427
+
428
+ # =============================================================================
429
+ # NEWS SENTIMENT ANALYSIS FUNCTIONS
430
+ # =============================================================================
431
+
432
+ def fetch_finnhub_news(company_symbol):
433
+ end_date = datetime.now()
434
+ start_date = end_date - timedelta(days=28)
435
+ url = (f"https://finnhub.io/api/v1/company-news"
436
+ f"?symbol={company_symbol}"
437
+ f"&from={start_date.strftime('%Y-%m-%d')}"
438
+ f"&to={end_date.strftime('%Y-%m-%d')}"
439
+ f"&token={FINNHUB_API_KEY}")
440
+ try:
441
+ response = requests.get(url)
442
+ if response.status_code == 200:
443
+ articles = response.json()
444
+ headlines = [a["headline"] for a in articles if "headline" in a]
445
+ return headlines
446
+ else:
447
+ print(f"Error fetching news: {response.status_code}")
448
+ return []
449
+ except Exception as e:
450
+ print(f"Error parsing news response: {e}")
451
+ return []
452
+
453
+
454
+ def analyze_sentiment(headlines):
455
+ try:
456
+ sid = SentimentIntensityAnalyzer()
457
+ sentiment_results = []
458
+ sentiment_totals = {"positive": 0, "negative": 0, "neutral": 0}
459
+
460
+ for headline in headlines:
461
+ if not headline or not isinstance(headline, str):
462
+ continue
463
+ sentiment = sid.polarity_scores(headline)
464
+ sentiment_results.append({"headline": headline, "sentiment": sentiment})
465
+ if sentiment["compound"] > 0.05:
466
+ sentiment_totals["positive"] += 1
467
+ elif sentiment["compound"] < -0.05:
468
+ sentiment_totals["negative"] += 1
469
+ else:
470
+ sentiment_totals["neutral"] += 1
471
+
472
+ return sentiment_results, sentiment_totals
473
+ except Exception as e:
474
+ print(f"Error in sentiment analysis: {e}")
475
+ return [], {"positive": 0, "negative": 0, "neutral": 0}
476
+
477
+
478
+ def plot_sentiment_pie(sentiment_totals, company_symbol):
479
+ fig = go.Figure(data=[go.Pie(
480
+ labels=["Positive", "Negative", "Neutral"],
481
+ values=[sentiment_totals["positive"],
482
+ sentiment_totals["negative"],
483
+ sentiment_totals["neutral"]],
484
+ marker=dict(colors=["#2ecc71", "#e74c3c", "#95a5a6"],
485
+ line=dict(color="white", width=0)),
486
+ textinfo="percent+label", textfont_size=20)])
487
+ fig.update_layout(
488
+ title=f"Sentiment Distribution for {company_symbol} (Last 28 Days)",
489
+ showlegend=True)
490
+ fig.show()
491
+
492
+
493
+ # =============================================================================
494
+ # AI SUMMARY FUNCTIONS [OPT-1] Transformers removed
495
+ # =============================================================================
496
+
497
+ def _extractive_summary(headlines, n=3):
498
+ """
499
+ Lightweight extractive summariser – replaces the BART transformer pipeline.
500
+ [OPT-1] Picks the top-n headlines by absolute VADER compound score so the
501
+ most opinionated sentences surface first. No heavy model download needed.
502
+ """
503
+ if not headlines:
504
+ return ""
505
+ try:
506
+ sid = SentimentIntensityAnalyzer()
507
+ scored = [(h, abs(sid.polarity_scores(h)["compound"]))
508
+ for h in headlines if h and isinstance(h, str)]
509
+ scored.sort(key=lambda x: x[1], reverse=True)
510
+ top = [h for h, _ in scored[:n]]
511
+ return " | ".join(top)
512
+ except Exception as e:
513
+ print(f"Extractive summary error: {e}")
514
+ return headlines[0] if headlines else ""
515
+
516
+
517
+ def generate_sentiment_summary(sentiment_totals, headlines, company_symbol):
518
+ """
519
+ Generate a human-readable sentiment summary.
520
+ [OPT-1] Uses simple NLTK-based extractive summarisation instead of a
521
+ Transformers pipeline (removes ~1.2 GB BART model download).
522
+ """
523
+ try:
524
+ total = max(1, sum(sentiment_totals.values()))
525
+ pos_pct = sentiment_totals["positive"] / total * 100
526
+ neg_pct = sentiment_totals["negative"] / total * 100
527
+
528
+ summary = (
529
+ f"Over the past 28 days, {len(headlines)} news articles about "
530
+ f"{company_symbol} were analysed. "
531
+ f"{sentiment_totals['positive']} positive ({pos_pct:.0f}%), "
532
+ f"{sentiment_totals['negative']} negative ({neg_pct:.0f}%), "
533
+ f"and {sentiment_totals['neutral']} neutral articles found."
534
+ )
535
+
536
+ if headlines:
537
+ key_headlines = _extractive_summary(headlines, n=2)
538
+ if key_headlines:
539
+ summary += f" Key headlines: {key_headlines}"
540
+
541
+ return summary
542
+ except Exception as e:
543
+ print(f"Error in generate_sentiment_summary: {e}")
544
+ return f"Unable to generate sentiment summary for {company_symbol}."
545
+
546
+
547
+ def generate_prediction_summary(pred_df, company_symbol):
548
+ first_price = pred_df["Predicted Price"].iloc[0]
549
+ last_price = pred_df["Predicted Price"].iloc[-1]
550
+ return (
551
+ f"The predicted stock prices for {company_symbol} range from "
552
+ f"${first_price:.2f} to ${last_price:.2f} over the forecast period."
553
+ )
554
+
555
+
556
+ def display_price_table(data, predictions, symbol, days_ahead):
557
+ """Print prediction results as a table (used in standalone main())."""
558
+ if isinstance(data, pd.DataFrame) and "Close" in data.columns:
559
+ last_price = data["Close"].iloc[-1]
560
+ last_date = data.index[-1]
561
+ else:
562
+ last_price = data.iloc[-1, 0]
563
+ last_date = data.index[-1]
564
+
565
+ future_dates = []
566
+ for i in range(1, days_ahead + 1):
567
+ next_date = last_date + timedelta(days=i)
568
+ while next_date.weekday() > 4:
569
+ next_date += timedelta(days=1)
570
+ future_dates.append(next_date)
571
+ future_dates = list(dict.fromkeys(future_dates))
572
+ prediction_data = predictions[: len(future_dates)].flatten()
573
+
574
+ last_price_row = pd.DataFrame({
575
+ "Date": [last_date.strftime("%Y-%m-%d")],
576
+ "Price": [f"${last_price:.2f}"],
577
+ "Change": ["0.00%"],
578
+ "Note": ["Actual last closing price"],
579
+ })
580
+ pred_rows = []
581
+ for i, (date, price) in enumerate(zip(future_dates, prediction_data)):
582
+ change_pct = ((price - last_price) / last_price) * 100
583
+ pred_rows.append({
584
+ "Date": date.strftime("%Y-%m-%d"),
585
+ "Price": f"${price:.2f}",
586
+ "Change": f"{change_pct:.2f}%",
587
+ "Note": f"Day {i+1} prediction",
588
+ })
589
+
590
+ combined_df = pd.concat([last_price_row, pd.DataFrame(pred_rows)],
591
+ ignore_index=True)
592
+ print(f"\n{symbol} Stock Price Prediction Table:")
593
+ print("=" * 80)
594
+ print(combined_df.to_string(index=False))
595
+ print("=" * 80)
596
+
597
+ return pd.DataFrame({
598
+ "Date": [d.strftime("%Y-%m-%d") for d in future_dates],
599
+ "Predicted Price": prediction_data,
600
+ })
601
+
602
+
603
+ # =============================================================================
604
+ # STANDALONE MAIN
605
+ # =============================================================================
606
+
607
+ def main():
608
+ symbol = input("Enter the stock symbol (e.g., AAPL): ").upper()
609
+ try:
610
+ days_ahead = int(input("Number of future days to predict (e.g., 5): "))
611
+ except ValueError:
612
+ print("Invalid input. Please enter an integer.")
613
+ return
614
+
615
+ print(f"\nFetching historical data for {symbol}...")
616
+ data = fetch_stock_data(symbol, outputsize="full")
617
+ if data is None or len(data) < 50:
618
+ print(f"Not enough data points for {symbol}.")
619
+ return
620
+
621
+ print("Preprocessing data...")
622
+ scaled_data, scaler = preprocess_data(data)
623
+
624
+ # [OPT-4] time_step 60 β†’ 30 in standalone mode too
625
+ time_step = 30
626
+ X, y = create_sequences(scaled_data, time_step)
627
+ if len(X) == 0:
628
+ print("Could not create sequences.")
629
+ return
630
+
631
+ train_size = int(len(X) * 0.8)
632
+ X_train, y_train = X[:train_size], y[:train_size]
633
+
634
+ print("Training LSTM model...")
635
+ lstm_model = train_lstm(X_train, y_train, time_step)
636
+
637
+ lstm_train_preds = lstm_model.predict(X_train, verbose=0).flatten()
638
+ residuals = y_train - lstm_train_preds
639
+
640
+ print("Training XGBoost model...")
641
+ xgb_model = train_xgboost(X_train.reshape(X_train.shape[0], -1), residuals)
642
+
643
+ print(f"Predicting {days_ahead} days ahead...")
644
+ predictions = predict_stock_price(
645
+ lstm_model, xgb_model, scaled_data, scaler, time_step, days_ahead)
646
+
647
+ display_price_table(data, predictions, symbol, days_ahead)
648
+
649
+ future_dates = []
650
+ last_date = data.index[-1]
651
+ for i in range(1, days_ahead + 1):
652
+ next_date = last_date + timedelta(days=i)
653
+ while next_date.weekday() > 4:
654
+ next_date += timedelta(days=1)
655
+ future_dates.append(next_date)
656
+ future_dates = list(dict.fromkeys(future_dates))
657
+
658
+ pred_df = pd.DataFrame({
659
+ "Date": [d.strftime("%Y-%m-%d") for d in future_dates[: len(predictions)]],
660
+ "Predicted Price": predictions.flatten()[: len(future_dates)],
661
+ })
662
+ print("\nPrediction summary:")
663
+ print(generate_prediction_summary(pred_df, symbol))
664
+
665
+ print("\nFetching news for sentiment analysis...")
666
+ headlines = fetch_finnhub_news(symbol)
667
+ if headlines:
668
+ sentiment_results, sentiment_totals = analyze_sentiment(headlines)
669
+ plot_sentiment_pie(sentiment_totals, symbol)
670
+ print(generate_sentiment_summary(sentiment_totals, headlines, symbol))
671
+ else:
672
+ print("No headlines found.")
673
+
674
+
675
+ if __name__ == "__main__":
676
+ main()
requirements.txt ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ─────────────────────────────────────────────────────────────────────────────
2
+ # StockBuddy – lightweight requirements for free-tier deployment
3
+ # (Render / Hugging Face Spaces)
4
+ #
5
+ # KEY CHANGES vs original:
6
+ # [REQ-1] tensorflow β†’ tensorflow-cpu
7
+ # CPU-only build: no CUDA libs, ~200 MB smaller install, same Python API.
8
+ # Free-tier hosts have no GPU anyway – the GPU variant just wastes space.
9
+ #
10
+ # [REQ-2] transformers==4.33.2 REMOVED
11
+ # The summarization pipeline downloaded a ~1.2 GB BART model at
12
+ # first run, instantly blowing past free-tier disk/RAM limits.
13
+ # Replaced by a lightweight NLTK-based extractive summariser in model.py.
14
+ #
15
+ # [REQ-3] tf-keras==2.15.0 REMOVED
16
+ # Caused dependency conflicts on Python 3.10+. tensorflow-cpu already
17
+ # bundles the correct Keras version.
18
+ #
19
+ # [REQ-4] torch NOT added – was never in the original requirements but was an
20
+ # implicit dependency of some transformers builds. No longer needed.
21
+ # ─────────────────────────────────────────────────────────────────────────────
22
+
23
+ flask==2.3.3
24
+ flask-cors==5.0.1
25
+ numpy==1.24.3
26
+ pandas==2.0.3
27
+ requests==2.31.0
28
+ scikit-learn==1.3.0
29
+
30
+ # [REQ-1] CPU-only TensorFlow – same API, ~200 MB smaller than the GPU build
31
+ tensorflow-cpu==2.13.0
32
+
33
+ xgboost==1.7.6
34
+ nltk==3.8.1
35
+
36
+ # [REQ-2] transformers removed – no heavy model download at startup
37
+ # transformers==4.33.2 ← DELETED
38
+
39
+ plotly==5.17.0
40
+ gunicorn==21.2.0
41
+
42
+ # [REQ-3] tf-keras removed – causes conflicts and is not needed with tf-cpu
43
+ # tf-keras==2.15.0 ← DELETED