Spaces:

danielthatu12
/

Stockbuddy_Prediction_Model

Running

App Files Files Community

danielthatu12 commited on Apr 18

Commit

a6ba8a1

verified ·

1 Parent(s): 148584d

Upload 3 files

Browse files

Files changed (3) hide show

app.py +552 -564
model.py +22 -58
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,564 +1,552 @@
-"""
-app.py  –  StockBuddy Flask API
-=================================
-LIGHTWEIGHT CHANGES vs original:
-  [OPT-A]  Removed the startup TF validation model (was creating & running a test
-           LSTM on every cold start – wastes ~10 s and ~100 MB RAM on free tier).
-           Replaced with a simple tf.constant() smoke-test.
-  [OPT-B]  PORT is now read from the PORT environment variable so the server
-           works on Render (sets $PORT automatically) and Hugging Face Spaces
-           (expects port 7860) without code changes.
-  [OPT-C]  time_step updated to 30 throughout (was 45) to match the lighter model.
-  All REST API routes are unchanged from the original.
-"""
-from flask import Flask, request, jsonify
-from flask_cors import CORS
-import numpy as np
-import pandas as pd
-import os
-import threading
-import time
-from datetime import datetime, timedelta
-import json
-import model as stock_model
-import sys
-import requests
-import traceback
-from sklearn.preprocessing import MinMaxScaler
-from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import LSTM, Dense, Dropout
-from tensorflow.keras.callbacks import Callback
-import tensorflow as tf
-import xgboost as xgb
-app = Flask(__name__)
-CORS(app)
-@app.route("/", methods=["GET"])
-def home():
-    return jsonify({"status": "running", "message": "StockBuddy API is live!"})
-# [OPT-A] Lightweight TF smoke-test instead of building & running a full LSTM
-def validate_tensorflow():
-    """Quick TensorFlow sanity-check (no model created, no GPU required)."""
-    try:
-        print("TensorFlow version:", tf.__version__)
-        # A tiny constant operation is enough to confirm TF is importable and
-        # the runtime works.  Full model creation is deferred to the first
-        # prediction request so the cold-start is fast on free-tier hosts.
-        _ = tf.constant([1.0, 2.0, 3.0])
-        gpus = tf.config.list_physical_devices("GPU")
-        if gpus:
-            msg = f"GPU available ({len(gpus)} device(s)) – running in GPU mode."
-        else:
-            msg = "No GPU detected – running in CPU mode (expected on free tier)."
-        print(f"TensorFlow OK: {msg}")
-        return True, msg
-    except Exception as e:
-        print(f"TensorFlow validation failed: {e}")
-        return False, f"TensorFlow error: {e}"
-# Run smoke-test at startup
-tf_status, tf_message = validate_tensorflow()
-if not tf_status:
-    print(f"WARNING: {tf_message}")
-else:
-    print(f"TensorFlow validation: {tf_message}")
-# Dictionary to store running prediction tasks
-prediction_tasks = {}
-class PredictionTask:
-    def __init__(self, user_id, symbol, days_ahead):
-        self.user_id       = user_id
-        self.symbol        = symbol
-        self.days_ahead    = days_ahead
-        self.progress      = 0
-        self.status        = "pending"
-        self.result        = None
-        self.sentiment_result = None
-        self.thread        = None
-        self.stop_requested    = False
-        self.stop_acknowledged = False
-        # Unique task ID: millisecond timestamp + random hex suffix
-        timestamp         = int(time.time() * 1000)
-        random_suffix     = os.urandom(4).hex()
-        self.task_id      = f"{user_id}_{symbol}_{timestamp}_{random_suffix}"
-    def run(self):
-        self.thread = threading.Thread(target=self._run_prediction)
-        self.thread.daemon = True
-        self.thread.start()
-        return self.task_id
-    def is_stop_requested(self):
-        """Callback for model training loops to poll stop flag."""
-        if self.stop_requested and not self.stop_acknowledged:
-            self.stop_acknowledged = True
-            self.status = "stopped"
-            return True
-        return self.stop_requested
-    def _run_prediction(self):
-        try:
-            print(f"Starting prediction for {self.symbol} (task: {self.task_id})")
-            self.status   = "running"
-            self.progress = 10
-            # ── Fetch historical data ────────────────────────────────────────
-            print(f"Fetching historical data for {self.symbol}...")
-            try:
-                data = stock_model.fetch_stock_data(self.symbol, outputsize="compact")
-                print(f"Fetched {len(data)} rows for {self.symbol}")
-            except Exception as e:
-                print(f"Data fetch error: {e}")
-                self.status = "failed"
-                self.result = {"error": f"Could not fetch data for {self.symbol}: {e}"}
-                return
-            if data is None:
-                self.status = "failed"
-                self.result = {"error": f"Could not fetch data for {self.symbol}"}
-                return
-            if self.stop_requested:
-                self.status = "stopped"; return
-            if len(data) < 60:
-                self.status = "failed"
-                self.result = {"error": f"Insufficient data for {self.symbol} "
-                                         f"(got {len(data)}, need ≥60)"}
-                return
-            # ── Extract last actual close ────────────────────────────────────
-            try:
-                if isinstance(data, pd.DataFrame) and "Close" in data.columns:
-                    last_actual_close = float(data["Close"].iloc[-1])
-                    last_date         = data.index[-1]
-                else:
-                    last_actual_close = float(data.iloc[-1, 0])
-                    last_date         = data.index[-1]
-                print(f"Latest close for {self.symbol}: "
-                      f"${last_actual_close:.2f} on {last_date.strftime('%Y-%m-%d')}")
-            except Exception as e:
-                self.status = "failed"
-                self.result = {"error": f"Error reading price data: {e}"}
-                return
-            self.progress = 20
-            if self.stop_requested:
-                self.status = "stopped"; return
-            # ── Sentiment analysis ───────────────────────────────────────────
-            try:
-                print(f"Fetching news for {self.symbol}...")
-                headlines = stock_model.fetch_finnhub_news(self.symbol)
-                print(f"Got {len(headlines)} headlines")
-                self.progress = 30
-                if self.stop_requested:
-                    self.status = "stopped"; return
-                sentiment_results, sentiment_totals = \
-                    stock_model.analyze_sentiment(headlines)
-                sentiment_summary = stock_model.generate_sentiment_summary(
-                    sentiment_totals, headlines, self.symbol)
-                self.sentiment_result = {
-                    "totals":  sentiment_totals,
-                    "summary": sentiment_summary,
-                }
-            except Exception as e:
-                print(f"Sentiment error (non-fatal): {e}")
-                self.sentiment_result = {
-                    "totals":  {"positive": 0, "negative": 0, "neutral": 0},
-                    "summary": f"Unable to analyse sentiment: {e}",
-                }
-            self.progress = 40
-            if self.stop_requested:
-                self.status = "stopped"; return
-            # ── Preprocess data ──────────────────────────────────────────────
-            try:
-                print("Preprocessing data...")
-                scaled_data, scaler = stock_model.preprocess_data(data)
-                # [OPT-C] time_step 45 → 30
-                time_step = 30
-                X, y      = stock_model.create_sequences(scaled_data, time_step)
-                print(f"Sequences: X={X.shape}, y={y.shape}")
-            except Exception as e:
-                self.status = "failed"
-                self.result = {"error": f"Preprocessing failed: {e}"}
-                return
-            if len(X) == 0:
-                self.status = "failed"
-                self.result = {"error": f"Could not create training sequences for {self.symbol}"}
-                return
-            self.progress = 50
-            if self.stop_requested:
-                self.status = "stopped"; return
-            # ── Train LSTM ───────────────────────────────────────────────────
-            try:
-                train_size       = int(len(X) * 0.8)
-                if train_size == 0:
-                    self.status = "failed"
-                    self.result = {"error": "Not enough data to split for training"}
-                    return
-                X_train, y_train = X[:train_size], y[:train_size]
-                self.progress    = 55
-                print(f"Training LSTM with {len(X_train)} samples...")
-                lstm_model = stock_model.train_lstm(
-                    X_train, y_train, time_step, self.is_stop_requested)
-            except Exception as e:
-                self.status = "failed"
-                self.result = {"error": f"LSTM training failed: {e}"}
-                return
-            if self.stop_requested:
-                self.status = "stopped"; return
-            self.progress = 75
-            if self.stop_requested:
-                self.status = "stopped"; return
-            # ── Train XGBoost on residuals ───────────────────────────────────
-            try:
-                print("Calculating residuals for XGBoost...")
-                lstm_preds = lstm_model.predict(X_train, verbose=0).flatten()
-                residuals  = y_train - lstm_preds
-                xgb_model  = stock_model.train_xgboost(
-                    X_train.reshape(X_train.shape[0], -1),
-                    residuals,
-                    self.is_stop_requested,
-                )
-                if self.stop_requested or xgb_model is None:
-                    self.status = "stopped"; return
-            except Exception as e:
-                print(f"XGBoost training error (non-fatal): {e}")
-                xgb_model = None
-            self.progress = 90
-            if self.stop_requested:
-                self.status = "stopped"; return
-            # ── Generate predictions ─────────────────────────────────────────
-            try:
-                print(f"Generating {self.days_ahead}-day predictions...")
-                predictions = stock_model.predict_stock_price(
-                    lstm_model, xgb_model, scaled_data, scaler,
-                    time_step, self.days_ahead, self.is_stop_requested,
-                )
-                if self.stop_requested or predictions is None:
-                    self.status = "stopped"; return
-            except Exception as e:
-                self.status = "failed"
-                self.result = {"error": f"Prediction generation failed: {e}"}
-                return
-            self.progress = 95
-            if self.stop_requested:
-                self.status = "stopped"; return
-            # ── Build future trading-day dates ───────────────────────────────
-            future_dates = []
-            for i in range(1, self.days_ahead + 1):
-                if self.stop_requested:
-                    break
-                next_date = last_date + timedelta(days=i)
-                while next_date.weekday() > 4:
-                    next_date += timedelta(days=1)
-                future_dates.append(next_date)
-            if self.stop_requested:
-                self.status = "stopped"; return
-            # Deduplicate dates
-            unique_future_dates = []
-            seen_dates          = set()
-            for date in future_dates:
-                ds = date.strftime("%Y-%m-%d")
-                if ds not in seen_dates:
-                    seen_dates.add(ds)
-                    unique_future_dates.append(date)
-            # Pad if needed
-            while (len(unique_future_dates) < len(predictions)
-                   and not self.stop_requested):
-                next_date = unique_future_dates[-1] + timedelta(days=1)
-                while next_date.weekday() > 4:
-                    next_date += timedelta(days=1)
-                ds = next_date.strftime("%Y-%m-%d")
-                if ds not in seen_dates:
-                    unique_future_dates.append(next_date)
-                    seen_dates.add(ds)
-            if self.stop_requested:
-                self.status = "stopped"; return
-            unique_future_dates = unique_future_dates[: len(predictions)]
-            # ── Assemble result payload ──────────────────────────────────────
-            prediction_data = []
-            for i in range(min(len(unique_future_dates), len(predictions))):
-                predicted_price = float(predictions[i][0])
-                percent_change  = (
-                    (predicted_price - last_actual_close) / last_actual_close * 100
-                )
-                prediction_data.append({
-                    "date":   unique_future_dates[i].strftime("%Y-%m-%d"),
-                    "price":  round(predicted_price, 2),
-                    "change": round(percent_change, 2),
-                })
-            self.result = {
-                "symbol": self.symbol,
-                "lastActualClose": {
-                    "date":  last_date.strftime("%Y-%m-%d"),
-                    "price": round(last_actual_close, 2),
-                },
-                "predictions": prediction_data,
-                "sentiment":   self.sentiment_result,
-                "tableDisplay": True,
-            }
-            self.progress = 100
-            self.status   = "completed"
-            print(f"Prediction complete for {self.symbol}")
-        except Exception as e:
-            self.status = "failed"
-            self.result = {"error": str(e)}
-            print(f"Prediction task error: {e}")
-            traceback.print_exc()
-# =============================================================================
-#                              REST API ROUTES
-#  (all routes are identical to the original – no frontend changes needed)
-# =============================================================================
-@app.route("/api/predict", methods=["POST"])
-def start_prediction():
-    try:
-        data = request.json
-        print(f"POST /api/predict  body={data}")
-        if not data:
-            return jsonify({"error": "Invalid or missing request body"}), 400
-        user_id    = data.get("userId")
-        symbol     = data.get("symbol")
-        days_ahead = int(data.get("daysAhead", 5))
-        if not user_id or not symbol:
-            return jsonify({"error": "Missing required parameters (userId or symbol)"}), 400
-        if not isinstance(symbol, str) or len(symbol) > 10:
-            return jsonify({"error": f"Invalid symbol format: {symbol}"}), 400
-        if not tf_status:
-            return jsonify({
-                "error": f"Prediction service unavailable: {tf_message}",
-                "tf_status": tf_message,
-            }), 503
-        task    = PredictionTask(user_id, symbol, days_ahead)
-        task_id = task.run()
-        prediction_tasks[task_id] = task
-        return jsonify({
-            "taskId":  task_id,
-            "status":  "pending",
-            "message": f"Prediction started for {symbol}",
-        })
-    except ValueError as e:
-        return jsonify({"error": str(e)}), 400
-    except Exception as e:
-        print(f"Critical error starting prediction: {e}")
-        traceback.print_exc()
-        return jsonify({"error": "Failed to start prediction", "details": str(e)}), 500
-@app.route("/api/predict/status/<task_id>", methods=["GET"])
-def prediction_status(task_id):
-    try:
-        task = prediction_tasks.get(task_id)
-        if not task:
-            return jsonify({"error": "Task not found"}), 404
-        try:
-            if task.status == "completed" and task.result:
-                if isinstance(task.result, dict):
-                    if "predictions" in task.result and isinstance(
-                            task.result["predictions"], list):
-                        for pred in task.result["predictions"]:
-                            if (not isinstance(pred, dict)
-                                    or "date" not in pred
-                                    or "price" not in pred):
-                                task.status = "failed"
-                                task.result = {"error": "Malformed prediction data"}
-                                break
-                    else:
-                        task.status = "failed"
-                        task.result = {"error": "Missing prediction data"}
-                else:
-                    task.status = "failed"
-                    task.result = {"error": "Invalid result format"}
-            return jsonify({
-                "taskId":   task_id,
-                "status":   task.status,
-                "progress": task.progress,
-                "result":   task.result if task.status == "completed" else None,
-            })
-        except Exception as e:
-            print(f"Error generating status response: {e}")
-            return jsonify({
-                "taskId":   task_id,
-                "status":   "error",
-                "progress": task.progress,
-                "error":    str(e),
-            })
-    except Exception as e:
-        print(f"Critical error in prediction status: {e}")
-        return jsonify({"taskId": task_id, "status": "error",
-                        "error": "Server error"}), 500
-@app.route("/api/predict/stop/<task_id>", methods=["POST"])
-def stop_prediction(task_id):
-    task = prediction_tasks.get(task_id)
-    if not task:
-        return jsonify({"error": "Task not found"}), 404
-    task.stop_requested = True
-    if task.thread and task.thread.is_alive():
-        task.status     = "stopping"
-        print(f"Stop requested for task {task_id} ({task.symbol})")
-        stop_wait_start = time.time()
-        while time.time() - stop_wait_start < 2:
-            if task.stop_acknowledged:
-                task.status = "stopped"
-                break
-            time.sleep(0.1)
-    else:
-        task.status = "stopped"
-    return jsonify({
-        "taskId":          task_id,
-        "status":          task.status,
-        "symbol":          task.symbol,
-        "progress":        task.progress,
-        "stopRequested":   task.stop_requested,
-        "stopAcknowledged": task.stop_acknowledged,
-    })
-@app.route("/api/predict/sentiment/<symbol>", methods=["GET"])
-def get_sentiment(symbol):
-    try:
-        headlines = stock_model.fetch_finnhub_news(symbol)
-        sentiment_results, sentiment_totals = \
-            stock_model.analyze_sentiment(headlines)
-        sentiment_summary = stock_model.generate_sentiment_summary(
-            sentiment_totals, headlines, symbol)
-        return jsonify({
-            "symbol": symbol,
-            "sentiment": {
-                "totals":  sentiment_totals,
-                "summary": sentiment_summary,
-                "period":  28,
-            },
-        })
-    except Exception as e:
-        return jsonify({"error": str(e)}), 500
-@app.route("/api/diagnose", methods=["GET"])
-def diagnose():
-    """Diagnostic endpoint – checks environment, APIs and model primitives."""
-    try:
-        env_info = {
-            "python_version":    sys.version,
-            "tensorflow_version": tf.__version__,
-            "numpy_version":     np.__version__,
-            "pandas_version":    pd.__version__,
-            "xgboost_version":   xgb.__version__,
-        }
-        api_status = {}
-        try:
-            url    = "https://www.alphavantage.co/query"
-            params = {
-                "function":   "TIME_SERIES_DAILY",
-                "symbol":     "AAPL",
-                "apikey":     stock_model.ALPHAVANTAGE_API_KEY,
-                "outputsize": "compact",
-                "datatype":   "json",
-            }
-            resp = requests.get(url, params=params)
-            rj   = resp.json()
-            api_status["alpha_vantage"] = {
-                "status_code": resp.status_code,
-                "has_data":    "Time Series (Daily)" in rj,
-                "error":       rj.get("Error Message") or rj.get("Note")
-                               if "Time Series (Daily)" not in rj else None,
-            }
-        except Exception as e:
-            api_status["alpha_vantage"] = {"error": str(e)}
-        try:
-            headers = {"X-Finnhub-Token": stock_model.FINNHUB_API_KEY}
-            resp    = requests.get(
-                "https://finnhub.io/api/v1/news?category=general",
-                headers=headers)
-            api_status["finnhub"] = {
-                "status_code": resp.status_code,
-                "has_data":    len(resp.json()) > 0,
-                "error":       None if resp.status_code == 200 else str(resp.text),
-            }
-        except Exception as e:
-            api_status["finnhub"] = {"error": str(e)}
-        model_status = {}
-        try:
-            test_data    = np.random.rand(100, 6)   # 6 features (OPT-2)
-            test_scaler  = MinMaxScaler()
-            test_data[:, 0] = test_scaler.fit_transform(
-                np.arange(100).reshape(-1, 1)).flatten()
-            X, y = stock_model.create_sequences(test_data, time_step=30)
-            model_status["sequence_creation"] = {
-                "success":  len(X) > 0,
-                "X_shape":  str(X.shape),
-                "y_shape":  str(y.shape),
-            }
-        except Exception as e:
-            model_status["error"] = str(e)
-        return jsonify({
-            "timestamp":    datetime.now().isoformat(),
-            "status":       "OK",
-            "environment":  env_info,
-            "api_status":   api_status,
-            "model_status": model_status,
-        })
-    except Exception as e:
-        return jsonify({"status": "ERROR", "error": str(e)}), 500
-if __name__ == "__main__":
-    # [OPT-B] Read port from environment variable so the same binary works on:
-    #   • Render          (sets $PORT automatically, usually 10000)
-    #   • Hugging Face    (expects 7860)
-    #   • Local dev       (falls back to 5001)
-    port = int(os.environ.get("PORT", 5001))
-    print(f"Starting StockBuddy API on port {port}")
-    app.run(host="0.0.0.0", port=port)

+"""
+app.py  –  StockBuddy Flask API
+=================================
+LIGHTWEIGHT CHANGES vs original:
+  [OPT-A]  Removed the startup TF validation model (was creating & running a test
+           LSTM on every cold start – wastes ~10 s and ~100 MB RAM on free tier).
+           Replaced with a simple tf.constant() smoke-test.
+  [OPT-B]  PORT is now read from the PORT environment variable so the server
+           works on Render (sets $PORT automatically) and Hugging Face Spaces
+           (expects port 7860) without code changes.
+  [OPT-C]  time_step updated to 30 throughout (was 45) to match the lighter model.
+  All REST API routes are unchanged from the original.
+"""
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import numpy as np
+import pandas as pd
+import os
+import threading
+import time
+from datetime import datetime, timedelta
+import json
+import model as stock_model
+import sys
+import requests
+import traceback
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import LSTM, Dense, Dropout
+from tensorflow.keras.callbacks import Callback
+import tensorflow as tf
+import xgboost as xgb
+app = Flask(__name__)
+CORS(app)
+# [OPT-A] Lightweight TF smoke-test instead of building & running a full LSTM
+def validate_tensorflow():
+    """Quick TensorFlow sanity-check (no model created, no GPU required)."""
+    try:
+        print("TensorFlow version:", tf.__version__)
+        # A tiny constant operation is enough to confirm TF is importable and
+        # the runtime works.  Full model creation is deferred to the first
+        # prediction request so the cold-start is fast on free-tier hosts.
+        _ = tf.constant([1.0, 2.0, 3.0])
+        gpus = tf.config.list_physical_devices("GPU")
+        if gpus:
+            msg = f"GPU available ({len(gpus)} device(s)) – running in GPU mode."
+        else:
+            msg = "No GPU detected – running in CPU mode (expected on free tier)."
+        print(f"TensorFlow OK: {msg}")
+        return True, msg
+    except Exception as e:
+        print(f"TensorFlow validation failed: {e}")
+        return False, f"TensorFlow error: {e}"
+# Run smoke-test at startup
+tf_status, tf_message = validate_tensorflow()
+if not tf_status:
+    print(f"WARNING: {tf_message}")
+else:
+    print(f"TensorFlow validation: {tf_message}")
+# Dictionary to store running prediction tasks
+prediction_tasks = {}
+class PredictionTask:
+    def __init__(self, user_id, symbol, days_ahead):
+        self.user_id       = user_id
+        self.symbol        = symbol
+        self.days_ahead    = days_ahead
+        self.progress      = 0
+        self.status        = "pending"
+        self.result        = None
+        self.sentiment_result = None
+        self.thread        = None
+        self.stop_requested    = False
+        self.stop_acknowledged = False
+        # Unique task ID: millisecond timestamp + random hex suffix
+        timestamp         = int(time.time() * 1000)
+        random_suffix     = os.urandom(4).hex()
+        self.task_id      = f"{user_id}_{symbol}_{timestamp}_{random_suffix}"
+    def run(self):
+        self.thread = threading.Thread(target=self._run_prediction)
+        self.thread.daemon = True
+        self.thread.start()
+        return self.task_id
+    def is_stop_requested(self):
+        """Callback for model training loops to poll stop flag."""
+        if self.stop_requested and not self.stop_acknowledged:
+            self.stop_acknowledged = True
+            self.status = "stopped"
+            return True
+        return self.stop_requested
+    def _run_prediction(self):
+        try:
+            print(f"Starting prediction for {self.symbol} (task: {self.task_id})")
+            self.status   = "running"
+            self.progress = 10
+            # ── Fetch historical data ────────────────────────────────────────
+            print(f"Fetching historical data for {self.symbol}...")
+            try:
+                data = stock_model.fetch_stock_data(self.symbol, outputsize="compact")
+                print(f"Fetched {len(data)} rows for {self.symbol}")
+            except Exception as e:
+                print(f"Data fetch error: {e}")
+                self.status = "failed"
+                self.result = {"error": f"Could not fetch data for {self.symbol}: {e}"}
+                return
+            if data is None:
+                self.status = "failed"
+                self.result = {"error": f"Could not fetch data for {self.symbol}"}
+                return
+            if self.stop_requested:
+                self.status = "stopped"; return
+            if len(data) < 60:
+                self.status = "failed"
+                self.result = {"error": f"Insufficient data for {self.symbol} "
+                                         f"(got {len(data)}, need ≥60)"}
+                return
+            # ── Extract last actual close ────────────────────────────────────
+            try:
+                if isinstance(data, pd.DataFrame) and "Close" in data.columns:
+                    last_actual_close = float(data["Close"].iloc[-1])
+                    last_date         = data.index[-1]
+                else:
+                    last_actual_close = float(data.iloc[-1, 0])
+                    last_date         = data.index[-1]
+                print(f"Latest close for {self.symbol}: "
+                      f"${last_actual_close:.2f} on {last_date.strftime('%Y-%m-%d')}")
+            except Exception as e:
+                self.status = "failed"
+                self.result = {"error": f"Error reading price data: {e}"}
+                return
+            self.progress = 20
+            if self.stop_requested:
+                self.status = "stopped"; return
+            # ── Sentiment analysis ───────────────────────────────────────────
+            try:
+                print(f"Fetching news for {self.symbol}...")
+                headlines = stock_model.fetch_finnhub_news(self.symbol)
+                print(f"Got {len(headlines)} headlines")
+                self.progress = 30
+                if self.stop_requested:
+                    self.status = "stopped"; return
+                sentiment_results, sentiment_totals = \
+                    stock_model.analyze_sentiment(headlines)
+                sentiment_summary = stock_model.generate_sentiment_summary(
+                    sentiment_totals, headlines, self.symbol)
+                self.sentiment_result = {
+                    "totals":  sentiment_totals,
+                    "summary": sentiment_summary,
+                }
+            except Exception as e:
+                print(f"Sentiment error (non-fatal): {e}")
+                self.sentiment_result = {
+                    "totals":  {"positive": 0, "negative": 0, "neutral": 0},
+                    "summary": f"Unable to analyse sentiment: {e}",
+                }
+            self.progress = 40
+            if self.stop_requested:
+                self.status = "stopped"; return
+            # ── Preprocess data ──────────────────────────────────────────────
+            try:
+                print("Preprocessing data...")
+                scaled_data, scaler = stock_model.preprocess_data(data)
+                # [OPT-C] time_step 45 → 30
+                time_step = 30
+                X, y      = stock_model.create_sequences(scaled_data, time_step)
+                print(f"Sequences: X={X.shape}, y={y.shape}")
+            except Exception as e:
+                self.status = "failed"
+                self.result = {"error": f"Preprocessing failed: {e}"}
+                return
+            if len(X) == 0:
+                self.status = "failed"
+                self.result = {"error": f"Could not create training sequences for {self.symbol}"}
+                return
+            self.progress = 50
+            if self.stop_requested:
+                self.status = "stopped"; return
+            # ── Train LSTM ───────────────────────────────────────────────────
+            try:
+                train_size       = int(len(X) * 0.8)
+                if train_size == 0:
+                    self.status = "failed"
+                    self.result = {"error": "Not enough data to split for training"}
+                    return
+                X_train, y_train = X[:train_size], y[:train_size]
+                self.progress    = 55
+                print(f"Training LSTM with {len(X_train)} samples...")
+                lstm_model = stock_model.train_lstm(
+                    X_train, y_train, time_step, self.is_stop_requested)
+            except Exception as e:
+                self.status = "failed"
+                self.result = {"error": f"LSTM training failed: {e}"}
+                return
+            if self.stop_requested:
+                self.status = "stopped"; return
+            self.progress = 75
+            if self.stop_requested:
+                self.status = "stopped"; return
+            # ── Train XGBoost on residuals ───────────────────────────────────
+            try:
+                print("Calculating residuals for XGBoost...")
+                lstm_preds = lstm_model.predict(X_train, verbose=0).flatten()
+                residuals  = y_train - lstm_preds
+                xgb_model  = stock_model.train_xgboost(
+                    X_train.reshape(X_train.shape[0], -1),
+                    residuals,
+                    self.is_stop_requested,
+                )
+                if self.stop_requested or xgb_model is None:
+                    self.status = "stopped"; return
+            except Exception as e:
+                print(f"XGBoost training error (non-fatal): {e}")
+                xgb_model = None
+            self.progress = 90
+            if self.stop_requested:
+                self.status = "stopped"; return
+            # ── Generate predictions ─────────────────────────────────────────
+            try:
+                print(f"Generating {self.days_ahead}-day predictions...")
+                predictions = stock_model.predict_stock_price(
+                    lstm_model, xgb_model, scaled_data, scaler,
+                    time_step, self.days_ahead, self.is_stop_requested,
+                )
+                if self.stop_requested or predictions is None:
+                    self.status = "stopped"; return
+            except Exception as e:
+                self.status = "failed"
+                self.result = {"error": f"Prediction generation failed: {e}"}
+                return
+            self.progress = 95
+            if self.stop_requested:
+                self.status = "stopped"; return
+            # ── Build future trading-day dates ───────────────────────────────
+            future_dates = []
+            for i in range(1, self.days_ahead + 1):
+                if self.stop_requested:
+                    break
+                next_date = last_date + timedelta(days=i)
+                while next_date.weekday() > 4:
+                    next_date += timedelta(days=1)
+                future_dates.append(next_date)
+            if self.stop_requested:
+                self.status = "stopped"; return
+            # Deduplicate dates
+            unique_future_dates = []
+            seen_dates          = set()
+            for date in future_dates:
+                ds = date.strftime("%Y-%m-%d")
+                if ds not in seen_dates:
+                    seen_dates.add(ds)
+                    unique_future_dates.append(date)
+            # Pad if needed
+            while (len(unique_future_dates) < len(predictions)
+                   and not self.stop_requested):
+                next_date = unique_future_dates[-1] + timedelta(days=1)
+                while next_date.weekday() > 4:
+                    next_date += timedelta(days=1)
+                ds = next_date.strftime("%Y-%m-%d")
+                if ds not in seen_dates:
+                    unique_future_dates.append(next_date)
+                    seen_dates.add(ds)
+            if self.stop_requested:
+                self.status = "stopped"; return
+            unique_future_dates = unique_future_dates[: len(predictions)]
+            # ── Assemble result payload ──────────────────────────────────────
+            prediction_data = []
+            for i in range(min(len(unique_future_dates), len(predictions))):
+                predicted_price = float(predictions[i][0])
+                percent_change  = (
+                    (predicted_price - last_actual_close) / last_actual_close * 100
+                )
+                prediction_data.append({
+                    "date":   unique_future_dates[i].strftime("%Y-%m-%d"),
+                    "price":  round(predicted_price, 2),
+                    "change": round(percent_change, 2),
+                })
+            self.result = {
+                "symbol": self.symbol,
+                "lastActualClose": {
+                    "date":  last_date.strftime("%Y-%m-%d"),
+                    "price": round(last_actual_close, 2),
+                },
+                "predictions": prediction_data,
+                "sentiment":   self.sentiment_result,
+                "tableDisplay": True,
+            }
+            self.progress = 100
+            self.status   = "completed"
+            print(f"Prediction complete for {self.symbol}")
+        except Exception as e:
+            self.status = "failed"
+            self.result = {"error": str(e)}
+            print(f"Prediction task error: {e}")
+            traceback.print_exc()
+# =============================================================================
+#                              REST API ROUTES
+#  (all routes are identical to the original – no frontend changes needed)
+# =============================================================================
+@app.route("/api/predict", methods=["POST"])
+def start_prediction():
+    try:
+        data = request.json
+        print(f"POST /api/predict  body={data}")
+        if not data:
+            return jsonify({"error": "Invalid or missing request body"}), 400
+        user_id    = data.get("userId")
+        symbol     = data.get("symbol")
+        days_ahead = int(data.get("daysAhead", 5))
+        if not user_id or not symbol:
+            return jsonify({"error": "Missing required parameters (userId or symbol)"}), 400
+        if not isinstance(symbol, str) or len(symbol) > 10:
+            return jsonify({"error": f"Invalid symbol format: {symbol}"}), 400
+        if not tf_status:
+            return jsonify({
+                "error": f"Prediction service unavailable: {tf_message}",
+                "tf_status": tf_message,
+            }), 503
+        task    = PredictionTask(user_id, symbol, days_ahead)
+        task_id = task.run()
+        prediction_tasks[task_id] = task
+        return jsonify({
+            "taskId":  task_id,
+            "status":  "pending",
+            "message": f"Prediction started for {symbol}",
+        })
+    except ValueError as e:
+        return jsonify({"error": str(e)}), 400
+    except Exception as e:
+        print(f"Critical error starting prediction: {e}")
+        traceback.print_exc()
+        return jsonify({"error": "Failed to start prediction", "details": str(e)}), 500
+@app.route("/api/predict/status/<task_id>", methods=["GET"])
+def prediction_status(task_id):
+    try:
+        task = prediction_tasks.get(task_id)
+        if not task:
+            return jsonify({"error": "Task not found"}), 404
+        try:
+            if task.status == "completed" and task.result:
+                if isinstance(task.result, dict):
+                    if "predictions" in task.result and isinstance(
+                            task.result["predictions"], list):
+                        for pred in task.result["predictions"]:
+                            if (not isinstance(pred, dict)
+                                    or "date" not in pred
+                                    or "price" not in pred):
+                                task.status = "failed"
+                                task.result = {"error": "Malformed prediction data"}
+                                break
+                    else:
+                        task.status = "failed"
+                        task.result = {"error": "Missing prediction data"}
+                else:
+                    task.status = "failed"
+                    task.result = {"error": "Invalid result format"}
+            return jsonify({
+                "taskId":   task_id,
+                "status":   task.status,
+                "progress": task.progress,
+                "result":   task.result if task.status == "completed" else None,
+            })
+        except Exception as e:
+            print(f"Error generating status response: {e}")
+            return jsonify({
+                "taskId":   task_id,
+                "status":   "error",
+                "progress": task.progress,
+                "error":    str(e),
+            })
+    except Exception as e:
+        print(f"Critical error in prediction status: {e}")
+        return jsonify({"taskId": task_id, "status": "error",
+                        "error": "Server error"}), 500
+@app.route("/api/predict/stop/<task_id>", methods=["POST"])
+def stop_prediction(task_id):
+    task = prediction_tasks.get(task_id)
+    if not task:
+        return jsonify({"error": "Task not found"}), 404
+    task.stop_requested = True
+    if task.thread and task.thread.is_alive():
+        task.status     = "stopping"
+        print(f"Stop requested for task {task_id} ({task.symbol})")
+        stop_wait_start = time.time()
+        while time.time() - stop_wait_start < 2:
+            if task.stop_acknowledged:
+                task.status = "stopped"
+                break
+            time.sleep(0.1)
+    else:
+        task.status = "stopped"
+    return jsonify({
+        "taskId":          task_id,
+        "status":          task.status,
+        "symbol":          task.symbol,
+        "progress":        task.progress,
+        "stopRequested":   task.stop_requested,
+        "stopAcknowledged": task.stop_acknowledged,
+    })
+@app.route("/api/predict/sentiment/<symbol>", methods=["GET"])
+def get_sentiment(symbol):
+    try:
+        headlines = stock_model.fetch_finnhub_news(symbol)
+        sentiment_results, sentiment_totals = \
+            stock_model.analyze_sentiment(headlines)
+        sentiment_summary = stock_model.generate_sentiment_summary(
+            sentiment_totals, headlines, symbol)
+        return jsonify({
+            "symbol": symbol,
+            "sentiment": {
+                "totals":  sentiment_totals,
+                "summary": sentiment_summary,
+                "period":  28,
+            },
+        })
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route("/api/diagnose", methods=["GET"])
+def diagnose():
+    """Diagnostic endpoint – checks environment, APIs and model primitives."""
+    try:
+        env_info = {
+            "python_version":    sys.version,
+            "tensorflow_version": tf.__version__,
+            "numpy_version":     np.__version__,
+            "pandas_version":    pd.__version__,
+            "xgboost_version":   xgb.__version__,
+        }
+        api_status = {}
+        try:
+            import yfinance as yf
+            test_stock = yf.Ticker("AAPL")
+            test_df = test_stock.history(period="1d")
+            api_status["yfinance"] = {
+                "status": "OK" if not test_df.empty else "No data",
+                "has_data": not test_df.empty,
+                "error": None
+            }
+        except Exception as e:
+            api_status["yfinance"] = {"error": str(e)}
+        try:
+            headers = {"X-Finnhub-Token": stock_model.FINNHUB_API_KEY}
+            resp    = requests.get(
+                "https://finnhub.io/api/v1/news?category=general",
+                headers=headers)
+            api_status["finnhub"] = {
+                "status_code": resp.status_code,
+                "has_data":    len(resp.json()) > 0,
+                "error":       None if resp.status_code == 200 else str(resp.text),
+            }
+        except Exception as e:
+            api_status["finnhub"] = {"error": str(e)}
+        model_status = {}
+        try:
+            test_data    = np.random.rand(100, 6)   # 6 features (OPT-2)
+            test_scaler  = MinMaxScaler()
+            test_data[:, 0] = test_scaler.fit_transform(
+                np.arange(100).reshape(-1, 1)).flatten()
+            X, y = stock_model.create_sequences(test_data, time_step=30)
+            model_status["sequence_creation"] = {
+                "success":  len(X) > 0,
+                "X_shape":  str(X.shape),
+                "y_shape":  str(y.shape),
+            }
+        except Exception as e:
+            model_status["error"] = str(e)
+        return jsonify({
+            "timestamp":    datetime.now().isoformat(),
+            "status":       "OK",
+            "environment":  env_info,
+            "api_status":   api_status,
+            "model_status": model_status,
+        })
+    except Exception as e:
+        return jsonify({"status": "ERROR", "error": str(e)}), 500
+if __name__ == "__main__":
+    # [OPT-B] Read port from environment variable so the same binary works on:
+    #   • Render          (sets $PORT automatically, usually 10000)
+    #   • Hugging Face    (expects 7860)
+    #   • Local dev       (falls back to 5001)
+    port = int(os.environ.get("PORT", 5001))
+    print(f"Starting StockBuddy API on port {port}")
+    app.run(host="0.0.0.0", port=port)

model.py CHANGED Viewed

@@ -44,65 +44,29 @@ FINNHUB_API_KEY     = "cu5gvghr01qqj8u6iau0cu5gvghr01qqj8u6iaug"
 #                     STOCK PRICE PREDICTION FUNCTIONS
 # =============================================================================
-def fetch_stock_data(symbol, outputsize="full"):
-    url = "https://www.alphavantage.co/query"
-    params = {
-        "function":   "TIME_SERIES_DAILY",
-        "symbol":     symbol,
-        "apikey":     ALPHAVANTAGE_API_KEY,
-        "outputsize": outputsize,
-        "datatype":   "json",
-    }
-    response = requests.get(url, params=params)
-    data = response.json()
-    if "Time Series (Daily)" not in data:
-        if "Error Message" in data:
-            raise ValueError(
-                f"Symbol '{symbol}' not found. Please verify the stock symbol.")
-        elif "Note" in data:
-            raise ValueError("API request limit reached. Please try again in a minute.")
-        else:
-            raise ValueError(
-                f"Unable to fetch data for symbol '{symbol}'. Please verify the symbol.")
-    ts = data["Time Series (Daily)"]
-    df = pd.DataFrame.from_dict(ts, orient="index")
-    df.index = pd.to_datetime(df.index)
-    df.sort_index(inplace=True)
-    for col in ["1. open", "2. high", "3. low", "4. close", "5. volume"]:
-        if col in df.columns:
-            df[col] = df[col].astype(float)
-    df = df.rename(columns={
-        "1. open":   "Open",
-        "2. high":   "High",
-        "3. low":    "Low",
-        "4. close":  "Close",
-        "5. volume": "Volume",
-    })
-    latest_date    = df.index[-1]
-    today          = pd.Timestamp.now().normalize()
-    market_closed_days = 0
-    if today.dayofweek >= 5:
-        market_closed_days = today.dayofweek - 4
-    elif today.hour < 16:
-        market_closed_days = 1
-    expected_latest = today - pd.Timedelta(days=market_closed_days)
-    date_diff = (expected_latest - latest_date).days
-    if date_diff > 5:
-        print(f"WARNING: Latest data for {symbol} is from "
-              f"{latest_date.strftime('%Y-%m-%d')} ({date_diff} days old).")
-    print(f"\nLatest closing price for {symbol} "
-          f"(as of {latest_date.strftime('%Y-%m-%d')}): ${df['Close'].iloc[-1]:.2f}")
-    # Add lightweight technical indicators
-    df = add_technical_indicators(df)
-    return df
 # [OPT-2] Reduced feature set: 11 → 6  (Close, RSI, SMA5, MACD, Upper_Band, ROC)

 #                     STOCK PRICE PREDICTION FUNCTIONS
 # =============================================================================
+import yfinance as yf
+def fetch_stock_data(symbol, outputsize="full"):
+    try:
+        stock = yf.Ticker(symbol)
+        # Use 'max' or '5y' for full, '1y' for compact
+        period = "5y" if outputsize == "full" else "1y"
+        df = stock.history(period=period)
+        if df.empty:
+            raise ValueError(f"Unable to fetch data for symbol '{symbol}'. Please verify the symbol.")
+        df.index = pd.to_datetime(df.index).tz_localize(None)
+        latest_date = df.index[-1]
+        print(f"\nLatest closing price for {symbol} "
+              f"(as of {latest_date.strftime('%Y-%m-%d')}): ${df['Close'].iloc[-1]:.2f}")
+        # Add lightweight technical indicators
+        df = add_technical_indicators(df)
+        return df
+    except Exception as e:
+        raise ValueError(f"Error fetching data for {symbol}: {e}")
 # [OPT-2] Reduced feature set: 11 → 6  (Close, RSI, SMA5, MACD, Upper_Band, ROC)

requirements.txt CHANGED Viewed

@@ -11,3 +11,4 @@ transformers==4.33.2
 plotly==5.17.0
 gunicorn==21.2.0
 tf-keras==2.15.0

 plotly==5.17.0
 gunicorn==21.2.0
 tf-keras==2.15.0
+yfinance==0.2.40