Spaces:

danielthatu12
/

StockBuddy_Model

Running

App Files Files Community

danielthatu12 commited on Apr 17

Commit

dee7f76

1 Parent(s): e3e113f

Updated

Browse files

Files changed (5) hide show

Dockerfile +13 -0
Procfile +5 -0
app.py +565 -0
model.py +676 -0
requirements.txt +43 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM python:3.10-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+ENV PORT=7860
+EXPOSE 7860
+CMD ["gunicorn", "app:app", "--bind", "0.0.0.0:7860", "--timeout", "300", "--workers", "1"]

Procfile ADDED Viewed

	@@ -0,0 +1,5 @@

+# Render reads this file to know how to start the web service.
+# --workers 1   : single worker keeps RAM under free-tier limit (~512 MB)
+# --timeout 300 : prediction tasks can run up to 5 minutes before Gunicorn kills them
+# --bind        : Render injects $PORT automatically
+web: gunicorn app:app --bind 0.0.0.0:$PORT --timeout 300 --workers 1

app.py ADDED Viewed

	@@ -0,0 +1,565 @@

+"""
+app.py  –  StockBuddy Flask API
+=================================
+LIGHTWEIGHT CHANGES vs original:
+  [OPT-A]  Removed the startup TF validation model (was creating & running a test
+           LSTM on every cold start – wastes ~10 s and ~100 MB RAM on free tier).
+           Replaced with a simple tf.constant() smoke-test.
+  [OPT-B]  PORT is now read from the PORT environment variable so the server
+           works on Render (sets $PORT automatically) and Hugging Face Spaces
+           (expects port 7860) without code changes.
+  [OPT-C]  time_step updated to 30 throughout (was 45) to match the lighter model.
+  All REST API routes are unchanged from the original.
+"""
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import numpy as np
+import pandas as pd
+import os
+import threading
+import time
+from datetime import datetime, timedelta
+import json
+import model as stock_model
+import sys
+import requests
+import traceback
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import LSTM, Dense, Dropout
+from tensorflow.keras.callbacks import Callback
+import tensorflow as tf
+import xgboost as xgb
+app = Flask(__name__)
+CORS(app)
+@app.route("/", methods=["GET"])
+def home():
+    return jsonify({"status": "running", "message": "StockBuddy API is live!"})
+# [OPT-A] Lightweight TF smoke-test instead of building & running a full LSTM
+def validate_tensorflow():
+    """Quick TensorFlow sanity-check (no model created, no GPU required)."""
+    try:
+        print("TensorFlow version:", tf.__version__)
+        # A tiny constant operation is enough to confirm TF is importable and
+        # the runtime works.  Full model creation is deferred to the first
+        # prediction request so the cold-start is fast on free-tier hosts.
+        _ = tf.constant([1.0, 2.0, 3.0])
+        gpus = tf.config.list_physical_devices("GPU")
+        if gpus:
+            msg = f"GPU available ({len(gpus)} device(s)) – running in GPU mode."
+        else:
+            msg = "No GPU detected – running in CPU mode (expected on free tier)."
+        print(f"TensorFlow OK: {msg}")
+        return True, msg
+    except Exception as e:
+        print(f"TensorFlow validation failed: {e}")
+        return False, f"TensorFlow error: {e}"
+# Run smoke-test at startup
+tf_status, tf_message = validate_tensorflow()
+if not tf_status:
+    print(f"WARNING: {tf_message}")
+else:
+    print(f"TensorFlow validation: {tf_message}")
+# Dictionary to store running prediction tasks
+prediction_tasks = {}
+class PredictionTask:
+    def __init__(self, user_id, symbol, days_ahead):
+        self.user_id       = user_id
+        self.symbol        = symbol
+        self.days_ahead    = days_ahead
+        self.progress      = 0
+        self.status        = "pending"
+        self.result        = None
+        self.sentiment_result = None
+        self.thread        = None
+        self.stop_requested    = False
+        self.stop_acknowledged = False
+        # Unique task ID: millisecond timestamp + random hex suffix
+        timestamp         = int(time.time() * 1000)
+        random_suffix     = os.urandom(4).hex()
+        self.task_id      = f"{user_id}_{symbol}_{timestamp}_{random_suffix}"
+    def run(self):
+        self.thread = threading.Thread(target=self._run_prediction)
+        self.thread.daemon = True
+        self.thread.start()
+        return self.task_id
+    def is_stop_requested(self):
+        """Callback for model training loops to poll stop flag."""
+        if self.stop_requested and not self.stop_acknowledged:
+            self.stop_acknowledged = True
+            self.status = "stopped"
+            return True
+        return self.stop_requested
+    def _run_prediction(self):
+        try:
+            print(f"Starting prediction for {self.symbol} (task: {self.task_id})")
+            self.status   = "running"
+            self.progress = 10
+            # ── Fetch historical data ────────────────────────────────────────
+            print(f"Fetching historical data for {self.symbol}...")
+            try:
+                data = stock_model.fetch_stock_data(self.symbol, outputsize="compact")
+                print(f"Fetched {len(data)} rows for {self.symbol}")
+            except Exception as e:
+                print(f"Data fetch error: {e}")
+                self.status = "failed"
+                self.result = {"error": f"Could not fetch data for {self.symbol}: {e}"}
+                return
+            if data is None:
+                self.status = "failed"
+                self.result = {"error": f"Could not fetch data for {self.symbol}"}
+                return
+            if self.stop_requested:
+                self.status = "stopped"; return
+            if len(data) < 60:
+                self.status = "failed"
+                self.result = {"error": f"Insufficient data for {self.symbol} "
+                                         f"(got {len(data)}, need ≥60)"}
+                return
+            # ── Extract last actual close ────────────────────────────────────
+            try:
+                if isinstance(data, pd.DataFrame) and "Close" in data.columns:
+                    last_actual_close = float(data["Close"].iloc[-1])
+                    last_date         = data.index[-1]
+                else:
+                    last_actual_close = float(data.iloc[-1, 0])
+                    last_date         = data.index[-1]
+                print(f"Latest close for {self.symbol}: "
+                      f"${last_actual_close:.2f} on {last_date.strftime('%Y-%m-%d')}")
+            except Exception as e:
+                self.status = "failed"
+                self.result = {"error": f"Error reading price data: {e}"}
+                return
+            self.progress = 20
+            if self.stop_requested:
+                self.status = "stopped"; return
+            # ── Sentiment analysis ───────────────────────────────────────────
+            try:
+                print(f"Fetching news for {self.symbol}...")
+                headlines = stock_model.fetch_finnhub_news(self.symbol)
+                print(f"Got {len(headlines)} headlines")
+                self.progress = 30
+                if self.stop_requested:
+                    self.status = "stopped"; return
+                sentiment_results, sentiment_totals = \
+                    stock_model.analyze_sentiment(headlines)
+                sentiment_summary = stock_model.generate_sentiment_summary(
+                    sentiment_totals, headlines, self.symbol)
+                self.sentiment_result = {
+                    "totals":  sentiment_totals,
+                    "summary": sentiment_summary,
+                }
+            except Exception as e:
+                print(f"Sentiment error (non-fatal): {e}")
+                self.sentiment_result = {
+                    "totals":  {"positive": 0, "negative": 0, "neutral": 0},
+                    "summary": f"Unable to analyse sentiment: {e}",
+                }
+            self.progress = 40
+            if self.stop_requested:
+                self.status = "stopped"; return
+            # ── Preprocess data ──────────────────────────────────────────────
+            try:
+                print("Preprocessing data...")
+                scaled_data, scaler = stock_model.preprocess_data(data)
+                # [OPT-C] time_step 45 → 30
+                time_step = 30
+                X, y      = stock_model.create_sequences(scaled_data, time_step)
+                print(f"Sequences: X={X.shape}, y={y.shape}")
+            except Exception as e:
+                self.status = "failed"
+                self.result = {"error": f"Preprocessing failed: {e}"}
+                return
+            if len(X) == 0:
+                self.status = "failed"
+                self.result = {"error": f"Could not create training sequences for {self.symbol}"}
+                return
+            self.progress = 50
+            if self.stop_requested:
+                self.status = "stopped"; return
+            # ── Train LSTM ───────────────────────────────────────────────────
+            try:
+                train_size       = int(len(X) * 0.8)
+                if train_size == 0:
+                    self.status = "failed"
+                    self.result = {"error": "Not enough data to split for training"}
+                    return
+                X_train, y_train = X[:train_size], y[:train_size]
+                self.progress    = 55
+                print(f"Training LSTM with {len(X_train)} samples...")
+                lstm_model = stock_model.train_lstm(
+                    X_train, y_train, time_step, self.is_stop_requested)
+            except Exception as e:
+                self.status = "failed"
+                self.result = {"error": f"LSTM training failed: {e}"}
+                return
+            if self.stop_requested:
+                self.status = "stopped"; return
+            self.progress = 75
+            if self.stop_requested:
+                self.status = "stopped"; return
+            # ── Train XGBoost on residuals ───────────────────────────────────
+            try:
+                print("Calculating residuals for XGBoost...")
+                lstm_preds = lstm_model.predict(X_train, verbose=0).flatten()
+                residuals  = y_train - lstm_preds
+                xgb_model  = stock_model.train_xgboost(
+                    X_train.reshape(X_train.shape[0], -1),
+                    residuals,
+                    self.is_stop_requested,
+                )
+                if self.stop_requested or xgb_model is None:
+                    self.status = "stopped"; return
+            except Exception as e:
+                print(f"XGBoost training error (non-fatal): {e}")
+                xgb_model = None
+            self.progress = 90
+            if self.stop_requested:
+                self.status = "stopped"; return
+            # ── Generate predictions ─────────────────────────────────────────
+            try:
+                print(f"Generating {self.days_ahead}-day predictions...")
+                predictions = stock_model.predict_stock_price(
+                    lstm_model, xgb_model, scaled_data, scaler,
+                    time_step, self.days_ahead, self.is_stop_requested,
+                )
+                if self.stop_requested or predictions is None:
+                    self.status = "stopped"; return
+            except Exception as e:
+                self.status = "failed"
+                self.result = {"error": f"Prediction generation failed: {e}"}
+                return
+            self.progress = 95
+            if self.stop_requested:
+                self.status = "stopped"; return
+            # ── Build future trading-day dates ───────────────────────────────
+            future_dates = []
+            for i in range(1, self.days_ahead + 1):
+                if self.stop_requested:
+                    break
+                next_date = last_date + timedelta(days=i)
+                while next_date.weekday() > 4:
+                    next_date += timedelta(days=1)
+                future_dates.append(next_date)
+            if self.stop_requested:
+                self.status = "stopped"; return
+            # Deduplicate dates
+            unique_future_dates = []
+            seen_dates          = set()
+            for date in future_dates:
+                ds = date.strftime("%Y-%m-%d")
+                if ds not in seen_dates:
+                    seen_dates.add(ds)
+                    unique_future_dates.append(date)
+            # Pad if needed
+            while (len(unique_future_dates) < len(predictions)
+                   and not self.stop_requested):
+                next_date = unique_future_dates[-1] + timedelta(days=1)
+                while next_date.weekday() > 4:
+                    next_date += timedelta(days=1)
+                ds = next_date.strftime("%Y-%m-%d")
+                if ds not in seen_dates:
+                    unique_future_dates.append(next_date)
+                    seen_dates.add(ds)
+            if self.stop_requested:
+                self.status = "stopped"; return
+            unique_future_dates = unique_future_dates[: len(predictions)]
+            # ── Assemble result payload ──────────────────────────────────────
+            prediction_data = []
+            for i in range(min(len(unique_future_dates), len(predictions))):
+                predicted_price = float(predictions[i][0])
+                percent_change  = (
+                    (predicted_price - last_actual_close) / last_actual_close * 100
+                )
+                prediction_data.append({
+                    "date":   unique_future_dates[i].strftime("%Y-%m-%d"),
+                    "price":  round(predicted_price, 2),
+                    "change": round(percent_change, 2),
+                })
+            self.result = {
+                "symbol": self.symbol,
+                "lastActualClose": {
+                    "date":  last_date.strftime("%Y-%m-%d"),
+                    "price": round(last_actual_close, 2),
+                },
+                "predictions": prediction_data,
+                "sentiment":   self.sentiment_result,
+                "tableDisplay": True,
+            }
+            self.progress = 100
+            self.status   = "completed"
+            print(f"Prediction complete for {self.symbol}")
+        except Exception as e:
+            self.status = "failed"
+            self.result = {"error": str(e)}
+            print(f"Prediction task error: {e}")
+            traceback.print_exc()
+# =============================================================================
+#                              REST API ROUTES
+#  (all routes are identical to the original – no frontend changes needed)
+# =============================================================================
+@app.route("/api/predict", methods=["POST"])
+def start_prediction():
+    try:
+        data = request.json
+        print(f"POST /api/predict  body={data}")
+        if not data:
+            return jsonify({"error": "Invalid or missing request body"}), 400
+        user_id    = data.get("userId")
+        symbol     = data.get("symbol")
+        days_ahead = int(data.get("daysAhead", 5))
+        if not user_id or not symbol:
+            return jsonify({"error": "Missing required parameters (userId or symbol)"}), 400
+        if not isinstance(symbol, str) or len(symbol) > 10:
+            return jsonify({"error": f"Invalid symbol format: {symbol}"}), 400
+        if not tf_status:
+            return jsonify({
+                "error": f"Prediction service unavailable: {tf_message}",
+                "tf_status": tf_message,
+            }), 503
+        task    = PredictionTask(user_id, symbol, days_ahead)
+        task_id = task.run()
+        prediction_tasks[task_id] = task
+        return jsonify({
+            "taskId":  task_id,
+            "status":  "pending",
+            "message": f"Prediction started for {symbol}",
+        })
+    except ValueError as e:
+        return jsonify({"error": str(e)}), 400
+    except Exception as e:
+        print(f"Critical error starting prediction: {e}")
+        traceback.print_exc()
+        return jsonify({"error": "Failed to start prediction", "details": str(e)}), 500
+@app.route("/api/predict/status/<task_id>", methods=["GET"])
+def prediction_status(task_id):
+    try:
+        task = prediction_tasks.get(task_id)
+        if not task:
+            return jsonify({"error": "Task not found"}), 404
+        try:
+            if task.status == "completed" and task.result:
+                if isinstance(task.result, dict):
+                    if "predictions" in task.result and isinstance(
+                            task.result["predictions"], list):
+                        for pred in task.result["predictions"]:
+                            if (not isinstance(pred, dict)
+                                    or "date" not in pred
+                                    or "price" not in pred):
+                                task.status = "failed"
+                                task.result = {"error": "Malformed prediction data"}
+                                break
+                    else:
+                        task.status = "failed"
+                        task.result = {"error": "Missing prediction data"}
+                else:
+                    task.status = "failed"
+                    task.result = {"error": "Invalid result format"}
+            return jsonify({
+                "taskId":   task_id,
+                "status":   task.status,
+                "progress": task.progress,
+                "result":   task.result if task.status == "completed" else None,
+            })
+        except Exception as e:
+            print(f"Error generating status response: {e}")
+            return jsonify({
+                "taskId":   task_id,
+                "status":   "error",
+                "progress": task.progress,
+                "error":    str(e),
+            })
+    except Exception as e:
+        print(f"Critical error in prediction status: {e}")
+        return jsonify({"taskId": task_id, "status": "error",
+                        "error": "Server error"}), 500
+@app.route("/api/predict/stop/<task_id>", methods=["POST"])
+def stop_prediction(task_id):
+    task = prediction_tasks.get(task_id)
+    if not task:
+        return jsonify({"error": "Task not found"}), 404
+    task.stop_requested = True
+    if task.thread and task.thread.is_alive():
+        task.status     = "stopping"
+        print(f"Stop requested for task {task_id} ({task.symbol})")
+        stop_wait_start = time.time()
+        while time.time() - stop_wait_start < 2:
+            if task.stop_acknowledged:
+                task.status = "stopped"
+                break
+            time.sleep(0.1)
+    else:
+        task.status = "stopped"
+    return jsonify({
+        "taskId":          task_id,
+        "status":          task.status,
+        "symbol":          task.symbol,
+        "progress":        task.progress,
+        "stopRequested":   task.stop_requested,
+        "stopAcknowledged": task.stop_acknowledged,
+    })
+@app.route("/api/predict/sentiment/<symbol>", methods=["GET"])
+def get_sentiment(symbol):
+    try:
+        headlines = stock_model.fetch_finnhub_news(symbol)
+        sentiment_results, sentiment_totals = \
+            stock_model.analyze_sentiment(headlines)
+        sentiment_summary = stock_model.generate_sentiment_summary(
+            sentiment_totals, headlines, symbol)
+        return jsonify({
+            "symbol": symbol,
+            "sentiment": {
+                "totals":  sentiment_totals,
+                "summary": sentiment_summary,
+                "period":  28,
+            },
+        })
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route("/api/diagnose", methods=["GET"])
+def diagnose():
+    """Diagnostic endpoint – checks environment, APIs and model primitives."""
+    try:
+        env_info = {
+            "python_version":    sys.version,
+            "tensorflow_version": tf.__version__,
+            "numpy_version":     np.__version__,
+            "pandas_version":    pd.__version__,
+            "xgboost_version":   xgb.__version__,
+        }
+        api_status = {}
+        try:
+            url    = "https://www.alphavantage.co/query"
+            params = {
+                "function":   "TIME_SERIES_DAILY",
+                "symbol":     "AAPL",
+                "apikey":     stock_model.ALPHAVANTAGE_API_KEY,
+                "outputsize": "compact",
+                "datatype":   "json",
+            }
+            resp = requests.get(url, params=params)
+            rj   = resp.json()
+            api_status["alpha_vantage"] = {
+                "status_code": resp.status_code,
+                "has_data":    "Time Series (Daily)" in rj,
+                "error":       rj.get("Error Message") or rj.get("Note")
+                               if "Time Series (Daily)" not in rj else None,
+            }
+        except Exception as e:
+            api_status["alpha_vantage"] = {"error": str(e)}
+        try:
+            headers = {"X-Finnhub-Token": stock_model.FINNHUB_API_KEY}
+            resp    = requests.get(
+                "https://finnhub.io/api/v1/news?category=general",
+                headers=headers)
+            api_status["finnhub"] = {
+                "status_code": resp.status_code,
+                "has_data":    len(resp.json()) > 0,
+                "error":       None if resp.status_code == 200 else str(resp.text),
+            }
+        except Exception as e:
+            api_status["finnhub"] = {"error": str(e)}
+        model_status = {}
+        try:
+            test_data    = np.random.rand(100, 6)   # 6 features (OPT-2)
+            test_scaler  = MinMaxScaler()
+            test_data[:, 0] = test_scaler.fit_transform(
+                np.arange(100).reshape(-1, 1)).flatten()
+            X, y = stock_model.create_sequences(test_data, time_step=30)
+            model_status["sequence_creation"] = {
+                "success":  len(X) > 0,
+                "X_shape":  str(X.shape),
+                "y_shape":  str(y.shape),
+            }
+        except Exception as e:
+            model_status["error"] = str(e)
+        return jsonify({
+            "timestamp":    datetime.now().isoformat(),
+            "status":       "OK",
+            "environment":  env_info,
+            "api_status":   api_status,
+            "model_status": model_status,
+        })
+    except Exception as e:
+        return jsonify({"status": "ERROR", "error": str(e)}), 500
+if __name__ == "__main__":
+    # [OPT-B] Read port from environment variable so the same binary works on:
+    #   • Render          (sets $PORT automatically, usually 10000)
+    #   • Hugging Face    (expects 7860)
+    #   • Local dev       (falls back to 5001)
+    port = int(os.environ.get("PORT", 5001))
+    print(f"Starting StockBuddy API on port {port}")
+    app.run(host="0.0.0.0", port=port)

model.py ADDED Viewed

	@@ -0,0 +1,676 @@

+"""
+model.py  –  StockBuddy ML / NLP core
+========================================
+LIGHTWEIGHT CHANGES vs original:
+  [OPT-1]  Removed `transformers` pipeline (was downloading ~1.2 GB BART model at
+           runtime).  Replaced with a fast NLTK-based extractive summariser.
+  [OPT-2]  Reduced technical indicators: 11 → 6 features (kept only the ones with
+           highest predictive signal; fewer features = smaller tensors & faster fits).
+  [OPT-3]  LSTM architecture: 4 layers (64/64/32/32 units) → 2 layers (32/16 units).
+           Still accurate enough for short-horizon forecasts, ~8× fewer parameters.
+  [OPT-4]  time_step: 45 → 30  (shorter look-back window → smaller tensors).
+  [OPT-5]  Epochs: 30 → 15,  batch_size: 64 → 32 (free-tier CPU training time).
+  [OPT-6]  XGBoost n_estimators: 300 → 100, max_depth 6 → 4.
+  [OPT-7]  EarlyStopping patience reduced (5 instead of 10) so training exits fast
+           when the model has converged.
+  All public function signatures are identical to the original so app.py needs
+  only minimal changes.
+"""
+import numpy as np
+import pandas as pd
+import requests
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import LSTM, Dense, Dropout
+import xgboost as xgb
+import plotly.graph_objects as go
+from datetime import datetime, timedelta
+import nltk
+from nltk.sentiment.vader import SentimentIntensityAnalyzer
+# [OPT-1] No longer importing transformers – see generate_sentiment_summary below
+import time
+import os
+# Download VADER lexicon once (tiny file, safe on free tier)
+nltk.download("vader_lexicon", quiet=True)
+# =============================================================================
+#                         API Keys (Replace with your own keys)
+# =============================================================================
+ALPHAVANTAGE_API_KEY = os.environ.get("ALPHAVANTAGE_API_KEY")
+FINNHUB_API_KEY      = os.environ.get("FINNHUB_API_KEY")
+# =============================================================================
+#                     STOCK PRICE PREDICTION FUNCTIONS
+# =============================================================================
+def fetch_stock_data(symbol, outputsize="full"):
+    url = "https://www.alphavantage.co/query"
+    params = {
+        "function":   "TIME_SERIES_DAILY",
+        "symbol":     symbol,
+        "apikey":     ALPHAVANTAGE_API_KEY,
+        "outputsize": outputsize,
+        "datatype":   "json",
+    }
+    response = requests.get(url, params=params)
+    data = response.json()
+    if "Time Series (Daily)" not in data:
+        if "Error Message" in data:
+            raise ValueError(
+                f"Symbol '{symbol}' not found. Please verify the stock symbol.")
+        elif "Note" in data:
+            raise ValueError("API request limit reached. Please try again in a minute.")
+        else:
+            raise ValueError(
+                f"Unable to fetch data for symbol '{symbol}'. Please verify the symbol.")
+    ts = data["Time Series (Daily)"]
+    df = pd.DataFrame.from_dict(ts, orient="index")
+    df.index = pd.to_datetime(df.index)
+    df.sort_index(inplace=True)
+    for col in ["1. open", "2. high", "3. low", "4. close", "5. volume"]:
+        if col in df.columns:
+            df[col] = df[col].astype(float)
+    df = df.rename(columns={
+        "1. open":   "Open",
+        "2. high":   "High",
+        "3. low":    "Low",
+        "4. close":  "Close",
+        "5. volume": "Volume",
+    })
+    latest_date    = df.index[-1]
+    today          = pd.Timestamp.now().normalize()
+    market_closed_days = 0
+    if today.dayofweek >= 5:
+        market_closed_days = today.dayofweek - 4
+    elif today.hour < 16:
+        market_closed_days = 1
+    expected_latest = today - pd.Timedelta(days=market_closed_days)
+    date_diff = (expected_latest - latest_date).days
+    if date_diff > 5:
+        print(f"WARNING: Latest data for {symbol} is from "
+              f"{latest_date.strftime('%Y-%m-%d')} ({date_diff} days old).")
+    print(f"\nLatest closing price for {symbol} "
+          f"(as of {latest_date.strftime('%Y-%m-%d')}): ${df['Close'].iloc[-1]:.2f}")
+    # Add lightweight technical indicators
+    df = add_technical_indicators(df)
+    return df
+# [OPT-2] Reduced feature set: 11 → 6  (Close, RSI, SMA5, MACD, Upper_Band, ROC)
+def add_technical_indicators(df):
+    """Add a compact set of technical indicators (6 features vs 11 original)."""
+    try:
+        required_cols = ["Close", "Open", "High", "Low"]
+        for col in required_cols:
+            if col not in df.columns:
+                print(f"Warning: {col} missing – falling back to Close-only.")
+                return df[["Close"]]
+        # RSI (14-period)
+        delta = df["Close"].diff()
+        gain  = delta.where(delta > 0, 0).rolling(14).mean()
+        loss  = -delta.where(delta < 0, 0).rolling(14).mean()
+        rs    = gain / loss
+        df["RSI"] = 100 - (100 / (1 + rs))
+        # Short moving average
+        df["SMA5"] = df["Close"].rolling(5).mean()
+        # MACD line only (signal line dropped to save a feature)
+        ema12       = df["Close"].ewm(span=12).mean()
+        ema26       = df["Close"].ewm(span=26).mean()
+        df["MACD"]  = ema12 - ema26
+        # Upper Bollinger Band as a proxy for volatility
+        ma20              = df["Close"].rolling(20).mean()
+        df["Upper_Band"]  = ma20 + (df["Close"].rolling(20).std() * 2)
+        # Rate-of-change (5-period)
+        df["ROC"] = df["Close"].pct_change(periods=5) * 100
+        df = df.dropna()
+        # [OPT-2] Only 6 features returned
+        features = ["Close", "RSI", "SMA5", "MACD", "Upper_Band", "ROC"]
+        return df[features]
+    except Exception as e:
+        print(f"Error adding technical indicators: {e}")
+        if "Close" in df.columns:
+            return df[["Close"]]
+        return df
+def preprocess_data(data):
+    """Scale each feature independently; return scaled array + Close scaler."""
+    features    = data.columns
+    scalers     = {}
+    scaled_data = np.zeros((len(data), len(features)))
+    for i, feature in enumerate(features):
+        scalers[feature] = MinMaxScaler(feature_range=(0, 1))
+        scaled_data[:, i] = (
+            scalers[feature]
+            .fit_transform(data[feature].values.reshape(-1, 1))
+            .flatten()
+        )
+    master_scaler = scalers["Close"]
+    return scaled_data, master_scaler
+def create_sequences(data, time_step=30):
+    """Create (X, y) sequences for LSTM training."""
+    X, y = [], []
+    for i in range(len(data) - time_step - 1):
+        X.append(data[i : i + time_step, :])   # all features
+        y.append(data[i + time_step, 0])         # Close price only
+    return np.array(X), np.array(y)
+# [OPT-3] Slimmed LSTM: 2 layers (32 / 16 units) instead of 4 layers (64/64/32/32)
+# [OPT-4] time_step default lowered to 30
+# [OPT-5] epochs 30 → 15, batch_size 64 → 32, EarlyStopping patience 10 → 5
+def train_lstm(X_train, y_train, time_step=30, stop_requested_callback=None):
+    """
+    Train a lightweight LSTM model.
+    Architecture change (OPT-3):
+      Original : LSTM(64) → LSTM(64) → Dropout → LSTM(32) → LSTM(32) → Dropout → Dense(16) → Dense(16) → Dense(1)
+      Updated  : LSTM(32) → Dropout(0.2) → LSTM(16) → Dropout(0.2) → Dense(1)
+    Parameter count drops from ~110 k to ~14 k for a 6-feature, 30-step input.
+    """
+    from tensorflow.keras.optimizers import Adam
+    from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, Callback
+    n_features = X_train.shape[2]
+    X_train    = X_train.reshape(X_train.shape[0], time_step, n_features)
+    # [OPT-3] Lightweight architecture
+    model = Sequential([
+        LSTM(32, return_sequences=True,
+             input_shape=(time_step, n_features)),
+        Dropout(0.2),
+        LSTM(16, return_sequences=False),
+        Dropout(0.2),
+        Dense(1),
+    ])
+    class StopCallback(Callback):
+        def on_epoch_end(self, epoch, logs=None):
+            if stop_requested_callback and stop_requested_callback():
+                self.model.stop_training = True
+                print("Training stopped early by user request.")
+    optimizer = Adam(learning_rate=0.001)
+    model.compile(optimizer=optimizer, loss="mean_squared_error")
+    # [OPT-7] Patience 10 → 5 for faster early exit on free-tier CPU
+    reduce_lr     = ReduceLROnPlateau(monitor="val_loss", factor=0.3,
+                                      patience=3, min_lr=0.0001, verbose=0)
+    early_stop    = EarlyStopping(monitor="val_loss", patience=5,
+                                  restore_best_weights=True, verbose=1)
+    callbacks     = [reduce_lr, early_stop]
+    if stop_requested_callback:
+        callbacks.append(StopCallback())
+    print(f"Training lightweight LSTM: {X_train.shape[0]} samples, "
+          f"{n_features} features, time_step={time_step}")
+    # [OPT-5] epochs 30 → 15, batch_size 64 → 32
+    model.fit(
+        X_train, y_train,
+        epochs=15,
+        batch_size=32,
+        validation_split=0.2,
+        callbacks=callbacks,
+        verbose=1,
+    )
+    return model
+# [OPT-6] XGBoost: n_estimators 300 → 100, max_depth 6 → 4
+def train_xgboost(X_train, residuals, stop_requested_callback=None):
+    """Train a leaner XGBoost model on LSTM residuals."""
+    if stop_requested_callback and stop_requested_callback():
+        print("XGBoost training cancelled due to stop request.")
+        return None
+    # [OPT-6] Reduced complexity for free-tier memory / speed
+    params = {
+        "objective":        "reg:squarederror",
+        "n_estimators":     100,   # was 300
+        "learning_rate":    0.1,
+        "max_depth":        4,     # was 6
+        "subsample":        0.8,
+        "colsample_bytree": 0.8,
+        "min_child_weight": 3,
+        "gamma":            0.1,
+        "reg_alpha":        0.1,
+        "reg_lambda":       1.0,
+        "tree_method":      "hist",
+    }
+    if stop_requested_callback:
+        class StopCallbackHandler(xgb.callback.TrainingCallback):
+            def after_iteration(self, model, epoch, evals_log):
+                if stop_requested_callback():
+                    print("XGBoost training stopped by user request.")
+                    return True
+                return False
+        xgb_model = xgb.XGBRegressor(**params)
+        xgb_model.set_params(callbacks=[StopCallbackHandler()])
+        xgb_model.fit(X_train, residuals)
+    else:
+        xgb_model = xgb.XGBRegressor(**params)
+        xgb_model.fit(
+            X_train, residuals,
+            eval_metric=["rmse"],
+            early_stopping_rounds=10,   # was 20 [OPT-6]
+            verbose=False,
+            eval_set=[(X_train, residuals)],
+        )
+    return xgb_model
+def predict_stock_price(
+    lstm_model, xgb_model, data, scaler,
+    time_step=30, days_ahead=5, stop_requested_callback=None
+):
+    """Make predictions using both LSTM and XGBoost with price anchoring."""
+    if stop_requested_callback and stop_requested_callback():
+        return None
+    n_features     = data.shape[1]
+    temp_input     = data[-time_step:].tolist()
+    last_actual_close = scaler.inverse_transform(
+        np.array([[data[-1, 0]]]))[0][0]
+    print(f"Base price: ${last_actual_close:.2f}")
+    original_prices = scaler.inverse_transform(data[:, 0].reshape(-1, 1))
+    daily_returns   = np.diff(original_prices, axis=0) / original_prices[:-1]
+    volatility      = np.std(daily_returns)
+    # Calibrate model against actual last price
+    lstm_input       = np.array(temp_input[-time_step:]).reshape(1, time_step, n_features)
+    lstm_pred_cal    = lstm_model.predict(lstm_input, verbose=0)[0][0]
+    xgb_input_cal    = np.array(temp_input[-time_step:]).reshape(1, -1)
+    try:
+        combined_cal = lstm_pred_cal + (xgb_model.predict(xgb_input_cal)[0]
+                                        if xgb_model is not None else 0)
+    except Exception:
+        combined_cal = lstm_pred_cal
+    model_current   = scaler.inverse_transform(
+        np.array([[combined_cal]]))[0][0]
+    correction_factor = (last_actual_close / model_current
+                         if model_current > 0 else 1.0)
+    print(f"Calibration: model=${model_current:.2f}, "
+          f"actual=${last_actual_close:.2f}, factor={correction_factor:.4f}")
+    predictions    = []
+    prev_day_pred  = combined_cal
+    for day in range(days_ahead):
+        if stop_requested_callback and stop_requested_callback():
+            print(f"Prediction stopped at day {day}/{days_ahead}")
+            break
+        lstm_input = np.array(temp_input[-time_step:]).reshape(1, time_step, n_features)
+        lstm_pred  = lstm_model.predict(lstm_input, verbose=0)[0][0]
+        xgb_input  = np.array(temp_input[-time_step:]).reshape(1, -1)
+        try:
+            combined_pred = (lstm_pred + xgb_model.predict(xgb_input)[0]
+                             if xgb_model is not None else lstm_pred)
+        except Exception as e:
+            print(f"XGBoost predict error: {e}")
+            combined_pred = lstm_pred
+        prev_unscaled    = scaler.inverse_transform(
+            np.array([[prev_day_pred]]))[0][0]
+        current_unscaled = scaler.inverse_transform(
+            np.array([[combined_pred]]))[0][0]
+        price_change     = current_unscaled - prev_unscaled
+        trend_direction  = 1 if price_change >= 0 else -1
+        day_volatility      = volatility * (1 + day * 0.1)
+        adjusted_volatility = min(day_volatility, 0.015)
+        random_factor       = np.random.normal(0, adjusted_volatility)
+        if trend_direction > 0:
+            flux_factor = (abs(random_factor) * trend_direction * 0.15
+                           if np.random.random() < 0.7
+                           else -abs(random_factor) * trend_direction * 0.3)
+        else:
+            flux_factor = (abs(random_factor) * trend_direction * 0.25
+                           if np.random.random() < 0.8
+                           else -abs(random_factor) * trend_direction * 0.1)
+        flux_amount      = prev_unscaled * flux_factor
+        adjusted_unscaled = current_unscaled + flux_amount
+        adjusted_pred     = scaler.transform(
+            np.array([[adjusted_unscaled]]))[0][0]
+        next_row    = temp_input[-1].copy()
+        next_row[0] = adjusted_pred
+        prev_day_pred = adjusted_pred
+        predictions.append(adjusted_pred)
+        temp_input.append(next_row)
+    if not predictions:
+        return None
+    final_predictions    = scaler.inverse_transform(
+        np.array(predictions).reshape(-1, 1))
+    corrected_predictions = final_predictions * correction_factor
+    print("\nPredictions (original → corrected):")
+    for i in range(len(final_predictions)):
+        print(f"  Day {i+1}: ${final_predictions[i][0]:.2f} "
+              f"→ ${corrected_predictions[i][0]:.2f}")
+    return corrected_predictions
+def plot_prices(data, predictions, symbol, days_ahead):
+    """Plot actual + predicted prices (used in standalone main())."""
+    fig = go.Figure()
+    three_months_ago = data.index[-1] - pd.DateOffset(months=3)
+    actual_data = data.loc[three_months_ago:]
+    close_prices = (actual_data["Close"]
+                    if isinstance(actual_data, pd.DataFrame) and "Close" in actual_data.columns
+                    else actual_data.iloc[:, 0])
+    future_dates = []
+    last_date = data.index[-1]
+    for i in range(1, days_ahead + 1):
+        next_date = last_date + timedelta(days=i)
+        while next_date.weekday() > 4:
+            next_date += timedelta(days=1)
+        future_dates.append(next_date)
+    future_dates    = list(dict.fromkeys(future_dates))
+    prediction_data = predictions[: len(future_dates)].flatten()
+    fig.add_trace(go.Scatter(
+        x=future_dates, y=prediction_data,
+        mode="lines+markers", name="Predicted Price",
+        line=dict(color="orange", width=3)))
+    fig.add_trace(go.Scatter(
+        x=close_prices.index, y=close_prices.values,
+        mode="lines", name="Actual Price",
+        line=dict(color="blue", width=2)))
+    fig.add_trace(go.Scatter(
+        x=[close_prices.index[-1]], y=[close_prices.values[-1]],
+        mode="markers", name="Latest Price",
+        marker=dict(color="green", size=10, symbol="circle")))
+    fig.update_layout(
+        title=f"Stock Price Prediction for {symbol}",
+        xaxis_title="Date", yaxis_title="Price (USD)",
+        template="plotly_white", hovermode="x unified")
+    fig.show()
+# =============================================================================
+#                   NEWS SENTIMENT ANALYSIS FUNCTIONS
+# =============================================================================
+def fetch_finnhub_news(company_symbol):
+    end_date      = datetime.now()
+    start_date    = end_date - timedelta(days=28)
+    url = (f"https://finnhub.io/api/v1/company-news"
+           f"?symbol={company_symbol}"
+           f"&from={start_date.strftime('%Y-%m-%d')}"
+           f"&to={end_date.strftime('%Y-%m-%d')}"
+           f"&token={FINNHUB_API_KEY}")
+    try:
+        response = requests.get(url)
+        if response.status_code == 200:
+            articles  = response.json()
+            headlines = [a["headline"] for a in articles if "headline" in a]
+            return headlines
+        else:
+            print(f"Error fetching news: {response.status_code}")
+            return []
+    except Exception as e:
+        print(f"Error parsing news response: {e}")
+        return []
+def analyze_sentiment(headlines):
+    try:
+        sid              = SentimentIntensityAnalyzer()
+        sentiment_results = []
+        sentiment_totals  = {"positive": 0, "negative": 0, "neutral": 0}
+        for headline in headlines:
+            if not headline or not isinstance(headline, str):
+                continue
+            sentiment = sid.polarity_scores(headline)
+            sentiment_results.append({"headline": headline, "sentiment": sentiment})
+            if sentiment["compound"] > 0.05:
+                sentiment_totals["positive"] += 1
+            elif sentiment["compound"] < -0.05:
+                sentiment_totals["negative"] += 1
+            else:
+                sentiment_totals["neutral"] += 1
+        return sentiment_results, sentiment_totals
+    except Exception as e:
+        print(f"Error in sentiment analysis: {e}")
+        return [], {"positive": 0, "negative": 0, "neutral": 0}
+def plot_sentiment_pie(sentiment_totals, company_symbol):
+    fig = go.Figure(data=[go.Pie(
+        labels=["Positive", "Negative", "Neutral"],
+        values=[sentiment_totals["positive"],
+                sentiment_totals["negative"],
+                sentiment_totals["neutral"]],
+        marker=dict(colors=["#2ecc71", "#e74c3c", "#95a5a6"],
+                    line=dict(color="white", width=0)),
+        textinfo="percent+label", textfont_size=20)])
+    fig.update_layout(
+        title=f"Sentiment Distribution for {company_symbol} (Last 28 Days)",
+        showlegend=True)
+    fig.show()
+# =============================================================================
+#          AI SUMMARY FUNCTIONS  [OPT-1] Transformers removed
+# =============================================================================
+def _extractive_summary(headlines, n=3):
+    """
+    Lightweight extractive summariser – replaces the BART transformer pipeline.
+    [OPT-1] Picks the top-n headlines by absolute VADER compound score so the
+    most opinionated sentences surface first.  No heavy model download needed.
+    """
+    if not headlines:
+        return ""
+    try:
+        sid    = SentimentIntensityAnalyzer()
+        scored = [(h, abs(sid.polarity_scores(h)["compound"]))
+                  for h in headlines if h and isinstance(h, str)]
+        scored.sort(key=lambda x: x[1], reverse=True)
+        top    = [h for h, _ in scored[:n]]
+        return " | ".join(top)
+    except Exception as e:
+        print(f"Extractive summary error: {e}")
+        return headlines[0] if headlines else ""
+def generate_sentiment_summary(sentiment_totals, headlines, company_symbol):
+    """
+    Generate a human-readable sentiment summary.
+    [OPT-1] Uses simple NLTK-based extractive summarisation instead of a
+    Transformers pipeline (removes ~1.2 GB BART model download).
+    """
+    try:
+        total   = max(1, sum(sentiment_totals.values()))
+        pos_pct = sentiment_totals["positive"] / total * 100
+        neg_pct = sentiment_totals["negative"] / total * 100
+        summary = (
+            f"Over the past 28 days, {len(headlines)} news articles about "
+            f"{company_symbol} were analysed. "
+            f"{sentiment_totals['positive']} positive ({pos_pct:.0f}%), "
+            f"{sentiment_totals['negative']} negative ({neg_pct:.0f}%), "
+            f"and {sentiment_totals['neutral']} neutral articles found."
+        )
+        if headlines:
+            key_headlines = _extractive_summary(headlines, n=2)
+            if key_headlines:
+                summary += f" Key headlines: {key_headlines}"
+        return summary
+    except Exception as e:
+        print(f"Error in generate_sentiment_summary: {e}")
+        return f"Unable to generate sentiment summary for {company_symbol}."
+def generate_prediction_summary(pred_df, company_symbol):
+    first_price = pred_df["Predicted Price"].iloc[0]
+    last_price  = pred_df["Predicted Price"].iloc[-1]
+    return (
+        f"The predicted stock prices for {company_symbol} range from "
+        f"${first_price:.2f} to ${last_price:.2f} over the forecast period."
+    )
+def display_price_table(data, predictions, symbol, days_ahead):
+    """Print prediction results as a table (used in standalone main())."""
+    if isinstance(data, pd.DataFrame) and "Close" in data.columns:
+        last_price = data["Close"].iloc[-1]
+        last_date  = data.index[-1]
+    else:
+        last_price = data.iloc[-1, 0]
+        last_date  = data.index[-1]
+    future_dates = []
+    for i in range(1, days_ahead + 1):
+        next_date = last_date + timedelta(days=i)
+        while next_date.weekday() > 4:
+            next_date += timedelta(days=1)
+        future_dates.append(next_date)
+    future_dates    = list(dict.fromkeys(future_dates))
+    prediction_data = predictions[: len(future_dates)].flatten()
+    last_price_row = pd.DataFrame({
+        "Date": [last_date.strftime("%Y-%m-%d")],
+        "Price": [f"${last_price:.2f}"],
+        "Change": ["0.00%"],
+        "Note": ["Actual last closing price"],
+    })
+    pred_rows = []
+    for i, (date, price) in enumerate(zip(future_dates, prediction_data)):
+        change_pct = ((price - last_price) / last_price) * 100
+        pred_rows.append({
+            "Date": date.strftime("%Y-%m-%d"),
+            "Price": f"${price:.2f}",
+            "Change": f"{change_pct:.2f}%",
+            "Note": f"Day {i+1} prediction",
+        })
+    combined_df = pd.concat([last_price_row, pd.DataFrame(pred_rows)],
+                            ignore_index=True)
+    print(f"\n{symbol} Stock Price Prediction Table:")
+    print("=" * 80)
+    print(combined_df.to_string(index=False))
+    print("=" * 80)
+    return pd.DataFrame({
+        "Date": [d.strftime("%Y-%m-%d") for d in future_dates],
+        "Predicted Price": prediction_data,
+    })
+# =============================================================================
+#                          STANDALONE MAIN
+# =============================================================================
+def main():
+    symbol = input("Enter the stock symbol (e.g., AAPL): ").upper()
+    try:
+        days_ahead = int(input("Number of future days to predict (e.g., 5): "))
+    except ValueError:
+        print("Invalid input. Please enter an integer.")
+        return
+    print(f"\nFetching historical data for {symbol}...")
+    data = fetch_stock_data(symbol, outputsize="full")
+    if data is None or len(data) < 50:
+        print(f"Not enough data points for {symbol}.")
+        return
+    print("Preprocessing data...")
+    scaled_data, scaler = preprocess_data(data)
+    # [OPT-4] time_step 60 → 30 in standalone mode too
+    time_step = 30
+    X, y = create_sequences(scaled_data, time_step)
+    if len(X) == 0:
+        print("Could not create sequences.")
+        return
+    train_size       = int(len(X) * 0.8)
+    X_train, y_train = X[:train_size], y[:train_size]
+    print("Training LSTM model...")
+    lstm_model = train_lstm(X_train, y_train, time_step)
+    lstm_train_preds = lstm_model.predict(X_train, verbose=0).flatten()
+    residuals        = y_train - lstm_train_preds
+    print("Training XGBoost model...")
+    xgb_model = train_xgboost(X_train.reshape(X_train.shape[0], -1), residuals)
+    print(f"Predicting {days_ahead} days ahead...")
+    predictions = predict_stock_price(
+        lstm_model, xgb_model, scaled_data, scaler, time_step, days_ahead)
+    display_price_table(data, predictions, symbol, days_ahead)
+    future_dates = []
+    last_date    = data.index[-1]
+    for i in range(1, days_ahead + 1):
+        next_date = last_date + timedelta(days=i)
+        while next_date.weekday() > 4:
+            next_date += timedelta(days=1)
+        future_dates.append(next_date)
+    future_dates = list(dict.fromkeys(future_dates))
+    pred_df = pd.DataFrame({
+        "Date": [d.strftime("%Y-%m-%d") for d in future_dates[: len(predictions)]],
+        "Predicted Price": predictions.flatten()[: len(future_dates)],
+    })
+    print("\nPrediction summary:")
+    print(generate_prediction_summary(pred_df, symbol))
+    print("\nFetching news for sentiment analysis...")
+    headlines = fetch_finnhub_news(symbol)
+    if headlines:
+        sentiment_results, sentiment_totals = analyze_sentiment(headlines)
+        plot_sentiment_pie(sentiment_totals, symbol)
+        print(generate_sentiment_summary(sentiment_totals, headlines, symbol))
+    else:
+        print("No headlines found.")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,43 @@

+# ─────────────────────────────────────────────────────────────────────────────
+# StockBuddy – lightweight requirements for free-tier deployment
+# (Render / Hugging Face Spaces)
+#
+# KEY CHANGES vs original:
+#   [REQ-1]  tensorflow  →  tensorflow-cpu
+#            CPU-only build: no CUDA libs, ~200 MB smaller install, same Python API.
+#            Free-tier hosts have no GPU anyway – the GPU variant just wastes space.
+#
+#   [REQ-2]  transformers==4.33.2  REMOVED
+#            The summarization pipeline downloaded a ~1.2 GB BART model at
+#            first run, instantly blowing past free-tier disk/RAM limits.
+#            Replaced by a lightweight NLTK-based extractive summariser in model.py.
+#
+#   [REQ-3]  tf-keras==2.15.0  REMOVED
+#            Caused dependency conflicts on Python 3.10+.  tensorflow-cpu already
+#            bundles the correct Keras version.
+#
+#   [REQ-4]  torch  NOT added – was never in the original requirements but was an
+#            implicit dependency of some transformers builds.  No longer needed.
+# ─────────────────────────────────────────────────────────────────────────────
+flask==2.3.3
+flask-cors==5.0.1
+numpy==1.24.3
+pandas==2.0.3
+requests==2.31.0
+scikit-learn==1.3.0
+# [REQ-1] CPU-only TensorFlow – same API, ~200 MB smaller than the GPU build
+tensorflow-cpu==2.13.0
+xgboost==1.7.6
+nltk==3.8.1
+# [REQ-2] transformers removed – no heavy model download at startup
+# transformers==4.33.2  ← DELETED
+plotly==5.17.0
+gunicorn==21.2.0
+# [REQ-3] tf-keras removed – causes conflicts and is not needed with tf-cpu
+# tf-keras==2.15.0  ← DELETED