Spaces:

arithescientist
/

StocksForecasting

Sleeping

App Files Files Community

arithescientist commited on May 1, 2025

Commit

e8ff7d9

verified ·

1 Parent(s): 7a59e7a

Update app.py

Browse files

Files changed (1) hide show

app.py +203 -203

app.py CHANGED Viewed

@@ -1,203 +1,203 @@
-# app.py
-import os
-import math
-from datetime import datetime
-import yfinance as yf
-import pandas as pd
-from flask import jsonify
-import numpy as np
-import matplotlib.pyplot as plt
-from flask import Flask, render_template, request
-from statsmodels.tsa.arima.model import ARIMA
-from sklearn.metrics import mean_squared_error
-import praw, re
-from pandas.tseries.offsets import BDay
-from textblob import TextBlob
-import simplejson
-import nltk
-nltk.download('punkt', quiet=True)
-app = Flask(__name__, static_folder='static', template_folder='templates')
-REDDIT_ID = os.getenv("REDDIT_ID")
-REDDIT_SECRET = os.getenv("REDDIT_SECRET")
-@app.route('/', methods=['GET'])
-def index():
-    # Renders your form; rename your HTML to templates/index.html
-    return render_template('forecaster.html')
-@app.route('/insertintotable', methods=['POST'])
-def insertintotable():
-    quote = request.form['nm']
-    # —— 1) Fetch exactly two years of raw + adjusted OHLCV via yfinance ——
-    end = datetime.now()
-    start = datetime(end.year - 2, end.month, end.day)
-    df = yf.download(
-        quote,
-        start=start,
-        end=end,
-        auto_adjust=False,  # ensures separate 'Adj Close'
-        actions=False
-    ).reset_index()
-    if df.empty:
-        return render_template('forecaster.html', not_found=True)
-    # Guarantee 'Adj Close' column
-    if 'Adj Close' not in df.columns:
-        df['Adj Close'] = df['Close']
-    # Take the very last row as a Series for display & recommendation
-    latest = df.iloc[-1]
-    def ARIMA_ALGO(df):
-        # — 1) Ensure a DateTimeIndex —
-        if 'Date' in df.columns:
-            df = df.copy()
-            df['Date'] = pd.to_datetime(df['Date'])
-            dfi = df.set_index('Date')
-        else:
-            dfi = df.copy()
-            dfi.index = pd.to_datetime(dfi.index)
-        # — 2) Pick the first matching price column —
-        for col in ('Adj Close','adjClose','Adj_close','Close','Price'):
-            if col in dfi.columns:
-                raw = dfi[col]
-                break
-        else:
-            raise KeyError("No price column found.")
-        # — 3) Flatten to 1-D, coerce to floats, preserve original dates —
-        arr = pd.to_numeric(raw.values.flatten(), errors='coerce')
-        series = pd.Series(arr, index=raw.index).bfill().astype(float)
-        # — 4) Train/test split (65/35) —
-        split = int(len(series) * 0.65)
-        train, test = series.iloc[:split], series.iloc[split:]
-        # — 5) Rolling ARIMA + 7-day ahead —
-        history, preds_all = list(train), []
-        for t in range(len(test) + 7):
-            m = ARIMA(history, order=(6,1,0)).fit()
-            yhat = float(m.forecast()[0])
-            preds_all.append(yhat)
-            history.append(test.iloc[t] if t < len(test) else yhat)
-        # — 6) Compute RMSE on the test slice —
-        rmse    = math.sqrt(mean_squared_error(test, preds_all[:len(test)]))
-        tomorrow= preds_all[-7]
-        # — 7) Build DataFrames for plotting —
-        #   — history_df: entire past —
-        history_df = pd.DataFrame({'Adj Close': series.values}, index=series.index)
-        #   — predict_df: only the 7 “future” days —
-        future_idx = pd.date_range(series.index[-1], periods=8, freq='B')[1:]
-        # here I use freq='B' to skip weekends
-        last7 = preds_all[-7:]
-        predict_df = pd.DataFrame({'ARIMA': last7}, index=future_idx)
-        return preds_all, rmse, tomorrow, history_df, predict_df
-    # Run ARIMA
-    preds, rmse, tomorrow, hist_df, pred_df = ARIMA_ALGO(df)
-    # Reset index so ‘Date’ is a column
-    hist_df = (
-        hist_df
-        .reset_index()
-        .rename(columns={
-            'index':    'Date',
-            'History':'Adj Close'    # rename to exactly match your latest['Adj Close']
-        })
-    )
-    pred_df = pred_df.reset_index().rename(columns={'index':'Date'})
-    # —— 3) Sentiment analysis ——
-    def retrieving_tweets_polarity(symbol):
-        reddit = praw.Reddit(
-            client_id=REDDIT_ID,
-            client_secret=REDDIT_SECRET,
-            user_agent='SentimentAnalysis'
-        )
-        posts = reddit.subreddit('all').search(symbol, limit=300, sort='new')
-        pos = neg = 0
-        texts = []
-        total_pol = 0
-        for post in posts:
-            txt = (post.title or post.selftext or "")
-            txt = re.sub(r'&amp;|:', '', txt).encode('ascii','ignore').decode()
-            blob = TextBlob(txt)
-            pol = sum(s.sentiment.polarity for s in blob.sentences)
-            total_pol += pol
-            if pol>0: pos+=1
-            if pol<0: neg+=1
-            texts.append(txt)
-        avg_pol = total_pol / len(texts) if texts else 0
-        neu = max(0, len(texts) - pos - neg)
-        label = "Overall Positive" if avg_pol>0 else "Overall Negative"
-        return avg_pol, texts, label, pos, neg, neu
-    global_pol, tweets, tw_pol, pos, neg, neu = retrieving_tweets_polarity(quote)
-    # —— 4) Recommendation ——
-    mean7 = hist_df['Adj Close'].tail(7).mean()
-    adj_val = float(latest['Adj Close'])
-    if adj_val < mean7 and global_pol > 0:
-        idea, decision = "RISE", "BUY"
-    else:
-        idea, decision = "FALL", "SELL"
-    # —— 5) Write out JSON for the dashboard ——
-    trends     = hist_df[['Date','Adj Close']].dropna().to_dict('records')
-    past_preds = pred_df[['Date','ARIMA']].tail(7).to_dict('records')
-    forecast   = pred_df[['Date','ARIMA']].to_dict('records')
-    # --- new: full series (history + all preds) ---
-    full = trends + [
-        {"Date": r["Date"], "Adj Close": r["ARIMA"]}
-        for r in forecast
-    ]
-    with open("static/assets/js/dashboard/trends.json",  "w", encoding='utf-8') as f:
-        simplejson.dump(trends, f, default= str, ignore_nan=True, ensure_ascii=False, indent=4)
-    with open("static/assets/js/dashboard/pastpreds.json","w", encoding='utf-8') as f:
-        simplejson.dump(past_preds, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
-    with open("static/assets/js/dashboard/forecast.json","w", encoding='utf-8') as f:
-        simplejson.dump(forecast, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
-    with open("static/assets/js/dashboard/full.json","w") as f:
-        simplejson.dump(full, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
-    # —— 6) Render the final template ——
-    dates_array = pred_df['Date'].dt.strftime('%Y-%m-%d').tail(7).to_numpy().reshape(-1,1)
-    forecast_array_ar = np.round(pred_df['ARIMA'].tail(7).to_numpy(), 2).reshape(-1,1)
-    return render_template(
-        'resultsf.html',
-        quote=quote,
-        arima_pred=round(tomorrow,2),
-        open_s=latest['Open'],
-        high_s=latest['High'],
-        low_s=latest['Low'],
-        close_s=latest['Close'],
-        adj_close=latest['Adj Close'],
-        vol=latest['Volume'],
-        tw_list=tweets,
-        tw_pol=tw_pol,
-        idea=idea,
-        decision=decision,
-        dates=dates_array,
-        forecast_set_ar=forecast_array_ar,
-        error_arima=round(rmse,2)
-    )
-if __name__ == '__main__':
-    app.run(debug=True, host="0.0.0.0", port=7860)

+# app.py
+import os
+import math
+from datetime import datetime
+import yfinance as yf
+import pandas as pd
+from flask import jsonify
+import numpy as np
+import matplotlib.pyplot as plt
+from flask import Flask, render_template, request
+from statsmodels.tsa.arima.model import ARIMA
+from sklearn.metrics import mean_squared_error
+import praw, re
+from pandas.tseries.offsets import BDay
+from textblob import TextBlob
+import simplejson
+import nltk
+nltk.download('punkt', quiet=True)
+app = Flask(__name__, static_folder='static', template_folder='templates')
+REDDIT_ID = os.getenv("REDDIT_ID")
+REDDIT_SECRET = os.getenv("REDDIT_SECRET")
+@app.route('/', methods=['GET'])
+def index():
+    # Renders your form; rename your HTML to templates/index.html
+    return render_template('forecaster.html')
+@app.route('/insertintotable', methods=['POST'])
+def insertintotable():
+    quote = request.form['nm']
+    # —— 1) Fetch exactly two years of raw + adjusted OHLCV via yfinance ——
+    end = datetime.now()
+    start = datetime(end.year - 2, end.month, end.day)
+    df = yf.download(
+        quote,
+        start=start,
+        end=end,
+        auto_adjust=False,  # ensures separate 'Adj Close'
+        actions=False
+    ).reset_index()
+    if df.empty:
+        return render_template('forecaster.html', not_found=True)
+    # Guarantee 'Adj Close' column
+    if 'Adj Close' not in df.columns:
+        df['Adj Close'] = df['Close']
+    # Take the very last row as a Series for display & recommendation
+    latest = df.iloc[-1]
+    def ARIMA_ALGO(df):
+        # — 1) Ensure a DateTimeIndex —
+        if 'Date' in df.columns:
+            df = df.copy()
+            df['Date'] = pd.to_datetime(df['Date'])
+            dfi = df.set_index('Date')
+        else:
+            dfi = df.copy()
+            dfi.index = pd.to_datetime(dfi.index)
+        # — 2) Pick the first matching price column —
+        for col in ('Adj Close','adjClose','Adj_close','Close','Price'):
+            if col in dfi.columns:
+                raw = dfi[col]
+                break
+        else:
+            raise KeyError("No price column found.")
+        # — 3) Flatten to 1-D, coerce to floats, preserve original dates —
+        arr = pd.to_numeric(raw.values.flatten(), errors='coerce')
+        series = pd.Series(arr, index=raw.index).bfill().astype(float)
+        # — 4) Train/test split (65/35) —
+        split = int(len(series) * 0.65)
+        train, test = series.iloc[:split], series.iloc[split:]
+        # — 5) Rolling ARIMA + 7-day ahead —
+        history, preds_all = list(train), []
+        for t in range(len(test) + 7):
+            m = ARIMA(history, order=(6,1,0)).fit()
+            yhat = float(m.forecast()[0])
+            preds_all.append(yhat)
+            history.append(test.iloc[t] if t < len(test) else yhat)
+        # — 6) Compute RMSE on the test slice —
+        rmse    = math.sqrt(mean_squared_error(test, preds_all[:len(test)]))
+        tomorrow= preds_all[-7]
+        # — 7) Build DataFrames for plotting —
+        #   — history_df: entire past —
+        history_df = pd.DataFrame({'Adj Close': series.values}, index=series.index)
+        #   — predict_df: only the 7 “future” days —
+        future_idx = pd.date_range(series.index[-1], periods=8, freq='B')[1:]
+        # here I use freq='B' to skip weekends
+        last7 = preds_all[-7:]
+        predict_df = pd.DataFrame({'ARIMA': last7}, index=future_idx)
+        return preds_all, rmse, tomorrow, history_df, predict_df
+    # Run ARIMA
+    preds, rmse, tomorrow, hist_df, pred_df = ARIMA_ALGO(df)
+    # Reset index so ‘Date’ is a column
+    hist_df = (
+        hist_df
+        .reset_index()
+        .rename(columns={
+            'index':    'Date',
+            'History':'Adj Close'    # rename to exactly match your latest['Adj Close']
+        })
+    )
+    pred_df = pred_df.reset_index().rename(columns={'index':'Date'})
+    # —— 3) Sentiment analysis ——
+    def retrieving_tweets_polarity(symbol):
+        reddit = praw.Reddit(
+            client_id=REDDIT_ID,
+            client_secret=REDDIT_SECRET,
+            user_agent='SentimentAnalysis'
+        )
+        posts = reddit.subreddit('all').search(symbol, limit=300, sort='new')
+        pos = neg = 0
+        texts = []
+        total_pol = 0
+        for post in posts:
+            txt = (post.title or post.selftext or "")
+            txt = re.sub(r'&amp;|:', '', txt).encode('ascii','ignore').decode()
+            blob = TextBlob(txt)
+            pol = sum(s.sentiment.polarity for s in blob.sentences)
+            total_pol += pol
+            if pol>0: pos+=1
+            if pol<0: neg+=1
+            texts.append(txt)
+        avg_pol = total_pol / len(texts) if texts else 0
+        neu = max(0, len(texts) - pos - neg)
+        label = "Overall Positive" if avg_pol>0 else "Overall Negative"
+        return avg_pol, texts, label, pos, neg, neu
+    global_pol, tweets, tw_pol, pos, neg, neu = retrieving_tweets_polarity(quote)
+    # —— 4) Recommendation ——
+    mean7 = hist_df['Adj Close'].tail(7).mean()
+    adj_val = float(latest['Adj Close'])
+    if adj_val < mean7 and global_pol > 0:
+        idea, decision = "RISE", "BUY"
+    else:
+        idea, decision = "FALL", "SELL"
+    # —— 5) Write out JSON for the dashboard ——
+    trends     = hist_df[['Date','Adj Close']].dropna().to_dict('records')
+    past_preds = pred_df[['Date','ARIMA']].tail(7).to_dict('records')
+    forecast   = pred_df[['Date','ARIMA']].to_dict('records')
+    # --- new: full series (history + all preds) ---
+    full = trends + [
+        {"Date": r["Date"], "Adj Close": r["ARIMA"]}
+        for r in forecast
+    ]
+    with open("static/assets/js/trends.json",  "w", encoding='utf-8') as f:
+        simplejson.dump(trends, f, default= str, ignore_nan=True, ensure_ascii=False, indent=4)
+    with open("static/assets/js/pastpreds.json","w", encoding='utf-8') as f:
+        simplejson.dump(past_preds, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
+    with open("static/assets/js/forecast.json","w", encoding='utf-8') as f:
+        simplejson.dump(forecast, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
+    with open("static/assets/js/full.json","w") as f:
+        simplejson.dump(full, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
+    # —— 6) Render the final template ——
+    dates_array = pred_df['Date'].dt.strftime('%Y-%m-%d').tail(7).to_numpy().reshape(-1,1)
+    forecast_array_ar = np.round(pred_df['ARIMA'].tail(7).to_numpy(), 2).reshape(-1,1)
+    return render_template(
+        'resultsf.html',
+        quote=quote,
+        arima_pred=round(tomorrow,2),
+        open_s=latest['Open'],
+        high_s=latest['High'],
+        low_s=latest['Low'],
+        close_s=latest['Close'],
+        adj_close=latest['Adj Close'],
+        vol=latest['Volume'],
+        tw_list=tweets,
+        tw_pol=tw_pol,
+        idea=idea,
+        decision=decision,
+        dates=dates_array,
+        forecast_set_ar=forecast_array_ar,
+        error_arima=round(rmse,2)
+    )
+if __name__ == '__main__':
+    app.run(debug=True, host="0.0.0.0", port=7860)