# app.py import os import math from datetime import datetime import yfinance as yf import pandas as pd from flask import jsonify import numpy as np import matplotlib.pyplot as plt from flask import Flask, render_template, request from statsmodels.tsa.arima.model import ARIMA from sklearn.metrics import mean_squared_error import praw, re from pandas.tseries.offsets import BDay from textblob import TextBlob import simplejson import nltk # nltk.download('punkt', quiet=True) app = Flask(__name__, static_folder='static', template_folder='templates') REDDIT_ID = os.getenv("REDDIT_ID") REDDIT_SECRET = os.getenv("REDDIT_SECRET") @app.route('/', methods=['GET']) def index(): # Renders your form; rename your HTML to templates/index.html return render_template('forecaster.html') @app.route('/insertintotable', methods=['POST']) def insertintotable(): quote = request.form['nm'] # —— 1) Fetch exactly two years of raw + adjusted OHLCV via yfinance —— end = datetime.now() start = datetime(end.year - 2, end.month, end.day) df = yf.download( quote, start=start, end=end, auto_adjust=False, # ensures separate 'Adj Close' actions=False ).reset_index() if df.empty: return render_template('forecaster.html', not_found=True) # Guarantee 'Adj Close' column if 'Adj Close' not in df.columns: df['Adj Close'] = df['Close'] # Take the very last row as a Series for display & recommendation latest = df.iloc[-1] def ARIMA_ALGO(df): # — 1) Ensure a DateTimeIndex — if 'Date' in df.columns: df = df.copy() df['Date'] = pd.to_datetime(df['Date']) dfi = df.set_index('Date') else: dfi = df.copy() dfi.index = pd.to_datetime(dfi.index) # — 2) Pick the first matching price column — for col in ('Adj Close','adjClose','Adj_close','Close','Price'): if col in dfi.columns: raw = dfi[col] break else: raise KeyError("No price column found.") # — 3) Flatten to 1-D, coerce to floats, preserve original dates — arr = pd.to_numeric(raw.values.flatten(), errors='coerce') series = pd.Series(arr, index=raw.index).bfill().astype(float) # — 4) Train/test split (65/35) — split = int(len(series) * 0.65) train, test = series.iloc[:split], series.iloc[split:] # — 5) Rolling ARIMA + 7-day ahead — history, preds_all = list(train), [] m = ARIMA(history, order=(6,1,0)).fit() for t in range(len(test) + 7): model = m.apply(history) yhat = float(model.forecast()[0]) preds_all.append(yhat) history.append(test.iloc[t] if t < len(test) else yhat) # — 6) Compute RMSE on the test slice — rmse = math.sqrt(mean_squared_error(test, preds_all[:len(test)])) tomorrow= preds_all[-7] # — 7) Build DataFrames for plotting — # — history_df: entire past — history_df = pd.DataFrame({'Adj Close': series.values}, index=series.index) # — predict_df: only the 7 “future” days — future_idx = pd.date_range(series.index[-1], periods=8, freq='B')[1:] # here I use freq='B' to skip weekends last7 = preds_all[-7:] predict_df = pd.DataFrame({'ARIMA': last7}, index=future_idx) return preds_all, rmse, tomorrow, history_df, predict_df # Run ARIMA preds, rmse, tomorrow, hist_df, pred_df = ARIMA_ALGO(df) # Reset index so ‘Date’ is a column hist_df = ( hist_df .reset_index() .rename(columns={ 'index': 'Date', 'History':'Adj Close' # rename to exactly match your latest['Adj Close'] }) ) pred_df = pred_df.reset_index().rename(columns={'index':'Date'}) # —— 3) Sentiment analysis —— def retrieving_tweets_polarity(symbol): reddit = praw.Reddit( client_id=REDDIT_ID, client_secret=REDDIT_SECRET, user_agent='SentimentAnalysis' ) posts = reddit.subreddit('all').search(symbol, limit=300, sort='new') pos = neg = 0 texts = [] total_pol = 0 for post in posts: txt = (post.title or post.selftext or "") txt = re.sub(r'&|:', '', txt).encode('ascii','ignore').decode() blob = TextBlob(txt) pol = sum(s.sentiment.polarity for s in blob.sentences) total_pol += pol if pol>0: pos+=1 if pol<0: neg+=1 texts.append(txt) avg_pol = total_pol / len(texts) if texts else 0 neu = max(0, len(texts) - pos - neg) label = "Overall Positive" if avg_pol>0 else "Overall Negative" return avg_pol, texts, label, pos, neg, neu global_pol, tweets, tw_pol, pos, neg, neu = retrieving_tweets_polarity(quote) # —— 4) Recommendation —— mean7 = hist_df['Adj Close'].tail(7).mean() adj_val = float(latest['Adj Close']) if adj_val < mean7 and global_pol > 0: idea, decision = "RISE", "BUY" else: idea, decision = "FALL", "SELL" # —— 5) Write out JSON for the dashboard —— trends = hist_df[['Date','Adj Close']].dropna().to_dict('records') past_preds = pred_df[['Date','ARIMA']].tail(7).to_dict('records') forecast = pred_df[['Date','ARIMA']].to_dict('records') # --- new: full series (history + all preds) --- full = trends + [ {"Date": r["Date"], "Adj Close": r["ARIMA"]} for r in forecast ] pie = [ {"sizes": pos, "labels": "Positive"}, {"sizes": neg, "labels": "Negative"}, {"sizes": neu, "labels": "Neutral"}, ] with open("static/assets/js/pie.json", "w", encoding='utf-8') as f: simplejson.dump(pie, f, default= str, ignore_nan=True, ensure_ascii=False, indent=4) with open("static/assets/js/trends.json", "w", encoding='utf-8') as f: simplejson.dump(trends, f, default= str, ignore_nan=True, ensure_ascii=False, indent=4) with open("static/assets/js/pastpreds.json","w", encoding='utf-8') as f: simplejson.dump(past_preds, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4) with open("static/assets/js/forecast.json","w", encoding='utf-8') as f: simplejson.dump(forecast, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4) with open("static/assets/js/full.json","w") as f: simplejson.dump(full, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4) # —— 6) Render the final template —— dates_array = pred_df['Date'].dt.strftime('%Y-%m-%d').tail(7).to_numpy().reshape(-1,1) forecast_array_ar = np.round(pred_df['ARIMA'].tail(7).to_numpy(), 2).reshape(-1,1) return render_template( 'resultsf.html', quote=quote, arima_pred=round(tomorrow,2), open_s=latest['Open'], high_s=latest['High'], low_s=latest['Low'], close_s=latest['Close'], adj_close=latest['Adj Close'], vol=latest['Volume'], tw_list=tweets, tw_pol=tw_pol, idea=idea, decision=decision, dates=dates_array, forecast_set_ar=forecast_array_ar, error_arima=round(rmse,2) ) if __name__ == "__main__": print("🚀 Starting Flask app...") app.run(debug=True, host="0.0.0.0", port=7860)