File size: 7,604 Bytes
e8ff7d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c37ab8
e8ff7d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
acf8d04
e8ff7d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
acf8d04
e8ff7d9
 
acf8d04
 
 
 
e8ff7d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
acb0d79
 
 
 
 
 
 
e8ff7d9
acb0d79
 
e8ff7d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
710aede
 
e8ff7d9
710aede
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# app.py
import os
import math
from datetime import datetime
import yfinance as yf
import pandas as pd
from flask import jsonify
import numpy as np
import matplotlib.pyplot as plt
from flask import Flask, render_template, request
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import praw, re
from pandas.tseries.offsets import BDay
from textblob import TextBlob
import simplejson 
import nltk
# nltk.download('punkt', quiet=True)

app = Flask(__name__, static_folder='static', template_folder='templates')

REDDIT_ID = os.getenv("REDDIT_ID")
REDDIT_SECRET = os.getenv("REDDIT_SECRET")

@app.route('/', methods=['GET'])
def index():
    # Renders your form; rename your HTML to templates/index.html
    return render_template('forecaster.html')

@app.route('/insertintotable', methods=['POST'])
def insertintotable():
    quote = request.form['nm']

    # β€”β€” 1) Fetch exactly two years of raw + adjusted OHLCV via yfinance β€”β€” 
    end = datetime.now()
    start = datetime(end.year - 2, end.month, end.day)
    df = yf.download(
        quote,
        start=start,
        end=end,
        auto_adjust=False,  # ensures separate 'Adj Close'
        actions=False
    ).reset_index()

    if df.empty:
        return render_template('forecaster.html', not_found=True)

    # Guarantee 'Adj Close' column
    if 'Adj Close' not in df.columns:
        df['Adj Close'] = df['Close']

    # Take the very last row as a Series for display & recommendation
    latest = df.iloc[-1]

  


    def ARIMA_ALGO(df):
        # β€” 1) Ensure a DateTimeIndex β€”
        if 'Date' in df.columns:
            df = df.copy()
            df['Date'] = pd.to_datetime(df['Date'])
            dfi = df.set_index('Date')
        else:
            dfi = df.copy()
            dfi.index = pd.to_datetime(dfi.index)

        # β€” 2) Pick the first matching price column β€”
        for col in ('Adj Close','adjClose','Adj_close','Close','Price'):
            if col in dfi.columns:
                raw = dfi[col]
                break
        else:
            raise KeyError("No price column found.")

        # β€” 3) Flatten to 1-D, coerce to floats, preserve original dates β€”
        arr = pd.to_numeric(raw.values.flatten(), errors='coerce')
        series = pd.Series(arr, index=raw.index).bfill().astype(float)

        # β€” 4) Train/test split (65/35) β€”
        split = int(len(series) * 0.65)
        train, test = series.iloc[:split], series.iloc[split:]
        
        # β€” 5) Rolling ARIMA + 7-day ahead β€”
        history, preds_all = list(train), []
        m = ARIMA(history, order=(6,1,0)).fit()
        for t in range(len(test) + 7): 
            model = m.apply(history)
            yhat = float(model.forecast()[0])
            preds_all.append(yhat)
            history.append(test.iloc[t] if t < len(test) else yhat)

        # β€” 6) Compute RMSE on the test slice β€”
        rmse    = math.sqrt(mean_squared_error(test, preds_all[:len(test)]))
        tomorrow= preds_all[-7]

        # β€” 7) Build DataFrames for plotting β€”
        #   β€” history_df: entire past β€”
        history_df = pd.DataFrame({'Adj Close': series.values}, index=series.index)

        #   β€” predict_df: only the 7 β€œfuture” days β€”
        future_idx = pd.date_range(series.index[-1], periods=8, freq='B')[1:]  
        # here I use freq='B' to skip weekends
        last7 = preds_all[-7:]
        predict_df = pd.DataFrame({'ARIMA': last7}, index=future_idx)

        return preds_all, rmse, tomorrow, history_df, predict_df

    # Run ARIMA
    preds, rmse, tomorrow, hist_df, pred_df = ARIMA_ALGO(df)

    # Reset index so β€˜Date’ is a column
    hist_df = (
        hist_df
        .reset_index()
        .rename(columns={
            'index':    'Date', 
            'History':'Adj Close'    # rename to exactly match your latest['Adj Close']
        })
    )
    pred_df = pred_df.reset_index().rename(columns={'index':'Date'})

    # β€”β€” 3) Sentiment analysis β€”β€” 
    def retrieving_tweets_polarity(symbol):
        reddit = praw.Reddit(
            client_id=REDDIT_ID,
            client_secret=REDDIT_SECRET,
            user_agent='SentimentAnalysis'
        )
        posts = reddit.subreddit('all').search(symbol, limit=300, sort='new')
        pos = neg = 0
        texts = []
        total_pol = 0
        for post in posts:
            txt = (post.title or post.selftext or "")
            txt = re.sub(r'&amp;|:', '', txt).encode('ascii','ignore').decode()
            blob = TextBlob(txt)
            pol = sum(s.sentiment.polarity for s in blob.sentences)
            total_pol += pol
            if pol>0: pos+=1
            if pol<0: neg+=1
            texts.append(txt)
        avg_pol = total_pol / len(texts) if texts else 0
        neu = max(0, len(texts) - pos - neg)
        label = "Overall Positive" if avg_pol>0 else "Overall Negative"
        return avg_pol, texts, label, pos, neg, neu

    global_pol, tweets, tw_pol, pos, neg, neu = retrieving_tweets_polarity(quote)

    # β€”β€” 4) Recommendation β€”β€” 
    mean7 = hist_df['Adj Close'].tail(7).mean()
    adj_val = float(latest['Adj Close'])
    if adj_val < mean7 and global_pol > 0:
        idea, decision = "RISE", "BUY"
    else:
        idea, decision = "FALL", "SELL"

    # β€”β€” 5) Write out JSON for the dashboard β€”β€” 
    trends     = hist_df[['Date','Adj Close']].dropna().to_dict('records')
    past_preds = pred_df[['Date','ARIMA']].tail(7).to_dict('records')
    forecast   = pred_df[['Date','ARIMA']].to_dict('records')

    # --- new: full series (history + all preds) ---
    full = trends + [
        {"Date": r["Date"], "Adj Close": r["ARIMA"]}
        for r in forecast
    ]
    
    pie = [
        {"sizes": pos, "labels": "Positive"},
        {"sizes": neg, "labels": "Negative"},
        {"sizes": neu, "labels": "Neutral"},
    ]


    with open("static/assets/js/pie.json",  "w", encoding='utf-8') as f:
        simplejson.dump(pie, f, default= str, ignore_nan=True, ensure_ascii=False, indent=4)
    with open("static/assets/js/trends.json",  "w", encoding='utf-8') as f:
        simplejson.dump(trends, f, default= str, ignore_nan=True, ensure_ascii=False, indent=4)
    with open("static/assets/js/pastpreds.json","w", encoding='utf-8') as f:
        simplejson.dump(past_preds, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
    with open("static/assets/js/forecast.json","w", encoding='utf-8') as f:
        simplejson.dump(forecast, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
    with open("static/assets/js/full.json","w") as f:
        simplejson.dump(full, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)

    # β€”β€” 6) Render the final template β€”β€” 
    dates_array = pred_df['Date'].dt.strftime('%Y-%m-%d').tail(7).to_numpy().reshape(-1,1)
    forecast_array_ar = np.round(pred_df['ARIMA'].tail(7).to_numpy(), 2).reshape(-1,1)



    return render_template(
        'resultsf.html',
        quote=quote,
        arima_pred=round(tomorrow,2),
        open_s=latest['Open'],
        high_s=latest['High'],
        low_s=latest['Low'],
        close_s=latest['Close'],
        adj_close=latest['Adj Close'],
        vol=latest['Volume'],
        tw_list=tweets,
        tw_pol=tw_pol,
        idea=idea,
        decision=decision,
        dates=dates_array,
        forecast_set_ar=forecast_array_ar,
        error_arima=round(rmse,2)
    )

if __name__ == "__main__":
    print("πŸš€ Starting Flask app...")
    app.run(debug=True, host="0.0.0.0", port=7860)