arithescientist commited on
Commit
e8ff7d9
Β·
verified Β·
1 Parent(s): 7a59e7a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +203 -203
app.py CHANGED
@@ -1,203 +1,203 @@
1
- # app.py
2
- import os
3
- import math
4
- from datetime import datetime
5
- import yfinance as yf
6
- import pandas as pd
7
- from flask import jsonify
8
- import numpy as np
9
- import matplotlib.pyplot as plt
10
- from flask import Flask, render_template, request
11
- from statsmodels.tsa.arima.model import ARIMA
12
- from sklearn.metrics import mean_squared_error
13
- import praw, re
14
- from pandas.tseries.offsets import BDay
15
- from textblob import TextBlob
16
- import simplejson
17
- import nltk
18
- nltk.download('punkt', quiet=True)
19
-
20
- app = Flask(__name__, static_folder='static', template_folder='templates')
21
-
22
- REDDIT_ID = os.getenv("REDDIT_ID")
23
- REDDIT_SECRET = os.getenv("REDDIT_SECRET")
24
-
25
- @app.route('/', methods=['GET'])
26
- def index():
27
- # Renders your form; rename your HTML to templates/index.html
28
- return render_template('forecaster.html')
29
-
30
- @app.route('/insertintotable', methods=['POST'])
31
- def insertintotable():
32
- quote = request.form['nm']
33
-
34
- # β€”β€” 1) Fetch exactly two years of raw + adjusted OHLCV via yfinance β€”β€”
35
- end = datetime.now()
36
- start = datetime(end.year - 2, end.month, end.day)
37
- df = yf.download(
38
- quote,
39
- start=start,
40
- end=end,
41
- auto_adjust=False, # ensures separate 'Adj Close'
42
- actions=False
43
- ).reset_index()
44
-
45
- if df.empty:
46
- return render_template('forecaster.html', not_found=True)
47
-
48
- # Guarantee 'Adj Close' column
49
- if 'Adj Close' not in df.columns:
50
- df['Adj Close'] = df['Close']
51
-
52
- # Take the very last row as a Series for display & recommendation
53
- latest = df.iloc[-1]
54
-
55
-
56
-
57
- def ARIMA_ALGO(df):
58
- # β€” 1) Ensure a DateTimeIndex β€”
59
- if 'Date' in df.columns:
60
- df = df.copy()
61
- df['Date'] = pd.to_datetime(df['Date'])
62
- dfi = df.set_index('Date')
63
- else:
64
- dfi = df.copy()
65
- dfi.index = pd.to_datetime(dfi.index)
66
-
67
- # β€” 2) Pick the first matching price column β€”
68
- for col in ('Adj Close','adjClose','Adj_close','Close','Price'):
69
- if col in dfi.columns:
70
- raw = dfi[col]
71
- break
72
- else:
73
- raise KeyError("No price column found.")
74
-
75
- # β€” 3) Flatten to 1-D, coerce to floats, preserve original dates β€”
76
- arr = pd.to_numeric(raw.values.flatten(), errors='coerce')
77
- series = pd.Series(arr, index=raw.index).bfill().astype(float)
78
-
79
- # β€” 4) Train/test split (65/35) β€”
80
- split = int(len(series) * 0.65)
81
- train, test = series.iloc[:split], series.iloc[split:]
82
-
83
- # β€” 5) Rolling ARIMA + 7-day ahead β€”
84
- history, preds_all = list(train), []
85
- for t in range(len(test) + 7):
86
- m = ARIMA(history, order=(6,1,0)).fit()
87
- yhat = float(m.forecast()[0])
88
- preds_all.append(yhat)
89
- history.append(test.iloc[t] if t < len(test) else yhat)
90
-
91
- # β€” 6) Compute RMSE on the test slice β€”
92
- rmse = math.sqrt(mean_squared_error(test, preds_all[:len(test)]))
93
- tomorrow= preds_all[-7]
94
-
95
- # β€” 7) Build DataFrames for plotting β€”
96
- # β€” history_df: entire past β€”
97
- history_df = pd.DataFrame({'Adj Close': series.values}, index=series.index)
98
-
99
- # β€” predict_df: only the 7 β€œfuture” days β€”
100
- future_idx = pd.date_range(series.index[-1], periods=8, freq='B')[1:]
101
- # here I use freq='B' to skip weekends
102
- last7 = preds_all[-7:]
103
- predict_df = pd.DataFrame({'ARIMA': last7}, index=future_idx)
104
-
105
- return preds_all, rmse, tomorrow, history_df, predict_df
106
-
107
-
108
- # Run ARIMA
109
- preds, rmse, tomorrow, hist_df, pred_df = ARIMA_ALGO(df)
110
-
111
- # Reset index so β€˜Date’ is a column
112
- hist_df = (
113
- hist_df
114
- .reset_index()
115
- .rename(columns={
116
- 'index': 'Date',
117
- 'History':'Adj Close' # rename to exactly match your latest['Adj Close']
118
- })
119
- )
120
- pred_df = pred_df.reset_index().rename(columns={'index':'Date'})
121
-
122
- # β€”β€” 3) Sentiment analysis β€”β€”
123
- def retrieving_tweets_polarity(symbol):
124
- reddit = praw.Reddit(
125
- client_id=REDDIT_ID,
126
- client_secret=REDDIT_SECRET,
127
- user_agent='SentimentAnalysis'
128
- )
129
- posts = reddit.subreddit('all').search(symbol, limit=300, sort='new')
130
- pos = neg = 0
131
- texts = []
132
- total_pol = 0
133
- for post in posts:
134
- txt = (post.title or post.selftext or "")
135
- txt = re.sub(r'&amp;|:', '', txt).encode('ascii','ignore').decode()
136
- blob = TextBlob(txt)
137
- pol = sum(s.sentiment.polarity for s in blob.sentences)
138
- total_pol += pol
139
- if pol>0: pos+=1
140
- if pol<0: neg+=1
141
- texts.append(txt)
142
- avg_pol = total_pol / len(texts) if texts else 0
143
- neu = max(0, len(texts) - pos - neg)
144
- label = "Overall Positive" if avg_pol>0 else "Overall Negative"
145
- return avg_pol, texts, label, pos, neg, neu
146
-
147
- global_pol, tweets, tw_pol, pos, neg, neu = retrieving_tweets_polarity(quote)
148
-
149
- # β€”β€” 4) Recommendation β€”β€”
150
- mean7 = hist_df['Adj Close'].tail(7).mean()
151
- adj_val = float(latest['Adj Close'])
152
- if adj_val < mean7 and global_pol > 0:
153
- idea, decision = "RISE", "BUY"
154
- else:
155
- idea, decision = "FALL", "SELL"
156
-
157
- # β€”β€” 5) Write out JSON for the dashboard β€”β€”
158
- trends = hist_df[['Date','Adj Close']].dropna().to_dict('records')
159
- past_preds = pred_df[['Date','ARIMA']].tail(7).to_dict('records')
160
- forecast = pred_df[['Date','ARIMA']].to_dict('records')
161
-
162
- # --- new: full series (history + all preds) ---
163
- full = trends + [
164
- {"Date": r["Date"], "Adj Close": r["ARIMA"]}
165
- for r in forecast
166
- ]
167
-
168
- with open("static/assets/js/dashboard/trends.json", "w", encoding='utf-8') as f:
169
- simplejson.dump(trends, f, default= str, ignore_nan=True, ensure_ascii=False, indent=4)
170
- with open("static/assets/js/dashboard/pastpreds.json","w", encoding='utf-8') as f:
171
- simplejson.dump(past_preds, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
172
- with open("static/assets/js/dashboard/forecast.json","w", encoding='utf-8') as f:
173
- simplejson.dump(forecast, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
174
- with open("static/assets/js/dashboard/full.json","w") as f:
175
- simplejson.dump(full, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
176
-
177
- # β€”β€” 6) Render the final template β€”β€”
178
- dates_array = pred_df['Date'].dt.strftime('%Y-%m-%d').tail(7).to_numpy().reshape(-1,1)
179
- forecast_array_ar = np.round(pred_df['ARIMA'].tail(7).to_numpy(), 2).reshape(-1,1)
180
-
181
-
182
-
183
- return render_template(
184
- 'resultsf.html',
185
- quote=quote,
186
- arima_pred=round(tomorrow,2),
187
- open_s=latest['Open'],
188
- high_s=latest['High'],
189
- low_s=latest['Low'],
190
- close_s=latest['Close'],
191
- adj_close=latest['Adj Close'],
192
- vol=latest['Volume'],
193
- tw_list=tweets,
194
- tw_pol=tw_pol,
195
- idea=idea,
196
- decision=decision,
197
- dates=dates_array,
198
- forecast_set_ar=forecast_array_ar,
199
- error_arima=round(rmse,2)
200
- )
201
-
202
- if __name__ == '__main__':
203
- app.run(debug=True, host="0.0.0.0", port=7860)
 
1
+ # app.py
2
+ import os
3
+ import math
4
+ from datetime import datetime
5
+ import yfinance as yf
6
+ import pandas as pd
7
+ from flask import jsonify
8
+ import numpy as np
9
+ import matplotlib.pyplot as plt
10
+ from flask import Flask, render_template, request
11
+ from statsmodels.tsa.arima.model import ARIMA
12
+ from sklearn.metrics import mean_squared_error
13
+ import praw, re
14
+ from pandas.tseries.offsets import BDay
15
+ from textblob import TextBlob
16
+ import simplejson
17
+ import nltk
18
+ nltk.download('punkt', quiet=True)
19
+
20
+ app = Flask(__name__, static_folder='static', template_folder='templates')
21
+
22
+ REDDIT_ID = os.getenv("REDDIT_ID")
23
+ REDDIT_SECRET = os.getenv("REDDIT_SECRET")
24
+
25
+ @app.route('/', methods=['GET'])
26
+ def index():
27
+ # Renders your form; rename your HTML to templates/index.html
28
+ return render_template('forecaster.html')
29
+
30
+ @app.route('/insertintotable', methods=['POST'])
31
+ def insertintotable():
32
+ quote = request.form['nm']
33
+
34
+ # β€”β€” 1) Fetch exactly two years of raw + adjusted OHLCV via yfinance β€”β€”
35
+ end = datetime.now()
36
+ start = datetime(end.year - 2, end.month, end.day)
37
+ df = yf.download(
38
+ quote,
39
+ start=start,
40
+ end=end,
41
+ auto_adjust=False, # ensures separate 'Adj Close'
42
+ actions=False
43
+ ).reset_index()
44
+
45
+ if df.empty:
46
+ return render_template('forecaster.html', not_found=True)
47
+
48
+ # Guarantee 'Adj Close' column
49
+ if 'Adj Close' not in df.columns:
50
+ df['Adj Close'] = df['Close']
51
+
52
+ # Take the very last row as a Series for display & recommendation
53
+ latest = df.iloc[-1]
54
+
55
+
56
+
57
+ def ARIMA_ALGO(df):
58
+ # β€” 1) Ensure a DateTimeIndex β€”
59
+ if 'Date' in df.columns:
60
+ df = df.copy()
61
+ df['Date'] = pd.to_datetime(df['Date'])
62
+ dfi = df.set_index('Date')
63
+ else:
64
+ dfi = df.copy()
65
+ dfi.index = pd.to_datetime(dfi.index)
66
+
67
+ # β€” 2) Pick the first matching price column β€”
68
+ for col in ('Adj Close','adjClose','Adj_close','Close','Price'):
69
+ if col in dfi.columns:
70
+ raw = dfi[col]
71
+ break
72
+ else:
73
+ raise KeyError("No price column found.")
74
+
75
+ # β€” 3) Flatten to 1-D, coerce to floats, preserve original dates β€”
76
+ arr = pd.to_numeric(raw.values.flatten(), errors='coerce')
77
+ series = pd.Series(arr, index=raw.index).bfill().astype(float)
78
+
79
+ # β€” 4) Train/test split (65/35) β€”
80
+ split = int(len(series) * 0.65)
81
+ train, test = series.iloc[:split], series.iloc[split:]
82
+
83
+ # β€” 5) Rolling ARIMA + 7-day ahead β€”
84
+ history, preds_all = list(train), []
85
+ for t in range(len(test) + 7):
86
+ m = ARIMA(history, order=(6,1,0)).fit()
87
+ yhat = float(m.forecast()[0])
88
+ preds_all.append(yhat)
89
+ history.append(test.iloc[t] if t < len(test) else yhat)
90
+
91
+ # β€” 6) Compute RMSE on the test slice β€”
92
+ rmse = math.sqrt(mean_squared_error(test, preds_all[:len(test)]))
93
+ tomorrow= preds_all[-7]
94
+
95
+ # β€” 7) Build DataFrames for plotting β€”
96
+ # β€” history_df: entire past β€”
97
+ history_df = pd.DataFrame({'Adj Close': series.values}, index=series.index)
98
+
99
+ # β€” predict_df: only the 7 β€œfuture” days β€”
100
+ future_idx = pd.date_range(series.index[-1], periods=8, freq='B')[1:]
101
+ # here I use freq='B' to skip weekends
102
+ last7 = preds_all[-7:]
103
+ predict_df = pd.DataFrame({'ARIMA': last7}, index=future_idx)
104
+
105
+ return preds_all, rmse, tomorrow, history_df, predict_df
106
+
107
+
108
+ # Run ARIMA
109
+ preds, rmse, tomorrow, hist_df, pred_df = ARIMA_ALGO(df)
110
+
111
+ # Reset index so β€˜Date’ is a column
112
+ hist_df = (
113
+ hist_df
114
+ .reset_index()
115
+ .rename(columns={
116
+ 'index': 'Date',
117
+ 'History':'Adj Close' # rename to exactly match your latest['Adj Close']
118
+ })
119
+ )
120
+ pred_df = pred_df.reset_index().rename(columns={'index':'Date'})
121
+
122
+ # β€”β€” 3) Sentiment analysis β€”β€”
123
+ def retrieving_tweets_polarity(symbol):
124
+ reddit = praw.Reddit(
125
+ client_id=REDDIT_ID,
126
+ client_secret=REDDIT_SECRET,
127
+ user_agent='SentimentAnalysis'
128
+ )
129
+ posts = reddit.subreddit('all').search(symbol, limit=300, sort='new')
130
+ pos = neg = 0
131
+ texts = []
132
+ total_pol = 0
133
+ for post in posts:
134
+ txt = (post.title or post.selftext or "")
135
+ txt = re.sub(r'&amp;|:', '', txt).encode('ascii','ignore').decode()
136
+ blob = TextBlob(txt)
137
+ pol = sum(s.sentiment.polarity for s in blob.sentences)
138
+ total_pol += pol
139
+ if pol>0: pos+=1
140
+ if pol<0: neg+=1
141
+ texts.append(txt)
142
+ avg_pol = total_pol / len(texts) if texts else 0
143
+ neu = max(0, len(texts) - pos - neg)
144
+ label = "Overall Positive" if avg_pol>0 else "Overall Negative"
145
+ return avg_pol, texts, label, pos, neg, neu
146
+
147
+ global_pol, tweets, tw_pol, pos, neg, neu = retrieving_tweets_polarity(quote)
148
+
149
+ # β€”β€” 4) Recommendation β€”β€”
150
+ mean7 = hist_df['Adj Close'].tail(7).mean()
151
+ adj_val = float(latest['Adj Close'])
152
+ if adj_val < mean7 and global_pol > 0:
153
+ idea, decision = "RISE", "BUY"
154
+ else:
155
+ idea, decision = "FALL", "SELL"
156
+
157
+ # β€”β€” 5) Write out JSON for the dashboard β€”β€”
158
+ trends = hist_df[['Date','Adj Close']].dropna().to_dict('records')
159
+ past_preds = pred_df[['Date','ARIMA']].tail(7).to_dict('records')
160
+ forecast = pred_df[['Date','ARIMA']].to_dict('records')
161
+
162
+ # --- new: full series (history + all preds) ---
163
+ full = trends + [
164
+ {"Date": r["Date"], "Adj Close": r["ARIMA"]}
165
+ for r in forecast
166
+ ]
167
+
168
+ with open("static/assets/js/trends.json", "w", encoding='utf-8') as f:
169
+ simplejson.dump(trends, f, default= str, ignore_nan=True, ensure_ascii=False, indent=4)
170
+ with open("static/assets/js/pastpreds.json","w", encoding='utf-8') as f:
171
+ simplejson.dump(past_preds, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
172
+ with open("static/assets/js/forecast.json","w", encoding='utf-8') as f:
173
+ simplejson.dump(forecast, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
174
+ with open("static/assets/js/full.json","w") as f:
175
+ simplejson.dump(full, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
176
+
177
+ # β€”β€” 6) Render the final template β€”β€”
178
+ dates_array = pred_df['Date'].dt.strftime('%Y-%m-%d').tail(7).to_numpy().reshape(-1,1)
179
+ forecast_array_ar = np.round(pred_df['ARIMA'].tail(7).to_numpy(), 2).reshape(-1,1)
180
+
181
+
182
+
183
+ return render_template(
184
+ 'resultsf.html',
185
+ quote=quote,
186
+ arima_pred=round(tomorrow,2),
187
+ open_s=latest['Open'],
188
+ high_s=latest['High'],
189
+ low_s=latest['Low'],
190
+ close_s=latest['Close'],
191
+ adj_close=latest['Adj Close'],
192
+ vol=latest['Volume'],
193
+ tw_list=tweets,
194
+ tw_pol=tw_pol,
195
+ idea=idea,
196
+ decision=decision,
197
+ dates=dates_array,
198
+ forecast_set_ar=forecast_array_ar,
199
+ error_arima=round(rmse,2)
200
+ )
201
+
202
+ if __name__ == '__main__':
203
+ app.run(debug=True, host="0.0.0.0", port=7860)