Spaces:

arithescientist
/

StocksForecasting

Sleeping

App Files Files Community

StocksForecasting / app.py

arithescientist

Update app.py

710aede verified 9 months ago

raw

history blame contribute delete

7.6 kB

	# app.py
	import os
	import math
	from datetime import datetime
	import yfinance as yf
	import pandas as pd
	from flask import jsonify
	import numpy as np
	import matplotlib.pyplot as plt
	from flask import Flask, render_template, request
	from statsmodels.tsa.arima.model import ARIMA
	from sklearn.metrics import mean_squared_error
	import praw, re
	from pandas.tseries.offsets import BDay
	from textblob import TextBlob
	import simplejson
	import nltk
	# nltk.download('punkt', quiet=True)

	app = Flask(__name__, static_folder='static', template_folder='templates')

	REDDIT_ID = os.getenv("REDDIT_ID")
	REDDIT_SECRET = os.getenv("REDDIT_SECRET")

	@app.route('/', methods=['GET'])
	def index():
	# Renders your form; rename your HTML to templates/index.html
	return render_template('forecaster.html')

	@app.route('/insertintotable', methods=['POST'])
	def insertintotable():
	quote = request.form['nm']

	# —— 1) Fetch exactly two years of raw + adjusted OHLCV via yfinance ——
	end = datetime.now()
	start = datetime(end.year - 2, end.month, end.day)
	df = yf.download(
	quote,
	start=start,
	end=end,
	auto_adjust=False, # ensures separate 'Adj Close'
	actions=False
	).reset_index()

	if df.empty:
	return render_template('forecaster.html', not_found=True)

	# Guarantee 'Adj Close' column
	if 'Adj Close' not in df.columns:
	df['Adj Close'] = df['Close']

	# Take the very last row as a Series for display & recommendation
	latest = df.iloc[-1]




	def ARIMA_ALGO(df):
	# — 1) Ensure a DateTimeIndex —
	if 'Date' in df.columns:
	df = df.copy()
	df['Date'] = pd.to_datetime(df['Date'])
	dfi = df.set_index('Date')
	else:
	dfi = df.copy()
	dfi.index = pd.to_datetime(dfi.index)

	# — 2) Pick the first matching price column —
	for col in ('Adj Close','adjClose','Adj_close','Close','Price'):
	if col in dfi.columns:
	raw = dfi[col]
	break
	else:
	raise KeyError("No price column found.")

	# — 3) Flatten to 1-D, coerce to floats, preserve original dates —
	arr = pd.to_numeric(raw.values.flatten(), errors='coerce')
	series = pd.Series(arr, index=raw.index).bfill().astype(float)

	# — 4) Train/test split (65/35) —
	split = int(len(series) * 0.65)
	train, test = series.iloc[:split], series.iloc[split:]

	# — 5) Rolling ARIMA + 7-day ahead —
	history, preds_all = list(train), []
	m = ARIMA(history, order=(6,1,0)).fit()
	for t in range(len(test) + 7):
	model = m.apply(history)
	yhat = float(model.forecast()[0])
	preds_all.append(yhat)
	history.append(test.iloc[t] if t < len(test) else yhat)

	# — 6) Compute RMSE on the test slice —
	rmse = math.sqrt(mean_squared_error(test, preds_all[:len(test)]))
	tomorrow= preds_all[-7]

	# — 7) Build DataFrames for plotting —
	# — history_df: entire past —
	history_df = pd.DataFrame({'Adj Close': series.values}, index=series.index)

	# — predict_df: only the 7 “future” days —
	future_idx = pd.date_range(series.index[-1], periods=8, freq='B')[1:]
	# here I use freq='B' to skip weekends
	last7 = preds_all[-7:]
	predict_df = pd.DataFrame({'ARIMA': last7}, index=future_idx)

	return preds_all, rmse, tomorrow, history_df, predict_df

	# Run ARIMA
	preds, rmse, tomorrow, hist_df, pred_df = ARIMA_ALGO(df)

	# Reset index so ‘Date’ is a column
	hist_df = (
	hist_df
	.reset_index()
	.rename(columns={
	'index': 'Date',
	'History':'Adj Close' # rename to exactly match your latest['Adj Close']
	})
	)
	pred_df = pred_df.reset_index().rename(columns={'index':'Date'})

	# —— 3) Sentiment analysis ——
	def retrieving_tweets_polarity(symbol):
	reddit = praw.Reddit(
	client_id=REDDIT_ID,
	client_secret=REDDIT_SECRET,
	user_agent='SentimentAnalysis'
	)
	posts = reddit.subreddit('all').search(symbol, limit=300, sort='new')
	pos = neg = 0
	texts = []
	total_pol = 0
	for post in posts:
	txt = (post.title or post.selftext or "")
	txt = re.sub(r'&\|:', '', txt).encode('ascii','ignore').decode()
	blob = TextBlob(txt)
	pol = sum(s.sentiment.polarity for s in blob.sentences)
	total_pol += pol
	if pol>0: pos+=1
	if pol<0: neg+=1
	texts.append(txt)
	avg_pol = total_pol / len(texts) if texts else 0
	neu = max(0, len(texts) - pos - neg)
	label = "Overall Positive" if avg_pol>0 else "Overall Negative"
	return avg_pol, texts, label, pos, neg, neu

	global_pol, tweets, tw_pol, pos, neg, neu = retrieving_tweets_polarity(quote)

	# —— 4) Recommendation ——
	mean7 = hist_df['Adj Close'].tail(7).mean()
	adj_val = float(latest['Adj Close'])
	if adj_val < mean7 and global_pol > 0:
	idea, decision = "RISE", "BUY"
	else:
	idea, decision = "FALL", "SELL"

	# —— 5) Write out JSON for the dashboard ——
	trends = hist_df[['Date','Adj Close']].dropna().to_dict('records')
	past_preds = pred_df[['Date','ARIMA']].tail(7).to_dict('records')
	forecast = pred_df[['Date','ARIMA']].to_dict('records')

	# --- new: full series (history + all preds) ---
	full = trends + [
	{"Date": r["Date"], "Adj Close": r["ARIMA"]}
	for r in forecast
	]

	pie = [
	{"sizes": pos, "labels": "Positive"},
	{"sizes": neg, "labels": "Negative"},
	{"sizes": neu, "labels": "Neutral"},
	]


	with open("static/assets/js/pie.json", "w", encoding='utf-8') as f:
	simplejson.dump(pie, f, default= str, ignore_nan=True, ensure_ascii=False, indent=4)
	with open("static/assets/js/trends.json", "w", encoding='utf-8') as f:
	simplejson.dump(trends, f, default= str, ignore_nan=True, ensure_ascii=False, indent=4)
	with open("static/assets/js/pastpreds.json","w", encoding='utf-8') as f:
	simplejson.dump(past_preds, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
	with open("static/assets/js/forecast.json","w", encoding='utf-8') as f:
	simplejson.dump(forecast, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)
	with open("static/assets/js/full.json","w") as f:
	simplejson.dump(full, f, default=str, ignore_nan=True, ensure_ascii=False, indent=4)

	# —— 6) Render the final template ——
	dates_array = pred_df['Date'].dt.strftime('%Y-%m-%d').tail(7).to_numpy().reshape(-1,1)
	forecast_array_ar = np.round(pred_df['ARIMA'].tail(7).to_numpy(), 2).reshape(-1,1)



	return render_template(
	'resultsf.html',
	quote=quote,
	arima_pred=round(tomorrow,2),
	open_s=latest['Open'],
	high_s=latest['High'],
	low_s=latest['Low'],
	close_s=latest['Close'],
	adj_close=latest['Adj Close'],
	vol=latest['Volume'],
	tw_list=tweets,
	tw_pol=tw_pol,
	idea=idea,
	decision=decision,
	dates=dates_array,
	forecast_set_ar=forecast_array_ar,
	error_arima=round(rmse,2)
	)

	if __name__ == "__main__":
	print("🚀 Starting Flask app...")
	app.run(debug=True, host="0.0.0.0", port=7860)