Method314 commited on
Commit
47c088f
·
verified ·
1 Parent(s): 872c55e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +618 -0
app.py ADDED
@@ -0,0 +1,618 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import yfinance as yf
3
+ import numpy as np
4
+ import pandas as pd
5
+ import plotly.express as px
6
+ import plotly.graph_objects as go
7
+ from plotly.subplots import make_subplots
8
+ from datetime import datetime, timedelta
9
+ import requests
10
+ from sklearn.model_selection import train_test_split
11
+ from sklearn.metrics import mean_squared_error, r2_score
12
+ from sklearn.preprocessing import StandardScaler
13
+ from catboost import CatBoostRegressor
14
+ import shap
15
+ import ta
16
+ import matplotlib.pyplot as plt
17
+ import warnings
18
+ import colorsys
19
+ import openai
20
+
21
+ warnings.filterwarnings('ignore')
22
+
23
+ # Initialize the OpenAI client
24
+ OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
25
+ openai.api_key = OPENAI_API_KEY
26
+
27
+ # Alpha Vantage API key
28
+ ALPHA_VANTAGE_API_KEY = st.secrets["ALPHA_VANTAGE_API_KEY"]
29
+
30
+ # GPT Assistant ID
31
+ ASSISTANT_ID = st.secrets["ASSISTANT_ID"]
32
+
33
+ def adjust_color_intensity(base_color, percentage):
34
+ r = int(base_color[1:3], 16) / 255.0
35
+ g = int(base_color[3:5], 16) / 255.0
36
+ b = int(base_color[5:7], 16) / 255.0
37
+
38
+ h, l, s = colorsys.rgb_to_hls(r, g, b)
39
+
40
+ l = max(0, min(1, l - (abs(percentage) / 100 * 0.5)))
41
+
42
+ r, g, b = colorsys.hls_to_rgb(h, l, s)
43
+
44
+ return f"#{int(r*255):02x}{int(g*255):02x}{int(b*255):02x}"
45
+
46
+ def create_color_box(text, background_color, percentage):
47
+ adjusted_color = adjust_color_intensity(background_color, percentage)
48
+ return f"""
49
+ <div style="background-color: {adjusted_color}; padding: 20px; border-radius: 10px; margin-bottom: 20px; font-size: 16px; line-height: 1.6; display: flex; justify-content: space-between; align-items: center;">
50
+ <div style="flex: 1;">
51
+ {text}
52
+ </div>
53
+ <div style="flex: 0 0 40%; display: flex; justify-content: center; align-items: center;">
54
+ <div style="font-size: 72px; font-weight: bold; color: {'#006400' if percentage >= 0 else '#8B0000'};">
55
+ {'+' if percentage >= 0 else ''}{percentage:.2f}%
56
+ </div>
57
+ </div>
58
+ </div>
59
+ """
60
+
61
+ def create_gradient_box(text, start_color, end_color, start_percentage, end_percentage):
62
+ adjusted_start_color = adjust_color_intensity(start_color, start_percentage)
63
+ adjusted_end_color = adjust_color_intensity(end_color, end_percentage)
64
+ return f"""
65
+ <div style="background: linear-gradient(to right, {adjusted_start_color}, {adjusted_end_color}); padding: 20px; border-radius: 10px; margin-bottom: 20px; font-size: 16px; line-height: 1.6;">
66
+ {text}
67
+ </div>
68
+ """
69
+
70
+ def get_financial_data(ticker, end_date):
71
+ base_url = "https://www.alphavantage.co/query"
72
+ functions = ['INCOME_STATEMENT', 'BALANCE_SHEET', 'CASH_FLOW']
73
+ data = {}
74
+
75
+ for function in functions:
76
+ params = {
77
+ "function": function,
78
+ "symbol": ticker,
79
+ "apikey": ALPHA_VANTAGE_API_KEY
80
+ }
81
+ response = requests.get(base_url, params=params)
82
+ if response.status_code == 200:
83
+ data[function] = response.json()
84
+ else:
85
+ raise Exception(f"Failed to fetch {function} data: {response.status_code}")
86
+
87
+ for function, content in data.items():
88
+ if 'quarterlyReports' in content:
89
+ content['quarterlyReports'] = [
90
+ report for report in content['quarterlyReports']
91
+ if datetime.strptime(report['fiscalDateEnding'], '%Y-%m-%d').date() <= end_date
92
+ ]
93
+ if 'annualReports' in content:
94
+ content['annualReports'] = [
95
+ report for report in content['annualReports']
96
+ if datetime.strptime(report['fiscalDateEnding'], '%Y-%m-%d').date() <= end_date
97
+ ]
98
+
99
+ return data
100
+
101
+ def get_earnings_dates(ticker):
102
+ url = f"https://www.alphavantage.co/query?function=EARNINGS&symbol={ticker}&apikey={ALPHA_VANTAGE_API_KEY}"
103
+ response = requests.get(url)
104
+ data = response.json()
105
+
106
+ earnings_dates = {}
107
+ for report in data.get('quarterlyEarnings', []):
108
+ fiscal_date = report['fiscalDateEnding']
109
+ reported_date = report['reportedDate']
110
+ earnings_dates[fiscal_date] = reported_date
111
+
112
+ return earnings_dates
113
+
114
+ def get_earnings_data(ticker):
115
+ url = f"https://www.alphavantage.co/query?function=EARNINGS&symbol={ticker}&apikey={ALPHA_VANTAGE_API_KEY}"
116
+ response = requests.get(url)
117
+ data = response.json()
118
+
119
+ quarterly_earnings = data.get('quarterlyEarnings', [])
120
+ df = pd.DataFrame(quarterly_earnings)
121
+ df['fiscalDateEnding'] = pd.to_datetime(df['fiscalDateEnding'])
122
+ df['reportedDate'] = pd.to_datetime(df['reportedDate'])
123
+ df = df.set_index('reportedDate')
124
+
125
+ numeric_columns = ['reportedEPS', 'estimatedEPS', 'surprise', 'surprisePercentage']
126
+ for col in numeric_columns:
127
+ df[col] = pd.to_numeric(df[col], errors='coerce')
128
+
129
+ return df
130
+
131
+ def process_financial_data(data, earnings_dates, earnings_data):
132
+ quarterly_data = {}
133
+
134
+ for statement_type, statement_data in data.items():
135
+ if 'quarterlyReports' in statement_data:
136
+ for report in statement_data['quarterlyReports']:
137
+ fiscal_date = report['fiscalDateEnding']
138
+ release_date = earnings_dates.get(fiscal_date, fiscal_date)
139
+ if release_date not in quarterly_data:
140
+ quarterly_data[release_date] = {}
141
+ quarterly_data[release_date].update({f"{statement_type}_{k}": v for k, v in report.items()})
142
+
143
+ df = pd.DataFrame.from_dict(quarterly_data, orient='index')
144
+ df.index = pd.to_datetime(df.index)
145
+ df = df.sort_index()
146
+
147
+ df = df.join(earnings_data, how='left')
148
+
149
+ for col in df.columns:
150
+ df[col] = pd.to_numeric(df[col], errors='coerce')
151
+
152
+ return df
153
+
154
+ def get_stock_data(ticker, start_date, end_date):
155
+ df = yf.download(ticker, start=start_date, end=end_date)
156
+
157
+ df['Price_Pct_Change'] = df['Close'].pct_change()
158
+
159
+ df['RSI'] = ta.momentum.RSIIndicator(df['Close']).rsi()
160
+ df['WILLR'] = ta.momentum.WilliamsRIndicator(df['High'], df['Low'], df['Close']).williams_r()
161
+ bb = ta.volatility.BollingerBands(df['Close'])
162
+ df['BB_upper'] = bb.bollinger_hband()
163
+ df['BB_middle'] = bb.bollinger_mavg()
164
+ df['BB_lower'] = bb.bollinger_lband()
165
+ df['OBV'] = ta.volume.OnBalanceVolumeIndicator(df['Close'], df['Volume']).on_balance_volume()
166
+ df['ATR'] = ta.volatility.AverageTrueRange(df['High'], df['Low'], df['Close']).average_true_range()
167
+ df['MACD'] = ta.trend.MACD(df['Close']).macd()
168
+ df['ADX'] = ta.trend.ADXIndicator(df['High'], df['Low'], df['Close']).adx()
169
+ df['CCI'] = ta.trend.CCIIndicator(df['High'], df['Low'], df['Close']).cci()
170
+
171
+ indicator_columns = ['RSI', 'WILLR', 'BB_upper', 'BB_middle', 'BB_lower', 'OBV', 'ATR', 'MACD', 'ADX', 'CCI']
172
+ for column in indicator_columns:
173
+ df[f'{column}_ROC'] = df[column].pct_change()
174
+
175
+ return df
176
+
177
+ def add_financial_ratios(X):
178
+ print("Adding financial ratios...")
179
+
180
+ def safe_divide(a, b):
181
+ return np.where(b != 0, a / b, np.nan)
182
+
183
+ X['PE_Ratio'] = safe_divide(X['BALANCE_SHEET_totalShareholderEquity'], X['INCOME_STATEMENT_netIncome'])
184
+ X['PB_Ratio'] = safe_divide(X['BALANCE_SHEET_totalAssets'], X['BALANCE_SHEET_totalShareholderEquity'])
185
+ X['Debt_to_Equity'] = safe_divide(X['BALANCE_SHEET_totalLiabilities'], X['BALANCE_SHEET_totalShareholderEquity'])
186
+ X['ROE'] = safe_divide(X['INCOME_STATEMENT_netIncome'], X['BALANCE_SHEET_totalShareholderEquity'])
187
+ X['ROA'] = safe_divide(X['INCOME_STATEMENT_netIncome'], X['BALANCE_SHEET_totalAssets'])
188
+
189
+ print("Financial ratios added.")
190
+ return X
191
+
192
+ def prepare_data(quarterly_df, stock_df, end_date):
193
+ print("Starting data preparation...")
194
+ print(f"Initial quarterly_df shape: {quarterly_df.shape}")
195
+ print(f"Initial stock_df shape: {stock_df.shape}")
196
+
197
+ quarterly_df.index = pd.to_datetime(quarterly_df.index).date
198
+ stock_df.index = pd.to_datetime(stock_df.index).date
199
+
200
+ quarterly_df = quarterly_df[quarterly_df.index <= end_date]
201
+ stock_df = stock_df[stock_df.index <= end_date]
202
+
203
+ start_date = min(quarterly_df.index.min(), stock_df.index.min())
204
+ all_dates = pd.date_range(start=start_date, end=end_date, freq='D').date
205
+
206
+ quarterly_df_reindexed = quarterly_df.reindex(all_dates).ffill()
207
+ stock_df_reindexed = stock_df.reindex(all_dates).ffill()
208
+
209
+ merged_df = pd.concat([stock_df_reindexed['Close'], quarterly_df_reindexed], axis=1)
210
+
211
+ merged_df = merged_df.dropna(subset=['Close'])
212
+
213
+ print(f"Merged dataframe shape: {merged_df.shape}")
214
+
215
+ if merged_df.empty:
216
+ raise ValueError("No overlapping data between stock prices and financial statements.")
217
+
218
+ X = merged_df.drop('Close', axis=1)
219
+ y = merged_df['Close']
220
+
221
+ X = X.fillna(X.mean())
222
+
223
+ X['EPS_Surprise'] = X['reportedEPS'] - X['estimatedEPS']
224
+ X['EPS_Surprise_Percentage'] = X['surprisePercentage']
225
+
226
+ X = add_financial_ratios(X)
227
+
228
+ scaler_X = StandardScaler()
229
+ scaler_y = StandardScaler()
230
+
231
+ X_scaled = pd.DataFrame(scaler_X.fit_transform(X), columns=X.columns, index=X.index)
232
+ y_scaled = pd.Series(scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten(), index=y.index)
233
+
234
+ print(f"Final data shape: X: {X_scaled.shape}, y: {y_scaled.shape}")
235
+ print(f"Date range: {X_scaled.index.min()} to {X_scaled.index.max()}")
236
+
237
+ return X_scaled, y_scaled, merged_df.index, scaler_X, scaler_y
238
+
239
+ def train_catboost_model(X_train, X_test, y_train, y_test):
240
+ model = CatBoostRegressor(
241
+ iterations=1000,
242
+ learning_rate=0.1,
243
+ depth=6,
244
+ loss_function='RMSE',
245
+ random_state=42,
246
+ verbose=100
247
+ )
248
+ model.fit(X_train, y_train, eval_set=(X_test, y_test), early_stopping_rounds=50)
249
+ return model
250
+
251
+ def evaluate_model(model, X_test, y_test, scaler_y):
252
+ y_pred_scaled = model.predict(X_test)
253
+ y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
254
+ y_test_unscaled = scaler_y.inverse_transform(y_test.values.reshape(-1, 1)).flatten()
255
+
256
+ mse = mean_squared_error(y_test_unscaled, y_pred)
257
+ r2 = r2_score(y_test_unscaled, y_pred)
258
+ print(f"Mean Squared Error: {mse}")
259
+ print(f"R-squared Score: {r2}")
260
+ return r2
261
+
262
+ def conformal_prediction(model, X_train, y_train, X_test, scaler_y, alpha=0.1):
263
+ model.fit(X_train, y_train)
264
+ y_pred_train = model.predict(X_train)
265
+
266
+ y_pred_train_unscaled = scaler_y.inverse_transform(y_pred_train.reshape(-1, 1)).flatten()
267
+ y_train_unscaled = scaler_y.inverse_transform(y_train.values.reshape(-1, 1)).flatten()
268
+
269
+ relative_errors = np.abs((y_train_unscaled - y_pred_train_unscaled) / y_pred_train_unscaled)
270
+
271
+ error_threshold = np.percentile(relative_errors, (1 - alpha) * 100)
272
+
273
+ y_pred_test = model.predict(X_test)
274
+ y_pred_test_unscaled = scaler_y.inverse_transform(y_pred_test.reshape(-1, 1)).flatten()
275
+
276
+ lower_bound_unscaled = y_pred_test_unscaled * (1 - error_threshold)
277
+ upper_bound_unscaled = y_pred_test_unscaled * (1 + error_threshold)
278
+
279
+ return y_pred_test_unscaled, lower_bound_unscaled, upper_bound_unscaled
280
+
281
+ def plot_results(dates, y, fair_values, lower_bound, upper_bound, scaler_y):
282
+ y_unscaled = scaler_y.inverse_transform(y.values.reshape(-1, 1)).flatten()
283
+
284
+ fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.02, row_heights=[0.7, 0.3])
285
+
286
+ fig.add_trace(go.Scatter(x=dates, y=y_unscaled, mode='lines', name='Actual Price', line=dict(color='blue')), row=1, col=1)
287
+ fig.add_trace(go.Scatter(x=dates, y=fair_values, mode='lines', name='Fair Value', line=dict(color='red')), row=1, col=1)
288
+ fig.add_trace(go.Scatter(x=dates, y=upper_bound, mode='lines', name='Upper Bound', line=dict(color='gray', width=0)), row=1, col=1)
289
+ fig.add_trace(go.Scatter(x=dates, y=lower_bound, mode='lines', name='Lower Bound', line=dict(color='gray', width=0), fill='tonexty'), row=1, col=1)
290
+
291
+ percent_error = ((fair_values - y_unscaled) / y_unscaled) * 100
292
+ fig.add_trace(go.Scatter(x=dates, y=percent_error, mode='lines', name='Percent Error', line=dict(color='purple')), row=2, col=1)
293
+
294
+ fig.update_layout(height=800, title_text="Stock Price, Fair Value, and Percent Error")
295
+ fig.update_xaxes(title_text="Date", row=2, col=1)
296
+ fig.update_yaxes(title_text="Price", row=1, col=1)
297
+ fig.update_yaxes(title_text="Percent Error", row=2, col=1)
298
+
299
+ return fig
300
+
301
+ def get_monthly_seasonality(ticker, start_date, end_date):
302
+ data = yf.download(ticker, start=start_date, end=end_date)
303
+ monthly_data = data['Adj Close'].resample('M').last()
304
+ monthly_returns = monthly_data.pct_change()
305
+ monthly_returns = monthly_returns.to_frame()
306
+ monthly_returns['Month'] = monthly_returns.index.month
307
+ seasonality = monthly_returns.groupby('Month')['Adj Close'].agg(['mean', 'median', 'count', lambda x: (x > 0).mean()])
308
+ seasonality.columns = ['Mean Change%', 'Median Change%', 'Count', 'Positive Periods']
309
+ return seasonality
310
+
311
+ def plot_monthly_seasonality(seasonality, ticker, start_date, end_date):
312
+ months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
313
+ fig = go.Figure()
314
+ fig.add_trace(go.Bar(
315
+ x=months,
316
+ y=seasonality['Positive Periods'] * 100,
317
+ name='Positive Periods',
318
+ marker_color=['green' if x > 0.5 else 'red' for x in seasonality['Positive Periods']],
319
+ text=[f"{seasonality['Positive Periods'][i]*100:.1f}%<br>{seasonality['Mean Change%'][i]*100:.2f}%" for i in range(1, 13)],
320
+ textposition='auto'
321
+ ))
322
+ fig.add_trace(go.Scatter(
323
+ x=months,
324
+ y=seasonality['Mean Change%'] * 100,
325
+ name='Mean Change%',
326
+ mode='lines+markers',
327
+ line=dict(color='yellow', width=2)
328
+ ))
329
+ fig.update_layout(
330
+ title=f'Monthly Seasonality for {ticker}<br>{start_date} to {end_date}',
331
+ xaxis_title='Month',
332
+ yaxis_title='Percentage',
333
+ template='plotly_dark',
334
+ showlegend=True,
335
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
336
+ height=600,
337
+ margin=dict(l=50, r=50, t=100, b=50)
338
+ )
339
+ fig.add_hline(y=50, line_dash="dash", line_color="gray")
340
+ fig.add_hline(y=0, line_dash="dash", line_color="gray")
341
+ fig.update_yaxes(ticksuffix="%", range=[0, 100])
342
+ return fig
343
+
344
+ def prepare_financial_data_for_gpt(financial_data):
345
+ def format_financial_data(data, report_type):
346
+ formatted_data = f"{report_type} (Last 5 Years):\n"
347
+ if report_type in data:
348
+ reports = data[report_type].get('annualReports', [])[:5]
349
+ for report in reports:
350
+ formatted_data += f"Fiscal Date Ending: {report.get('fiscalDateEnding', 'N/A')}\n"
351
+ for key, value in report.items():
352
+ if key != 'fiscalDateEnding':
353
+ formatted_data += f"{key}: {value}\n"
354
+ formatted_data += "\n"
355
+ return formatted_data
356
+
357
+ income_statement = format_financial_data(financial_data, 'INCOME_STATEMENT')
358
+ balance_sheet = format_financial_data(financial_data, 'BALANCE_SHEET')
359
+ cash_flow = format_financial_data(financial_data, 'CASH_FLOW')
360
+
361
+ return f"{income_statement}\n{balance_sheet}\n{cash_flow}"
362
+
363
+ def get_gpt_analysis(ticker, financial_data):
364
+ formatted_data = prepare_financial_data_for_gpt(financial_data)
365
+ prompt = f"Analyze the following financial data for {ticker} and provide insights:\n\n{formatted_data}"
366
+
367
+ try:
368
+ response = openai.ChatCompletion.create(
369
+ model="gpt-4",
370
+ messages=[
371
+ {"role": "system", "content": "You are a financial analyst."},
372
+ {"role": "user", "content": prompt}
373
+ ],
374
+ max_tokens=500,
375
+ n=1,
376
+ stop=None,
377
+ temperature=0.5,
378
+ )
379
+ analysis = response.choices[0].message['content'].strip()
380
+ return analysis
381
+ except Exception as e:
382
+ print(f"OpenAI API error: {e}")
383
+ return "GPT Assistant analysis failed. Please check the API integration."
384
+
385
+ def plot_interactive_logarithmic_stock_chart(ticker, start_date, end_date):
386
+ stock = yf.Ticker(ticker)
387
+ data = stock.history(start=start_date, end=end_date)
388
+
389
+ x = (data.index - data.index[0]).days
390
+ y = np.log(data['Close'])
391
+ slope, intercept = np.polyfit(x, y, 1)
392
+
393
+ future_days = 365 * 10
394
+ all_days = np.arange(len(x) + future_days)
395
+ log_trend = np.exp(intercept + slope * all_days)
396
+
397
+ inner_upper_band = log_trend * 2
398
+ inner_lower_band = log_trend / 2
399
+ outer_upper_band = log_trend * 4
400
+ outer_lower_band = log_trend / 4
401
+
402
+ extended_dates = pd.date_range(start=data.index[0], periods=len(all_days), freq='D')
403
+
404
+ fig = go.Figure()
405
+
406
+ fig.add_trace(go.Scatter(x=data.index, y=data['Close'], mode='lines', name='Close Price', line=dict(color='blue')))
407
+
408
+ fig.add_trace(go.Scatter(x=extended_dates, y=log_trend, mode='lines', name='Log Trend', line=dict(color='red')))
409
+ fig.add_trace(go.Scatter(x=extended_dates, y=inner_upper_band, mode='lines', name='Inner Upper Band', line=dict(color='green')))
410
+ fig.add_trace(go.Scatter(x=extended_dates, y=inner_lower_band, mode='lines', name='Inner Lower Band', line=dict(color='green')))
411
+ fig.add_trace(go.Scatter(x=extended_dates, y=outer_upper_band, mode='lines', name='Outer Upper Band', line=dict(color='orange')))
412
+ fig.add_trace(go.Scatter(x=extended_dates, y=outer_lower_band, mode='lines', name='Outer Lower Band', line=dict(color='orange')))
413
+
414
+ fig.update_layout(
415
+ title=f'{ticker} Stock Price (Logarithmic Scale) with Extended Trend Lines and Outer Bands',
416
+ xaxis_title='Date',
417
+ yaxis_title='Price (Log Scale)',
418
+ yaxis_type="log",
419
+ legend=dict(x=0.01, y=0.99, bgcolor='rgba(255, 255, 255, 0.8)'),
420
+ hovermode='x unified',
421
+ height=800
422
+ )
423
+
424
+ fig.update_xaxes(
425
+ rangeslider_visible=True,
426
+ rangeselector=dict(
427
+ buttons=list([
428
+ dict(count=1, label="1m", step="month", stepmode="backward"),
429
+ dict(count=6, label="6m", step="month", stepmode="backward"),
430
+ dict(count=1, label="YTD", step="year", stepmode="todate"),
431
+ dict(count=1, label="1y", step="year", stepmode="backward"),
432
+ dict(step="all")
433
+ ])
434
+ )
435
+ )
436
+
437
+ return fig
438
+
439
+ def analyze_stock(ticker, start_date, end_date, use_ai_assistant):
440
+ try:
441
+ print(f"Starting analysis for {ticker} from {start_date} to {end_date}")
442
+
443
+ end_date_dt = end_date
444
+
445
+ print("Fetching financial data...")
446
+ financial_data = get_financial_data(ticker, end_date_dt)
447
+ print("Fetching earnings dates...")
448
+ earnings_dates = get_earnings_dates(ticker)
449
+ print("Fetching earnings data...")
450
+ earnings_data = get_earnings_data(ticker)
451
+ print("Processing financial data...")
452
+ quarterly_df = process_financial_data(financial_data, earnings_dates, earnings_data)
453
+ print("Downloading stock data...")
454
+ stock_df = get_stock_data(ticker, start_date, end_date)
455
+
456
+ if quarterly_df.empty:
457
+ return "No financial data available for processing.", None, None, None, None, None, None, None, None
458
+
459
+ print(f"Quarterly data shape: {quarterly_df.shape}")
460
+ print(f"Stock data shape: {stock_df.shape}")
461
+
462
+ print("Preparing data for analysis...")
463
+ X_scaled, y_scaled, dates, scaler_X, scaler_y = prepare_data(quarterly_df, stock_df, end_date_dt)
464
+
465
+ if X_scaled is None or y_scaled is None:
466
+ return "Not enough data for model training.", None, None, None, None, None, None, None, None
467
+
468
+ print(f"Prepared data shape: X: {X_scaled.shape}, y: {y_scaled.shape}")
469
+ print(f"X column names: {X_scaled.columns.tolist()}")
470
+
471
+ print(f"Final number of features: {X_scaled.shape[1]}")
472
+ print("Data prepared successfully. Starting model training...")
473
+
474
+ X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
475
+
476
+ print("Training CatBoost model...")
477
+ model = train_catboost_model(X_train, X_test, y_train, y_test)
478
+
479
+ print("Evaluating model performance...")
480
+ r2 = evaluate_model(model, X_test, y_test, scaler_y)
481
+
482
+ if r2 < 0.5:
483
+ return "Model performance is poor. Re-evaluate features or model parameters.", None, None, None, None, None, None, None, None
484
+
485
+ print("Model trained successfully. Calculating fair values with conformal prediction...")
486
+ fair_values, lower_bound, upper_bound = conformal_prediction(model, X_train, y_train, X_scaled, scaler_y)
487
+
488
+ print("Plotting results...")
489
+ fig = plot_results(dates, y_scaled, fair_values, lower_bound, upper_bound, scaler_y)
490
+
491
+ print("Calculating feature importance...")
492
+ feature_importance = model.feature_importances_
493
+ feature_importance_df = pd.DataFrame({'feature': X_scaled.columns, 'importance': feature_importance})
494
+ feature_importance_df = feature_importance_df.sort_values('importance', ascending=False)
495
+ print("\nTop 10 most important features:")
496
+ print(feature_importance_df.head(10))
497
+
498
+ print("\nCalculating SHAP values for feature importance...")
499
+ explainer = shap.TreeExplainer(model)
500
+ shap_values = explainer.shap_values(X_scaled)
501
+
502
+ shap_fig = plt.figure(figsize=(10, 6))
503
+ shap.summary_plot(shap_values, X_scaled, plot_type="bar", show=False)
504
+ plt.title("SHAP Feature Importance")
505
+ plt.tight_layout()
506
+
507
+ seasonality = get_monthly_seasonality(ticker, start_date, end_date)
508
+ seasonality_fig = plot_monthly_seasonality(seasonality, ticker, start_date, end_date)
509
+
510
+ current_month = datetime.now().month
511
+ next_month = (current_month % 12) + 1
512
+
513
+ current_month_return = seasonality.loc[current_month, 'Mean Change%'] * 100
514
+ next_month_return = seasonality.loc[next_month, 'Mean Change%'] * 100
515
+ current_month_win_rate = seasonality.loc[current_month, 'Positive Periods'] * 100
516
+ next_month_win_rate = seasonality.loc[next_month, 'Positive Periods'] * 100
517
+
518
+ seasonality_text = f"""
519
+ <h2 style="margin-bottom: 15px;">Seasonality Analysis ({start_date} to {end_date})</h2>
520
+ <h3>Current month ({datetime.now().strftime('%B')}):</h3>
521
+ <p>Average return: {current_month_return:.2f}%</p>
522
+ <p>Probability of positive return: {current_month_win_rate:.1f}%</p>
523
+ <h3>Next month ({(datetime.now() + timedelta(days=31)).strftime('%B')}):</h3>
524
+ <p>Average return: {next_month_return:.2f}%</p>
525
+ <p>Probability of positive return: {next_month_win_rate:.1f}%</p>
526
+ """
527
+
528
+ latest_close = stock_df['Close'].iloc[-1]
529
+ latest_fair_value = fair_values[-1]
530
+ latest_lower_bound = lower_bound[-1]
531
+ latest_upper_bound = upper_bound[-1]
532
+
533
+ fair_price_text = f"""
534
+ <h2 style="margin-bottom: 15px;">Fair Price Analysis</h2>
535
+ <p><strong>Current Price:</strong> ${latest_close:.2f}</p>
536
+ <p><strong>Estimated Fair Value:</strong> ${latest_fair_value:.2f}</p>
537
+ <p><strong>Price Prediction Range:</strong> ${latest_lower_bound:.2f} to ${latest_upper_bound:.2f}</p>
538
+ <p><strong>R-squared Score:</strong> {r2:.4f}</p>
539
+ <h3 style="margin-top: 20px;">Top 10 most important features for fair value prediction:</h3>
540
+ <pre>{feature_importance_df.head(10).to_string(index=False)}</pre>
541
+ """
542
+
543
+ # Determine background color and percentage change
544
+ percentage_change = ((latest_fair_value - latest_close) / latest_close) * 100
545
+ background_color = "#d4edda" if percentage_change > 0 else "#f8d7da"
546
+ fair_price_html = create_color_box(fair_price_text, background_color, percentage_change)
547
+
548
+ # Format the seasonality analysis results
549
+ current_month_color = "#d4edda" if current_month_return > 0 else "#f8d7da"
550
+ next_month_color = "#d4edda" if next_month_return > 0 else "#f8d7da"
551
+ seasonality_html = create_gradient_box(seasonality_text, current_month_color, next_month_color, current_month_return, next_month_return)
552
+
553
+ # Generate logarithmic chart
554
+ log_chart = plot_interactive_logarithmic_stock_chart(ticker, start_date, end_date)
555
+
556
+ # Get GPT analysis if requested
557
+ gpt_analysis = get_gpt_analysis(ticker, financial_data) if use_ai_assistant else "AI assistant analysis not requested."
558
+
559
+ return fair_price_html, fig, shap_fig, seasonality_fig, seasonality_html, gpt_analysis, log_chart
560
+
561
+ except Exception as e:
562
+ error_message = f"An error occurred: {str(e)}"
563
+ print(error_message)
564
+ return error_message, None, None, None, None, None, None
565
+
566
+ # Streamlit app
567
+ def main():
568
+ st.set_page_config(page_title="Advanced Stock Analysis", layout="wide")
569
+ st.title("Advanced Stock Analysis App")
570
+ st.markdown("Enter a stock ticker and date range to perform comprehensive stock analysis.")
571
+
572
+ col1, col2, col3 = st.columns(3)
573
+ with col1:
574
+ ticker = st.text_input("Stock Ticker", value="MSFT")
575
+ with col2:
576
+ start_date = st.date_input("Start Date", value=datetime(2015, 1, 1))
577
+ with col3:
578
+ end_date = st.date_input("End Date", value=datetime.now())
579
+
580
+ use_ai_assistant = st.checkbox("Use AI Assistant")
581
+
582
+ if st.button("Submit", type="primary"):
583
+ with st.spinner("Analyzing..."):
584
+ results = analyze_stock(ticker, start_date, end_date, use_ai_assistant)
585
+ display_results(results)
586
+
587
+ def display_results(results):
588
+ if isinstance(results, str): # Error occurred
589
+ st.error(results)
590
+ return
591
+
592
+ fair_price_html, fig, shap_fig, seasonality_fig, seasonality_html, gpt_analysis, log_chart = results
593
+
594
+ st.subheader("Fair Price Analysis")
595
+ st.markdown(fair_price_html, unsafe_allow_html=True)
596
+
597
+ st.subheader("Fair Price Prediction")
598
+ st.plotly_chart(fig, use_container_width=True)
599
+
600
+ col1, col2 = st.columns(2)
601
+ with col1:
602
+ st.subheader("SHAP Feature Importance")
603
+ st.pyplot(shap_fig)
604
+ with col2:
605
+ st.subheader("Monthly Seasonality")
606
+ st.plotly_chart(seasonality_fig, use_container_width=True)
607
+
608
+ st.markdown(seasonality_html, unsafe_allow_html=True)
609
+
610
+ if gpt_analysis != "AI assistant analysis not requested.":
611
+ st.subheader("GPT Assistant Analysis")
612
+ st.text_area("Analysis", value=gpt_analysis, height=300)
613
+
614
+ st.subheader("Logarithmic Stock Chart")
615
+ st.plotly_chart(log_chart, use_container_width=True)
616
+
617
+ if __name__ == "__main__":
618
+ main()