Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,53 +15,35 @@ import shap
|
|
| 15 |
import ta
|
| 16 |
import matplotlib.pyplot as plt
|
| 17 |
import warnings
|
| 18 |
-
import colorsys
|
| 19 |
import openai
|
| 20 |
|
| 21 |
warnings.filterwarnings('ignore')
|
| 22 |
|
|
|
|
| 23 |
OPENAI_API_KEY = "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" # Replace with your actual OpenAI API key
|
| 24 |
openai.api_key = OPENAI_API_KEY
|
| 25 |
# Alpha Vantage API key
|
| 26 |
ALPHA_VANTAGE_API_KEY = "JK0DVDNTEYBTBP5L"
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
</div>
|
| 48 |
-
<div style="flex: 0 0 40%; display: flex; justify-content: center; align-items: center;">
|
| 49 |
-
<div style="font-size: 72px; font-weight: bold; color: {'#006400' if percentage >= 0 else '#8B0000'};">
|
| 50 |
-
{'+' if percentage >= 0 else ''}{percentage:.2f}%
|
| 51 |
-
</div>
|
| 52 |
-
</div>
|
| 53 |
-
</div>
|
| 54 |
-
"""
|
| 55 |
-
|
| 56 |
-
def create_gradient_box(text, start_color, end_color, start_percentage, end_percentage):
|
| 57 |
-
adjusted_start_color = adjust_color_intensity(start_color, start_percentage)
|
| 58 |
-
adjusted_end_color = adjust_color_intensity(end_color, end_percentage)
|
| 59 |
-
return f"""
|
| 60 |
-
<div style="background: linear-gradient(to right, {adjusted_start_color}, {adjusted_end_color}); padding: 20px; border-radius: 10px; margin-bottom: 20px; font-size: 16px; line-height: 1.6;">
|
| 61 |
-
{text}
|
| 62 |
-
</div>
|
| 63 |
-
"""
|
| 64 |
-
|
| 65 |
def get_financial_data(ticker, end_date):
|
| 66 |
base_url = "https://www.alphavantage.co/query"
|
| 67 |
functions = ['INCOME_STATEMENT', 'BALANCE_SHEET', 'CASH_FLOW']
|
|
@@ -93,6 +75,7 @@ def get_financial_data(ticker, end_date):
|
|
| 93 |
|
| 94 |
return data
|
| 95 |
|
|
|
|
| 96 |
def get_earnings_dates(ticker):
|
| 97 |
url = f"https://www.alphavantage.co/query?function=EARNINGS&symbol={ticker}&apikey={ALPHA_VANTAGE_API_KEY}"
|
| 98 |
response = requests.get(url)
|
|
@@ -106,6 +89,7 @@ def get_earnings_dates(ticker):
|
|
| 106 |
|
| 107 |
return earnings_dates
|
| 108 |
|
|
|
|
| 109 |
def get_earnings_data(ticker):
|
| 110 |
url = f"https://www.alphavantage.co/query?function=EARNINGS&symbol={ticker}&apikey={ALPHA_VANTAGE_API_KEY}"
|
| 111 |
response = requests.get(url)
|
|
@@ -123,6 +107,7 @@ def get_earnings_data(ticker):
|
|
| 123 |
|
| 124 |
return df
|
| 125 |
|
|
|
|
| 126 |
def process_financial_data(data, earnings_dates, earnings_data):
|
| 127 |
quarterly_data = {}
|
| 128 |
|
|
@@ -146,6 +131,7 @@ def process_financial_data(data, earnings_dates, earnings_data):
|
|
| 146 |
|
| 147 |
return df
|
| 148 |
|
|
|
|
| 149 |
def get_stock_data(ticker, start_date, end_date):
|
| 150 |
df = yf.download(ticker, start=start_date, end=end_date)
|
| 151 |
|
|
@@ -170,8 +156,6 @@ def get_stock_data(ticker, start_date, end_date):
|
|
| 170 |
return df
|
| 171 |
|
| 172 |
def add_financial_ratios(X):
|
| 173 |
-
print("Adding financial ratios...")
|
| 174 |
-
|
| 175 |
def safe_divide(a, b):
|
| 176 |
return np.where(b != 0, a / b, np.nan)
|
| 177 |
|
|
@@ -181,14 +165,9 @@ def add_financial_ratios(X):
|
|
| 181 |
X['ROE'] = safe_divide(X['INCOME_STATEMENT_netIncome'], X['BALANCE_SHEET_totalShareholderEquity'])
|
| 182 |
X['ROA'] = safe_divide(X['INCOME_STATEMENT_netIncome'], X['BALANCE_SHEET_totalAssets'])
|
| 183 |
|
| 184 |
-
print("Financial ratios added.")
|
| 185 |
return X
|
| 186 |
|
| 187 |
def prepare_data(quarterly_df, stock_df, end_date):
|
| 188 |
-
print("Starting data preparation...")
|
| 189 |
-
print(f"Initial quarterly_df shape: {quarterly_df.shape}")
|
| 190 |
-
print(f"Initial stock_df shape: {stock_df.shape}")
|
| 191 |
-
|
| 192 |
quarterly_df.index = pd.to_datetime(quarterly_df.index).date
|
| 193 |
stock_df.index = pd.to_datetime(stock_df.index).date
|
| 194 |
|
|
@@ -205,8 +184,6 @@ def prepare_data(quarterly_df, stock_df, end_date):
|
|
| 205 |
|
| 206 |
merged_df = merged_df.dropna(subset=['Close'])
|
| 207 |
|
| 208 |
-
print(f"Merged dataframe shape: {merged_df.shape}")
|
| 209 |
-
|
| 210 |
if merged_df.empty:
|
| 211 |
raise ValueError("No overlapping data between stock prices and financial statements.")
|
| 212 |
|
|
@@ -226,9 +203,6 @@ def prepare_data(quarterly_df, stock_df, end_date):
|
|
| 226 |
X_scaled = pd.DataFrame(scaler_X.fit_transform(X), columns=X.columns, index=X.index)
|
| 227 |
y_scaled = pd.Series(scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten(), index=y.index)
|
| 228 |
|
| 229 |
-
print(f"Final data shape: X: {X_scaled.shape}, y: {y_scaled.shape}")
|
| 230 |
-
print(f"Date range: {X_scaled.index.min()} to {X_scaled.index.max()}")
|
| 231 |
-
|
| 232 |
return X_scaled, y_scaled, merged_df.index, scaler_X, scaler_y
|
| 233 |
|
| 234 |
def train_catboost_model(X_train, X_test, y_train, y_test):
|
|
@@ -250,8 +224,6 @@ def evaluate_model(model, X_test, y_test, scaler_y):
|
|
| 250 |
|
| 251 |
mse = mean_squared_error(y_test_unscaled, y_pred)
|
| 252 |
r2 = r2_score(y_test_unscaled, y_pred)
|
| 253 |
-
print(f"Mean Squared Error: {mse}")
|
| 254 |
-
print(f"R-squared Score: {r2}")
|
| 255 |
return r2
|
| 256 |
|
| 257 |
def conformal_prediction(model, X_train, y_train, X_test, scaler_y, alpha=0.1):
|
|
@@ -293,6 +265,7 @@ def plot_results(dates, y, fair_values, lower_bound, upper_bound, scaler_y):
|
|
| 293 |
|
| 294 |
return fig
|
| 295 |
|
|
|
|
| 296 |
def get_monthly_seasonality(ticker, start_date, end_date):
|
| 297 |
data = yf.download(ticker, start=start_date, end=end_date)
|
| 298 |
monthly_data = data['Adj Close'].resample('M').last()
|
|
@@ -374,7 +347,7 @@ def get_gpt_analysis(ticker, financial_data):
|
|
| 374 |
analysis = response.choices[0].message['content'].strip()
|
| 375 |
return analysis
|
| 376 |
except Exception as e:
|
| 377 |
-
|
| 378 |
return "GPT Assistant analysis failed. Please check the API integration."
|
| 379 |
|
| 380 |
def plot_interactive_logarithmic_stock_chart(ticker, start_date, end_date):
|
|
@@ -432,135 +405,76 @@ def plot_interactive_logarithmic_stock_chart(ticker, start_date, end_date):
|
|
| 432 |
return fig
|
| 433 |
|
| 434 |
def analyze_stock(ticker, start_date, end_date, use_ai_assistant):
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
|
| 466 |
-
print(f"Final number of features: {X_scaled.shape[1]}")
|
| 467 |
-
print("Data prepared successfully. Starting model training...")
|
| 468 |
-
|
| 469 |
-
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
|
| 470 |
-
|
| 471 |
-
print("Training CatBoost model...")
|
| 472 |
-
model = train_catboost_model(X_train, X_test, y_train, y_test)
|
| 473 |
-
|
| 474 |
-
print("Evaluating model performance...")
|
| 475 |
-
r2 = evaluate_model(model, X_test, y_test, scaler_y)
|
| 476 |
-
|
| 477 |
-
if r2 < 0.5:
|
| 478 |
-
return "Model performance is poor. Re-evaluate features or model parameters.", None, None, None, None, None, None, None, None
|
| 479 |
-
|
| 480 |
-
print("Model trained successfully. Calculating fair values with conformal prediction...")
|
| 481 |
-
fair_values, lower_bound, upper_bound = conformal_prediction(model, X_train, y_train, X_scaled, scaler_y)
|
| 482 |
-
|
| 483 |
-
print("Plotting results...")
|
| 484 |
-
fig = plot_results(dates, y_scaled, fair_values, lower_bound, upper_bound, scaler_y)
|
| 485 |
-
|
| 486 |
-
print("Calculating feature importance...")
|
| 487 |
-
feature_importance = model.feature_importances_
|
| 488 |
-
feature_importance_df = pd.DataFrame({'feature': X_scaled.columns, 'importance': feature_importance})
|
| 489 |
-
feature_importance_df = feature_importance_df.sort_values('importance', ascending=False)
|
| 490 |
-
print("\nTop 10 most important features:")
|
| 491 |
-
print(feature_importance_df.head(10))
|
| 492 |
-
|
| 493 |
-
print("\nCalculating SHAP values for feature importance...")
|
| 494 |
-
explainer = shap.TreeExplainer(model)
|
| 495 |
-
shap_values = explainer.shap_values(X_scaled)
|
| 496 |
-
|
| 497 |
-
shap_fig = plt.figure(figsize=(10, 6))
|
| 498 |
-
shap.summary_plot(shap_values, X_scaled, plot_type="bar", show=False)
|
| 499 |
-
plt.title("SHAP Feature Importance")
|
| 500 |
-
plt.tight_layout()
|
| 501 |
-
|
| 502 |
-
seasonality = get_monthly_seasonality(ticker, start_date, end_date)
|
| 503 |
-
seasonality_fig = plot_monthly_seasonality(seasonality, ticker, start_date, end_date)
|
| 504 |
-
|
| 505 |
-
current_month = datetime.now().month
|
| 506 |
-
next_month = (current_month % 12) + 1
|
| 507 |
-
|
| 508 |
-
current_month_return = seasonality.loc[current_month, 'Mean Change%'] * 100
|
| 509 |
-
next_month_return = seasonality.loc[next_month, 'Mean Change%'] * 100
|
| 510 |
-
current_month_win_rate = seasonality.loc[current_month, 'Positive Periods'] * 100
|
| 511 |
-
next_month_win_rate = seasonality.loc[next_month, 'Positive Periods'] * 100
|
| 512 |
-
|
| 513 |
-
seasonality_text = f"""
|
| 514 |
-
<h2 style="margin-bottom: 15px;">Seasonality Analysis ({start_date} to {end_date})</h2>
|
| 515 |
-
<h3>Current month ({datetime.now().strftime('%B')}):</h3>
|
| 516 |
-
<p>Average return: {current_month_return:.2f}%</p>
|
| 517 |
-
<p>Probability of positive return: {current_month_win_rate:.1f}%</p>
|
| 518 |
-
<h3>Next month ({(datetime.now() + timedelta(days=31)).strftime('%B')}):</h3>
|
| 519 |
-
<p>Average return: {next_month_return:.2f}%</p>
|
| 520 |
-
<p>Probability of positive return: {next_month_win_rate:.1f}%</p>
|
| 521 |
-
"""
|
| 522 |
-
|
| 523 |
-
latest_close = stock_df['Close'].iloc[-1]
|
| 524 |
-
latest_fair_value = fair_values[-1]
|
| 525 |
-
latest_lower_bound = lower_bound[-1]
|
| 526 |
-
latest_upper_bound = upper_bound[-1]
|
| 527 |
-
|
| 528 |
-
fair_price_text = f"""
|
| 529 |
-
<h2 style="margin-bottom: 15px;">Fair Price Analysis</h2>
|
| 530 |
-
<p><strong>Current Price:</strong> ${latest_close:.2f}</p>
|
| 531 |
-
<p><strong>Estimated Fair Value:</strong> ${latest_fair_value:.2f}</p>
|
| 532 |
-
<p><strong>Price Prediction Range:</strong> ${latest_lower_bound:.2f} to ${latest_upper_bound:.2f}</p>
|
| 533 |
-
<p><strong>R-squared Score:</strong> {r2:.4f}</p>
|
| 534 |
-
<h3 style="margin-top: 20px;">Top 10 most important features for fair value prediction:</h3>
|
| 535 |
-
<pre>{feature_importance_df.head(10).to_string(index=False)}</pre>
|
| 536 |
-
"""
|
| 537 |
-
|
| 538 |
-
# Determine background color and percentage change
|
| 539 |
-
percentage_change = ((latest_fair_value - latest_close) / latest_close) * 100
|
| 540 |
-
background_color = "#d4edda" if percentage_change > 0 else "#f8d7da"
|
| 541 |
-
fair_price_html = create_color_box(fair_price_text, background_color, percentage_change)
|
| 542 |
-
|
| 543 |
-
# Format the seasonality analysis results
|
| 544 |
-
current_month_color = "#d4edda" if current_month_return > 0 else "#f8d7da"
|
| 545 |
-
next_month_color = "#d4edda" if next_month_return > 0 else "#f8d7da"
|
| 546 |
-
seasonality_html = create_gradient_box(seasonality_text, current_month_color, next_month_color, current_month_return, next_month_return)
|
| 547 |
-
|
| 548 |
-
# Generate logarithmic chart
|
| 549 |
-
log_chart = plot_interactive_logarithmic_stock_chart(ticker, start_date, end_date)
|
| 550 |
-
|
| 551 |
-
# Get GPT analysis if requested
|
| 552 |
-
gpt_analysis = get_gpt_analysis(ticker, financial_data) if use_ai_assistant else "AI assistant analysis not requested."
|
| 553 |
-
|
| 554 |
-
return fair_price_html, fig, shap_fig, seasonality_fig, seasonality_html, gpt_analysis, log_chart
|
| 555 |
-
|
| 556 |
-
except Exception as e:
|
| 557 |
-
error_message = f"An error occurred: {str(e)}"
|
| 558 |
-
print(error_message)
|
| 559 |
-
return error_message, None, None, None, None, None, None
|
| 560 |
-
|
| 561 |
-
# Streamlit app
|
| 562 |
def main():
|
| 563 |
-
st.set_page_config(page_title="Advanced Stock Analysis", layout="wide")
|
| 564 |
st.title("Advanced Stock Analysis App")
|
| 565 |
st.markdown("Enter a stock ticker and date range to perform comprehensive stock analysis.")
|
| 566 |
|
|
@@ -572,42 +486,57 @@ def main():
|
|
| 572 |
with col3:
|
| 573 |
end_date = st.date_input("End Date", value=datetime.now())
|
| 574 |
|
| 575 |
-
use_ai_assistant = st.checkbox("Use AI Assistant")
|
| 576 |
-
|
| 577 |
-
if st.button("Submit", type="primary"):
|
| 578 |
-
with st.spinner("Analyzing..."):
|
| 579 |
-
results = analyze_stock(ticker, start_date, end_date, use_ai_assistant)
|
| 580 |
-
display_results(results)
|
| 581 |
-
|
| 582 |
-
def display_results(results):
|
| 583 |
-
if isinstance(results, str): # Error occurred
|
| 584 |
-
st.error(results)
|
| 585 |
-
return
|
| 586 |
-
|
| 587 |
-
fair_price_html, fig, shap_fig, seasonality_fig, seasonality_html, gpt_analysis, log_chart = results
|
| 588 |
|
| 589 |
-
st.
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 611 |
|
| 612 |
if __name__ == "__main__":
|
| 613 |
main()
|
|
|
|
| 15 |
import ta
|
| 16 |
import matplotlib.pyplot as plt
|
| 17 |
import warnings
|
|
|
|
| 18 |
import openai
|
| 19 |
|
| 20 |
warnings.filterwarnings('ignore')
|
| 21 |
|
| 22 |
+
# Initialize the OpenAI client
|
| 23 |
OPENAI_API_KEY = "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" # Replace with your actual OpenAI API key
|
| 24 |
openai.api_key = OPENAI_API_KEY
|
| 25 |
# Alpha Vantage API key
|
| 26 |
ALPHA_VANTAGE_API_KEY = "JK0DVDNTEYBTBP5L"
|
| 27 |
|
| 28 |
+
# Custom CSS
|
| 29 |
+
st.markdown("""
|
| 30 |
+
<style>
|
| 31 |
+
.reportview-container .main .block-container {
|
| 32 |
+
max-width: 1200px;
|
| 33 |
+
padding-top: 2rem;
|
| 34 |
+
padding-bottom: 2rem;
|
| 35 |
+
}
|
| 36 |
+
.stAlert > div {
|
| 37 |
+
padding-top: 1rem;
|
| 38 |
+
padding-bottom: 1rem;
|
| 39 |
+
}
|
| 40 |
+
.stButton>button {
|
| 41 |
+
width: 100%;
|
| 42 |
+
}
|
| 43 |
+
</style>
|
| 44 |
+
""", unsafe_allow_html=True)
|
| 45 |
+
|
| 46 |
+
@st.cache_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
def get_financial_data(ticker, end_date):
|
| 48 |
base_url = "https://www.alphavantage.co/query"
|
| 49 |
functions = ['INCOME_STATEMENT', 'BALANCE_SHEET', 'CASH_FLOW']
|
|
|
|
| 75 |
|
| 76 |
return data
|
| 77 |
|
| 78 |
+
@st.cache_data
|
| 79 |
def get_earnings_dates(ticker):
|
| 80 |
url = f"https://www.alphavantage.co/query?function=EARNINGS&symbol={ticker}&apikey={ALPHA_VANTAGE_API_KEY}"
|
| 81 |
response = requests.get(url)
|
|
|
|
| 89 |
|
| 90 |
return earnings_dates
|
| 91 |
|
| 92 |
+
@st.cache_data
|
| 93 |
def get_earnings_data(ticker):
|
| 94 |
url = f"https://www.alphavantage.co/query?function=EARNINGS&symbol={ticker}&apikey={ALPHA_VANTAGE_API_KEY}"
|
| 95 |
response = requests.get(url)
|
|
|
|
| 107 |
|
| 108 |
return df
|
| 109 |
|
| 110 |
+
@st.cache_data
|
| 111 |
def process_financial_data(data, earnings_dates, earnings_data):
|
| 112 |
quarterly_data = {}
|
| 113 |
|
|
|
|
| 131 |
|
| 132 |
return df
|
| 133 |
|
| 134 |
+
@st.cache_data
|
| 135 |
def get_stock_data(ticker, start_date, end_date):
|
| 136 |
df = yf.download(ticker, start=start_date, end=end_date)
|
| 137 |
|
|
|
|
| 156 |
return df
|
| 157 |
|
| 158 |
def add_financial_ratios(X):
|
|
|
|
|
|
|
| 159 |
def safe_divide(a, b):
|
| 160 |
return np.where(b != 0, a / b, np.nan)
|
| 161 |
|
|
|
|
| 165 |
X['ROE'] = safe_divide(X['INCOME_STATEMENT_netIncome'], X['BALANCE_SHEET_totalShareholderEquity'])
|
| 166 |
X['ROA'] = safe_divide(X['INCOME_STATEMENT_netIncome'], X['BALANCE_SHEET_totalAssets'])
|
| 167 |
|
|
|
|
| 168 |
return X
|
| 169 |
|
| 170 |
def prepare_data(quarterly_df, stock_df, end_date):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
quarterly_df.index = pd.to_datetime(quarterly_df.index).date
|
| 172 |
stock_df.index = pd.to_datetime(stock_df.index).date
|
| 173 |
|
|
|
|
| 184 |
|
| 185 |
merged_df = merged_df.dropna(subset=['Close'])
|
| 186 |
|
|
|
|
|
|
|
| 187 |
if merged_df.empty:
|
| 188 |
raise ValueError("No overlapping data between stock prices and financial statements.")
|
| 189 |
|
|
|
|
| 203 |
X_scaled = pd.DataFrame(scaler_X.fit_transform(X), columns=X.columns, index=X.index)
|
| 204 |
y_scaled = pd.Series(scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten(), index=y.index)
|
| 205 |
|
|
|
|
|
|
|
|
|
|
| 206 |
return X_scaled, y_scaled, merged_df.index, scaler_X, scaler_y
|
| 207 |
|
| 208 |
def train_catboost_model(X_train, X_test, y_train, y_test):
|
|
|
|
| 224 |
|
| 225 |
mse = mean_squared_error(y_test_unscaled, y_pred)
|
| 226 |
r2 = r2_score(y_test_unscaled, y_pred)
|
|
|
|
|
|
|
| 227 |
return r2
|
| 228 |
|
| 229 |
def conformal_prediction(model, X_train, y_train, X_test, scaler_y, alpha=0.1):
|
|
|
|
| 265 |
|
| 266 |
return fig
|
| 267 |
|
| 268 |
+
@st.cache_data
|
| 269 |
def get_monthly_seasonality(ticker, start_date, end_date):
|
| 270 |
data = yf.download(ticker, start=start_date, end=end_date)
|
| 271 |
monthly_data = data['Adj Close'].resample('M').last()
|
|
|
|
| 347 |
analysis = response.choices[0].message['content'].strip()
|
| 348 |
return analysis
|
| 349 |
except Exception as e:
|
| 350 |
+
st.error(f"OpenAI API error: {e}")
|
| 351 |
return "GPT Assistant analysis failed. Please check the API integration."
|
| 352 |
|
| 353 |
def plot_interactive_logarithmic_stock_chart(ticker, start_date, end_date):
|
|
|
|
| 405 |
return fig
|
| 406 |
|
| 407 |
def analyze_stock(ticker, start_date, end_date, use_ai_assistant):
|
| 408 |
+
with st.spinner('Analyzing stock data...'):
|
| 409 |
+
try:
|
| 410 |
+
financial_data = get_financial_data(ticker, end_date)
|
| 411 |
+
earnings_dates = get_earnings_dates(ticker)
|
| 412 |
+
earnings_data = get_earnings_data(ticker)
|
| 413 |
+
quarterly_df = process_financial_data(financial_data, earnings_dates, earnings_data)
|
| 414 |
+
stock_df = get_stock_data(ticker, start_date, end_date)
|
| 415 |
+
|
| 416 |
+
if quarterly_df.empty:
|
| 417 |
+
st.error("No financial data available for processing.")
|
| 418 |
+
return
|
| 419 |
+
|
| 420 |
+
X_scaled, y_scaled, dates, scaler_X, scaler_y = prepare_data(quarterly_df, stock_df, end_date)
|
| 421 |
+
|
| 422 |
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
|
| 423 |
+
|
| 424 |
+
model = train_catboost_model(X_train, X_test, y_train, y_test)
|
| 425 |
+
|
| 426 |
+
r2 = evaluate_model(model, X_test, y_test, scaler_y)
|
| 427 |
+
|
| 428 |
+
if r2 < 0.5:
|
| 429 |
+
st.warning("Model performance is poor. Results may not be reliable.")
|
| 430 |
+
|
| 431 |
+
fair_values, lower_bound, upper_bound = conformal_prediction(model, X_train, y_train, X_scaled, scaler_y)
|
| 432 |
+
|
| 433 |
+
fig = plot_results(dates, y_scaled, fair_values, lower_bound, upper_bound, scaler_y)
|
| 434 |
+
|
| 435 |
+
feature_importance = model.feature_importances_
|
| 436 |
+
feature_importance_df = pd.DataFrame({'feature': X_scaled.columns, 'importance': feature_importance})
|
| 437 |
+
feature_importance_df = feature_importance_df.sort_values('importance', ascending=False)
|
| 438 |
+
|
| 439 |
+
explainer = shap.TreeExplainer(model)
|
| 440 |
+
shap_values = explainer.shap_values(X_scaled)
|
| 441 |
+
|
| 442 |
+
shap_fig, ax = plt.subplots(figsize=(10, 6))
|
| 443 |
+
shap.summary_plot(shap_values, X_scaled, plot_type="bar", show=False)
|
| 444 |
+
plt.title("SHAP Feature Importance")
|
| 445 |
+
plt.tight_layout()
|
| 446 |
+
|
| 447 |
+
seasonality = get_monthly_seasonality(ticker, start_date, end_date)
|
| 448 |
+
seasonality_fig = plot_monthly_seasonality(seasonality, ticker, start_date, end_date)
|
| 449 |
+
|
| 450 |
+
log_chart = plot_interactive_logarithmic_stock_chart(ticker, start_date, end_date)
|
| 451 |
+
|
| 452 |
+
gpt_analysis = get_gpt_analysis(ticker, financial_data) if use_ai_assistant else "AI assistant analysis not requested."
|
| 453 |
+
|
| 454 |
+
return {
|
| 455 |
+
'ticker': ticker,
|
| 456 |
+
'fair_values': fair_values,
|
| 457 |
+
'lower_bound': lower_bound,
|
| 458 |
+
'upper_bound': upper_bound,
|
| 459 |
+
'r2': r2,
|
| 460 |
+
'fig': fig,
|
| 461 |
+
'feature_importance_df': feature_importance_df,
|
| 462 |
+
'shap_fig': shap_fig,
|
| 463 |
+
'seasonality': seasonality,
|
| 464 |
+
'seasonality_fig': seasonality_fig,
|
| 465 |
+
'gpt_analysis': gpt_analysis,
|
| 466 |
+
'log_chart': log_chart,
|
| 467 |
+
'latest_close': stock_df['Close'].iloc[-1],
|
| 468 |
+
'latest_fair_value': fair_values[-1],
|
| 469 |
+
'latest_lower_bound': lower_bound[-1],
|
| 470 |
+
'latest_upper_bound': upper_bound[-1],
|
| 471 |
+
}
|
| 472 |
+
|
| 473 |
+
except Exception as e:
|
| 474 |
+
st.error(f"An error occurred: {str(e)}")
|
| 475 |
+
return None
|
| 476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
def main():
|
|
|
|
| 478 |
st.title("Advanced Stock Analysis App")
|
| 479 |
st.markdown("Enter a stock ticker and date range to perform comprehensive stock analysis.")
|
| 480 |
|
|
|
|
| 486 |
with col3:
|
| 487 |
end_date = st.date_input("End Date", value=datetime.now())
|
| 488 |
|
| 489 |
+
use_ai_assistant = st.checkbox("Use AI Assistant for Analysis")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
|
| 491 |
+
if st.button("Analyze Stock"):
|
| 492 |
+
results = analyze_stock(ticker, start_date, end_date, use_ai_assistant)
|
| 493 |
+
|
| 494 |
+
if results:
|
| 495 |
+
st.subheader("Fair Price Analysis")
|
| 496 |
+
col1, col2, col3 = st.columns(3)
|
| 497 |
+
with col1:
|
| 498 |
+
st.metric("Current Price", f"${results['latest_close']:.2f}")
|
| 499 |
+
with col2:
|
| 500 |
+
st.metric("Estimated Fair Value", f"${results['latest_fair_value']:.2f}",
|
| 501 |
+
f"{((results['latest_fair_value'] - results['latest_close']) / results['latest_close'] * 100):.2f}%")
|
| 502 |
+
with col3:
|
| 503 |
+
st.metric("R-squared Score", f"{results['r2']:.4f}")
|
| 504 |
+
|
| 505 |
+
st.metric("Price Prediction Range",
|
| 506 |
+
f"${results['latest_lower_bound']:.2f} to ${results['latest_upper_bound']:.2f}")
|
| 507 |
+
|
| 508 |
+
st.subheader("Fair Price Prediction")
|
| 509 |
+
st.plotly_chart(results['fig'], use_container_width=True)
|
| 510 |
+
|
| 511 |
+
col1, col2 = st.columns(2)
|
| 512 |
+
with col1:
|
| 513 |
+
st.subheader("SHAP Feature Importance")
|
| 514 |
+
st.pyplot(results['shap_fig'])
|
| 515 |
+
with col2:
|
| 516 |
+
st.subheader("Top 10 Important Features")
|
| 517 |
+
st.dataframe(results['feature_importance_df'].head(10))
|
| 518 |
+
|
| 519 |
+
st.subheader("Monthly Seasonality")
|
| 520 |
+
st.plotly_chart(results['seasonality_fig'], use_container_width=True)
|
| 521 |
+
|
| 522 |
+
current_month = datetime.now().month
|
| 523 |
+
next_month = (current_month % 12) + 1
|
| 524 |
+
col1, col2 = st.columns(2)
|
| 525 |
+
with col1:
|
| 526 |
+
st.metric(f"Current month ({datetime.now().strftime('%B')})",
|
| 527 |
+
f"{results['seasonality'].loc[current_month, 'Mean Change%']*100:.2f}%",
|
| 528 |
+
f"{results['seasonality'].loc[current_month, 'Positive Periods']*100:.1f}% probability of positive return")
|
| 529 |
+
with col2:
|
| 530 |
+
st.metric(f"Next month ({(datetime.now() + timedelta(days=31)).strftime('%B')})",
|
| 531 |
+
f"{results['seasonality'].loc[next_month, 'Mean Change%']*100:.2f}%",
|
| 532 |
+
f"{results['seasonality'].loc[next_month, 'Positive Periods']*100:.1f}% probability of positive return")
|
| 533 |
+
|
| 534 |
+
if results['gpt_analysis'] != "AI assistant analysis not requested.":
|
| 535 |
+
st.subheader("AI Assistant Analysis")
|
| 536 |
+
st.text_area("Analysis", value=results['gpt_analysis'], height=300)
|
| 537 |
+
|
| 538 |
+
st.subheader("Logarithmic Stock Chart")
|
| 539 |
+
st.plotly_chart(results['log_chart'], use_container_width=True)
|
| 540 |
|
| 541 |
if __name__ == "__main__":
|
| 542 |
main()
|