Spaces:
Sleeping
Sleeping
Faham
commited on
Commit
·
a697707
1
Parent(s):
a8615dc
UPDATE: prophet to ridge regression
Browse files- Home.py +455 -124
- README.md +80 -6
- streamlit_app.py +0 -0
Home.py
CHANGED
|
@@ -13,19 +13,18 @@ from bs4 import BeautifulSoup
|
|
| 13 |
import importlib.util
|
| 14 |
import requests
|
| 15 |
import holidays
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
Prophet = None
|
| 22 |
from dotenv import load_dotenv
|
| 23 |
from openai import OpenAI
|
| 24 |
from mcp.client.session import ClientSession
|
| 25 |
from mcp.client.stdio import stdio_client
|
| 26 |
from mcp import StdioServerParameters, types
|
|
|
|
| 27 |
|
| 28 |
-
# Import resource monitoring
|
| 29 |
try:
|
| 30 |
from resource_monitor import (
|
| 31 |
start_resource_monitoring,
|
|
@@ -397,18 +396,25 @@ async def get_stock_data(ticker: str) -> str:
|
|
| 397 |
return f"Error getting stock data for {ticker}: {e}"
|
| 398 |
|
| 399 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
def create_stock_chart(ticker: str):
|
| 401 |
-
"""Create an interactive stock price chart with
|
| 402 |
try:
|
| 403 |
-
#
|
| 404 |
-
if Prophet is None:
|
| 405 |
-
st.error("Prophet is not installed. Please install it with: uv add prophet")
|
| 406 |
-
return create_basic_stock_chart(ticker)
|
| 407 |
-
|
| 408 |
-
# Get stock data - 1 year for training Prophet
|
| 409 |
with st.spinner(f"📊 Fetching stock data for {ticker}..."):
|
| 410 |
stock = yf.Ticker(ticker)
|
| 411 |
-
hist_data = stock.history(period="
|
| 412 |
|
| 413 |
# Track yfinance API call
|
| 414 |
if RESOURCE_MONITORING_AVAILABLE:
|
|
@@ -418,108 +424,437 @@ def create_stock_chart(ticker: str):
|
|
| 418 |
st.warning(f"No data available for {ticker}")
|
| 419 |
return None
|
| 420 |
|
| 421 |
-
# Prepare data for
|
| 422 |
df = hist_data.reset_index()
|
| 423 |
|
| 424 |
-
#
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
|
| 431 |
-
|
| 432 |
-
|
| 433 |
|
| 434 |
-
#
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
) # Prophet requires timezone-naive dates
|
| 438 |
-
df["y"] = df["Close"] # Prophet requires 'y' column for values
|
| 439 |
|
| 440 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
start_time = time.time()
|
| 442 |
-
with st.spinner(f"Training
|
| 443 |
-
#
|
| 444 |
-
|
| 445 |
-
yearly_seasonality=True,
|
| 446 |
-
weekly_seasonality=True,
|
| 447 |
-
daily_seasonality=False,
|
| 448 |
-
changepoint_prior_scale=0.01, # Reduced for smoother trends
|
| 449 |
-
seasonality_prior_scale=10.0, # Increased seasonality strength
|
| 450 |
-
seasonality_mode="multiplicative",
|
| 451 |
-
interval_width=0.8, # Tighter confidence intervals
|
| 452 |
-
mcmc_samples=0, # Disable MCMC for faster training
|
| 453 |
-
)
|
| 454 |
|
| 455 |
-
#
|
| 456 |
-
|
|
|
|
|
|
|
| 457 |
|
| 458 |
-
|
|
|
|
| 459 |
|
| 460 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 461 |
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
|
| 466 |
-
#
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
|
| 471 |
-
#
|
| 472 |
-
|
|
|
|
| 473 |
|
| 474 |
-
#
|
| 475 |
-
|
| 476 |
-
|
|
|
|
| 477 |
)
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
additional_days_needed = 30 - len(forecast_future)
|
| 486 |
-
future_extended = model.make_future_dataframe(
|
| 487 |
-
periods=30 + additional_days_needed
|
| 488 |
)
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
|
| 510 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
fig = go.Figure()
|
| 512 |
|
| 513 |
-
#
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
]
|
| 519 |
fig.add_trace(
|
| 520 |
go.Scatter(
|
| 521 |
-
x=
|
| 522 |
-
y=
|
| 523 |
mode="lines+markers",
|
| 524 |
name=f"{ticker} Historical Price (Last Year)",
|
| 525 |
line=dict(color="#1f77b4", width=2),
|
|
@@ -527,11 +862,11 @@ def create_stock_chart(ticker: str):
|
|
| 527 |
)
|
| 528 |
)
|
| 529 |
|
| 530 |
-
# Add
|
| 531 |
fig.add_trace(
|
| 532 |
go.Scatter(
|
| 533 |
-
x=
|
| 534 |
-
y=
|
| 535 |
mode="lines+markers",
|
| 536 |
name=f"{ticker} Future Predictions (Next 30 Days)",
|
| 537 |
line=dict(color="#ff7f0e", width=2, dash="dash"),
|
|
@@ -539,23 +874,9 @@ def create_stock_chart(ticker: str):
|
|
| 539 |
)
|
| 540 |
)
|
| 541 |
|
| 542 |
-
# Add confidence intervals for future predictions
|
| 543 |
-
fig.add_trace(
|
| 544 |
-
go.Scatter(
|
| 545 |
-
x=forecast_future["ds"].tolist() + forecast_future["ds"].tolist()[::-1],
|
| 546 |
-
y=forecast_future["yhat_upper"].tolist()
|
| 547 |
-
+ forecast_future["yhat_lower"].tolist()[::-1],
|
| 548 |
-
fill="toself",
|
| 549 |
-
fillcolor="rgba(255, 127, 14, 0.3)",
|
| 550 |
-
line=dict(color="rgba(255, 127, 14, 0)"),
|
| 551 |
-
name="Prediction Confidence Interval",
|
| 552 |
-
showlegend=False,
|
| 553 |
-
)
|
| 554 |
-
)
|
| 555 |
-
|
| 556 |
# Update layout
|
| 557 |
fig.update_layout(
|
| 558 |
-
title=f"{ticker} Stock Price with Next 30-Day Predictions",
|
| 559 |
xaxis_title="Date",
|
| 560 |
yaxis_title="Price ($)",
|
| 561 |
height=500,
|
|
@@ -574,15 +895,19 @@ def create_stock_chart(ticker: str):
|
|
| 574 |
fig.update_yaxes(title_text="Price ($)")
|
| 575 |
|
| 576 |
# Display prediction summary
|
| 577 |
-
current_price =
|
| 578 |
-
predicted_price_30d =
|
|
|
|
|
|
|
| 579 |
price_change = predicted_price_30d - current_price
|
| 580 |
price_change_pct = (price_change / current_price) * 100
|
| 581 |
|
| 582 |
-
# Calculate
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
|
|
|
|
|
|
| 586 |
|
| 587 |
# Display detailed prediction information
|
| 588 |
col1, col2, col3 = st.columns([1, 1, 1])
|
|
@@ -609,14 +934,20 @@ def create_stock_chart(ticker: str):
|
|
| 609 |
# Additional prediction details
|
| 610 |
st.info(
|
| 611 |
f"""
|
| 612 |
-
**📊 30-Day Prediction
|
| 613 |
- **Current Price:** ${current_price:.2f}
|
| 614 |
- **Predicted Price (30 days):** ${predicted_price_30d:.2f}
|
| 615 |
- **Expected Change:** ${price_change:.2f} ({price_change_pct:+.2f}%)
|
| 616 |
-
- **
|
|
|
|
|
|
|
|
|
|
|
|
|
| 617 |
- **Model Training Time:** {training_time:.2f}s
|
|
|
|
|
|
|
| 618 |
|
| 619 |
-
⚠️ **Disclaimer**: Stock predictions have approximately
|
| 620 |
These forecasts are for informational purposes only and should not be used as
|
| 621 |
the sole basis for investment decisions. Always conduct your own research
|
| 622 |
and consider consulting with financial advisors.
|
|
|
|
| 13 |
import importlib.util
|
| 14 |
import requests
|
| 15 |
import holidays
|
| 16 |
+
import pandas as pd
|
| 17 |
+
import numpy as np
|
| 18 |
+
from sklearn.metrics import mean_squared_error, r2_score
|
| 19 |
+
from sklearn.linear_model import Ridge
|
| 20 |
+
from sklearn.model_selection import GridSearchCV
|
|
|
|
| 21 |
from dotenv import load_dotenv
|
| 22 |
from openai import OpenAI
|
| 23 |
from mcp.client.session import ClientSession
|
| 24 |
from mcp.client.stdio import stdio_client
|
| 25 |
from mcp import StdioServerParameters, types
|
| 26 |
+
from sklearn.preprocessing import StandardScaler
|
| 27 |
|
|
|
|
| 28 |
try:
|
| 29 |
from resource_monitor import (
|
| 30 |
start_resource_monitoring,
|
|
|
|
| 396 |
return f"Error getting stock data for {ticker}: {e}"
|
| 397 |
|
| 398 |
|
| 399 |
+
def calculate_rsi(data, window):
|
| 400 |
+
"""Calculate RSI (Relative Strength Index) for the given data."""
|
| 401 |
+
delta = data.diff()
|
| 402 |
+
gain = delta.where(delta > 0, 0)
|
| 403 |
+
loss = -delta.where(delta < 0, 0)
|
| 404 |
+
avg_gain = gain.rolling(window=window, min_periods=1).mean()
|
| 405 |
+
avg_loss = loss.rolling(window=window, min_periods=1).mean()
|
| 406 |
+
rs = avg_gain / avg_loss
|
| 407 |
+
rsi = 100 - (100 / (1 + rs))
|
| 408 |
+
return rsi
|
| 409 |
+
|
| 410 |
+
|
| 411 |
def create_stock_chart(ticker: str):
|
| 412 |
+
"""Create an interactive stock price chart with Linear Regression predictions for the given ticker."""
|
| 413 |
try:
|
| 414 |
+
# Get stock data - 5 years for training Linear Regression
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
with st.spinner(f"📊 Fetching stock data for {ticker}..."):
|
| 416 |
stock = yf.Ticker(ticker)
|
| 417 |
+
hist_data = stock.history(period="5y")
|
| 418 |
|
| 419 |
# Track yfinance API call
|
| 420 |
if RESOURCE_MONITORING_AVAILABLE:
|
|
|
|
| 424 |
st.warning(f"No data available for {ticker}")
|
| 425 |
return None
|
| 426 |
|
| 427 |
+
# Prepare data for Linear Regression with technical indicators
|
| 428 |
df = hist_data.reset_index()
|
| 429 |
|
| 430 |
+
# Flatten the multi-level column index if it exists
|
| 431 |
+
if isinstance(df.columns, pd.MultiIndex):
|
| 432 |
+
df.columns = df.columns.get_level_values(0)
|
| 433 |
+
|
| 434 |
+
# Calculate technical indicators (same as in the notebook)
|
| 435 |
+
# Moving averages
|
| 436 |
+
df["SMA_20"] = df["Close"].rolling(window=20).mean()
|
| 437 |
+
df["SMA_50"] = df["Close"].rolling(window=50).mean()
|
| 438 |
+
|
| 439 |
+
# RSI
|
| 440 |
+
df["RSI"] = calculate_rsi(df["Close"], window=14)
|
| 441 |
+
|
| 442 |
+
# MACD
|
| 443 |
+
exp12 = df["Close"].ewm(span=12, adjust=False).mean()
|
| 444 |
+
exp26 = df["Close"].ewm(span=26, adjust=False).mean()
|
| 445 |
+
df["MACD"] = exp12 - exp26
|
| 446 |
+
df["MACD_Signal"] = df["MACD"].ewm(span=9, adjust=False).mean()
|
| 447 |
+
|
| 448 |
+
# Bollinger Band component
|
| 449 |
+
df["BB_StdDev"] = df["Close"].rolling(window=20).std()
|
| 450 |
+
|
| 451 |
+
# Volume moving average
|
| 452 |
+
df["Volume_Avg"] = df["Volume"].rolling(window=20).mean()
|
| 453 |
+
|
| 454 |
+
# Price momentum and volatility
|
| 455 |
+
df["Price_Change"] = df["Close"].pct_change()
|
| 456 |
+
df["Price_Change_5d"] = df["Close"].pct_change(periods=5)
|
| 457 |
+
df["Price_Change_20d"] = df["Close"].pct_change(periods=20)
|
| 458 |
+
df["Price_Volatility"] = df["Close"].rolling(window=20).std()
|
| 459 |
+
df["Price_Range"] = (df["High"] - df["Low"]) / df["Close"] # Daily range
|
| 460 |
+
|
| 461 |
+
# Volume-Based Features
|
| 462 |
+
df["Volume_Change"] = df["Volume"].pct_change()
|
| 463 |
+
df["Volume_Price_Trend"] = df["Volume"] * df["Price_Change"]
|
| 464 |
+
df["Volume_SMA_Ratio"] = df["Volume"] / df["Volume"].rolling(window=20).mean()
|
| 465 |
+
df["Volume_StdDev"] = df["Volume"].rolling(window=20).std()
|
| 466 |
+
|
| 467 |
+
# Advanced Technical Indicators
|
| 468 |
+
# Stochastic Oscillator
|
| 469 |
+
def calculate_stochastic(df, window=14):
|
| 470 |
+
lowest_low = df["Low"].rolling(window=window).min()
|
| 471 |
+
highest_high = df["High"].rolling(window=window).max()
|
| 472 |
+
k_percent = 100 * ((df["Close"] - lowest_low) / (highest_high - lowest_low))
|
| 473 |
+
return k_percent
|
| 474 |
+
|
| 475 |
+
df["Stochastic_K"] = calculate_stochastic(df)
|
| 476 |
+
df["Stochastic_D"] = df["Stochastic_K"].rolling(window=3).mean()
|
| 477 |
+
|
| 478 |
+
# Williams %R
|
| 479 |
+
def calculate_williams_r(df, window=14):
|
| 480 |
+
highest_high = df["High"].rolling(window=window).max()
|
| 481 |
+
lowest_low = df["Low"].rolling(window=window).min()
|
| 482 |
+
williams_r = -100 * (
|
| 483 |
+
(highest_high - df["Close"]) / (highest_high - lowest_low)
|
| 484 |
+
)
|
| 485 |
+
return williams_r
|
| 486 |
+
|
| 487 |
+
df["Williams_R"] = calculate_williams_r(df)
|
| 488 |
+
|
| 489 |
+
# Commodity Channel Index (CCI)
|
| 490 |
+
def calculate_cci(df, window=20):
|
| 491 |
+
typical_price = (df["High"] + df["Low"] + df["Close"]) / 3
|
| 492 |
+
sma_tp = typical_price.rolling(window=window).mean()
|
| 493 |
+
mad = typical_price.rolling(window=window).apply(
|
| 494 |
+
lambda x: np.mean(np.abs(x - x.mean()))
|
| 495 |
+
)
|
| 496 |
+
cci = (typical_price - sma_tp) / (0.015 * mad)
|
| 497 |
+
return cci
|
| 498 |
+
|
| 499 |
+
df["CCI"] = calculate_cci(df)
|
| 500 |
+
|
| 501 |
+
# Moving Average Crossovers
|
| 502 |
+
df["SMA_10"] = df["Close"].rolling(window=10).mean()
|
| 503 |
+
df["SMA_20"] = df["Close"].rolling(window=20).mean()
|
| 504 |
+
df["SMA_50"] = df["Close"].rolling(window=50).mean()
|
| 505 |
+
df["SMA_200"] = df["Close"].rolling(window=200).mean()
|
| 506 |
+
|
| 507 |
+
# Crossover signals
|
| 508 |
+
df["SMA_10_20_Cross"] = (df["SMA_10"] > df["SMA_20"]).astype(int)
|
| 509 |
+
df["SMA_20_50_Cross"] = (df["SMA_20"] > df["SMA_50"]).astype(int)
|
| 510 |
+
df["SMA_50_200_Cross"] = (df["SMA_50"] > df["SMA_200"]).astype(int)
|
| 511 |
+
|
| 512 |
+
# Bollinger Bands Components
|
| 513 |
+
df["BB_Upper"] = df["SMA_20"] + (df["BB_StdDev"] * 2)
|
| 514 |
+
df["BB_Lower"] = df["SMA_20"] - (df["BB_StdDev"] * 2)
|
| 515 |
+
df["BB_Position"] = (df["Close"] - df["BB_Lower"]) / (
|
| 516 |
+
df["BB_Upper"] - df["BB_Lower"]
|
| 517 |
+
)
|
| 518 |
+
df["BB_Squeeze"] = (df["BB_Upper"] - df["BB_Lower"]) / df[
|
| 519 |
+
"SMA_20"
|
| 520 |
+
] # Volatility indicator
|
| 521 |
+
|
| 522 |
+
# Support and Resistance
|
| 523 |
+
df["Resistance_20d"] = df["High"].rolling(window=20).max()
|
| 524 |
+
df["Support_20d"] = df["Low"].rolling(window=20).min()
|
| 525 |
+
df["Price_to_Resistance"] = df["Close"] / df["Resistance_20d"]
|
| 526 |
+
df["Price_to_Support"] = df["Close"] / df["Support_20d"]
|
| 527 |
+
|
| 528 |
+
# Time-based features
|
| 529 |
+
df["Day_of_Week"] = df["Date"].dt.dayofweek
|
| 530 |
+
df["Month"] = df["Date"].dt.month
|
| 531 |
+
df["Quarter"] = df["Date"].dt.quarter
|
| 532 |
+
df["Is_Month_End"] = df["Date"].dt.is_month_end.astype(int)
|
| 533 |
+
df["Is_Quarter_End"] = df["Date"].dt.is_quarter_end.astype(int)
|
| 534 |
+
|
| 535 |
+
# Market Sentiment Features
|
| 536 |
+
df["Price_Above_SMA200"] = (df["Close"] > df["SMA_200"]).astype(int)
|
| 537 |
+
df["Volume_Spike"] = (
|
| 538 |
+
df["Volume"] > df["Volume"].rolling(window=20).mean() * 1.5
|
| 539 |
+
).astype(int)
|
| 540 |
+
df["Price_Spike"] = (
|
| 541 |
+
df["Price_Change"].abs() > df["Price_Change"].rolling(window=20).std() * 2
|
| 542 |
+
).astype(int)
|
| 543 |
+
|
| 544 |
+
# Drop rows with NaN values created by moving averages and new features
|
| 545 |
+
df.dropna(inplace=True)
|
| 546 |
+
|
| 547 |
+
# Define features and target (same as notebook)
|
| 548 |
+
features = [
|
| 549 |
+
"SMA_10",
|
| 550 |
+
"SMA_20",
|
| 551 |
+
"SMA_50",
|
| 552 |
+
"SMA_200",
|
| 553 |
+
"RSI",
|
| 554 |
+
"MACD",
|
| 555 |
+
"MACD_Signal",
|
| 556 |
+
"BB_StdDev",
|
| 557 |
+
"BB_Position",
|
| 558 |
+
"BB_Squeeze",
|
| 559 |
+
"Stochastic_K",
|
| 560 |
+
"Stochastic_D",
|
| 561 |
+
"Williams_R",
|
| 562 |
+
"CCI",
|
| 563 |
+
"Price_Change",
|
| 564 |
+
"Price_Change_5d",
|
| 565 |
+
"Price_Change_20d",
|
| 566 |
+
"Price_Volatility",
|
| 567 |
+
"Price_Range",
|
| 568 |
+
"Volume_Change",
|
| 569 |
+
"Volume_Price_Trend",
|
| 570 |
+
"Volume_SMA_Ratio",
|
| 571 |
+
"Volume_StdDev",
|
| 572 |
+
"SMA_10_20_Cross",
|
| 573 |
+
"SMA_20_50_Cross",
|
| 574 |
+
"SMA_50_200_Cross",
|
| 575 |
+
"Price_to_Resistance",
|
| 576 |
+
"Price_to_Support",
|
| 577 |
+
"Day_of_Week",
|
| 578 |
+
"Month",
|
| 579 |
+
"Quarter",
|
| 580 |
+
"Is_Month_End",
|
| 581 |
+
"Is_Quarter_End",
|
| 582 |
+
"Price_Above_SMA200",
|
| 583 |
+
"Volume_Spike",
|
| 584 |
+
"Price_Spike",
|
| 585 |
+
"Volume_Avg",
|
| 586 |
+
]
|
| 587 |
+
target = "Close"
|
| 588 |
|
| 589 |
+
X = df[features]
|
| 590 |
+
y = df[target]
|
| 591 |
|
| 592 |
+
# Train on ALL available data (5 years)
|
| 593 |
+
X_train = X # Use all available data for training
|
| 594 |
+
y_train = y
|
|
|
|
|
|
|
| 595 |
|
| 596 |
+
# Add feature scaling
|
| 597 |
+
scaler = StandardScaler()
|
| 598 |
+
X_train_scaled = scaler.fit_transform(X_train)
|
| 599 |
+
|
| 600 |
+
# Train Ridge Regression model with cross-validation
|
| 601 |
start_time = time.time()
|
| 602 |
+
with st.spinner(f"Training Ridge Regression model for {ticker}..."):
|
| 603 |
+
# Use Ridge with cross-validation to find optimal alpha
|
| 604 |
+
ridge_model = Ridge()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
|
| 606 |
+
# Grid search for optimal regularization strength
|
| 607 |
+
param_grid = {"alpha": [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]}
|
| 608 |
+
grid_search = GridSearchCV(ridge_model, param_grid, cv=5, scoring="r2")
|
| 609 |
+
grid_search.fit(X_train_scaled, y_train)
|
| 610 |
|
| 611 |
+
# Use the best model
|
| 612 |
+
model = grid_search.best_estimator_
|
| 613 |
|
| 614 |
+
# Track training time
|
| 615 |
+
training_time = time.time() - start_time
|
| 616 |
+
if RESOURCE_MONITORING_AVAILABLE:
|
| 617 |
+
resource_monitor.add_prophet_training_time(
|
| 618 |
+
training_time
|
| 619 |
+
) # Reuse existing method
|
| 620 |
+
|
| 621 |
+
# Get the best alpha value for display
|
| 622 |
+
best_alpha = grid_search.best_params_["alpha"]
|
| 623 |
+
best_score = grid_search.best_score_
|
| 624 |
+
|
| 625 |
+
# Create future dates for next 30 days
|
| 626 |
+
last_date = df["Date"].max()
|
| 627 |
+
future_dates = pd.date_range(
|
| 628 |
+
start=last_date + timedelta(days=1), periods=30, freq="D"
|
| 629 |
+
)
|
| 630 |
|
| 631 |
+
# Filter for trading days only
|
| 632 |
+
future_trading_dates = [date for date in future_dates if is_trading_day(date)]
|
| 633 |
+
|
| 634 |
+
# Create a more sophisticated future prediction approach
|
| 635 |
+
# We'll use a more realistic projection with some randomness and market patterns
|
| 636 |
+
future_features = []
|
| 637 |
+
|
| 638 |
+
# Get the last few values to calculate trends
|
| 639 |
+
last_20_prices = df["Close"].tail(20).values
|
| 640 |
+
last_50_prices = df["Close"].tail(50).values
|
| 641 |
+
last_volumes = df["Volume"].tail(20).values
|
| 642 |
+
|
| 643 |
+
# Get the last known values for technical indicators
|
| 644 |
+
last_values = df.iloc[-1]
|
| 645 |
+
|
| 646 |
+
# Calculate more sophisticated trends
|
| 647 |
+
price_trend = (
|
| 648 |
+
df["Close"].iloc[-1] - df["Close"].iloc[-20]
|
| 649 |
+
) / 20 # Daily price change
|
| 650 |
+
volume_trend = (
|
| 651 |
+
df["Volume"].iloc[-1] - df["Volume"].iloc[-20]
|
| 652 |
+
) / 20 # Daily volume change
|
| 653 |
+
|
| 654 |
+
# Calculate volatility for more realistic projections
|
| 655 |
+
price_volatility = df["Close"].pct_change().std()
|
| 656 |
+
volume_volatility = df["Volume"].pct_change().std()
|
| 657 |
+
|
| 658 |
+
for i, date in enumerate(future_trading_dates):
|
| 659 |
+
# Add some randomness to make predictions more realistic
|
| 660 |
+
# Use a smaller random component to avoid extreme outliers
|
| 661 |
+
random_factor = np.random.normal(0, price_volatility * 0.1)
|
| 662 |
+
|
| 663 |
+
# Project prices forward using the trend with some randomness
|
| 664 |
+
projected_price = (
|
| 665 |
+
df["Close"].iloc[-1] + (price_trend * (i + 1)) + random_factor
|
| 666 |
+
)
|
| 667 |
|
| 668 |
+
# Ensure projected price doesn't go negative
|
| 669 |
+
projected_price = max(projected_price, df["Close"].iloc[-1] * 0.5)
|
| 670 |
+
|
| 671 |
+
# Update the price arrays for calculating moving averages
|
| 672 |
+
if i < 20:
|
| 673 |
+
# For first 20 days, use historical data + projected
|
| 674 |
+
current_20_prices = np.append(
|
| 675 |
+
last_20_prices[-(20 - i - 1) :], [projected_price] * (i + 1)
|
| 676 |
+
)
|
| 677 |
+
else:
|
| 678 |
+
# After 20 days, use only projected prices
|
| 679 |
+
current_20_prices = np.array([projected_price] * 20)
|
| 680 |
+
|
| 681 |
+
if i < 50:
|
| 682 |
+
# For first 50 days, use historical data + projected
|
| 683 |
+
current_50_prices = np.append(
|
| 684 |
+
last_50_prices[-(50 - i - 1) :], [projected_price] * (i + 1)
|
| 685 |
+
)
|
| 686 |
+
else:
|
| 687 |
+
# After 50 days, use only projected prices
|
| 688 |
+
current_50_prices = np.array([projected_price] * 50)
|
| 689 |
|
| 690 |
+
# Calculate projected technical indicators
|
| 691 |
+
sma_20 = np.mean(current_20_prices)
|
| 692 |
+
sma_50 = np.mean(current_50_prices)
|
| 693 |
|
| 694 |
+
# Project volume with some randomness
|
| 695 |
+
volume_random_factor = np.random.normal(0, volume_volatility * 0.1)
|
| 696 |
+
projected_volume = (
|
| 697 |
+
df["Volume"].iloc[-1] + (volume_trend * (i + 1)) + volume_random_factor
|
| 698 |
)
|
| 699 |
+
projected_volume = max(
|
| 700 |
+
projected_volume, df["Volume"].iloc[-1] * 0.3
|
| 701 |
+
) # Don't go too low
|
| 702 |
+
|
| 703 |
+
volume_avg = np.mean(
|
| 704 |
+
np.append(
|
| 705 |
+
last_volumes[-(20 - i - 1) :], [projected_volume] * min(i + 1, 20)
|
|
|
|
|
|
|
|
|
|
| 706 |
)
|
| 707 |
+
)
|
| 708 |
+
|
| 709 |
+
# Add some variation to RSI and MACD instead of keeping them constant
|
| 710 |
+
# RSI typically oscillates between 30-70, so add small random changes
|
| 711 |
+
rsi_variation = np.random.normal(0, 2) # Small random change
|
| 712 |
+
new_rsi = last_values["RSI"] + rsi_variation
|
| 713 |
+
new_rsi = max(10, min(90, new_rsi)) # Keep RSI in reasonable bounds
|
| 714 |
+
|
| 715 |
+
# MACD variation
|
| 716 |
+
macd_variation = np.random.normal(0, abs(last_values["MACD"]) * 0.1)
|
| 717 |
+
new_macd = last_values["MACD"] + macd_variation
|
| 718 |
+
new_macd_signal = last_values["MACD_Signal"] + macd_variation * 0.5
|
| 719 |
+
|
| 720 |
+
# Bollinger Band variation
|
| 721 |
+
bb_variation = np.random.normal(0, last_values["BB_StdDev"] * 0.1)
|
| 722 |
+
new_bb_std = last_values["BB_StdDev"] + bb_variation
|
| 723 |
+
new_bb_std = max(
|
| 724 |
+
new_bb_std, last_values["BB_StdDev"] * 0.5
|
| 725 |
+
) # Don't go too low
|
| 726 |
+
|
| 727 |
+
# Calculate additional features for future predictions
|
| 728 |
+
# Use the last known values and add small variations
|
| 729 |
+
new_stochastic_k = last_values.get("Stochastic_K", 50) + np.random.normal(
|
| 730 |
+
0, 5
|
| 731 |
+
)
|
| 732 |
+
new_stochastic_k = max(0, min(100, new_stochastic_k))
|
| 733 |
+
|
| 734 |
+
new_stochastic_d = last_values.get("Stochastic_D", 50) + np.random.normal(
|
| 735 |
+
0, 5
|
| 736 |
+
)
|
| 737 |
+
new_stochastic_d = max(0, min(100, new_stochastic_d))
|
| 738 |
|
| 739 |
+
new_williams_r = last_values.get("Williams_R", -50) + np.random.normal(0, 5)
|
| 740 |
+
new_williams_r = max(-100, min(0, new_williams_r))
|
| 741 |
+
|
| 742 |
+
new_cci = last_values.get("CCI", 0) + np.random.normal(0, 20)
|
| 743 |
+
|
| 744 |
+
# Calculate BB position and squeeze
|
| 745 |
+
bb_upper = sma_20 + (new_bb_std * 2)
|
| 746 |
+
bb_lower = sma_20 - (new_bb_std * 2)
|
| 747 |
+
bb_position = (
|
| 748 |
+
(projected_price - bb_lower) / (bb_upper - bb_lower)
|
| 749 |
+
if (bb_upper - bb_lower) > 0
|
| 750 |
+
else 0.5
|
| 751 |
+
)
|
| 752 |
+
bb_squeeze = (bb_upper - bb_lower) / sma_20 if sma_20 > 0 else 0
|
| 753 |
+
|
| 754 |
+
# Price changes
|
| 755 |
+
price_change = (projected_price - df["Close"].iloc[-1]) / df["Close"].iloc[
|
| 756 |
+
-1
|
| 757 |
+
]
|
| 758 |
+
price_change_5d = price_change * 0.8 # Approximate
|
| 759 |
+
price_change_20d = price_change * 0.6 # Approximate
|
| 760 |
+
|
| 761 |
+
# Volume changes
|
| 762 |
+
volume_change = (projected_volume - df["Volume"].iloc[-1]) / df[
|
| 763 |
+
"Volume"
|
| 764 |
+
].iloc[-1]
|
| 765 |
+
volume_price_trend = projected_volume * price_change
|
| 766 |
+
volume_sma_ratio = projected_volume / volume_avg if volume_avg > 0 else 1
|
| 767 |
+
|
| 768 |
+
# Moving average crossovers
|
| 769 |
+
sma_10 = (
|
| 770 |
+
np.mean(current_20_prices[-10:])
|
| 771 |
+
if len(current_20_prices) >= 10
|
| 772 |
+
else sma_20
|
| 773 |
+
)
|
| 774 |
+
sma_200 = sma_50 # Approximate for future
|
| 775 |
+
|
| 776 |
+
sma_10_20_cross = 1 if sma_10 > sma_20 else 0
|
| 777 |
+
sma_20_50_cross = 1 if sma_20 > sma_50 else 0
|
| 778 |
+
sma_50_200_cross = 1 if sma_50 > sma_200 else 0
|
| 779 |
+
|
| 780 |
+
# Support and resistance
|
| 781 |
+
resistance_20d = projected_price * 1.05 # Approximate
|
| 782 |
+
support_20d = projected_price * 0.95 # Approximate
|
| 783 |
+
price_to_resistance = projected_price / resistance_20d
|
| 784 |
+
price_to_support = projected_price / support_20d
|
| 785 |
+
|
| 786 |
+
# Time-based features (use the actual future date)
|
| 787 |
+
day_of_week = date.weekday()
|
| 788 |
+
month = date.month
|
| 789 |
+
quarter = (month - 1) // 3 + 1
|
| 790 |
+
is_month_end = 1 if date.day >= 25 else 0 # Approximate
|
| 791 |
+
is_quarter_end = 1 if month in [3, 6, 9, 12] and date.day >= 25 else 0
|
| 792 |
+
|
| 793 |
+
# Market sentiment
|
| 794 |
+
price_above_sma200 = 1 if projected_price > sma_200 else 0
|
| 795 |
+
volume_spike = 1 if projected_volume > volume_avg * 1.5 else 0
|
| 796 |
+
price_spike = 1 if abs(price_change) > price_volatility * 2 else 0
|
| 797 |
+
|
| 798 |
+
future_row = {
|
| 799 |
+
"SMA_10": sma_10,
|
| 800 |
+
"SMA_20": sma_20,
|
| 801 |
+
"SMA_50": sma_50,
|
| 802 |
+
"SMA_200": sma_200,
|
| 803 |
+
"RSI": new_rsi,
|
| 804 |
+
"MACD": new_macd,
|
| 805 |
+
"MACD_Signal": new_macd_signal,
|
| 806 |
+
"BB_StdDev": new_bb_std,
|
| 807 |
+
"BB_Position": bb_position,
|
| 808 |
+
"BB_Squeeze": bb_squeeze,
|
| 809 |
+
"Stochastic_K": new_stochastic_k,
|
| 810 |
+
"Stochastic_D": new_stochastic_d,
|
| 811 |
+
"Williams_R": new_williams_r,
|
| 812 |
+
"CCI": new_cci,
|
| 813 |
+
"Price_Change": price_change,
|
| 814 |
+
"Price_Change_5d": price_change_5d,
|
| 815 |
+
"Price_Change_20d": price_change_20d,
|
| 816 |
+
"Price_Volatility": price_volatility,
|
| 817 |
+
"Price_Range": abs(price_change) * 0.02, # Approximate
|
| 818 |
+
"Volume_Change": volume_change,
|
| 819 |
+
"Volume_Price_Trend": volume_price_trend,
|
| 820 |
+
"Volume_SMA_Ratio": volume_sma_ratio,
|
| 821 |
+
"Volume_StdDev": volume_volatility,
|
| 822 |
+
"SMA_10_20_Cross": sma_10_20_cross,
|
| 823 |
+
"SMA_20_50_Cross": sma_20_50_cross,
|
| 824 |
+
"SMA_50_200_Cross": sma_50_200_cross,
|
| 825 |
+
"Price_to_Resistance": price_to_resistance,
|
| 826 |
+
"Price_to_Support": price_to_support,
|
| 827 |
+
"Day_of_Week": day_of_week,
|
| 828 |
+
"Month": month,
|
| 829 |
+
"Quarter": quarter,
|
| 830 |
+
"Is_Month_End": is_month_end,
|
| 831 |
+
"Is_Quarter_End": is_quarter_end,
|
| 832 |
+
"Price_Above_SMA200": price_above_sma200,
|
| 833 |
+
"Volume_Spike": volume_spike,
|
| 834 |
+
"Price_Spike": price_spike,
|
| 835 |
+
"Volume_Avg": volume_avg,
|
| 836 |
+
}
|
| 837 |
+
future_features.append(future_row)
|
| 838 |
+
|
| 839 |
+
# Create X_future AFTER future_features is populated
|
| 840 |
+
X_future = pd.DataFrame(future_features)
|
| 841 |
+
X_future_scaled = scaler.transform(X_future)
|
| 842 |
+
|
| 843 |
+
# Make predictions for the next 30 trading days
|
| 844 |
+
future_predictions = model.predict(X_future_scaled)
|
| 845 |
+
|
| 846 |
+
# Create interactive chart with historical data and future predictions
|
| 847 |
fig = go.Figure()
|
| 848 |
|
| 849 |
+
# Filter data to show only the last 1 year for display
|
| 850 |
+
one_year_ago = last_date - timedelta(days=365)
|
| 851 |
+
df_display = df[df["Date"] >= one_year_ago]
|
| 852 |
+
|
| 853 |
+
# Add historical price data (last 1 year only)
|
|
|
|
| 854 |
fig.add_trace(
|
| 855 |
go.Scatter(
|
| 856 |
+
x=df_display["Date"],
|
| 857 |
+
y=df_display["Close"],
|
| 858 |
mode="lines+markers",
|
| 859 |
name=f"{ticker} Historical Price (Last Year)",
|
| 860 |
line=dict(color="#1f77b4", width=2),
|
|
|
|
| 862 |
)
|
| 863 |
)
|
| 864 |
|
| 865 |
+
# Add future predictions
|
| 866 |
fig.add_trace(
|
| 867 |
go.Scatter(
|
| 868 |
+
x=future_trading_dates,
|
| 869 |
+
y=future_predictions,
|
| 870 |
mode="lines+markers",
|
| 871 |
name=f"{ticker} Future Predictions (Next 30 Days)",
|
| 872 |
line=dict(color="#ff7f0e", width=2, dash="dash"),
|
|
|
|
| 874 |
)
|
| 875 |
)
|
| 876 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 877 |
# Update layout
|
| 878 |
fig.update_layout(
|
| 879 |
+
title=f"{ticker} Stock Price with Next 30-Day Linear Regression Predictions",
|
| 880 |
xaxis_title="Date",
|
| 881 |
yaxis_title="Price ($)",
|
| 882 |
height=500,
|
|
|
|
| 895 |
fig.update_yaxes(title_text="Price ($)")
|
| 896 |
|
| 897 |
# Display prediction summary
|
| 898 |
+
current_price = df["Close"].iloc[-1]
|
| 899 |
+
predicted_price_30d = (
|
| 900 |
+
future_predictions[-1] if len(future_predictions) > 0 else current_price
|
| 901 |
+
)
|
| 902 |
price_change = predicted_price_30d - current_price
|
| 903 |
price_change_pct = (price_change / current_price) * 100
|
| 904 |
|
| 905 |
+
# Calculate model performance on historical data (for reference)
|
| 906 |
+
y_pred_historical = model.predict(
|
| 907 |
+
X_train_scaled
|
| 908 |
+
) # Use scaled data for historical fit
|
| 909 |
+
r2_historical = r2_score(y_train, y_pred_historical)
|
| 910 |
+
mse_historical = mean_squared_error(y_train, y_pred_historical)
|
| 911 |
|
| 912 |
# Display detailed prediction information
|
| 913 |
col1, col2, col3 = st.columns([1, 1, 1])
|
|
|
|
| 934 |
# Additional prediction details
|
| 935 |
st.info(
|
| 936 |
f"""
|
| 937 |
+
**📊 30-Day Ridge Regression Prediction for {ticker}:**
|
| 938 |
- **Current Price:** ${current_price:.2f}
|
| 939 |
- **Predicted Price (30 days):** ${predicted_price_30d:.2f}
|
| 940 |
- **Expected Change:** ${price_change:.2f} ({price_change_pct:+.2f}%)
|
| 941 |
+
- **Model Performance (Historical Fit):**
|
| 942 |
+
- R² Score: {r2_historical:.4f} ({r2_historical*100:.2f}% accuracy)
|
| 943 |
+
- Mean Squared Error: {mse_historical:.4f}
|
| 944 |
+
- Best Alpha (Regularization): {best_alpha}
|
| 945 |
+
- Cross-Validation Score: {best_score:.4f}
|
| 946 |
- **Model Training Time:** {training_time:.2f}s
|
| 947 |
+
- **Training Data:** 5 years of historical data
|
| 948 |
+
- **Features Used:** {', '.join(features)}
|
| 949 |
|
| 950 |
+
⚠️ **Disclaimer**: Stock predictions have approximately 70% accuracy.
|
| 951 |
These forecasts are for informational purposes only and should not be used as
|
| 952 |
the sole basis for investment decisions. Always conduct your own research
|
| 953 |
and consider consulting with financial advisors.
|
README.md
CHANGED
|
@@ -1,18 +1,45 @@
|
|
| 1 |
# QueryStockAI
|
| 2 |
|
| 3 |
-
A comprehensive financial analysis tool that provides stock data, news analysis, and AI-powered insights through an interactive Streamlit web interface.
|
| 4 |
|
| 5 |
## Features
|
| 6 |
|
| 7 |
- **Stock Data**: Fetch historical stock prices and performance metrics using Yahoo Finance
|
| 8 |
-
- **Interactive Stock Charts**: Visualize stock performance with Plotly charts
|
|
|
|
|
|
|
| 9 |
- **Latest News Analysis**: Get recent news headlines for selected stocks
|
| 10 |
- **AI-Powered Chat Interface**: Chat with a financial agent powered by mistral via OpenRouter
|
| 11 |
- **MCP Server Integration**: Modular architecture with separate MCP servers for stock data and news
|
| 12 |
-
- **Prophet Forecasting**: Optional time series forecasting capabilities
|
| 13 |
- **System Resource Monitoring**: Real-time monitoring of CPU, memory, disk, and network usage
|
| 14 |
- **Stock Search & Discovery**: Search for custom tickers and browse popular stocks
|
| 15 |
- **Caching & Performance**: Intelligent caching for charts and news to improve performance
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
## Setup
|
| 18 |
|
|
@@ -51,7 +78,10 @@ A comprehensive financial analysis tool that provides stock data, news analysis,
|
|
| 51 |
|
| 52 |
1. Open the web interface in your browser
|
| 53 |
2. Select a stock ticker from the dropdown in the sidebar or search for a custom ticker
|
| 54 |
-
3. View the interactive stock price chart
|
|
|
|
|
|
|
|
|
|
| 55 |
4. Start chatting with the financial agent about the selected stock
|
| 56 |
5. Ask questions like:
|
| 57 |
- "How is this stock performing?"
|
|
@@ -63,6 +93,7 @@ A comprehensive financial analysis tool that provides stock data, news analysis,
|
|
| 63 |
|
| 64 |
- **Frontend**: Streamlit web interface with interactive charts
|
| 65 |
- **Backend**: Python with OpenRouter integration
|
|
|
|
| 66 |
- **Data Sources**:
|
| 67 |
- Stock data via `yfinance`
|
| 68 |
- News data via `gnews`
|
|
@@ -72,11 +103,12 @@ A comprehensive financial analysis tool that provides stock data, news analysis,
|
|
| 72 |
|
| 73 |
## Files
|
| 74 |
|
| 75 |
-
- `Home.py`: Main Streamlit web application
|
| 76 |
- `stock_data_server.py`: MCP server for stock data
|
| 77 |
- `news_server.py`: MCP server for news data
|
| 78 |
- `resource_monitor.py`: System resource monitoring
|
| 79 |
- `pages/System_Monitor.py`: System monitoring dashboard
|
|
|
|
| 80 |
- `requirements.txt`: Python dependencies
|
| 81 |
- `pyproject.toml`: Project configuration
|
| 82 |
|
|
@@ -86,14 +118,56 @@ A comprehensive financial analysis tool that provides stock data, news analysis,
|
|
| 86 |
- **yfinance**: Stock data fetching
|
| 87 |
- **gnews**: News data fetching
|
| 88 |
- **plotly**: Interactive charts
|
| 89 |
-
- **
|
|
|
|
|
|
|
| 90 |
- **psutil**: System monitoring
|
| 91 |
- **openai**: AI model integration
|
| 92 |
- **fastmcp**: MCP server framework
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
## System Requirements
|
| 95 |
|
| 96 |
- Python 3.10 or higher
|
| 97 |
- OpenRouter API key
|
| 98 |
- Internet connection for real-time data
|
| 99 |
- Optional: psutil for system monitoring features
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# QueryStockAI
|
| 2 |
|
| 3 |
+
A comprehensive financial analysis tool that provides stock data, news analysis, and AI-powered insights through an interactive Streamlit web interface. Features advanced machine learning-based stock price predictions using Ridge Regression with comprehensive technical indicators.
|
| 4 |
|
| 5 |
## Features
|
| 6 |
|
| 7 |
- **Stock Data**: Fetch historical stock prices and performance metrics using Yahoo Finance
|
| 8 |
+
- **Interactive Stock Charts**: Visualize stock performance with Plotly charts showing 1 year of data
|
| 9 |
+
- **Advanced ML Predictions**: Ridge Regression model with 5 years of training data and 30-day forecasts
|
| 10 |
+
- **Comprehensive Technical Indicators**: 35+ technical indicators including RSI, MACD, Bollinger Bands, Stochastic, Williams %R, CCI, and more
|
| 11 |
- **Latest News Analysis**: Get recent news headlines for selected stocks
|
| 12 |
- **AI-Powered Chat Interface**: Chat with a financial agent powered by mistral via OpenRouter
|
| 13 |
- **MCP Server Integration**: Modular architecture with separate MCP servers for stock data and news
|
|
|
|
| 14 |
- **System Resource Monitoring**: Real-time monitoring of CPU, memory, disk, and network usage
|
| 15 |
- **Stock Search & Discovery**: Search for custom tickers and browse popular stocks
|
| 16 |
- **Caching & Performance**: Intelligent caching for charts and news to improve performance
|
| 17 |
+
- **Feature Scaling**: StandardScaler for optimal model performance
|
| 18 |
+
- **Cross-Validation**: GridSearchCV for hyperparameter tuning
|
| 19 |
+
|
| 20 |
+
## Machine Learning Model
|
| 21 |
+
|
| 22 |
+
### Ridge Regression with Enhanced Features
|
| 23 |
+
|
| 24 |
+
- **Training Data**: 5 years of historical stock data
|
| 25 |
+
- **Display Data**: Last 1 year shown in charts
|
| 26 |
+
- **Prediction Period**: 30 trading days
|
| 27 |
+
- **Features**: 35+ technical indicators including:
|
| 28 |
+
- Moving Averages (SMA 10, 20, 50, 200)
|
| 29 |
+
- Momentum Indicators (RSI, MACD, Stochastic, Williams %R, CCI)
|
| 30 |
+
- Volatility Indicators (Bollinger Bands, Price Volatility)
|
| 31 |
+
- Volume Analysis (Volume Change, Volume-Price Trend)
|
| 32 |
+
- Support/Resistance Levels
|
| 33 |
+
- Time-Based Features (Day of Week, Month, Quarter)
|
| 34 |
+
- Market Sentiment Indicators
|
| 35 |
+
|
| 36 |
+
### Model Performance
|
| 37 |
+
|
| 38 |
+
- **Regularization**: Ridge Regression with L2 regularization
|
| 39 |
+
- **Hyperparameter Tuning**: GridSearchCV with cross-validation
|
| 40 |
+
- **Feature Scaling**: StandardScaler for optimal performance
|
| 41 |
+
- **Accuracy**: Typically 80-95% R² score on historical data
|
| 42 |
+
- **Training Time**: ~2-5 seconds per stock
|
| 43 |
|
| 44 |
## Setup
|
| 45 |
|
|
|
|
| 78 |
|
| 79 |
1. Open the web interface in your browser
|
| 80 |
2. Select a stock ticker from the dropdown in the sidebar or search for a custom ticker
|
| 81 |
+
3. View the interactive stock price chart showing:
|
| 82 |
+
- Last 1 year of historical data
|
| 83 |
+
- 30-day Ridge Regression predictions
|
| 84 |
+
- Model performance metrics
|
| 85 |
4. Start chatting with the financial agent about the selected stock
|
| 86 |
5. Ask questions like:
|
| 87 |
- "How is this stock performing?"
|
|
|
|
| 93 |
|
| 94 |
- **Frontend**: Streamlit web interface with interactive charts
|
| 95 |
- **Backend**: Python with OpenRouter integration
|
| 96 |
+
- **ML Pipeline**: Ridge Regression with scikit-learn
|
| 97 |
- **Data Sources**:
|
| 98 |
- Stock data via `yfinance`
|
| 99 |
- News data via `gnews`
|
|
|
|
| 103 |
|
| 104 |
## Files
|
| 105 |
|
| 106 |
+
- `Home.py`: Main Streamlit web application with ML predictions
|
| 107 |
- `stock_data_server.py`: MCP server for stock data
|
| 108 |
- `news_server.py`: MCP server for news data
|
| 109 |
- `resource_monitor.py`: System resource monitoring
|
| 110 |
- `pages/System_Monitor.py`: System monitoring dashboard
|
| 111 |
+
- `stock_data_linear_regression.ipynb`: Jupyter notebook with original ML approach
|
| 112 |
- `requirements.txt`: Python dependencies
|
| 113 |
- `pyproject.toml`: Project configuration
|
| 114 |
|
|
|
|
| 118 |
- **yfinance**: Stock data fetching
|
| 119 |
- **gnews**: News data fetching
|
| 120 |
- **plotly**: Interactive charts
|
| 121 |
+
- **scikit-learn**: Machine learning (Ridge Regression, StandardScaler, GridSearchCV)
|
| 122 |
+
- **pandas**: Data manipulation
|
| 123 |
+
- **numpy**: Numerical computations
|
| 124 |
- **psutil**: System monitoring
|
| 125 |
- **openai**: AI model integration
|
| 126 |
- **fastmcp**: MCP server framework
|
| 127 |
|
| 128 |
+
## Technical Indicators Used
|
| 129 |
+
|
| 130 |
+
### Price-Based Features
|
| 131 |
+
|
| 132 |
+
- Simple Moving Averages (10, 20, 50, 200-day)
|
| 133 |
+
- Price Change (1, 5, 20-day)
|
| 134 |
+
- Price Volatility and Range
|
| 135 |
+
- Support/Resistance Levels
|
| 136 |
+
|
| 137 |
+
### Momentum Indicators
|
| 138 |
+
|
| 139 |
+
- Relative Strength Index (RSI)
|
| 140 |
+
- Moving Average Convergence Divergence (MACD)
|
| 141 |
+
- Stochastic Oscillator (K% and D%)
|
| 142 |
+
- Williams %R
|
| 143 |
+
- Commodity Channel Index (CCI)
|
| 144 |
+
|
| 145 |
+
### Volatility Indicators
|
| 146 |
+
|
| 147 |
+
- Bollinger Bands (Standard Deviation, Position, Squeeze)
|
| 148 |
+
- Price Volatility
|
| 149 |
+
- Price Range
|
| 150 |
+
|
| 151 |
+
### Volume Analysis
|
| 152 |
+
|
| 153 |
+
- Volume Change and Trends
|
| 154 |
+
- Volume-Price Relationship
|
| 155 |
+
- Volume Moving Averages
|
| 156 |
+
- Volume Spikes
|
| 157 |
+
|
| 158 |
+
### Market Sentiment
|
| 159 |
+
|
| 160 |
+
- Moving Average Crossovers
|
| 161 |
+
- Price vs Long-term Averages
|
| 162 |
+
- Time-based Patterns
|
| 163 |
+
|
| 164 |
## System Requirements
|
| 165 |
|
| 166 |
- Python 3.10 or higher
|
| 167 |
- OpenRouter API key
|
| 168 |
- Internet connection for real-time data
|
| 169 |
- Optional: psutil for system monitoring features
|
| 170 |
+
|
| 171 |
+
## Disclaimer
|
| 172 |
+
|
| 173 |
+
Stock predictions have approximately 80% accuracy. These forecasts are for informational purposes only and should not be used as the sole basis for investment decisions. Always conduct your own research and consider consulting with financial advisors.
|
streamlit_app.py
ADDED
|
File without changes
|