Spaces:
Running
Running
| import pandas as pd | |
| import plotly.graph_objects as go | |
| import streamlit as st | |
| import yfinance as yf | |
| from plotly.subplots import make_subplots | |
| from scipy.stats import norm | |
| import numpy as np | |
| def calculate_macd( | |
| data: pd.DataFrame, | |
| short_window: int = 12, | |
| long_window: int = 26, | |
| signal_window: int = 9, | |
| ) -> pd.DataFrame: | |
| """ | |
| Calculate the Moving Average Convergence Divergence (MACD) and Signal line indicators. | |
| Parameters: | |
| data (pd.DataFrame): The dataframe containing stock price information. | |
| short_window (int): The number of periods for the shorter exponential moving average (EMA). | |
| Default is 12. | |
| long_window (int): The number of periods for the longer EMA. Default is 26. | |
| signal_window (int): The number of periods for the signal line EMA. Default is 9. | |
| Returns: | |
| pd.DataFrame: The input Dataframe with additional columns 'MACD' and 'Signal_Line' | |
| which contains the computed MACD values and signal line values respectively. | |
| Note: The function assumes that the input DataFrame contains a 'Close' column from which it computes the EMAs. | |
| """ | |
| # Calculate the Short term Exponential Moving Average | |
| short_ema = data.Close.ewm(span=short_window, adjust=False).mean() | |
| # Calculate the Long term Exponential Moving Average | |
| long_ema = data.Close.ewm(span=long_window, adjust=False).mean() | |
| # Compute MACD (short EMA - long EMA) | |
| data["MACD"] = short_ema - long_ema | |
| # Compute Signal Line (EMA of MACD) | |
| data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean() | |
| return data | |
| def calculate_normalized_macd( | |
| data: pd.DataFrame, | |
| short_window: int = 12, | |
| long_window: int = 26, | |
| signal_window: int = 9, | |
| ) -> pd.DataFrame: | |
| """ | |
| Calculate the normalized Moving Average Convergence Divergence (MACD) and Signal line. | |
| The MACD is a trend-following momentum indicator that shows the relationship between | |
| two moving averages of a security's price. The MACD is calculated by subtracting the | |
| long-term exponential moving average (EMA) from the short-term EMA. A nine-day EMA of | |
| the MACD called the "Signal Line," is then plotted on top of the MACD, functioning as | |
| a trigger for buy and sell signals. | |
| This function adds a normalization step to the typical MACD calculation by standardizing | |
| the values using z-scores. | |
| Parameters: | |
| data (pd.DataFrame): The dataframe containing stock price information with a 'Close' column. | |
| short_window (int): The number of periods for the shorter EMA. Default is 12. | |
| long_window (int): The number of periods for the longer EMA. Default is 26. | |
| signal_window (int): The number of periods for the signal line EMA. Default is 9. | |
| Returns: | |
| pd.DataFrame: The input Dataframe is returned with additional columns 'MACD' and 'Signal_Line', | |
| which contains the computed normalized MACD and signal line values respectively. | |
| """ | |
| # Calculate the Short term Exponential Moving Average | |
| short_ema = data.Close.ewm(span=short_window, adjust=False).mean() | |
| # Calculate the Long term Exponential Moving Average | |
| long_ema = data.Close.ewm(span=long_window, adjust=False).mean() | |
| # Compute MACD (short EMA - long EMA) | |
| data["MACD"] = short_ema - long_ema | |
| # Compute Signal Line (EMA of MACD) | |
| data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean() | |
| # Normalize the 'MACD' column using z-score normalization | |
| data["MACD"] = (data["MACD"] - data["MACD"].mean()) / data["MACD"].std() | |
| # Normalize the 'Signal_Line' column using z-score normalization | |
| data["Signal_Line"] = (data["Signal_Line"] - data["Signal_Line"].mean()) / data[ | |
| "Signal_Line" | |
| ].std() | |
| return data | |
| def calculate_percentile_macd( | |
| data: pd.DataFrame, | |
| short_window: int = 12, | |
| long_window: int = 26, | |
| signal_window: int = 9, | |
| ) -> pd.DataFrame: | |
| """ | |
| Calculate the percentile-based Moving Average Convergence Divergence (MACD) and Signal line. | |
| This function computes the MACD by subtracting the long-term exponential moving average (EMA) | |
| from the short-term EMA. It then calculates the Signal Line, which is a smoothing of the MACD | |
| values. After normalization using z-scores, the normalized MACD and Signal Line values are converted | |
| to percentiles, which are then rescaled to range from -100% to +100%. | |
| Parameters: | |
| data (pd.DataFrame): The dataframe containing stock price information with a 'Close' column. | |
| short_window (int): The number of periods for the shorter EMA. Default is 12. | |
| long_window (int): The number of periods for the longer EMA. Default is 26. | |
| signal_window (int): The number of periods for the signal line EMA. Default is 9. | |
| Returns: | |
| pd.DataFrame: The input Dataframe with additional columns 'MACD' and 'Signal_Line', representing | |
| the rescaled percentile values of the corresponding MACD and signal line calculations. | |
| """ | |
| # Calculate the Short term Exponential Moving Average | |
| short_ema = data.Close.ewm(span=short_window, adjust=False).mean() | |
| # Calculate the Long term Exponential Moving Average | |
| long_ema = data.Close.ewm(span=long_window, adjust=False).mean() | |
| # Compute MACD (short EMA - long EMA) | |
| data["MACD"] = short_ema - long_ema | |
| # Compute Signal Line (EMA of MACD) | |
| data["Signal_Line"] = data.MACD.ewm(span=signal_window, adjust=False).mean() | |
| # Normalize the 'MACD' column using z-score normalization | |
| data["MACD"] = (data["MACD"] - data["MACD"].mean()) / data["MACD"].std() | |
| # Normalize the 'Signal_Line' column using z-score normalization | |
| data["Signal_Line"] = (data["Signal_Line"] - data["Signal_Line"].mean()) / data[ | |
| "Signal_Line" | |
| ].std() | |
| # Convert normalized data to percentiles (CDF) and rescale to -100% to +100% | |
| # Rescaling allows comparing the relative position of the current value within the distribution | |
| data["MACD"] = norm.cdf(data["MACD"]) * 200 - 100 | |
| data["Signal_Line"] = norm.cdf(data["Signal_Line"]) * 200 - 100 | |
| return data | |
| def find_crossovers( | |
| df: pd.DataFrame, bullish_threshold: float, bearish_threshold: float | |
| ) -> pd.DataFrame: | |
| """ | |
| Identifies the bullish and bearish crossover points between MACD and Signal Line. | |
| This function checks where the MACD line crosses the Signal Line from below (bullish crossover) | |
| or from above (bearish crossover). It then marks these crossovers with a 1 for bullish or -1 | |
| for bearish within a new column in the DataFrame called 'Crossover'. | |
| Parameters: | |
| df (pd.DataFrame): The dataframe containing the columns 'MACD' and 'Signal_Line'. | |
| bullish_threshold (float): The threshold above which a crossover is considered bullish. | |
| bearish_threshold (float): The threshold below which a crossover is considered bearish. | |
| Returns: | |
| pd.DataFrame: The input DataFrame with an additional 'Crossover' column indicating | |
| the bullish (+1) and bearish (-1) crossovers. | |
| """ | |
| # Initialize 'Crossover' column to zero, indicating no crossover by default | |
| df["Crossover"] = 0 | |
| # Find bullish crossovers - when the MACD crosses the Signal Line from below | |
| # and the Signal Line is below the bullish threshold. | |
| crossover_indices = df.index[ | |
| (df["MACD"] > df["Signal_Line"]) | |
| & (df["MACD"].shift() < df["Signal_Line"].shift()) | |
| & (df["Signal_Line"] < bullish_threshold) | |
| ] | |
| # Mark the bullish crossovers with 1 in the 'Crossover' column | |
| df.loc[crossover_indices, "Crossover"] = 1 | |
| # Find bearish crossovers - when the MACD crosses the Signal Line from above | |
| # and the Signal Line is above the bearish threshold. | |
| crossover_indices = df.index[ | |
| (df["MACD"] < df["Signal_Line"]) | |
| & (df["MACD"].shift() > df["Signal_Line"].shift()) | |
| & (df["Signal_Line"] > bearish_threshold) | |
| ] | |
| # Mark the bearish crossovers with -1 in the 'Crossover' column | |
| df.loc[crossover_indices, "Crossover"] = -1 | |
| return df | |
| def get_fundamentals(ticker: str): | |
| """ | |
| Fetches the income statement, balance sheet, and cash flow statement for a given stock ticker. | |
| This function retrieves fundamental financial information about a stock using the yfinance library, | |
| which fetches this data from Yahoo Finance. | |
| Parameters: | |
| ticker (str): The stock symbol to query. | |
| Returns: | |
| tuple of pandas.DataFrame: A 3-tuple where the first element is an income statement DataFrame, | |
| the second is a balance sheet DataFrame, and the third | |
| is a cash flow statement DataFrame. | |
| """ | |
| # Create a Ticker object which allows access to Yahoo finance's vast data source | |
| stock = yf.Ticker(ticker) | |
| # Fetching and returning annual income statement, balance sheet, and cashflow data | |
| return stock.income_stmt, stock.balance_sheet, stock.cashflow | |
| def create_fig(data: pd.DataFrame, ticker: str) -> go.Figure: | |
| """ | |
| Creates a Plotly graph object (figure) that includes a candlestick plot of the stock prices, | |
| moving averages and a MACD (Moving Average Convergence Divergence) chart for the given data. | |
| Parameters: | |
| data (pandas.DataFrame): The input data containing the stock price information. | |
| It must include 'Close', 'Open', 'High', 'Low' columns and | |
| 'MACD', 'Signal_Line', 'Crossover' values calculated externally. | |
| ticker (str): The stock symbol used in subplot titles to indicate the stock being analyzed. | |
| Returns: | |
| plotly.graph_objs._figure.Figure: A figure object which includes the visualization of | |
| the stock prices with moving averages and a MACD chart. | |
| """ | |
| # Calculate moving averages | |
| data["MA12"] = data["Close"].rolling(window=12).mean() | |
| data["MA26"] = data["Close"].rolling(window=26).mean() | |
| data["MA50"] = data["Close"].rolling(window=50).mean() | |
| data["MA200"] = data["Close"].rolling(window=200).mean() | |
| # Initialize figure with subplots | |
| fig = make_subplots( | |
| rows=2, | |
| cols=1, | |
| shared_xaxes=True, | |
| vertical_spacing=0.02, | |
| subplot_titles=(f"{ticker} Candlestick", "MACD"), | |
| row_width=[0.2, 0.7], | |
| ) | |
| # Add Candlestick trace | |
| fig.add_trace( | |
| go.Candlestick( | |
| x=data.index, | |
| open=data["Open"], | |
| high=data["High"], | |
| low=data["Low"], | |
| close=data["Close"], | |
| name="Candlestick", | |
| ), | |
| row=1, | |
| col=1, | |
| ) | |
| # Add Moving Average traces | |
| for ma, color in zip( | |
| ["MA12", "MA26", "MA50", "MA200"], ["magenta", "cyan", "yellow", "black"] | |
| ): | |
| fig.add_trace( | |
| go.Scatter( | |
| x=data.index, | |
| y=data[ma], | |
| line=dict(color=color, width=1.5), | |
| name=f"{ma} days MA", | |
| ), | |
| row=1, | |
| col=1, | |
| ) | |
| # Add MACD and Signal Line traces | |
| fig.add_trace( | |
| go.Scatter( | |
| x=data.index, y=data["MACD"], line=dict(color="blue", width=2), name="MACD" | |
| ), | |
| row=2, | |
| col=1, | |
| ) | |
| fig.add_trace( | |
| go.Scatter( | |
| x=data.index, | |
| y=data["Signal_Line"], | |
| line=dict(color="orange", width=2), | |
| name="Signal Line", | |
| ), | |
| row=2, | |
| col=1, | |
| ) | |
| # Add markers for Bullish and Bearish crossovers on MACD chart | |
| fig.add_trace( | |
| go.Scatter( | |
| mode="markers", | |
| x=data[data["Crossover"] == 1].index, | |
| y=data[data["Crossover"] == 1]["MACD"], | |
| marker_symbol="triangle-up", | |
| marker_color="green", | |
| marker_size=20, | |
| name="Bullish Crossover (MACD) ✅", | |
| ), | |
| row=2, | |
| col=1, | |
| ) | |
| fig.add_trace( | |
| go.Scatter( | |
| mode="markers", | |
| x=data[data["Crossover"] == -1].index, | |
| y=data[data["Crossover"] == -1]["MACD"], | |
| marker_symbol="triangle-down", | |
| marker_color="red", | |
| marker_size=20, | |
| name="Bearish Crossover (MACD) 🈲", | |
| ), | |
| row=2, | |
| col=1, | |
| ) | |
| # Add markers for Bullish and Bearish crossovers on the Candlestick chart | |
| fig.add_trace( | |
| go.Scatter( | |
| mode="markers", | |
| x=data[data["Crossover"] == 1].index, | |
| y=data[data["Crossover"] == 1]["Close"], | |
| marker_symbol="triangle-up", | |
| marker_color="green", | |
| marker_size=25, | |
| name="Bullish Crossover (Close) ✅", | |
| ), | |
| row=1, | |
| col=1, | |
| ) | |
| fig.add_trace( | |
| go.Scatter( | |
| mode="markers", | |
| x=data[data["Crossover"] == -1].index, | |
| y=data[data["Crossover"] == -1]["Close"], | |
| marker_symbol="triangle-down", | |
| marker_color="red", | |
| marker_size=25, | |
| name="Bearish Crossover (Close) 🈲", | |
| ), | |
| row=1, | |
| col=1, | |
| ) | |
| # Update layout configurations | |
| fig.update_layout( | |
| xaxis_rangeslider_visible=False, | |
| height=800, # Define the height of the figure | |
| ) | |
| return fig | |
| def generate_simulated_data(data: pd.DataFrame, num_days: int) -> pd.DataFrame: | |
| """ | |
| Generates simulated future data for a given DataFrame based on the statistical characteristics | |
| (mean and standard deviation) of the input data. | |
| The simulation assumes normally distributed returns and extrapolates future values by computing | |
| the cumulative product of random returns. | |
| Parameters: | |
| data (pandas.DataFrame): The historical data on which the simulation will be based. The index must be date-based. | |
| num_days (int): The number of days into the future for which data should be simulated. | |
| Returns: | |
| pandas.DataFrame: A DataFrame containing the original historical data appended with the simulated future data. | |
| """ | |
| # Compute mean and standard deviation for each column | |
| means = data.mean() | |
| stds = data.std() | |
| # Generate random returns from normal distribution | |
| random_returns = pd.DataFrame() | |
| for col in data.columns: | |
| random_returns[col] = np.random.normal(loc=means[col], scale=stds[col], size=num_days) | |
| # Add 1 to the returns | |
| random_returns += 1 | |
| # Compute cumulative product to get factors | |
| factors = random_returns.cumprod() | |
| # Generate future dates | |
| last_date = data.index[-1] | |
| future_dates = pd.date_range(start=last_date + pd.DateOffset(days=1), periods=num_days) | |
| # Append future factors to original data | |
| future_data = pd.DataFrame(index=future_dates, columns=data.columns, data=factors.values) | |
| # Concatenate original data and future data | |
| simulated_data = pd.concat([data, future_data]) | |
| return simulated_data |