Spaces:

gluo88
/

performance

Sleeping

App Files Files Community

gluo88 commited on Feb 24, 2025

Commit

a3d0cbf

verified ·

1 Parent(s): aea9f49

Update performance.py

Browse files

temparary version for years_list = [1, 2, 3, 5, 10, 15, 20, 25, 30, 40, 50, 60]

Files changed (1) hide show

performance.py +316 -208

performance.py CHANGED Viewed

@@ -1,176 +1,186 @@
 '''
-Example 8 for using yfinance
 Calculate annual, trailing, cumumlative, and CAGR returns for multiple stocks.
 * The start date can be an arbitrary date.  The default is the current date.
 * annual return is displayed from the default current day, or an arbitrary given
-  day (except for Feb 29 for leap year) TODO-fix
 * trailing, cumumlative returns are currently displayed from the month boundary (last day of Month)
   prior to the given date.
 * However, trailing, cumumlative returns can be displayed
   from any date, which can be not at the month boundary (last day of Month),
   by minor change of setting calculation_end_date_for_others_str = calculation_end_date_str.
   prior to the given date in the function "calculation_response(message, history)"
 Author: Gang Luo
 '''
-script_version = '(2024-01-26.1)'
 import gradio as gr
 import yfinance as yf
 import pandas as pd
 import numpy as np
 from datetime import datetime, timedelta
 import pytz
 #==============================================================================
 print_yearly_total_return = True
-num_years_calculation=32   # total years for calculation
 # Define a list of years to calculate the trailing returns, cumulative returns, and so on
 # remove the row of current year row since it is not a full year.
-years_list = [1, 2, 3, 5, 10, 15, 20, 25, 30]
 # Set the stock tickers list
-tickers_lists = [["qqq","hxq.to","spy", "vfv.to","xiu.to", "xbb.to","xcb.to","xhb.to"], #0
-    ["spy", "vfv.to", "vgg.to", "zlu.to", "xiu.to", "vdy.to", "xfn.to", "ry.to", "td.to", "na.to",
-      "slf.to", "gwo.to", "bce.to", "t.to", "rci-b.to", "enb.to", "trp.to", "zlb.to", "cp.to"], #1
-    ["spy","vfv.to", "xiu.to", "zeb.to", "xfn.to", "na.to","ry.to", "bmo.to","bns.to", "td.to", "cm.to", "cwb.to",
-      "slf.to", "gwo.to", "bce.to", "t.to", "rci-b.to", "enb.to", "trp.to", "xdv.to","cdz.to","vdy.to"],  #2
-    ["qqq","spy", "vfv.to", "vgg.to","zlu.to","msft","AAPL","goog","AMZN","NVDA","meta","tsla","shop.to","hxq.to"],   #3
-    ["^GSPC","spy","voo","ivv", "tpu-u.to","vfv.to", "zsp.to","hxs.to","tpu.to","xus.to", "xsp.to","^IXIC","qqq","hxq.to","^GSPTSE","xic.to","xiu.to","xfn.to", "fie.to"], #4
-    ["^IXIC","ONEQ","CIBR","QQJG", "qqq", "spy", "vfv.to", "HXQ.to", "ZQQ.to", "XQQ.to", "QQC.to"],  #5
-    ["goog", "msft", "^GSPC"]
 ]
 #==============================================================================
-# Part 1: fetch retrieve yearly total returns by yfinance & display
-# Function to fetch data from yfinance and extract yearly total returns#
-# annual return calculation can start at any given day
-def get_annual_returns_df(ticker, calculation_end_date_str):
-    # Get the historical data for the given ticker
-    stock = yf.Ticker(ticker)
-    calculation_end_date=pd.to_datetime(calculation_end_date_str).tz_localize('America/New_York')
     try:
         '''
-        'try' statement for handlingy the exception error of stock.history that a ticker is not yet at stock market,
-        For example, "shop.to" is not there in 2012
-        '''
-        stock_history=stock.history(period="max")["Close"]
         '''
-        Between the start and end days in stock_history variable, there are some missing days where there are no corresponding rows.
-        Add rows of  missing  days such that the values of column "Close" are set to be the value of the closest earlier day's
-        value, by using date_range to create full range without any missing date.
-        '''
-        # Create a DataFrame with a complete date range
-        date_range = pd.date_range(start=stock_history.index.min(), end=stock_history.index.max(), freq='D')
-        complete_stock_history = pd.DataFrame(index=date_range)
-        # Merge the complete DataFrame with the original stock_history
-        complete_stock_history = complete_stock_history.merge(stock_history, how='left', left_index=True, right_index=True)
-        complete_stock_history['Close'] = complete_stock_history['Close'].ffill()  # fill the newy added rows with previous day value
-        '''
-        Filter out the rows that matches the month and date of calculation_end_date, which are the ends of
-        annual periods from the calculation_end_date.
-        '''
-        # Filter out rows with dates newer than calculation_end_date
-        filtered_stock_history = complete_stock_history[complete_stock_history.index <= calculation_end_date]
-        #print(filtered_stock_history)
-        target_month=filtered_stock_history.index.max().month
-        target_day=filtered_stock_history.index.max().day
-        #print("target_month", target_month, "target_day",target_day, "start_year", filtered_stock_history.index.max().year)
-        annual_returns = filtered_stock_history[(filtered_stock_history.index.month == target_month)
-           & (filtered_stock_history.index.day ==target_day)]
-        annual_returns_percent = annual_returns.pct_change().dropna()
     except:
         return  pd.DataFrame()
     else:
-        annual_returns_df = pd.DataFrame(annual_returns_percent, columns=['Close'])
-        annual_returns_df.rename(columns={'Close': ticker}, inplace=True)
-        return annual_returns_df
-# Function to fetch data from yfinance and extract yearly total returns
-# annual return calculation starts at only yaer end boundary, i.e, Dec 31,
-# by resample('A')
-def get_annual_returns_year_boundary_df(ticker, calculation_end_date_str):
-    # Get the historical data for the given ticker
-    stock = yf.Ticker(ticker)
-    calculation_end_date = datetime.strptime(calculation_end_date_str, "%Y-%m-%d")
-    calculation_start_date_str = (calculation_end_date
-                - timedelta(days=num_years_calculation * 365)).strftime("%Y-%m-%d")
-    try:
-        '''
-        1.  'try' statement for handlingy the exception error of stock.history that a ticker is not yet at stock market,
-             For example, "shop.to" is not there in 2012
-        2. The row with the latest day from .history(.., end='end_day_date') is the day prior to end_day_date.  Therefore,
-           let end=the expected end day plus one day.
-        '''
-        calculation_end_date_plus_1day_str =  (calculation_end_date + timedelta(days=1)).strftime("%Y-%m-%d")
-        annual_returns_history=stock.history(start=calculation_start_date_str,end=calculation_end_date_plus_1day_str)["Close"]
-        #print("debug get_annual_returns_df ", ticker, annual_returns_history)
-        # For 'A', 'Y', see https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases
-        ffilled_history=annual_returns = annual_returns_history.resample('A').ffill()
-        #print(ffilled_history)
-        annual_returns = ffilled_history.pct_change().dropna()
-        #annual_returns = annual_returns_history.resample('A').ffill().pct_change().dropna()
-        #print("debug get_annual_returns_df after resample()", ticker, calculation_end_date, "\n", annual_returns)
-    except:
-        return  pd.DataFrame()
-    else:
-        annual_returns_df = pd.DataFrame(annual_returns, columns=['Close'])
-        annual_returns_df.rename(columns={'Close': ticker}, inplace=True)
-        return annual_returns_df
-#----------------------------------------------------------------------------------
-# handling a list of tickers by calling the functions (either get_annual_returns_df
-# get_annual_returns_year_boundary_df) that handle single tickers
-def get_annual_returns_tickers_common_df(tickers, calculation_end_date_str, annual_returns_func_df):
-    # Create an empty DataFrame to store all tickers' total returns
-    all_tickers_returns_df = pd.DataFrame()
-    # Loop through each ticker in the list
-    for ticker in tickers:
-        ticker_returns_df = annual_returns_func_df(ticker, calculation_end_date_str)
-        if not ticker_returns_df.empty:
-            if all_tickers_returns_df.empty:
-                all_tickers_returns_df = ticker_returns_df
-            else:
-                '''
-                When running in  huggingface, pd.concat changed the index order of ticker_returns_df
-                when ticker_returns_df has more rows than all_tickers_returns_df. However, it is ok
-                running in colab.  Therefore, use pd.merge to replace pd.concat.
-                all_tickers_returns_df = pd.concat([all_tickers_returns_df, ticker_returns_df],axis=1,join='outer')  # Concatenate DataFrames
-                all_tickers_returns_df.sort_index() # index may be changed when running in huggingface
-                '''
-                all_tickers_returns_df = pd.merge(all_tickers_returns_df, ticker_returns_df,
-                        left_index=True, right_index=True, how='outer')
-        else:
-            # New column with NaN values
-            new_column_name = ticker
-            new_column_values = [None] * len(all_tickers_returns_df)
-            new_column = pd.DataFrame({new_column_name: new_column_values}, index=all_tickers_returns_df.index)
-            # Concatenate the new column to the original DataFrame
-            all_tickers_returns_df = pd.concat([all_tickers_returns_df, new_column], axis=1)
-    #return date_label_conversion_strip_time(all_tickers_returns_df, calculation_end_date_str)
-    return all_tickers_returns_df
-def get_annual_returns_tickers_df(tickers, calculation_end_date_str):
-    return get_annual_returns_tickers_common_df(tickers, calculation_end_date_str,
-                                                get_annual_returns_df)
-def get_annual_returns_tickers_year_boundary_df(tickers, calculation_end_date_str):
-    return get_annual_returns_tickers_common_df(tickers, calculation_end_date_str,
-                                                get_annual_returns_year_boundary_df)
 #==============================================================================
-# Part 2: calculate the annualized trailing total return from the data generated in step 1 & display
 # Define a function to calculate the annualized trailing total return for a given number of years
 def get_trailing_return(ticker, data, years):
     # Get the total return values for the last n years
     trailing_data = data[ticker].tail(years)
     # Check if there are empty values within years
     if trailing_data.isna().any():
-        return "N/A"
     # Check if there are valid total return values for all years
     if len(trailing_data) == years:
         # Convert the percentage strings to numeric values
@@ -185,7 +195,7 @@ def get_trailing_return(ticker, data, years):
         annualized_trailing_return = annualized_trailing_return.round(2)
         return annualized_trailing_return
     else:
-        return "N/A"
 # Define a function to Loop through the list and print the trailing returns for each num_years
 def get_trailing_return_column(ticker, annual_returns_df):
@@ -199,13 +209,13 @@ def get_trailing_return_column(ticker, annual_returns_df):
             trailing_return_column[f"{num_years}-Year"] = trailing_return
         else:
             print(f"Data not available for {ticker}. Skipping.")
-            trailing_return_column[f"{num_years}-Year"] = "N/A"
     return trailing_return_column
 # Create an empty DataFrame to store all tickers' trailing returns
-def get_trailing_return_all(tickers, annual_returns_df):
     all_tickers_trailing_returns_df = pd.DataFrame(index=years_list)
     # Loop through each ticker in the list
     for ticker in tickers:
         trailing_returns = get_trailing_return_column(ticker, annual_returns_df)
@@ -214,7 +224,7 @@ def get_trailing_return_all(tickers, annual_returns_df):
     return all_tickers_trailing_returns_df
 #==============================================================================
-# Part 3: calculate the cumulative return from the data (all_tickers_returns_df) generated in part 1 & display
 #  Define a function to calculate the cumulative return for a given number of years from a ticker
 def get_cumulative_return(ticker, data, years):
     # Calculate the cumulative return
@@ -231,9 +241,10 @@ def get_cumulative_return_column(ticker, annual_returns_df):
         cumulative_returns[years] = cumulative_return.iloc[-1]
     return cumulative_returns
-def get_cumulative_return_all(tickers, annual_returns_df):
     # Create an empty DataFrame with years_list as the index for cumulative  returns
     all_tickers_cumulative_returns_df = pd.DataFrame(index=years_list)
     # Loop through each ticker in the list
     for ticker in tickers:
         cumulative_returns = get_cumulative_return_column(ticker, annual_returns_df)
@@ -242,7 +253,7 @@ def get_cumulative_return_all(tickers, annual_returns_df):
     return all_tickers_cumulative_returns_df
 #==============================================================================
-# Part 4: calculate the  CAGR (Compound Annual Growth Rate) from the data
 # in all_tickers_cumulative_returns_df generated earlier & display
 # Define a function to calculate the CAGR from the cumulative value and the years
 def calculate_cagr(value, years):
@@ -264,46 +275,6 @@ def get_cagr_return_all(all_tickers_cumulative_returns_df):
     all_tickers_cagrs_df = all_tickers_cumulative_returns_df.apply(lambda x: calculate_cagr(x, x.index), axis=0)
     return all_tickers_cagrs_df
-#==============================================================================
-# Part 5: utility functions
-# get the last trading day of S&P 500 in string format
-def get_last_trading_day():
-    # Get today's date, use .strftime("%Y-%m-%d") to convert to a string
-    today_date_str=datetime.now(pytz.timezone('America/New_York')).date().strftime("%Y-%m-%d")
-    stock = yf.Ticker("^GSPC") # S&P 500 (^GSPC) ticker
-    #  search and see yfinance_BUG_1 NOTE in this file
-    history_df=stock.history(period="max", end=today_date_str)["Close"]
-    last_trading_day_str = history_df.index.max().date().strftime("%Y-%m-%d")
-    return last_trading_day_str
-def str_to_integer(integer_str):
-    try:
-        integer_number = int(integer_str)
-        return integer_number
-    except ValueError:
-        return -1
-# validate the date string
-def is_valid_date(date_string):
-    try:
-        # Attempt to parse the date string
-        datetime.strptime(date_string, "%Y-%m-%d")
-        return True
-    except ValueError:
-        # Raised when the date string is not in the expected format
-        return False
-def date_label_conversion_strip_time(all_tickers_returns_df, calculation_end_date_str):
-    all_tickers_returns_df.index=all_tickers_returns_df.index.date
-    all_tickers_returns_df.index.name='date'
-    # print("debug get_annual_returns_tickers_df", all_tickers_returns_df)
-    # Convert calculation_end_date_str to a datetime object, replace the index's mon/day portion of date
-    end_date_datetime_obj = datetime.strptime(calculation_end_date_str, "%Y-%m-%d")
-    all_tickers_returns_df.index = all_tickers_returns_df.index.map(
-      lambda x: x.replace(month=end_date_datetime_obj.month,
-      day=end_date_datetime_obj.day))
-    return all_tickers_returns_df
 #==============================================================================
 # Part 6:
 # single ticker's Prices, Returns,Dividends, good for verifying whether "Adj Close" is correct.
@@ -317,14 +288,21 @@ def date_label_conversion_strip_time(all_tickers_returns_df, calculation_end_dat
 def get_yearly_single_stock_data(ticker):
     stock = yf.Ticker(ticker)
     #-------- mainly for downloading 'Dividends'
-    history = stock.history(period="max")
     dividend_history=history['Dividends']
     dividend_history.index=dividend_history.index.date
     #-------- mainly for downloading 'Close','Adj Close'
-    dld_history=yf.download(ticker, period="max")
     dld_history=dld_history[['Close','Adj Close']]
     dld_history.rename(columns={'Adj Close': 'AdjClose'}, inplace=True)
     date_range = pd.date_range(start=dld_history.index.min(), end=dld_history.index.max(), freq='D')
     complete_history = pd.DataFrame(index=date_range)
@@ -368,18 +346,58 @@ def get_yearly_single_stock_data(ticker):
     return yearly_data
 #==============================================================================
-# Part 7: gradio handling - Input command handling and display in web page
 help_info_str="Input Formats:\n  \
             1. ticker list....................Example:  spy vfv.to xiu.to xic.to xfn.to ry.to \n \
-            2. One of default ticker list, a number between 1 and 5....Example:   0, or 1, ...,5 \n \
             3. CalculationEndDate as prefix.  Example:   2020-12-31 2 \n \
             .........................................2020-12-31 spy vfv.to xiu.to xic.to xfn.to ry.to \n \
             4. single ticker: Dividend/Close/AdjClose/Return/TotalReturn/CalReturn(by Close/Dividends).  @1 spy \n \
-            note: daily adjusted close data are from Yahoo Finance. "
-# Gradio Web interface
-def calculation_response(message, history):
     # if there is no input, display help information
     if message=="":
         return help_info_str
@@ -429,10 +447,17 @@ def calculation_response(message, history):
     # calculation_end_date_for_others are for trailing and cumulative returns
     calculation_end_date_for_others_str=calculation_end_date_month_boundary_date_str
-    ''' TODO handling Feb 29 of leap year.
-    Check if involved dates (in calculation_end_date_str and calculation_end_date_for_others_str),
-    Feb 28 will be used to replace Feb 29 for calculation
     '''
     #................End
     # Check whether numebr 0, 1, 2, .. is selected for using a default ticker list
@@ -443,47 +468,130 @@ def calculation_response(message, history):
     # if no tickers were set, display help information
     if len(tickers)==0:
         return help_info_str
     #*********************************************************************************
-    # Calculating Annual, Trailing, Cumulative, and CAGR & generating html for display
-    # annual_returns - at year end boundard, to be displayed
     output_string = f"\nAnnual Total Return (%) as {calculation_end_date_str}\n"
-    output_dataframe = get_annual_returns_tickers_year_boundary_df(tickers, calculation_end_date_str)
     output_dataframe = output_dataframe.round(4)*100
-    output_dataframe.index=output_dataframe.index.date
     # Assuming your DataFrame is named output_dataframe
     last_date = output_dataframe.index[-1]
     output_dataframe = output_dataframe.rename(index={last_date: calculation_end_date_str})
     # Convert the DataFrame to HTML, Combine the expected string outputs
-    output_html1 = output_string + output_dataframe.to_html()
     # annual_returns  - at any given day, for calculating trailing and cumulative returns, not to be displayed
-    annual_returns_dataframe=get_annual_returns_tickers_df(tickers, calculation_end_date_for_others_str)
     # Trailing Return
-    output_string2 = f"\nTrailing Total Return (%) as {calculation_end_date_for_others_str}\n"
-    output_dataframe2=get_trailing_return_all(tickers, annual_returns_dataframe)
     # Insert an empty to align the ticker symbols with annual return display
-    output_dataframe2.insert(0, "--------", "      ")
-    output_dataframe2.index.name="years"
-    output_html2=output_string2 + output_dataframe2.to_html()
-    # Cumulative Return
-    output_string3 = f"\nCumulative Return (%) as {calculation_end_date_for_others_str}\n"
-    cumulative_return_all_dataframe=get_cumulative_return_all(tickers, annual_returns_dataframe)
-    output_dataframe3=cumulative_return_all_dataframe.round(4)*100
-    output_dataframe3.index.name="years"
     output_html3=output_string3 + output_dataframe3.to_html()
-    # CAGR Return
-    output_string4 = f"\nCompound Annual Growth Rate (CAGR) (%) as {calculation_end_date_for_others_str}\n"
-    output_dataframe4=get_cagr_return_all (cumulative_return_all_dataframe)
-    output_dataframe4=output_dataframe4.round(4)*100
     output_html4=output_string4 + output_dataframe4.to_html()
-    #output_html = output_html1 + output_html2 + output_html3 + output_html4
-    output_html = output_html1 + output_html2 + output_html3
     return  output_html
-demo = gr.ChatInterface(calculation_response)
-demo.launch(debug=False, share=False)

 '''
+Example 9 for using yfinance
 Calculate annual, trailing, cumumlative, and CAGR returns for multiple stocks.
 * The start date can be an arbitrary date.  The default is the current date.
 * annual return is displayed from the default current day, or an arbitrary given
+  day (except for Feb 29 for leap year)
+  For leap years, use Feb 28 to replace Feb 29 as simplification & approximation
 * trailing, cumumlative returns are currently displayed from the month boundary (last day of Month)
   prior to the given date.
 * However, trailing, cumumlative returns can be displayed
   from any date, which can be not at the month boundary (last day of Month),
   by minor change of setting calculation_end_date_for_others_str = calculation_end_date_str.
   prior to the given date in the function "calculation_response(message, history)"
 Author: Gang Luo
+yfinance References:
+  code:      https://github.com/ranaroussi/yfinance
+  project:   https://pypi.org/project/yfinance/
+  Guide:     https://algotrading101.com/learn/yfinance-guide/
+Revision history:
+2025-02.23.1444: fixing issues of missing "Adj Close" in yf.download and   yf.Ticker("AAPL"),
+   caused by (https://github.com/ranaroussi/yfinance/issues/2283) which is introduced by
+   yfinance version 0.2.54 (released on Feb 18, 2025 ).
+2025-02.23.1655:  further fix for the issues from  (https://github.com/ranaroussi/yfinance/issues/2283).
+   The  "Adj Close" column is missing from yf.download since  yf.download default changed
+   from auto_adjust=False to auto_adjust=True. When auto_adjust=True,  column Close is actually Adj Close and
+   Adj Close column does not exist any more.
+   The  "Adj Close" column is also missing from using  ticker = yf.Ticker("AAPL") data = ticker.history(period="1y")
+   The fixes 1: In order to fix the issue in the function stock_prices_df, auto_adjust=False is used explicitly in download function, to get back the Adj Close column.
+   The fixes 2: The function "get_yearly_single_stock_data" in part 6 is broken duo to the missing "Adj Close" column
+     from ticker = yf.Ticker() and ticker.history().  Add auto_adjust=False into ticker.history(..., auto_adjust=False)
+     for fixing the issue.   However, after the fix, the following line in the part 6 has an error:
+     complete_history = complete_history.merge(dld_history, how='left', left_index=True, right_index=True)
+     The root cause is that Columns of dld_history is of MultiIndex(,  names=['Price', 'Ticker']). However, each price column
+     such as 'Close','AdjClose' has only single level with Ticker being column index name.
+     Dropping the column MultiIndex level ('Ticker') fixed the issue (dld_history.columns = dld_history.columns.droplevel(1) )
+     print("\n===== DataFrame Structure Information for debug =====")
+     print("Index Levels:", dld_history.index.names)   # Shows the index levels
+     print("Index:", dld_history.index)                # Shows the actual index
+     print("Columns:", dld_history.columns)            # Shows column names
+     print("Data Types:\n", dld_history.dtypes)        # Shows data types of each column
+     print("Shape (Rows, Columns):", dld_history.shape)  # Shows the shape of the DataFrame
+2025-02.23.2000: Add the test cases for unit testing of part 1,2,3,4
+                 Comment out part 5 which is not used, for better performance.
+2025-02.23.2001: temparary version for years_list = [1, 2, 3, 5, 10, 15, 20, 25, 30, 40, 50, 60]
 '''
+script_version = 'version: (2025-02.23.2001)'
 import gradio as gr
 import yfinance as yf
 import pandas as pd
 import numpy as np
 from datetime import datetime, timedelta
 import pytz
+DEBUG_ENABLED = True
 #==============================================================================
 print_yearly_total_return = True
+num_years_calculation=52   # total years for calculation
 # Define a list of years to calculate the trailing returns, cumulative returns, and so on
 # remove the row of current year row since it is not a full year.
+#years_list = [1, 2, 3, 5, 10, 15, 20, 25, 30, 40, 50, 60]
+years_list = [1, 2, 3, 4, 5, 6, 7,8, 9,10, 11,12,13,14,15,16,17,18,19, 20, 25, 30, 40, 50, 60]
 # Set the stock tickers list
+tickers_lists = [["qqq","hxq.to","spy", "vfv.to","xiu.to", "xbb.to","xcb.to","xhb.to"], #0  checking ETF
+    ["qqq","spy", "vfv.to", "vgg.to", "zlu.to", "xiu.to","zlb.to","vdy.to", "xfn.to", "ry.to", "td.to", "na.to",
+      "slf.to", "gwo.to", "bce.to", "t.to", "rci-b.to", "enb.to", "trp.to","cp.to"], #1 main monitoring list
+    ["xiu.to", "xfn.to", "na.to","ry.to", "bmo.to","bns.to", "td.to", "cm.to", "cwb.to",
+      "slf.to", "gwo.to", "bce.to", "t.to", "rci-b.to", "enb.to", "trp.to", "vdy.to","xdv.to","cdz.to","xdiv.to", "zeb.to"],  #2   financial  ETF & stocks
+    ["spy","qqq","tqqq","mags","msft","AAPL","goog","AMZN","NVDA","meta","tsla","BRK-A","shop.to","hxq.to"],   #3  US mega stocks + risky shopfy
+    ["^DJI","dia","^GSPC","spy","voo","ivv", "tpu-u.to","vfv.to", "zsp.to","hxs.to","tpu.to","xus.to", "xsp.to",
+      "^IXIC","^ndx", "qqq","hxq.to","^GSPTSE","xic.to","xiu.to", "HXT.TO", "TTP.TO","ZCN.TO", "xfn.to", "xit.to"], #4   indexes and index ETFs
+    ["dia","^DJI","^GSPC","spy","vfv.to", "zsp.to","hxs.to","xus.to", "xsp.to",
+      "^IXIC","qqq","hxq.to","^GSPTSE","xic.to","xiu.to", "HXT.TO", "xfn.to"], #5   indexes and typical index ETFs
+    ["^IXIC","^ndx","ONEQ","CIBR","QQJG", "qqq", "tqqq", "spy", "vfv.to", "HXQ.to", "ZQQ.to", "XQQ.to", "QQC.to", "ZNQ.TO",
+         "xiu.to", "xit.to"],  #6   Nasdaq ETF and TSX IT ETF
+    ["qqq","tqqq","sqqq", "QLD", "spy", "spxu", "upro", "sso", "spxl","tecl"], #7 leveraged ETFs
+    ["^IXIC","^DJI","^GSPC","^GSPTSE"], #8 testing
+    ["vfv.to","spy"] #9 testing
 ]
 #==============================================================================
+# Part 1:
+#  retrieve daily adjusted close prices of a list of tickers from yahoo finance
+#  Generate the year-end adjusted close prices
+#  return year-end adjusted close prices, and  daily adjusted close prices
+def stock_prices_df(tickers_list, end_date_str):
+    tickers_list_upper = [ticker.upper() for ticker in tickers_list]
+    tickers_str = ", ".join(tickers_list_upper)
     try:
         '''
+        'try' statement for handlingy the exception error  for yf.download
         '''
+        # Download the historical data,  see 2025-02.23.1655 revision note
+        data = yf.download(tickers_str, period="max", auto_adjust=False) # default changed to auto_adjust=True at yfinance version 0.2.54,
+                                                                         # when auto_adjust=True,  Close = Adj Close and Adj Close does not exist
     except:
         return  pd.DataFrame()
     else:
+        data_adj_close = data['Adj Close']
+    # Filter out rows with dates newer than calculation_end_date
+    data_adj_close = data_adj_close[data_adj_close.index <= end_date_str]
+    #print("\nDebug- stock_prices_df\n", data_adj_close)
+    # Rearrange columns based on the order in tickers_list_upper
+    if len(tickers_list)>1:
+        data_adj_close = data_adj_close.reindex(columns=tickers_list_upper)
+    # needed this when having only a single ticker in the ticker list
+    if len(tickers_list_upper)==1:
+        data_adj_close = pd.DataFrame(data_adj_close)
+        data_adj_close.rename(columns={'Adj Close': tickers_list_upper[0]}, inplace=True)
+    data_adj_close.columns = map(str.lower, data_adj_close.columns) # must after  pd.DataFrame(data_adj_close)
+    # data_adj_close_year_end = data_adj_close.resample('A').ffill().round(2) # must before index changed to date
+    data_adj_close_year_end = data_adj_close.resample('YE').ffill().round(2) # must before index changed to date
+    data_adj_close.index=data_adj_close.index.date
+    data_adj_close_year_end.index=data_adj_close_year_end.index.date
+    last_date = data_adj_close_year_end.index[-1]
+    data_adj_close_year_end = data_adj_close_year_end.rename(index={last_date: end_date_str})
+    #print("\nstock_prices_df\n", end_date_str, "\n", data_adj_close_year_end)
+    return data_adj_close_year_end, data_adj_close
+#==============================================================================
+# Part 2:  Calculate annual returns at year end, and at any given day (by calculation_end_date_str)
+#
+# annual return calculation can start at any given day
+def get_annual_returns_anyday_df(daily_adj_close_df, calculation_end_date_str):
+    calculation_end_date=pd.to_datetime(calculation_end_date_str).tz_localize('America/New_York')
+    # Create a DataFrame with a complete date range
+    date_range = pd.date_range(start=daily_adj_close_df.index.min(), end=daily_adj_close_df.index.max(), freq='D')
+    complete_stock_history = pd.DataFrame(index=date_range)
+    # Merge the complete DataFrame with the original stock_history
+    complete_stock_history = complete_stock_history.merge(daily_adj_close_df, how='left', left_index=True, right_index=True)
+    complete_stock_history = complete_stock_history.ffill()  # fill the newy added rows with previous day value
+    '''
+    Filter out the rows that matches the month and date of calculation_end_date, which are the ends of
+    annual periods from the calculation_end_date.
+    '''
+    # Filter out rows with dates newer than calculation_end_date
+    #filtered_stock_history = complete_stock_history[complete_stock_history.index <= calculation_end_date]
+    # note" daily_adj_close_df satisfys daily_adj_close_df.index <= calculation_end_date
+    filtered_stock_history = complete_stock_history
+    #print(filtered_stock_history)
+    target_month=filtered_stock_history.index.max().month
+    target_day=filtered_stock_history.index.max().day
+    #print("target_month", target_month, "target_day",target_day, "start_year", filtered_stock_history.index.max().year)
+    annual_returns = filtered_stock_history[(filtered_stock_history.index.month == target_month)
+           & (filtered_stock_history.index.day ==target_day)]
+    annual_returns_percent = annual_returns.pct_change().dropna(how='all')
+    annual_returns_df = pd.DataFrame(annual_returns_percent)
+    #print("\ndebug-annual_returns_df\n", annual_returns_df)
+    return annual_returns_df
+# annual return calculation can start at year end
+def get_annual_returns_year_end_df(data_adj_close_df, calculation_end_date_str):
+    annual_returns_percent = data_adj_close_df.pct_change().dropna(how='all')
+    return annual_returns_percent
 #==============================================================================
+# Part 3: calculate the annualized trailing total return from the data generated in step 1 & display
 # Define a function to calculate the annualized trailing total return for a given number of years
 def get_trailing_return(ticker, data, years):
     # Get the total return values for the last n years
     trailing_data = data[ticker].tail(years)
     # Check if there are empty values within years
     if trailing_data.isna().any():
+        return np.nan
     # Check if there are valid total return values for all years
     if len(trailing_data) == years:
         # Convert the percentage strings to numeric values
         annualized_trailing_return = annualized_trailing_return.round(2)
         return annualized_trailing_return
     else:
+        return np.nan
 # Define a function to Loop through the list and print the trailing returns for each num_years
 def get_trailing_return_column(ticker, annual_returns_df):
             trailing_return_column[f"{num_years}-Year"] = trailing_return
         else:
             print(f"Data not available for {ticker}. Skipping.")
+            trailing_return_column[f"{num_years}-Year"] = np.nan
     return trailing_return_column
 # Create an empty DataFrame to store all tickers' trailing returns
+def get_trailing_return_all(annual_returns_df):
     all_tickers_trailing_returns_df = pd.DataFrame(index=years_list)
+    tickers=annual_returns_df.columns.tolist()
     # Loop through each ticker in the list
     for ticker in tickers:
         trailing_returns = get_trailing_return_column(ticker, annual_returns_df)
     return all_tickers_trailing_returns_df
 #==============================================================================
+# Part 4: calculate the cumulative return from the data (all_tickers_returns_df) generated in part 1 & display
 #  Define a function to calculate the cumulative return for a given number of years from a ticker
 def get_cumulative_return(ticker, data, years):
     # Calculate the cumulative return
         cumulative_returns[years] = cumulative_return.iloc[-1]
     return cumulative_returns
+def get_cumulative_return_all(annual_returns_df):
     # Create an empty DataFrame with years_list as the index for cumulative  returns
     all_tickers_cumulative_returns_df = pd.DataFrame(index=years_list)
+    tickers=annual_returns_df.columns.tolist()
     # Loop through each ticker in the list
     for ticker in tickers:
         cumulative_returns = get_cumulative_return_column(ticker, annual_returns_df)
     return all_tickers_cumulative_returns_df
 #==============================================================================
+# Part 5: calculate the  CAGR (Compound Annual Growth Rate) from the data
 # in all_tickers_cumulative_returns_df generated earlier & display
 # Define a function to calculate the CAGR from the cumulative value and the years
 def calculate_cagr(value, years):
     all_tickers_cagrs_df = all_tickers_cumulative_returns_df.apply(lambda x: calculate_cagr(x, x.index), axis=0)
     return all_tickers_cagrs_df
 #==============================================================================
 # Part 6:
 # single ticker's Prices, Returns,Dividends, good for verifying whether "Adj Close" is correct.
 def get_yearly_single_stock_data(ticker):
     stock = yf.Ticker(ticker)
     #-------- mainly for downloading 'Dividends'
+    history = stock.history(period="max", auto_adjust=False) # see 2025-02.23.1655 revision note
     dividend_history=history['Dividends']
     dividend_history.index=dividend_history.index.date
     #-------- mainly for downloading 'Close','Adj Close'
+    dld_history=yf.download(ticker, period="max", auto_adjust=False) # see 2025-02.23.1655 revision note
     dld_history=dld_history[['Close','Adj Close']]
     dld_history.rename(columns={'Adj Close': 'AdjClose'}, inplace=True)
+    '''
+    note: see 2025-02.23.1655 revision note
+          Columns  is of MultiIndex(,  names=['Price', 'Ticker']).  Each price colums such as 'Close','AdjClose'
+          has only single sub-column with Ticker is column index name.
+           Drop the column MultiIndex level ('Ticker')
+    '''
+    dld_history.columns = dld_history.columns.droplevel(1) # see 2025-02.23.1655 revision note
     date_range = pd.date_range(start=dld_history.index.min(), end=dld_history.index.max(), freq='D')
     complete_history = pd.DataFrame(index=date_range)
     return yearly_data
 #==============================================================================
+# Part 7: utility functions
+# get the last trading day of S&P 500 in string format
+def get_last_trading_day():
+    # Get today's date, use .strftime("%Y-%m-%d") to convert to a string
+    today_date_str=datetime.now(pytz.timezone('America/New_York')).date().strftime("%Y-%m-%d")
+    stock = yf.Ticker("^GSPC") # S&P 500 (^GSPC) ticker
+    #  search and see yfinance_BUG_1 NOTE in this file
+    history_df=stock.history(period="max", end=today_date_str)["Close"]
+    last_trading_day_str = history_df.index.max().date().strftime("%Y-%m-%d")
+    return last_trading_day_str
+def str_to_integer(integer_str):
+    try:
+        integer_number = int(integer_str)
+        return integer_number
+    except ValueError:
+        return -1
+# validate the date string
+def is_valid_date(date_string):
+    try:
+        # Attempt to parse the date string
+        datetime.strptime(date_string, "%Y-%m-%d")
+        return True
+    except ValueError:
+        # Raised when the date string is not in the expected format
+        return False
+def date_label_conversion_strip_time(all_tickers_returns_df, calculation_end_date_str):
+    all_tickers_returns_df.index=all_tickers_returns_df.index.date
+    all_tickers_returns_df.index.name='date'
+    # print("debug get_annual_returns_tickers_df", all_tickers_returns_df)
+    # Convert calculation_end_date_str to a datetime object, replace the index's mon/day portion of date
+    end_date_datetime_obj = datetime.strptime(calculation_end_date_str, "%Y-%m-%d")
+    all_tickers_returns_df.index = all_tickers_returns_df.index.map(
+      lambda x: x.replace(month=end_date_datetime_obj.month,
+      day=end_date_datetime_obj.day))
+    return all_tickers_returns_df
+#==============================================================================
+# Part 8: gradio handling - Input command handling and display in web page
 help_info_str="Input Formats:\n  \
             1. ticker list....................Example:  spy vfv.to xiu.to xic.to xfn.to ry.to \n \
+            2. One of default ticker list, a number between 1 and 7....Example:   0, or 1, ...,7 \n \
             3. CalculationEndDate as prefix.  Example:   2020-12-31 2 \n \
             .........................................2020-12-31 spy vfv.to xiu.to xic.to xfn.to ry.to \n \
             4. single ticker: Dividend/Close/AdjClose/Return/TotalReturn/CalReturn(by Close/Dividends).  @1 spy \n \
+            note: daily adjusted close data are from Yahoo Finance. \n" + script_version
+# Main Handling Process
+def calculation_response(message):
     # if there is no input, display help information
     if message=="":
         return help_info_str
     # calculation_end_date_for_others are for trailing and cumulative returns
     calculation_end_date_for_others_str=calculation_end_date_month_boundary_date_str
+    '''  Handling Feb 29 of leap years.
+    For leap years, to simiplify the calculation,  Feb 28 will be used to replace Feb 29 for
+    for calculating returns.
+    Therefore, if calculation_end_date_for_others_str is Feb 29, then replace 29 to 28 of calculation_end_date_for_others_str
     '''
+    leap_year=False
+    if (
+        calculation_end_date_for_others_str[-5:] == '02-29'
+    ):
+        calculation_end_date_for_others_str = calculation_end_date_for_others_str[:-2] + '28'
+        leap_year=True
     #................End
     # Check whether numebr 0, 1, 2, .. is selected for using a default ticker list
     # if no tickers were set, display help information
     if len(tickers)==0:
         return help_info_str
+    tmp_ticker_list=tickers
+    tickers = [ticker.lower() for ticker in tmp_ticker_list]
     #*********************************************************************************
+    # Calculating year-end prices, Annual, Trailing, Cumulative, and CAGR returns & generating html for display
+    #
+    # list of year-end prices of stocks
+    output_string1= f"\nAdj Close Prices ($) at year-end\n"
+    data_adj_close_year_end_df, data_adj_close_df = stock_prices_df(tickers, calculation_end_date_str)
+    output_dataframe= data_adj_close_year_end_df
+    output_html1=output_string1 + output_dataframe.to_html()
+    #print("\ndebug1  output_dataframe\n",  output_string1, output_dataframe)
+    #  Annual Total Return
     output_string = f"\nAnnual Total Return (%) as {calculation_end_date_str}\n"
+    #output_dataframe = get_annual_returns_tickers_year_boundary_df(tickers, calculation_end_date_str)
+    output_dataframe = get_annual_returns_year_end_df(data_adj_close_year_end_df, calculation_end_date_str)
+    output_dataframe = output_dataframe.dropna(how='all')
     output_dataframe = output_dataframe.round(4)*100
+    #output_dataframe.index=output_dataframe.index.date
     # Assuming your DataFrame is named output_dataframe
     last_date = output_dataframe.index[-1]
     output_dataframe = output_dataframe.rename(index={last_date: calculation_end_date_str})
     # Convert the DataFrame to HTML, Combine the expected string outputs
+    output_html2 = output_string + output_dataframe.to_html()
+    #print("\ndebug2  output_dataframe\n", output_dataframe)
     # annual_returns  - at any given day, for calculating trailing and cumulative returns, not to be displayed
+    #annual_returns_dataframe=get_annual_returns_tickers_df(tickers, calculation_end_date_for_others_str)
+    annual_returns_dataframe=get_annual_returns_anyday_df(data_adj_close_df, calculation_end_date_str)
+    #print("\ndebug2-T  annual_returns_dataframe\n", annual_returns_dataframe)
     # Trailing Return
+    if (leap_year):
+        output_string3 = f"\nTrailing Total Return (%) as {calculation_end_date_for_others_str} (leap year: Feb 29 replaced by Feb 28 for approximation)\n"
+    else:
+        output_string3 = f"\nTrailing Total Return (%) as {calculation_end_date_for_others_str}\n"
+    output_dataframe3=get_trailing_return_all(annual_returns_dataframe)
+    output_dataframe3 = output_dataframe3.dropna(how='all')
     # Insert an empty to align the ticker symbols with annual return display
+    output_dataframe3.insert(0, "-", "      ")
+    output_dataframe3.index.name="yrs"
     output_html3=output_string3 + output_dataframe3.to_html()
+    #print("\ndebug3\n", output_string3, output_dataframe3)
+    # Cumulative Return
+    output_string4 = f"\nCumulative Return (%) as {calculation_end_date_for_others_str}\n"
+    cumulative_return_all_dataframe=get_cumulative_return_all(annual_returns_dataframe)
+    cumulative_return_all_dataframe = cumulative_return_all_dataframe.dropna(how='all')
+    output_dataframe4=cumulative_return_all_dataframe.round(4)*100
+    output_dataframe4.index.name="yrs"
     output_html4=output_string4 + output_dataframe4.to_html()
+    # CAGR Return
+    '''
+    # following code is fine, but is not needed
+    output_string5 = f"\nCompound Annual Growth Rate (CAGR) (%) as {calculation_end_date_for_others_str}\n"
+    output_dataframe5=get_cagr_return_all (cumulative_return_all_dataframe)
+    output_dataframe5=output_dataframe5.round(4)*100
+    output_html5=output_string5 + output_dataframe5.to_html()
+    '''
+    # print total 1,2,3,4 (not 5)
+    output_html = output_html1 + output_html2 + output_html3 + output_html4
     return  output_html
+# Gradio Web interface
+with gr.Blocks() as web_block:
+    chatbot = gr.Chatbot(height="500px")
+    # Create a row element for the Textbox and Clear button
+    with gr.Row():
+        #msg = gr.Textbox(label="stock tickers input", scale=2, min_width=380)
+        msg = gr.Textbox(show_label=False, scale=2, min_width=380)
+        clear = gr.ClearButton([msg, chatbot], scale=0, min_width=50)
+    def respond(message, chat_history):
+        bot_message = calculation_response(message)
+        chat_history.append((message, bot_message))
+        return "", chat_history
+    msg.submit(respond, # function
+     [msg, chatbot],  # inputs of the function
+     [msg, chatbot]   # outputs of the function
+               )
+web_block.launch()
+#web_block.launch(debug=True)
+#----------- test cases-----------------
+#-------- part 1 stock_prices_df
+calculation_end_date_str="2025-02-21"
+data_adj_close_year_end_df, data_adj_close_df = stock_prices_df(["SPY", "MSFT"], "2025-02-21")
+#print("\ndebug  data_adj_close_df data_adj_close_year_end_df\n", data_adj_close_year_end_df, "\ndata_adj_close_df\n",data_adj_close_df)
+#tickers = yf.download(["AAPL", "MSFT"], period="1y", auto_adjust=False) # default changed to auto_adjust=True at yfinance version 0.2.54
+                                                                         # when auto_adjust=True,  Close = Adj Close and Adj Close does not exist
+#print("\ndebug  test2\n", tickers)
+#tickers = yf.download(["AAPL", "MSFT"], period="1y")
+#print("\ndebug  test\n", tickers)
+#-------- part 2 get_annual_returns_year_end_df
+output_dataframe = get_annual_returns_year_end_df(data_adj_close_year_end_df, calculation_end_date_str)
+#print("\ndebug  get_annual_returns_year_end_df\n", output_dataframe)
+# for calculating trailing return
+annual_returns_dataframe=get_annual_returns_anyday_df(data_adj_close_df, calculation_end_date_str)
+#print("\ndebug  get_annual_returns_anyday_df\n", annual_returns_dataframe)
+#-------- part 3 get_trailing_return_all
+output_dataframe3=get_trailing_return_all(annual_returns_dataframe)
+#print("\ndebug  get_trailing_return_all\n", output_dataframe3)
+#-------- part 4 get_cumulative_return_all
+cumulative_return_all_dataframe=get_cumulative_return_all(annual_returns_dataframe)
+#print("\ndebug  get_cumulative_return_all\n", cumulative_return_all_dataframe)
+#-------- part 5 get_cagr_return_all
+#output_dataframe5=get_cagr_return_all (cumulative_return_all_dataframe)
+#print("\ndebug  get_cagr_return_all\n", output_dataframe5)
+#-------- part 6 stock_prices_df
+#output_dataframe0=get_yearly_single_stock_data("SPY")
+#print("\ndebug  part 6 test\n", output_dataframe0)
+#--------  testing calculation_response
+#calculation_response("8")
+#bot_message = calculation_response("SPY")
+#print(bot_message)