pilot-etf-tech / rolling_update.py
Artem Kalyta
prompt experimentation
80c35b6
import pandas as pd
import yfinance as yf
from finance_handler import get_financial_info
from config import Config
from common_functionality import convert_to_excel, get_financial_columns
def update_financial_info(data) -> str:
"""
Router method for updating financial info for a list of companies
Parameters:
----------
data: filename for the uploaded file
Returns:
-------
filename: filename for the updated file
"""
dataframes = pd.read_excel(data.name, sheet_name=None)
if len(dataframes.keys()) == 1:
updated_data = initial_update_flow(list(dataframes.values())[0])
filename = convert_to_excel(updated_data, filename="initial_update")
else:
updated_data = add_financial_history(dataframes)
filename = convert_to_excel(updated_data, filename="rolling_update", index=False)
return filename
def initial_update_flow(data: pd.DataFrame) -> dict:
"""
Accepts a regular one-tab Excel file and:
1. Updates the financial info for each company in the main table to the latest values
2. Adds a new tab with the historical financial info for each company
3. Returns a dictionary of dataframes with the updated info
Parameters:
----------
data: pandas dataframe with the main table of companies and their financial info
Returns:
-------
dataframes: dictionary of dataframes with the updated info
"""
tickers = data["ticker"].unique()
financial_columns = get_financial_columns(data)
processed_data = data.to_dict(orient="records")
current_data = [data[data["ticker"] == ticker][financial_columns + [Config.FINANCE_UPDATE_TIME]
].to_dict(orient="records")[0] for ticker in tickers
]
fresh_data = [get_financial_info(ticker, financial_columns) for ticker in tickers]
assert len(current_data) == len(fresh_data)
assert len(fresh_data) == len(processed_data)
[processed_data[c].update(fresh_data[c]) for c in range(len(processed_data))]
processed_data = pd.DataFrame(processed_data)
formatted_data = {Config.MAIN_SHEET_NAME: processed_data}
dataframes = {tickers[i]: pd.DataFrame([current_data[i], fresh_data[i]]).set_index(Config.FINANCE_UPDATE_TIME)
for i in range(len(current_data))}
dataframes = {**formatted_data, **dataframes}
return dataframes # {AAPL: {price: 100, marketCap: 1000, update: 01-01-2021}}
def add_financial_history(dataframes: dict[pd.DataFrame]) -> dict[pd.DataFrame]:
# TODO: Add time verification to make sure we're not updating too often
"""
Accepts a dictionary of dataframes and:
1. Updates each tab with the new financial info for each company
2. Updates the main tab with the most recent financial info for each company
"""
companies_tickers_filtered = [i for i in dataframes.keys() if i not in Config.__dict__.values()][0]
financial_columns = dataframes[companies_tickers_filtered].columns
for name, df in dataframes.items():
if name == Config.MAIN_SHEET_NAME:
df_dict = df.to_dict(orient="records")
[df_dict[c].update(get_financial_info(company["ticker"], financial_columns))
for c, company in enumerate(df_dict)]
dataframes[name] = pd.DataFrame(df_dict)
continue
if name == Config.SELECTED_BY_FINANCIALS:
continue
df_dict = df.to_dict(orient="records")
updated_info = dataframes[Config.MAIN_SHEET_NAME][dataframes[Config.MAIN_SHEET_NAME]["ticker"] == name][financial_columns].to_dict(orient="records")[0]
df_dict.append(updated_info)
dataframes[name] = pd.DataFrame(df_dict)
return dataframes