pilot-etf-tech / excel_merging /merge_excels.py
Artem Kalyta
added excel merging
66e4785
import pandas as pd
from gradio import File
from functools import reduce
from config import Config
def merge_excels(files: list[File]) -> dict[pd.DataFrame]:
processed_files = [pd.read_excel(file.name, sheet_name=None, index_col=1) for file in files]
joined_df = {Config.MAIN_SHEET_NAME: reduce(lambda left, right: pd.concat([left, right]),
[df[Config.MAIN_SHEET_NAME] for df in processed_files]).reset_index(drop=True)}
joined_df[Config.MAIN_SHEET_NAME] = joined_df[Config.MAIN_SHEET_NAME].drop_duplicates(subset=["ticker"],
keep='first')\
.reset_index(drop=True)
for file in processed_files:
file.pop(Config.MAIN_SHEET_NAME)
for sheet_name, sheet_data in file.items():
if sheet_name not in [Config.SELECTED_BY_FINANCIALS] and sheet_name not in joined_df.keys():
joined_df[sheet_name] = sheet_data
if Config.SELECTED_BY_FINANCIALS in processed_files[0].keys():
joined_df = {Config.SELECTED_BY_FINANCIALS: reduce(lambda left, right: pd.concat([left, right]),
[df[Config.SELECTED_BY_FINANCIALS] for df in processed_files])}
joined_df[Config.SELECTED_BY_FINANCIALS] = joined_df[Config.SELECTED_BY_FINANCIALS].drop_duplicates(subset=["ticker"],
keep='first') \
.reset_index(drop=True)
return joined_df