Spaces:
Sleeping
Sleeping
| import openai | |
| import pandas as pd | |
| from config import Config | |
| from common_functionality import convert_to_excel, get_financial_columns, send_openai_request | |
| from prompts import Prompts | |
| def select_by_financials(processed_file): | |
| dataframes = pd.read_excel(processed_file.name, sheet_name=None) | |
| selected_companies_df = get_selected_companies(dataframes[Config.MAIN_SHEET_NAME]) | |
| joined_data = {**dataframes, **selected_companies_df} | |
| filename = convert_to_excel(joined_data, filename="selected_companies", index=False) | |
| return filename | |
| def get_selected_companies(data: pd.DataFrame) -> dict: | |
| financial_columns = get_financial_columns(data) | |
| if Config.FINANCE_UPDATE_TIME in financial_columns: | |
| financial_columns.remove(Config.FINANCE_UPDATE_TIME) | |
| if "country" in financial_columns: | |
| financial_columns.remove("country") | |
| tickers = list(data["ticker"]) | |
| financial_data = data[financial_columns].to_dict(orient="records") | |
| for c, company in enumerate(financial_data): | |
| company.update({"ticker": tickers[c]}) | |
| prompt = Prompts.SELECT_TOP_FINANCIALS_PROMPT.format(num_companies=Config.TEMP_NUMBER_TO_SELECT, | |
| companies=financial_data) | |
| response = send_openai_request(prompt=prompt, max_tokens=3000) | |
| selected_df = pd.DataFrame(response) | |
| selected_df["justification"] = selected_df["justification"].apply(lambda x: " ".join(x)) | |
| selected_df = pd.merge(selected_df, data[financial_columns + ["ticker"]], on="ticker") | |
| return {Config.SELECTED_BY_FINANCIALS: selected_df} | |