Spaces:
Sleeping
Sleeping
Artem Kalyta commited on
Commit ·
a3d6903
1
Parent(s): fce23bc
scoring update
Browse files- .env +1 -0
- .gitignore +1 -0
- __pycache__/basic_handler.cpython-311.pyc +0 -0
- __pycache__/common_functionality.cpython-311.pyc +0 -0
- __pycache__/config.cpython-311.pyc +0 -0
- __pycache__/finance_handler.cpython-311.pyc +0 -0
- __pycache__/prompts.cpython-311.pyc +0 -0
- __pycache__/rolling_update.cpython-311.pyc +0 -0
- __pycache__/select_top_financials.cpython-311.pyc +0 -0
- __pycache__/strategy_handler.cpython-311.pyc +0 -0
- main_router.py → app.py +51 -21
- basic+financial.xlsx +0 -0
- basic_handler.py +10 -11
- common_functionality.py +23 -0
- config.py +8 -2
- data_formatting.ipynb +0 -0
- prompts.py +6 -0
- rolling_update.py +6 -15
- rolling_update.xlsx +0 -0
- select_top_financials.py +33 -0
- selected_companies.xlsx +0 -0
- updated_strategic_data.xlsx +0 -0
- workflow.ipynb +599 -0
.env
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
OPENAI_API_KEY=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
|
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
/__pycache__
|
__pycache__/basic_handler.cpython-311.pyc
ADDED
|
Binary file (1.62 kB). View file
|
|
|
__pycache__/common_functionality.cpython-311.pyc
ADDED
|
Binary file (2.56 kB). View file
|
|
|
__pycache__/config.cpython-311.pyc
ADDED
|
Binary file (1.01 kB). View file
|
|
|
__pycache__/finance_handler.cpython-311.pyc
ADDED
|
Binary file (1.91 kB). View file
|
|
|
__pycache__/prompts.cpython-311.pyc
ADDED
|
Binary file (2.42 kB). View file
|
|
|
__pycache__/rolling_update.cpython-311.pyc
ADDED
|
Binary file (6.92 kB). View file
|
|
|
__pycache__/select_top_financials.cpython-311.pyc
ADDED
|
Binary file (2.84 kB). View file
|
|
|
__pycache__/strategy_handler.cpython-311.pyc
ADDED
|
Binary file (832 Bytes). View file
|
|
|
main_router.py → app.py
RENAMED
|
@@ -1,17 +1,25 @@
|
|
| 1 |
from basic_handler import get_basic_info
|
| 2 |
from strategy_handler import get_strategy_info
|
| 3 |
from finance_handler import get_financial_info
|
| 4 |
-
from rolling_update import update_financial_info
|
|
|
|
| 5 |
from config import Config
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
import pandas as pd
|
| 8 |
import openai
|
| 9 |
import gradio as gr
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
# TODO: add an identifier for financial columns for better repeat parsing
|
| 14 |
|
|
|
|
| 15 |
def process_file(file) -> dict:
|
| 16 |
input_data = pd.read_excel(file.name)
|
| 17 |
data_dict = get_formatting_columns(input_data)
|
|
@@ -30,51 +38,73 @@ def get_formatting_columns(data: pd.DataFrame) -> dict:
|
|
| 30 |
return data_dict
|
| 31 |
|
| 32 |
|
| 33 |
-
def shape_sector_info(data_dict: dict, processed_data: pd.DataFrame, action) -> pd.DataFrame:
|
| 34 |
if action == Config.ACTION_BASIC_INFO:
|
| 35 |
processed_data = get_basic_info(data_dict["sector"], data_dict["num_companies"],
|
| 36 |
data_dict[Config.ACTION_BASIC_INFO])
|
|
|
|
|
|
|
| 37 |
sector_data = pd.DataFrame(processed_data)
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
| 41 |
if action == Config.ACTION_STRATEGY_INFO:
|
| 42 |
[processed_data[c].update(get_strategy_info(company["name"], data_dict[Config.ACTION_STRATEGY_INFO])
|
| 43 |
) for c, company in enumerate(processed_data)]
|
| 44 |
-
elif action == Config.ACTION_FINANCIAL_INFO:
|
| 45 |
-
[processed_data[c].update(get_financial_info(company["ticker"], data_dict[Config.ACTION_FINANCIAL_INFO])
|
| 46 |
-
) for c, company in enumerate(processed_data)]
|
| 47 |
else:
|
| 48 |
-
return "Invalid action Error"
|
| 49 |
sector_data = pd.DataFrame(processed_data)
|
| 50 |
-
return sector_data
|
| 51 |
|
| 52 |
|
| 53 |
-
def main_interface(requirements_file, processed_file, action):
|
| 54 |
-
|
| 55 |
data_dict = process_file(requirements_file)
|
| 56 |
-
|
| 57 |
data_dict = {}
|
| 58 |
if action == Config.ACTION_BASIC_INFO:
|
| 59 |
-
|
| 60 |
-
|
|
|
|
| 61 |
sector_data = pd.read_excel(processed_file.name)
|
| 62 |
sector_data = shape_sector_info(data_dict=data_dict, processed_data=sector_data, action=action)
|
| 63 |
if type(sector_data) == str:
|
| 64 |
return sector_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
elif action == Config.ACTION_UPDATE_INFO:
|
| 66 |
filename = update_financial_info(processed_file)
|
| 67 |
return filename
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
|
| 73 |
iface = gr.Interface(
|
| 74 |
fn=main_interface,
|
| 75 |
inputs=[gr.inputs.File(label="Requirements file"), gr.inputs.File(label="(Optional) Populated file"),
|
| 76 |
-
gr.inputs.Dropdown([Config.ACTION_BASIC_INFO,
|
| 77 |
-
Config.
|
|
|
|
|
|
|
|
|
|
| 78 |
label="Select the type of information you want to extract")],
|
| 79 |
outputs=gr.outputs.File()
|
| 80 |
)
|
|
|
|
| 1 |
from basic_handler import get_basic_info
|
| 2 |
from strategy_handler import get_strategy_info
|
| 3 |
from finance_handler import get_financial_info
|
| 4 |
+
from rolling_update import update_financial_info, initial_update_flow
|
| 5 |
+
from select_top_financials import select_by_financials
|
| 6 |
from config import Config
|
| 7 |
+
from common_functionality import convert_to_excel
|
| 8 |
+
import os
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
|
| 11 |
import pandas as pd
|
| 12 |
import openai
|
| 13 |
import gradio as gr
|
| 14 |
|
| 15 |
+
load_dotenv()
|
| 16 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 17 |
+
print(f"os: {os.getenv('OPENAI_API_KEY')}")
|
| 18 |
+
print(openai.api_key)
|
| 19 |
|
| 20 |
# TODO: add an identifier for financial columns for better repeat parsing
|
| 21 |
|
| 22 |
+
|
| 23 |
def process_file(file) -> dict:
|
| 24 |
input_data = pd.read_excel(file.name)
|
| 25 |
data_dict = get_formatting_columns(input_data)
|
|
|
|
| 38 |
return data_dict
|
| 39 |
|
| 40 |
|
| 41 |
+
def shape_sector_info(data_dict: dict, processed_data: pd.DataFrame, action) -> dict[str, pd.DataFrame]:
|
| 42 |
if action == Config.ACTION_BASIC_INFO:
|
| 43 |
processed_data = get_basic_info(data_dict["sector"], data_dict["num_companies"],
|
| 44 |
data_dict[Config.ACTION_BASIC_INFO])
|
| 45 |
+
[processed_data[c].update(get_financial_info(company["ticker"], data_dict[Config.ACTION_FINANCIAL_INFO])
|
| 46 |
+
) for c, company in enumerate(processed_data)]
|
| 47 |
sector_data = pd.DataFrame(processed_data)
|
| 48 |
+
updated_data = initial_update_flow(sector_data)
|
| 49 |
+
filename = convert_to_excel(updated_data, filename="basic+financial")
|
| 50 |
+
return filename
|
| 51 |
+
# return {Config.MAIN_SHEET_NAME: sector_data}
|
| 52 |
+
processed_data = processed_data.to_dict(orient="records")
|
| 53 |
if action == Config.ACTION_STRATEGY_INFO:
|
| 54 |
[processed_data[c].update(get_strategy_info(company["name"], data_dict[Config.ACTION_STRATEGY_INFO])
|
| 55 |
) for c, company in enumerate(processed_data)]
|
|
|
|
|
|
|
|
|
|
| 56 |
else:
|
| 57 |
+
return {"Invalid action Error": pd.DataFrame()}
|
| 58 |
sector_data = pd.DataFrame(processed_data)
|
| 59 |
+
return {Config.MAIN_SHEET_NAME: sector_data}
|
| 60 |
|
| 61 |
|
| 62 |
+
def main_interface(requirements_file, processed_file, action): # file, file, str
|
| 63 |
+
if requirements_file:
|
| 64 |
data_dict = process_file(requirements_file)
|
| 65 |
+
else:
|
| 66 |
data_dict = {}
|
| 67 |
if action == Config.ACTION_BASIC_INFO:
|
| 68 |
+
filename = shape_sector_info(data_dict=data_dict, processed_data=None, action=action)
|
| 69 |
+
return filename
|
| 70 |
+
elif action == Config.ACTION_STRATEGY_INFO:
|
| 71 |
sector_data = pd.read_excel(processed_file.name)
|
| 72 |
sector_data = shape_sector_info(data_dict=data_dict, processed_data=sector_data, action=action)
|
| 73 |
if type(sector_data) == str:
|
| 74 |
return sector_data
|
| 75 |
+
elif action == Config.ACTION_SELECTED_STRATEGY_INFO:
|
| 76 |
+
processed_data = pd.read_excel(processed_file.name, sheet_name=None)
|
| 77 |
+
selected_data = processed_data[Config.SELECTED_BY_FINANCIALS]
|
| 78 |
+
selected_data = selected_data.to_dict(orient="records")
|
| 79 |
+
[selected_data[c].update(get_strategy_info(company["ticker"], data_dict[Config.ACTION_STRATEGY_INFO])
|
| 80 |
+
) for c, company in enumerate(selected_data)]
|
| 81 |
+
processed_data[Config.SELECTED_BY_FINANCIALS] = pd.DataFrame(selected_data)
|
| 82 |
+
filename = convert_to_excel(processed_data, filename="updated_strategic_data", index=False)
|
| 83 |
+
return filename
|
| 84 |
elif action == Config.ACTION_UPDATE_INFO:
|
| 85 |
filename = update_financial_info(processed_file)
|
| 86 |
return filename
|
| 87 |
+
elif action == Config.ACTION_SELECT_BY_FINANCIALS:
|
| 88 |
+
filename = select_by_financials(processed_file)
|
| 89 |
+
return filename
|
| 90 |
+
processed_dataframes = pd.read_excel(processed_file.name, sheet_name=None)
|
| 91 |
+
if len(processed_dataframes.keys()) > 1:
|
| 92 |
+
processed_dataframes.pop(Config.MAIN_SHEET_NAME)
|
| 93 |
+
processed_dataframes = {**sector_data, **processed_dataframes}
|
| 94 |
+
filename = convert_to_excel(processed_dataframes, filename="updated_data", index=False)
|
| 95 |
+
else:
|
| 96 |
+
filename = convert_to_excel(sector_data, filename="updated_data", index=False)
|
| 97 |
+
return filename
|
| 98 |
|
| 99 |
|
| 100 |
iface = gr.Interface(
|
| 101 |
fn=main_interface,
|
| 102 |
inputs=[gr.inputs.File(label="Requirements file"), gr.inputs.File(label="(Optional) Populated file"),
|
| 103 |
+
gr.inputs.Dropdown([Config.ACTION_BASIC_INFO,
|
| 104 |
+
Config.ACTION_STRATEGY_INFO,
|
| 105 |
+
Config.ACTION_SELECTED_STRATEGY_INFO,
|
| 106 |
+
Config.ACTION_UPDATE_INFO,
|
| 107 |
+
Config.ACTION_SELECT_BY_FINANCIALS],
|
| 108 |
label="Select the type of information you want to extract")],
|
| 109 |
outputs=gr.outputs.File()
|
| 110 |
)
|
basic+financial.xlsx
ADDED
|
Binary file (7.27 kB). View file
|
|
|
basic_handler.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
| 1 |
from prompts import Prompts
|
| 2 |
from common_functionality import send_openai_request
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
def get_basic_info(sector: str, n_companies: int, required_fields: list) -> list:
|
|
@@ -9,18 +13,13 @@ def get_basic_info(sector: str, n_companies: int, required_fields: list) -> list
|
|
| 9 |
ignore_companies = []
|
| 10 |
for _ in range(n_companies // max_num_companies):
|
| 11 |
prompt = Prompts.BASIC_INFORMATION_PROMPT.format(num_companies=max_num_companies,
|
| 12 |
-
|
| 13 |
required_fields=required_fields)
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
resp = send_openai_request(prompt)
|
| 20 |
-
if type(resp) == dict:
|
| 21 |
-
data.append(resp)
|
| 22 |
-
else:
|
| 23 |
-
data.extend(resp)
|
| 24 |
else:
|
| 25 |
prompt = Prompts.BASIC_INFORMATION_PROMPT_SINGLE_USE.format(num_companies=n_companies, sector=sector,
|
| 26 |
required_fields=required_fields)
|
|
|
|
| 1 |
from prompts import Prompts
|
| 2 |
from common_functionality import send_openai_request
|
| 3 |
+
import openai
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 7 |
|
| 8 |
|
| 9 |
def get_basic_info(sector: str, n_companies: int, required_fields: list) -> list:
|
|
|
|
| 13 |
ignore_companies = []
|
| 14 |
for _ in range(n_companies // max_num_companies):
|
| 15 |
prompt = Prompts.BASIC_INFORMATION_PROMPT.format(num_companies=max_num_companies,
|
| 16 |
+
ignore_companies=ignore_companies,
|
| 17 |
required_fields=required_fields)
|
| 18 |
+
resp = send_openai_request(prompt)
|
| 19 |
+
if type(resp) == dict:
|
| 20 |
+
data.append(resp)
|
| 21 |
+
else:
|
| 22 |
+
data.extend(resp)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
else:
|
| 24 |
prompt = Prompts.BASIC_INFORMATION_PROMPT_SINGLE_USE.format(num_companies=n_companies, sector=sector,
|
| 25 |
required_fields=required_fields)
|
common_functionality.py
CHANGED
|
@@ -1,5 +1,21 @@
|
|
| 1 |
import json
|
| 2 |
import openai
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
def send_openai_request(prompt: str, max_tokens=2000, temperature=0.1) -> dict:
|
|
@@ -18,3 +34,10 @@ def send_openai_request(prompt: str, max_tokens=2000, temperature=0.1) -> dict:
|
|
| 18 |
presence_penalty=0
|
| 19 |
)
|
| 20 |
return json.loads(response.choices[0]["message"]["content"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import json
|
| 2 |
import openai
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import yfinance as yf
|
| 5 |
+
|
| 6 |
+
from config import Config
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def convert_to_excel(dataframes: dict[pd.DataFrame], filename="updated_data", index=True) -> str:
|
| 10 |
+
"""
|
| 11 |
+
Accepts a dictionary of dataframes and converts it to an Excel file
|
| 12 |
+
"""
|
| 13 |
+
filename = f"{filename}.xlsx"
|
| 14 |
+
writer = pd.ExcelWriter(filename, engine='openpyxl')
|
| 15 |
+
for ticker, df in dataframes.items():
|
| 16 |
+
df.to_excel(writer, index=index, sheet_name=ticker)
|
| 17 |
+
writer.close()
|
| 18 |
+
return filename
|
| 19 |
|
| 20 |
|
| 21 |
def send_openai_request(prompt: str, max_tokens=2000, temperature=0.1) -> dict:
|
|
|
|
| 34 |
presence_penalty=0
|
| 35 |
)
|
| 36 |
return json.loads(response.choices[0]["message"]["content"])
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def get_financial_columns(data: pd.DataFrame) -> list:
|
| 40 |
+
test = yf.Ticker("AAPL")
|
| 41 |
+
financial_columns = [x for x in data.columns if x in (test.info.keys() or Config.FINANCE_HISTORICAL_RETURN in x)
|
| 42 |
+
and x != "Country"]
|
| 43 |
+
return financial_columns
|
config.py
CHANGED
|
@@ -1,11 +1,17 @@
|
|
| 1 |
class Config:
|
| 2 |
|
| 3 |
-
ACTION_BASIC_INFO = "Basic"
|
| 4 |
ACTION_STRATEGY_INFO = "Strategy"
|
| 5 |
-
|
|
|
|
| 6 |
ACTION_UPDATE_INFO = "Update financials"
|
|
|
|
|
|
|
| 7 |
|
| 8 |
FINANCE_UPDATE_TIME = "last_updated"
|
| 9 |
FINANCE_HISTORICAL_RETURN = "historical_return"
|
| 10 |
|
| 11 |
MAIN_SHEET_NAME = "Companies"
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
class Config:
|
| 2 |
|
| 3 |
+
ACTION_BASIC_INFO = "Basic+Financial"
|
| 4 |
ACTION_STRATEGY_INFO = "Strategy"
|
| 5 |
+
ACTION_SELECTED_STRATEGY_INFO = "Selected Companies Strategy"
|
| 6 |
+
ACTION_FINANCIAL_INFO = "Financial"
|
| 7 |
ACTION_UPDATE_INFO = "Update financials"
|
| 8 |
+
ACTION_SELECT_BY_STRATEGY = "Select top best strategy companies"
|
| 9 |
+
ACTION_SELECT_BY_FINANCIALS = "Select top best financials companies"
|
| 10 |
|
| 11 |
FINANCE_UPDATE_TIME = "last_updated"
|
| 12 |
FINANCE_HISTORICAL_RETURN = "historical_return"
|
| 13 |
|
| 14 |
MAIN_SHEET_NAME = "Companies"
|
| 15 |
+
|
| 16 |
+
SELECTED_BY_FINANCIALS = "Top Financials"
|
| 17 |
+
TEMP_NUMBER_TO_SELECT = 2
|
data_formatting.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
prompts.py
CHANGED
|
@@ -20,4 +20,10 @@ class Prompts:
|
|
| 20 |
"flat JSON with the keys being the provided categories description and score. " \
|
| 21 |
"Do not nest values within the JSON."
|
| 22 |
FINANCIAL_INFORMATION_PROMPT = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
|
|
|
| 20 |
"flat JSON with the keys being the provided categories description and score. " \
|
| 21 |
"Do not nest values within the JSON."
|
| 22 |
FINANCIAL_INFORMATION_PROMPT = ""
|
| 23 |
+
SELECT_TOP_FINANCIALS_PROMPT = "Select the top {num_companies} companies with the best financials out of the " \
|
| 24 |
+
"companies in the provided list based on the financial metrics provided. For each" \
|
| 25 |
+
" company provide a short 2-3 bulletpoints justification why this company was " \
|
| 26 |
+
"selected. Format your answer as the following JSON:\n" \
|
| 27 |
+
"[{{ticker: company_ticker, justification: justification}}, ...]\n" \
|
| 28 |
+
"List of companies with financials: {companies}"
|
| 29 |
|
rolling_update.py
CHANGED
|
@@ -2,6 +2,7 @@ import pandas as pd
|
|
| 2 |
import yfinance as yf
|
| 3 |
from finance_handler import get_financial_info
|
| 4 |
from config import Config
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
def update_financial_info(data) -> str:
|
|
@@ -41,8 +42,7 @@ def initial_update_flow(data: pd.DataFrame) -> dict:
|
|
| 41 |
dataframes: dictionary of dataframes with the updated info
|
| 42 |
"""
|
| 43 |
tickers = data["ticker"].unique()
|
| 44 |
-
|
| 45 |
-
financial_columns = [x for x in data.columns if x in test.info.keys() or Config.FINANCE_HISTORICAL_RETURN in x]
|
| 46 |
processed_data = data.to_dict(orient="records")
|
| 47 |
current_data = [data[data["ticker"] == ticker][financial_columns + [Config.FINANCE_UPDATE_TIME]
|
| 48 |
].to_dict(orient="records")[0] for ticker in tickers]
|
|
@@ -66,7 +66,8 @@ def add_financial_history(dataframes: dict[pd.DataFrame]) -> dict[pd.DataFrame]:
|
|
| 66 |
2. Updates the main tab with the most recent financial info for each company
|
| 67 |
|
| 68 |
"""
|
| 69 |
-
|
|
|
|
| 70 |
for name, df in dataframes.items():
|
| 71 |
if name == Config.MAIN_SHEET_NAME:
|
| 72 |
df_dict = df.to_dict(orient="records")
|
|
@@ -74,20 +75,10 @@ def add_financial_history(dataframes: dict[pd.DataFrame]) -> dict[pd.DataFrame]:
|
|
| 74 |
for c, company in enumerate(df_dict)]
|
| 75 |
dataframes[name] = pd.DataFrame(df_dict)
|
| 76 |
continue
|
|
|
|
|
|
|
| 77 |
df_dict = df.to_dict(orient="records")
|
| 78 |
updated_info = dataframes[Config.MAIN_SHEET_NAME][dataframes[Config.MAIN_SHEET_NAME]["ticker"] == name][financial_columns].to_dict(orient="records")[0]
|
| 79 |
df_dict.append(updated_info)
|
| 80 |
dataframes[name] = pd.DataFrame(df_dict)
|
| 81 |
return dataframes
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
def convert_to_excel(dataframes: dict[pd.DataFrame], filename="updated_data", index=True) -> str:
|
| 85 |
-
"""
|
| 86 |
-
Accepts a dictionary of dataframes and converts it to an Excel file
|
| 87 |
-
"""
|
| 88 |
-
filename = f"{filename}.xlsx"
|
| 89 |
-
writer = pd.ExcelWriter(filename, engine='openpyxl')
|
| 90 |
-
for ticker, df in dataframes.items():
|
| 91 |
-
df.to_excel(writer, index=index, sheet_name=ticker)
|
| 92 |
-
writer.close()
|
| 93 |
-
return filename
|
|
|
|
| 2 |
import yfinance as yf
|
| 3 |
from finance_handler import get_financial_info
|
| 4 |
from config import Config
|
| 5 |
+
from common_functionality import convert_to_excel, get_financial_columns
|
| 6 |
|
| 7 |
|
| 8 |
def update_financial_info(data) -> str:
|
|
|
|
| 42 |
dataframes: dictionary of dataframes with the updated info
|
| 43 |
"""
|
| 44 |
tickers = data["ticker"].unique()
|
| 45 |
+
financial_columns = get_financial_columns(data)
|
|
|
|
| 46 |
processed_data = data.to_dict(orient="records")
|
| 47 |
current_data = [data[data["ticker"] == ticker][financial_columns + [Config.FINANCE_UPDATE_TIME]
|
| 48 |
].to_dict(orient="records")[0] for ticker in tickers]
|
|
|
|
| 66 |
2. Updates the main tab with the most recent financial info for each company
|
| 67 |
|
| 68 |
"""
|
| 69 |
+
companies_tickers_filtered = [i for i in dataframes.keys() if i not in Config.__dict__.values()][0]
|
| 70 |
+
financial_columns = dataframes[companies_tickers_filtered].columns
|
| 71 |
for name, df in dataframes.items():
|
| 72 |
if name == Config.MAIN_SHEET_NAME:
|
| 73 |
df_dict = df.to_dict(orient="records")
|
|
|
|
| 75 |
for c, company in enumerate(df_dict)]
|
| 76 |
dataframes[name] = pd.DataFrame(df_dict)
|
| 77 |
continue
|
| 78 |
+
if name == Config.SELECTED_BY_FINANCIALS:
|
| 79 |
+
continue
|
| 80 |
df_dict = df.to_dict(orient="records")
|
| 81 |
updated_info = dataframes[Config.MAIN_SHEET_NAME][dataframes[Config.MAIN_SHEET_NAME]["ticker"] == name][financial_columns].to_dict(orient="records")[0]
|
| 82 |
df_dict.append(updated_info)
|
| 83 |
dataframes[name] = pd.DataFrame(df_dict)
|
| 84 |
return dataframes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rolling_update.xlsx
ADDED
|
Binary file (8.32 kB). View file
|
|
|
select_top_financials.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import openai
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
from config import Config
|
| 5 |
+
from common_functionality import convert_to_excel, get_financial_columns, send_openai_request
|
| 6 |
+
from prompts import Prompts
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def select_by_financials(processed_file):
|
| 10 |
+
dataframes = pd.read_excel(processed_file.name, sheet_name=None)
|
| 11 |
+
selected_companies_df = get_selected_companies(dataframes[Config.MAIN_SHEET_NAME])
|
| 12 |
+
joined_data = {**dataframes, **selected_companies_df}
|
| 13 |
+
filename = convert_to_excel(joined_data, filename="selected_companies", index=False)
|
| 14 |
+
return filename
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def get_selected_companies(data: pd.DataFrame) -> dict:
|
| 18 |
+
financial_columns = get_financial_columns(data)
|
| 19 |
+
if Config.FINANCE_UPDATE_TIME in financial_columns:
|
| 20 |
+
financial_columns.remove(Config.FINANCE_UPDATE_TIME)
|
| 21 |
+
if "country" in financial_columns:
|
| 22 |
+
financial_columns.remove("country")
|
| 23 |
+
tickers = list(data["ticker"])
|
| 24 |
+
financial_data = data[financial_columns].to_dict(orient="records")
|
| 25 |
+
for c, company in enumerate(financial_data):
|
| 26 |
+
company.update({"ticker": tickers[c]})
|
| 27 |
+
prompt = Prompts.SELECT_TOP_FINANCIALS_PROMPT.format(num_companies=Config.TEMP_NUMBER_TO_SELECT,
|
| 28 |
+
companies=financial_data)
|
| 29 |
+
response = send_openai_request(prompt=prompt)
|
| 30 |
+
selected_df = pd.DataFrame(response)
|
| 31 |
+
selected_df["justification"] = selected_df["justification"].apply(lambda x: " ".join(x))
|
| 32 |
+
selected_df = pd.merge(selected_df, data[financial_columns + ["ticker"]], on="ticker")
|
| 33 |
+
return {Config.SELECTED_BY_FINANCIALS: selected_df}
|
selected_companies.xlsx
ADDED
|
Binary file (8.26 kB). View file
|
|
|
updated_strategic_data.xlsx
ADDED
|
Binary file (9.86 kB). View file
|
|
|
workflow.ipynb
ADDED
|
@@ -0,0 +1,599 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 24,
|
| 6 |
+
"metadata": {
|
| 7 |
+
"ExecuteTime": {
|
| 8 |
+
"end_time": "2023-09-27T02:30:24.971742Z",
|
| 9 |
+
"start_time": "2023-09-27T02:30:24.766127Z"
|
| 10 |
+
}
|
| 11 |
+
},
|
| 12 |
+
"outputs": [],
|
| 13 |
+
"source": [
|
| 14 |
+
"import pandas as pd\n",
|
| 15 |
+
"import json\n",
|
| 16 |
+
"import datetime\n",
|
| 17 |
+
"import yfinance as yf\n",
|
| 18 |
+
"import openai\n",
|
| 19 |
+
"openai.api_key = \"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\""
|
| 20 |
+
]
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"cell_type": "code",
|
| 24 |
+
"execution_count": 41,
|
| 25 |
+
"metadata": {
|
| 26 |
+
"ExecuteTime": {
|
| 27 |
+
"end_time": "2023-09-21T02:11:08.848554Z",
|
| 28 |
+
"start_time": "2023-09-21T02:11:08.820339Z"
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"outputs": [
|
| 32 |
+
{
|
| 33 |
+
"data": {
|
| 34 |
+
"text/plain": " Sector\n0 Cybersecurity\n1 2\n2 Name, ticker, subsector, country, description \n3 currentPrice, trailingPE, forwardPE, marketCap...\n4 Leadership and Governance, Market Position and...",
|
| 35 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Sector</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Cybersecurity</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Name, ticker, subsector, country, description</td>\n </tr>\n <tr>\n <th>3</th>\n <td>currentPrice, trailingPE, forwardPE, marketCap...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Leadership and Governance, Market Position and...</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
| 36 |
+
},
|
| 37 |
+
"execution_count": 41,
|
| 38 |
+
"metadata": {},
|
| 39 |
+
"output_type": "execute_result"
|
| 40 |
+
}
|
| 41 |
+
],
|
| 42 |
+
"source": [
|
| 43 |
+
"with open(\"/Users/artemkalyta/Downloads/input_v0.1-example.xlsx\", 'rb') as f:\n",
|
| 44 |
+
" temp_data = pd.read_excel(f)\n",
|
| 45 |
+
"temp_data"
|
| 46 |
+
]
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"cell_type": "code",
|
| 50 |
+
"execution_count": 42,
|
| 51 |
+
"metadata": {
|
| 52 |
+
"ExecuteTime": {
|
| 53 |
+
"end_time": "2023-09-21T02:11:15.777916Z",
|
| 54 |
+
"start_time": "2023-09-21T02:11:15.763160Z"
|
| 55 |
+
}
|
| 56 |
+
},
|
| 57 |
+
"outputs": [
|
| 58 |
+
{
|
| 59 |
+
"data": {
|
| 60 |
+
"text/plain": " Sector\n0 Cybersecurity\n1 2\n2 Name, ticker, subsector, country, description \n3 currentPrice, trailingPE, forwardPE, marketCap...\n4 Leadership and Governance, Market Position and...",
|
| 61 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Sector</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Cybersecurity</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Name, ticker, subsector, country, description</td>\n </tr>\n <tr>\n <th>3</th>\n <td>currentPrice, trailingPE, forwardPE, marketCap...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Leadership and Governance, Market Position and...</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
| 62 |
+
},
|
| 63 |
+
"execution_count": 42,
|
| 64 |
+
"metadata": {},
|
| 65 |
+
"output_type": "execute_result"
|
| 66 |
+
}
|
| 67 |
+
],
|
| 68 |
+
"source": [
|
| 69 |
+
"data = pd.read_excel(\"/Users/artemkalyta/Downloads/input_v0.1-example.xlsx\")\n",
|
| 70 |
+
"data.head()"
|
| 71 |
+
]
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"cell_type": "code",
|
| 75 |
+
"execution_count": 52,
|
| 76 |
+
"metadata": {
|
| 77 |
+
"ExecuteTime": {
|
| 78 |
+
"end_time": "2023-09-21T02:16:38.142654Z",
|
| 79 |
+
"start_time": "2023-09-21T02:16:17.490453Z"
|
| 80 |
+
}
|
| 81 |
+
},
|
| 82 |
+
"outputs": [],
|
| 83 |
+
"source": [
|
| 84 |
+
"def get_formatting_columns(data: pd.DataFrame) -> dict:\n",
|
| 85 |
+
" data_dict = {\"sector\": None, \"num_companies\": None, \"basic_info\": None, \"financial_info\": None, \"strategy_info\": None}\n",
|
| 86 |
+
" data_dict[\"sector\"] = data[\"Sector\"].values.tolist()[0]\n",
|
| 87 |
+
" data_dict[\"num_companies\"] = data[\"Sector\"].values.tolist()[1]\n",
|
| 88 |
+
" basic_info = [x.strip().lower() for x in data[\"Sector\"].values.tolist()[2].split(\", \")]\n",
|
| 89 |
+
" financial_info = [x.strip() for x in data[\"Sector\"].values.tolist()[3].split(\", \")]\n",
|
| 90 |
+
" strategy_info = [x.strip() for x in data[\"Sector\"].values.tolist()[4].split(\", \")]\n",
|
| 91 |
+
" data_dict[\"basic_info\"] = basic_info\n",
|
| 92 |
+
" data_dict[\"financial_info\"] = financial_info\n",
|
| 93 |
+
" data_dict[\"strategy_info\"] = strategy_info\n",
|
| 94 |
+
" return data_dict\n",
|
| 95 |
+
"\n",
|
| 96 |
+
"def send_openai_request(prompt, max_tokens=2000):\n",
|
| 97 |
+
" response = openai.ChatCompletion.create(\n",
|
| 98 |
+
" model=\"gpt-4\",\n",
|
| 99 |
+
" messages=[\n",
|
| 100 |
+
" {\n",
|
| 101 |
+
" \"role\": \"user\",\n",
|
| 102 |
+
" \"content\": f\"{prompt}\"\n",
|
| 103 |
+
" }\n",
|
| 104 |
+
" ],\n",
|
| 105 |
+
" temperature=0.1,\n",
|
| 106 |
+
" max_tokens=max_tokens,\n",
|
| 107 |
+
" top_p=1,\n",
|
| 108 |
+
" frequency_penalty=0,\n",
|
| 109 |
+
" presence_penalty=0\n",
|
| 110 |
+
" )\n",
|
| 111 |
+
" return json.loads(response.choices[0][\"message\"][\"content\"])\n",
|
| 112 |
+
"\n",
|
| 113 |
+
"\n",
|
| 114 |
+
"def get_basic_info(sector: str, n_companies: int, required_fields: list) -> list:\n",
|
| 115 |
+
" max_num_companies = 20\n",
|
| 116 |
+
" data = []\n",
|
| 117 |
+
" if n_companies > max_num_companies:\n",
|
| 118 |
+
" ignore_companies = []\n",
|
| 119 |
+
" for _ in range(n_companies // max_num_companies): \n",
|
| 120 |
+
" prompt = f\"\"\"Provide a list of the {max_num_companies} most prominent companies in {sector} with the best potential growth. Companies that you should not mention: {ignore_companies}. Include all the relevant, concise, and descriptive information and format the response as the valid JSON with the following keys: {required_fields}\"\"\"\n",
|
| 121 |
+
" data.extend(send_openai_request(prompt))\n",
|
| 122 |
+
" ignore_companies = [i[\"name\"] for i in data]\n",
|
| 123 |
+
" prompt = f\"\"\"Provide a list of the {n_companies % max_num_companies} most prominent companies in {sector} with the best potential growth. Companies that you should not mention: {ignore_companies}. Include all the relevant, concise, and descriptive information and format the response as the valid JSON with the following keys: {required_fields}\"\"\"\n",
|
| 124 |
+
" resp = send_openai_request(prompt)\n",
|
| 125 |
+
" if type(resp) == dict:\n",
|
| 126 |
+
" data.append(resp)\n",
|
| 127 |
+
" else:\n",
|
| 128 |
+
" data.extend(resp)\n",
|
| 129 |
+
" else:\n",
|
| 130 |
+
" prompt = f\"\"\"Provide a list of the {n_companies} most prominent companies in {sector} with the best potential growth. Include all the relevant, concise, and descriptive information and format the response as the valid JSON with the following keys: {required_fields}\"\"\"\n",
|
| 131 |
+
" data.extend(send_openai_request(prompt))\n",
|
| 132 |
+
" return data\n",
|
| 133 |
+
"\n",
|
| 134 |
+
"\n",
|
| 135 |
+
"def get_strategy_info(company_name: str, categories: list) -> dict:\n",
|
| 136 |
+
" prompt = f\"\"\"Please analyze {company_name} and give a descriptive, detailed, and very specific analysis (at least 5-6 sentences or more) using numbers when appropriate (only if you're absolutely confident in the numbers) on each of the following categories: {categories}. Each category should be described in detail. All of the information should be useful to a potential investor and should describe the advantages and drawbacks of investing in the company, you should approach every category only from a business and potential profit perspective. Please format your answer as a valid JSON with the keys being the provided categories.\"\"\"\n",
|
| 137 |
+
" response = send_openai_request(prompt)\n",
|
| 138 |
+
" return response\n",
|
| 139 |
+
"\n",
|
| 140 |
+
"def historical_return(company: yf.Ticker, history_years: int) -> float:\n",
|
| 141 |
+
" history = company.history(period=f\"{history_years}y\")\n",
|
| 142 |
+
" return (history[\"Close\"].iloc[-1] - history[\"Close\"].iloc[0]) / history[\"Close\"].iloc[0]\n",
|
| 143 |
+
"\n",
|
| 144 |
+
"def get_financial_info(ticker: str, metrics: list) -> dict:\n",
|
| 145 |
+
" company = yf.Ticker(ticker)\n",
|
| 146 |
+
" financial_info = {}\n",
|
| 147 |
+
" for metric in metrics:\n",
|
| 148 |
+
" if company.info.get(metric, None):\n",
|
| 149 |
+
" financial_info.update({metric: company.info.get(metric)})\n",
|
| 150 |
+
" continue\n",
|
| 151 |
+
" if \"historical_return\" in metric:\n",
|
| 152 |
+
" financial_info.update({metric: historical_return(company, metric[-1])})\n",
|
| 153 |
+
" return financial_info\n",
|
| 154 |
+
"\n",
|
| 155 |
+
"def shape_sector_info(data_dict: dict) -> pd.DataFrame:\n",
|
| 156 |
+
" sector_data = get_basic_info(data_dict[\"sector\"], data_dict[\"num_companies\"], data_dict[\"basic_info\"])\n",
|
| 157 |
+
" for c, company in enumerate(sector_data):\n",
|
| 158 |
+
" #sector_data[c].update(get_strategy_info(company[\"name\"], data_dict[\"strategy_info\"]))\n",
|
| 159 |
+
" sector_data[c].update(get_financial_info(company[\"ticker\"], data_dict[\"financial_info\"]))\n",
|
| 160 |
+
" sector_data = pd.DataFrame(sector_data)\n",
|
| 161 |
+
" return sector_data\n",
|
| 162 |
+
"\n",
|
| 163 |
+
"\n",
|
| 164 |
+
"\n",
|
| 165 |
+
"data_dict = get_formatting_columns(data)\n",
|
| 166 |
+
"sector_data = shape_sector_info(data_dict)\n"
|
| 167 |
+
]
|
| 168 |
+
},
|
| 169 |
+
{
|
| 170 |
+
"cell_type": "code",
|
| 171 |
+
"execution_count": 53,
|
| 172 |
+
"outputs": [
|
| 173 |
+
{
|
| 174 |
+
"data": {
|
| 175 |
+
"text/plain": " name ticker subsector country \\\n0 CrowdStrike Holdings Inc. CRWD Software-Infrastructure United States \n1 Palo Alto Networks Inc. PANW Software-Infrastructure United States \n\n description currentPrice forwardPE \\\n0 CrowdStrike is a leading cybersecurity company... 163.89 46.825714 \n1 Palo Alto Networks is a global cybersecurity l... 235.30 36.765625 \n\n marketCap beta recommendationMean historical_return1 \\\n0 39134965760 0.959392 1.8 -0.031669 \n1 72880881664 1.191908 1.8 0.375058 \n\n historical_return5 trailingPE \n0 1.825690 NaN \n1 2.075549 188.24 ",
|
| 176 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>ticker</th>\n <th>subsector</th>\n <th>country</th>\n <th>description</th>\n <th>currentPrice</th>\n <th>forwardPE</th>\n <th>marketCap</th>\n <th>beta</th>\n <th>recommendationMean</th>\n <th>historical_return1</th>\n <th>historical_return5</th>\n <th>trailingPE</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>CrowdStrike Holdings Inc.</td>\n <td>CRWD</td>\n <td>Software-Infrastructure</td>\n <td>United States</td>\n <td>CrowdStrike is a leading cybersecurity company...</td>\n <td>163.89</td>\n <td>46.825714</td>\n <td>39134965760</td>\n <td>0.959392</td>\n <td>1.8</td>\n <td>-0.031669</td>\n <td>1.825690</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Palo Alto Networks Inc.</td>\n <td>PANW</td>\n <td>Software-Infrastructure</td>\n <td>United States</td>\n <td>Palo Alto Networks is a global cybersecurity l...</td>\n <td>235.30</td>\n <td>36.765625</td>\n <td>72880881664</td>\n <td>1.191908</td>\n <td>1.8</td>\n <td>0.375058</td>\n <td>2.075549</td>\n <td>188.24</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
| 177 |
+
},
|
| 178 |
+
"execution_count": 53,
|
| 179 |
+
"metadata": {},
|
| 180 |
+
"output_type": "execute_result"
|
| 181 |
+
}
|
| 182 |
+
],
|
| 183 |
+
"source": [
|
| 184 |
+
"sector_data"
|
| 185 |
+
],
|
| 186 |
+
"metadata": {
|
| 187 |
+
"collapsed": false,
|
| 188 |
+
"ExecuteTime": {
|
| 189 |
+
"end_time": "2023-09-21T02:17:24.897424Z",
|
| 190 |
+
"start_time": "2023-09-21T02:17:24.888427Z"
|
| 191 |
+
}
|
| 192 |
+
}
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"cell_type": "code",
|
| 196 |
+
"execution_count": 54,
|
| 197 |
+
"metadata": {
|
| 198 |
+
"ExecuteTime": {
|
| 199 |
+
"end_time": "2023-09-21T02:17:28.507658Z",
|
| 200 |
+
"start_time": "2023-09-21T02:17:28.488571Z"
|
| 201 |
+
}
|
| 202 |
+
},
|
| 203 |
+
"outputs": [
|
| 204 |
+
{
|
| 205 |
+
"data": {
|
| 206 |
+
"text/plain": " name ticker subsector country \\\n0 CrowdStrike Holdings Inc. CRWD Software-Infrastructure United States \n1 Palo Alto Networks Inc. PANW Software-Infrastructure United States \n\n description currentPrice forwardPE \\\n0 CrowdStrike is a leading cybersecurity company... 163.89 46.825714 \n1 Palo Alto Networks is a global cybersecurity l... 235.30 36.765625 \n\n marketCap beta recommendationMean historical_return1 \\\n0 39134965760 0.959392 1.8 -0.031669 \n1 72880881664 1.191908 1.8 0.375058 \n\n historical_return5 trailingPE \n0 1.825690 NaN \n1 2.075549 188.24 ",
|
| 207 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>ticker</th>\n <th>subsector</th>\n <th>country</th>\n <th>description</th>\n <th>currentPrice</th>\n <th>forwardPE</th>\n <th>marketCap</th>\n <th>beta</th>\n <th>recommendationMean</th>\n <th>historical_return1</th>\n <th>historical_return5</th>\n <th>trailingPE</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>CrowdStrike Holdings Inc.</td>\n <td>CRWD</td>\n <td>Software-Infrastructure</td>\n <td>United States</td>\n <td>CrowdStrike is a leading cybersecurity company...</td>\n <td>163.89</td>\n <td>46.825714</td>\n <td>39134965760</td>\n <td>0.959392</td>\n <td>1.8</td>\n <td>-0.031669</td>\n <td>1.825690</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Palo Alto Networks Inc.</td>\n <td>PANW</td>\n <td>Software-Infrastructure</td>\n <td>United States</td>\n <td>Palo Alto Networks is a global cybersecurity l...</td>\n <td>235.30</td>\n <td>36.765625</td>\n <td>72880881664</td>\n <td>1.191908</td>\n <td>1.8</td>\n <td>0.375058</td>\n <td>2.075549</td>\n <td>188.24</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
| 208 |
+
},
|
| 209 |
+
"execution_count": 54,
|
| 210 |
+
"metadata": {},
|
| 211 |
+
"output_type": "execute_result"
|
| 212 |
+
}
|
| 213 |
+
],
|
| 214 |
+
"source": [
|
| 215 |
+
"sector_data.to_excel(\"/Users/artemkalyta/Downloads/output_v0.1-example.xlsx\")\n",
|
| 216 |
+
"sector_data"
|
| 217 |
+
]
|
| 218 |
+
},
|
| 219 |
+
{
|
| 220 |
+
"cell_type": "code",
|
| 221 |
+
"execution_count": 23,
|
| 222 |
+
"metadata": {
|
| 223 |
+
"ExecuteTime": {
|
| 224 |
+
"end_time": "2023-09-27T02:13:50.891551Z",
|
| 225 |
+
"start_time": "2023-09-27T02:13:50.886535Z"
|
| 226 |
+
}
|
| 227 |
+
},
|
| 228 |
+
"outputs": [
|
| 229 |
+
{
|
| 230 |
+
"name": "stdout",
|
| 231 |
+
"output_type": "stream",
|
| 232 |
+
"text": [
|
| 233 |
+
"['__class__', '__delattr__', '__delete__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__get__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__isabstractmethod__', '__le__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__set__', '__set_name__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 'deleter', 'fdel', 'fget', 'fset', 'getter', 'setter']\n"
|
| 234 |
+
]
|
| 235 |
+
}
|
| 236 |
+
],
|
| 237 |
+
"source": [
|
| 238 |
+
"import yfinance as yf\n",
|
| 239 |
+
"print(dir(yf.ticker.Ticker.financials))\n"
|
| 240 |
+
]
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"cell_type": "code",
|
| 244 |
+
"execution_count": 27,
|
| 245 |
+
"metadata": {
|
| 246 |
+
"ExecuteTime": {
|
| 247 |
+
"end_time": "2023-09-21T01:56:55.816136Z",
|
| 248 |
+
"start_time": "2023-09-21T01:56:55.777294Z"
|
| 249 |
+
}
|
| 250 |
+
},
|
| 251 |
+
"outputs": [
|
| 252 |
+
{
|
| 253 |
+
"data": {
|
| 254 |
+
"text/plain": "0.3457653580944472"
|
| 255 |
+
},
|
| 256 |
+
"execution_count": 27,
|
| 257 |
+
"metadata": {},
|
| 258 |
+
"output_type": "execute_result"
|
| 259 |
+
}
|
| 260 |
+
],
|
| 261 |
+
"source": [
|
| 262 |
+
"def historical_return(ticker: str, history_years: int) -> float:\n",
|
| 263 |
+
" company = yf.Ticker(ticker)\n",
|
| 264 |
+
" history = company.history(period=f\"{history_years}y\")\n",
|
| 265 |
+
" return (history[\"Close\"].iloc[-1] - history[\"Close\"].iloc[0]) / history[\"Close\"].iloc[0]\n",
|
| 266 |
+
"\n",
|
| 267 |
+
"historical_return(\"GOOG\", 1)"
|
| 268 |
+
]
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"cell_type": "code",
|
| 272 |
+
"execution_count": 51,
|
| 273 |
+
"metadata": {
|
| 274 |
+
"ExecuteTime": {
|
| 275 |
+
"end_time": "2023-09-21T02:15:43.563900Z",
|
| 276 |
+
"start_time": "2023-09-21T02:15:43.372194Z"
|
| 277 |
+
}
|
| 278 |
+
},
|
| 279 |
+
"outputs": [
|
| 280 |
+
{
|
| 281 |
+
"data": {
|
| 282 |
+
"text/plain": "1.8"
|
| 283 |
+
},
|
| 284 |
+
"execution_count": 51,
|
| 285 |
+
"metadata": {},
|
| 286 |
+
"output_type": "execute_result"
|
| 287 |
+
}
|
| 288 |
+
],
|
| 289 |
+
"source": [
|
| 290 |
+
"tick = yf.Ticker(\"PANW\").info[\"recommendationMean\"]\n",
|
| 291 |
+
"tick"
|
| 292 |
+
]
|
| 293 |
+
},
|
| 294 |
+
{
|
| 295 |
+
"cell_type": "code",
|
| 296 |
+
"execution_count": 4,
|
| 297 |
+
"metadata": {
|
| 298 |
+
"ExecuteTime": {
|
| 299 |
+
"end_time": "2023-09-21T01:29:10.489157Z",
|
| 300 |
+
"start_time": "2023-09-21T01:29:10.032458Z"
|
| 301 |
+
}
|
| 302 |
+
},
|
| 303 |
+
"outputs": [
|
| 304 |
+
{
|
| 305 |
+
"ename": "KeyError",
|
| 306 |
+
"evalue": "'regularMarketPrice'",
|
| 307 |
+
"output_type": "error",
|
| 308 |
+
"traceback": [
|
| 309 |
+
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
|
| 310 |
+
"\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)",
|
| 311 |
+
"Cell \u001B[0;32mIn[4], line 3\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01myfinance\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m \u001B[38;5;21;01myf\u001B[39;00m\n\u001B[1;32m 2\u001B[0m ticker \u001B[38;5;241m=\u001B[39m yf\u001B[38;5;241m.\u001B[39mTicker(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mGOOGL\u001B[39m\u001B[38;5;124m'\u001B[39m)\u001B[38;5;241m.\u001B[39minfo\n\u001B[0;32m----> 3\u001B[0m marketPrice \u001B[38;5;241m=\u001B[39m \u001B[43mticker\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mregularMarketPrice\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m]\u001B[49m\n\u001B[1;32m 4\u001B[0m previousClosePrice \u001B[38;5;241m=\u001B[39m ticker[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mregularMarketPreviousClose\u001B[39m\u001B[38;5;124m'\u001B[39m]\n\u001B[1;32m 5\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mTicker Value: GOOGL\u001B[39m\u001B[38;5;124m'\u001B[39m)\n",
|
| 312 |
+
"\u001B[0;31mKeyError\u001B[0m: 'regularMarketPrice'"
|
| 313 |
+
]
|
| 314 |
+
}
|
| 315 |
+
],
|
| 316 |
+
"source": [
|
| 317 |
+
"[\"currentPrice\", \"trailingPE\", \"forwardPE\", \"marketCap\", \"beta\", \"recommendationMean\", \"historical_return1\", \"historical_return5\"]"
|
| 318 |
+
]
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"cell_type": "code",
|
| 322 |
+
"execution_count": 36,
|
| 323 |
+
"metadata": {
|
| 324 |
+
"ExecuteTime": {
|
| 325 |
+
"end_time": "2023-09-21T02:04:30.806580Z",
|
| 326 |
+
"start_time": "2023-09-21T02:04:30.802510Z"
|
| 327 |
+
}
|
| 328 |
+
},
|
| 329 |
+
"outputs": [
|
| 330 |
+
{
|
| 331 |
+
"data": {
|
| 332 |
+
"text/plain": "dict"
|
| 333 |
+
},
|
| 334 |
+
"execution_count": 36,
|
| 335 |
+
"metadata": {},
|
| 336 |
+
"output_type": "execute_result"
|
| 337 |
+
}
|
| 338 |
+
],
|
| 339 |
+
"source": [
|
| 340 |
+
"a = ticker.get(\"ebitdaa\", None)\n",
|
| 341 |
+
"print(a)"
|
| 342 |
+
]
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"cell_type": "code",
|
| 346 |
+
"execution_count": 12,
|
| 347 |
+
"outputs": [
|
| 348 |
+
{
|
| 349 |
+
"data": {
|
| 350 |
+
"text/plain": "{'sector': 'Cybersecurity',\n 'num_companies': 2,\n 'basic_info': ['name', 'ticker', 'subsector', 'country', 'description'],\n 'financian_info': ['P/E',\n 'Market Cap',\n 'Beta',\n 'Analyst Recommendation',\n '1-year Return',\n '5-year Return'],\n 'strategy_info': ['Leadership and Governance',\n 'Market Position and Competitive Advantage',\n 'R&D and Innovation',\n 'ESG Factors']}"
|
| 351 |
+
},
|
| 352 |
+
"execution_count": 12,
|
| 353 |
+
"metadata": {},
|
| 354 |
+
"output_type": "execute_result"
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"source": [
|
| 358 |
+
"data_dict"
|
| 359 |
+
],
|
| 360 |
+
"metadata": {
|
| 361 |
+
"collapsed": false,
|
| 362 |
+
"ExecuteTime": {
|
| 363 |
+
"end_time": "2023-09-21T01:36:45.859093Z",
|
| 364 |
+
"start_time": "2023-09-21T01:36:45.852532Z"
|
| 365 |
+
}
|
| 366 |
+
}
|
| 367 |
+
},
|
| 368 |
+
{
|
| 369 |
+
"cell_type": "code",
|
| 370 |
+
"execution_count": 13,
|
| 371 |
+
"outputs": [
|
| 372 |
+
{
|
| 373 |
+
"data": {
|
| 374 |
+
"text/plain": "dict_keys(['__module__', 'ACTION_BASIC_INFO', 'ACTION_STRATEGY_INFO', 'ACTION_FINANCIAL_INFO', 'ACTION_UPDATE_INFO', 'ACTION_SELECT_BY_STRATEGY', 'ACTION_SELECT_BY_FINANCIALS', 'FINANCE_UPDATE_TIME', 'FINANCE_HISTORICAL_RETURN', 'MAIN_SHEET_NAME', 'SELECTED_BY_FINANCIALS', '__dict__', '__weakref__', '__doc__', '__annotations__'])"
|
| 375 |
+
},
|
| 376 |
+
"execution_count": 13,
|
| 377 |
+
"metadata": {},
|
| 378 |
+
"output_type": "execute_result"
|
| 379 |
+
}
|
| 380 |
+
],
|
| 381 |
+
"source": [
|
| 382 |
+
"from config import Config\n",
|
| 383 |
+
"\n",
|
| 384 |
+
"Config.__dict__.keys()"
|
| 385 |
+
],
|
| 386 |
+
"metadata": {
|
| 387 |
+
"collapsed": false,
|
| 388 |
+
"ExecuteTime": {
|
| 389 |
+
"end_time": "2023-10-02T03:08:06.743908Z",
|
| 390 |
+
"start_time": "2023-10-02T03:08:06.740531Z"
|
| 391 |
+
}
|
| 392 |
+
}
|
| 393 |
+
},
|
| 394 |
+
{
|
| 395 |
+
"cell_type": "code",
|
| 396 |
+
"execution_count": 7,
|
| 397 |
+
"outputs": [
|
| 398 |
+
{
|
| 399 |
+
"data": {
|
| 400 |
+
"text/plain": "['a', 'b']"
|
| 401 |
+
},
|
| 402 |
+
"execution_count": 7,
|
| 403 |
+
"metadata": {},
|
| 404 |
+
"output_type": "execute_result"
|
| 405 |
+
}
|
| 406 |
+
],
|
| 407 |
+
"source": [
|
| 408 |
+
"a = [\"a\", \"b\", \"c\"]\n",
|
| 409 |
+
"a.remove(\"c\")\n",
|
| 410 |
+
"a"
|
| 411 |
+
],
|
| 412 |
+
"metadata": {
|
| 413 |
+
"collapsed": false,
|
| 414 |
+
"ExecuteTime": {
|
| 415 |
+
"end_time": "2023-10-02T02:42:04.990078Z",
|
| 416 |
+
"start_time": "2023-10-02T02:42:04.987609Z"
|
| 417 |
+
}
|
| 418 |
+
}
|
| 419 |
+
},
|
| 420 |
+
{
|
| 421 |
+
"cell_type": "code",
|
| 422 |
+
"execution_count": 22,
|
| 423 |
+
"outputs": [
|
| 424 |
+
{
|
| 425 |
+
"data": {
|
| 426 |
+
"text/plain": "1.9"
|
| 427 |
+
},
|
| 428 |
+
"execution_count": 22,
|
| 429 |
+
"metadata": {},
|
| 430 |
+
"output_type": "execute_result"
|
| 431 |
+
}
|
| 432 |
+
],
|
| 433 |
+
"source": [
|
| 434 |
+
"ticker[\"recommendationMean\"]"
|
| 435 |
+
],
|
| 436 |
+
"metadata": {
|
| 437 |
+
"collapsed": false,
|
| 438 |
+
"ExecuteTime": {
|
| 439 |
+
"end_time": "2023-09-21T01:51:38.725734Z",
|
| 440 |
+
"start_time": "2023-09-21T01:51:38.723808Z"
|
| 441 |
+
}
|
| 442 |
+
}
|
| 443 |
+
},
|
| 444 |
+
{
|
| 445 |
+
"cell_type": "code",
|
| 446 |
+
"execution_count": 2,
|
| 447 |
+
"outputs": [
|
| 448 |
+
{
|
| 449 |
+
"name": "stdout",
|
| 450 |
+
"output_type": "stream",
|
| 451 |
+
"text": [
|
| 452 |
+
"Select the top 5 companies with the best financials out of the companies in the provided list based on the financial metrics provided. For each company provide a short 2-3 bulletpoint justification why this company was selected. Format your answer as the following JSON:[{company_name: justification}, {company_name2: justification}]\n"
|
| 453 |
+
]
|
| 454 |
+
}
|
| 455 |
+
],
|
| 456 |
+
"source": [
|
| 457 |
+
"SELECT_TOP_FINANCIALS_PROMPT = \"Select the top {num_companies} companies with the best financials out of the \" \\\n",
|
| 458 |
+
" \"companies in the provided list based on the financial metrics provided. For each\" \\\n",
|
| 459 |
+
" \" company provide a short 2-3 bulletpoint justification why this company was \" \\\n",
|
| 460 |
+
" \"selected. Format your answer as the following JSON:\" \\\n",
|
| 461 |
+
" \"[{{company_name: justification}}, {{company_name2: justification}}]\"\n",
|
| 462 |
+
"print(SELECT_TOP_FINANCIALS_PROMPT.format(num_companies=5))"
|
| 463 |
+
],
|
| 464 |
+
"metadata": {
|
| 465 |
+
"collapsed": false,
|
| 466 |
+
"ExecuteTime": {
|
| 467 |
+
"end_time": "2023-10-02T02:24:51.683836Z",
|
| 468 |
+
"start_time": "2023-10-02T02:24:51.679391Z"
|
| 469 |
+
}
|
| 470 |
+
}
|
| 471 |
+
},
|
| 472 |
+
{
|
| 473 |
+
"cell_type": "code",
|
| 474 |
+
"execution_count": 4,
|
| 475 |
+
"outputs": [
|
| 476 |
+
{
|
| 477 |
+
"data": {
|
| 478 |
+
"text/plain": "False"
|
| 479 |
+
},
|
| 480 |
+
"execution_count": 4,
|
| 481 |
+
"metadata": {},
|
| 482 |
+
"output_type": "execute_result"
|
| 483 |
+
}
|
| 484 |
+
],
|
| 485 |
+
"source": [
|
| 486 |
+
"import yfinance as yf\n",
|
| 487 |
+
"\n",
|
| 488 |
+
"test = yf.Ticker(\"AAPL\")\n",
|
| 489 |
+
"\"ticker\" in test.info.keys()"
|
| 490 |
+
],
|
| 491 |
+
"metadata": {
|
| 492 |
+
"collapsed": false,
|
| 493 |
+
"ExecuteTime": {
|
| 494 |
+
"end_time": "2023-10-02T02:39:36.762388Z",
|
| 495 |
+
"start_time": "2023-10-02T02:39:36.559113Z"
|
| 496 |
+
}
|
| 497 |
+
}
|
| 498 |
+
},
|
| 499 |
+
{
|
| 500 |
+
"cell_type": "code",
|
| 501 |
+
"execution_count": 61,
|
| 502 |
+
"outputs": [
|
| 503 |
+
{
|
| 504 |
+
"data": {
|
| 505 |
+
"text/plain": "[{'name': 'CrowdStrike Holdings Inc.',\n 'ticker': 'CRWD',\n 'subsector': 'Software-Infrastructure',\n 'country': 'United States',\n 'description': 'CrowdStrike is a leading cybersecurity company that provides cloud-delivered solutions for endpoint protection, threat intelligence, and cyber attack response. Their AI-powered technology is designed to stop breaches and their Falcon platform offers instant visibility and protection across the enterprise and prevents attacks on endpoints on or off the network.',\n 'currentPrice': 163.89,\n 'forwardPE': 46.825714,\n 'marketCap': 39134965760,\n 'beta': 0.959392,\n 'recommendationMean': 1.8,\n 'historical_return1': -0.03166913211433715,\n 'historical_return5': 1.825689644649111,\n 'trailingPE': nan},\n {'name': 'Palo Alto Networks Inc.',\n 'ticker': 'PANW',\n 'subsector': 'Software-Infrastructure',\n 'country': 'United States',\n 'description': 'Palo Alto Networks is a global cybersecurity leader known for always challenging the security status quo. Their mission is to protect the digital way of life by preventing successful cyberattacks. They have pioneered the next generation of network security with innovative products that deliver cybersecurity across clouds, networks, and mobile devices.',\n 'currentPrice': 235.3,\n 'forwardPE': 36.765625,\n 'marketCap': 72880881664,\n 'beta': 1.191908,\n 'recommendationMean': 1.8,\n 'historical_return1': 0.3750584955932131,\n 'historical_return5': 2.075548954405327,\n 'trailingPE': 188.24}]"
|
| 506 |
+
},
|
| 507 |
+
"execution_count": 61,
|
| 508 |
+
"metadata": {},
|
| 509 |
+
"output_type": "execute_result"
|
| 510 |
+
}
|
| 511 |
+
],
|
| 512 |
+
"source": [
|
| 513 |
+
"temp = sector_data.to_dict(orient='records')\n",
|
| 514 |
+
"temp"
|
| 515 |
+
],
|
| 516 |
+
"metadata": {
|
| 517 |
+
"collapsed": false,
|
| 518 |
+
"ExecuteTime": {
|
| 519 |
+
"end_time": "2023-09-26T18:55:54.094789Z",
|
| 520 |
+
"start_time": "2023-09-26T18:55:54.094116Z"
|
| 521 |
+
}
|
| 522 |
+
}
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"cell_type": "code",
|
| 526 |
+
"execution_count": 28,
|
| 527 |
+
"outputs": [
|
| 528 |
+
{
|
| 529 |
+
"data": {
|
| 530 |
+
"text/plain": " price market_cap\nupdated \n2021-09-01 90 90000\n2021-09-02 100 100000",
|
| 531 |
+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>price</th>\n <th>market_cap</th>\n </tr>\n <tr>\n <th>updated</th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2021-09-01</th>\n <td>90</td>\n <td>90000</td>\n </tr>\n <tr>\n <th>2021-09-02</th>\n <td>100</td>\n <td>100000</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
| 532 |
+
},
|
| 533 |
+
"execution_count": 28,
|
| 534 |
+
"metadata": {},
|
| 535 |
+
"output_type": "execute_result"
|
| 536 |
+
}
|
| 537 |
+
],
|
| 538 |
+
"source": [
|
| 539 |
+
"a = {\"price\": 90, \"market_cap\": 90000, \"updated\": \"2021-09-01\"}\n",
|
| 540 |
+
"b = {\"price\": 100, \"market_cap\": 100000, \"updated\": \"2021-09-02\"}\n",
|
| 541 |
+
"df = pd.DataFrame([a, b]).set_index(\"updated\")\n",
|
| 542 |
+
"df"
|
| 543 |
+
],
|
| 544 |
+
"metadata": {
|
| 545 |
+
"collapsed": false,
|
| 546 |
+
"ExecuteTime": {
|
| 547 |
+
"end_time": "2023-09-27T02:34:11.587757Z",
|
| 548 |
+
"start_time": "2023-09-27T02:34:11.581806Z"
|
| 549 |
+
}
|
| 550 |
+
}
|
| 551 |
+
},
|
| 552 |
+
{
|
| 553 |
+
"cell_type": "code",
|
| 554 |
+
"execution_count": 31,
|
| 555 |
+
"outputs": [
|
| 556 |
+
{
|
| 557 |
+
"data": {
|
| 558 |
+
"text/plain": "{'price': {'2021-09-01': 90, '2021-09-02': 100},\n 'market_cap': {'2021-09-01': 90000, '2021-09-02': 100000}}"
|
| 559 |
+
},
|
| 560 |
+
"execution_count": 31,
|
| 561 |
+
"metadata": {},
|
| 562 |
+
"output_type": "execute_result"
|
| 563 |
+
}
|
| 564 |
+
],
|
| 565 |
+
"source": [
|
| 566 |
+
"df.to_dict()"
|
| 567 |
+
],
|
| 568 |
+
"metadata": {
|
| 569 |
+
"collapsed": false,
|
| 570 |
+
"ExecuteTime": {
|
| 571 |
+
"end_time": "2023-09-27T18:19:53.124974Z",
|
| 572 |
+
"start_time": "2023-09-27T18:19:53.114800Z"
|
| 573 |
+
}
|
| 574 |
+
}
|
| 575 |
+
}
|
| 576 |
+
],
|
| 577 |
+
"metadata": {
|
| 578 |
+
"kernelspec": {
|
| 579 |
+
"display_name": "investor-env",
|
| 580 |
+
"language": "python",
|
| 581 |
+
"name": "python3"
|
| 582 |
+
},
|
| 583 |
+
"language_info": {
|
| 584 |
+
"codemirror_mode": {
|
| 585 |
+
"name": "ipython",
|
| 586 |
+
"version": 3
|
| 587 |
+
},
|
| 588 |
+
"file_extension": ".py",
|
| 589 |
+
"mimetype": "text/x-python",
|
| 590 |
+
"name": "python",
|
| 591 |
+
"nbconvert_exporter": "python",
|
| 592 |
+
"pygments_lexer": "ipython3",
|
| 593 |
+
"version": "3.11.5"
|
| 594 |
+
},
|
| 595 |
+
"orig_nbformat": 4
|
| 596 |
+
},
|
| 597 |
+
"nbformat": 4,
|
| 598 |
+
"nbformat_minor": 2
|
| 599 |
+
}
|