Artem Kalyta commited on
Commit
a3d6903
·
1 Parent(s): fce23bc

scoring update

Browse files
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY=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
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ /__pycache__
__pycache__/basic_handler.cpython-311.pyc ADDED
Binary file (1.62 kB). View file
 
__pycache__/common_functionality.cpython-311.pyc ADDED
Binary file (2.56 kB). View file
 
__pycache__/config.cpython-311.pyc ADDED
Binary file (1.01 kB). View file
 
__pycache__/finance_handler.cpython-311.pyc ADDED
Binary file (1.91 kB). View file
 
__pycache__/prompts.cpython-311.pyc ADDED
Binary file (2.42 kB). View file
 
__pycache__/rolling_update.cpython-311.pyc ADDED
Binary file (6.92 kB). View file
 
__pycache__/select_top_financials.cpython-311.pyc ADDED
Binary file (2.84 kB). View file
 
__pycache__/strategy_handler.cpython-311.pyc ADDED
Binary file (832 Bytes). View file
 
main_router.py → app.py RENAMED
@@ -1,17 +1,25 @@
1
  from basic_handler import get_basic_info
2
  from strategy_handler import get_strategy_info
3
  from finance_handler import get_financial_info
4
- from rolling_update import update_financial_info
 
5
  from config import Config
 
 
 
6
 
7
  import pandas as pd
8
  import openai
9
  import gradio as gr
10
 
11
-
 
 
 
12
 
13
  # TODO: add an identifier for financial columns for better repeat parsing
14
 
 
15
  def process_file(file) -> dict:
16
  input_data = pd.read_excel(file.name)
17
  data_dict = get_formatting_columns(input_data)
@@ -30,51 +38,73 @@ def get_formatting_columns(data: pd.DataFrame) -> dict:
30
  return data_dict
31
 
32
 
33
- def shape_sector_info(data_dict: dict, processed_data: pd.DataFrame, action) -> pd.DataFrame:
34
  if action == Config.ACTION_BASIC_INFO:
35
  processed_data = get_basic_info(data_dict["sector"], data_dict["num_companies"],
36
  data_dict[Config.ACTION_BASIC_INFO])
 
 
37
  sector_data = pd.DataFrame(processed_data)
38
- return sector_data
39
- else:
40
- processed_data = processed_data.to_dict(orient="records")
 
 
41
  if action == Config.ACTION_STRATEGY_INFO:
42
  [processed_data[c].update(get_strategy_info(company["name"], data_dict[Config.ACTION_STRATEGY_INFO])
43
  ) for c, company in enumerate(processed_data)]
44
- elif action == Config.ACTION_FINANCIAL_INFO:
45
- [processed_data[c].update(get_financial_info(company["ticker"], data_dict[Config.ACTION_FINANCIAL_INFO])
46
- ) for c, company in enumerate(processed_data)]
47
  else:
48
- return "Invalid action Error"
49
  sector_data = pd.DataFrame(processed_data)
50
- return sector_data
51
 
52
 
53
- def main_interface(requirements_file, processed_file, action):
54
- try:
55
  data_dict = process_file(requirements_file)
56
- except Exception as e:
57
  data_dict = {}
58
  if action == Config.ACTION_BASIC_INFO:
59
- sector_data = shape_sector_info(data_dict=data_dict, processed_data=None, action=action)
60
- elif action in [Config.ACTION_STRATEGY_INFO, Config.ACTION_FINANCIAL_INFO]:
 
61
  sector_data = pd.read_excel(processed_file.name)
62
  sector_data = shape_sector_info(data_dict=data_dict, processed_data=sector_data, action=action)
63
  if type(sector_data) == str:
64
  return sector_data
 
 
 
 
 
 
 
 
 
65
  elif action == Config.ACTION_UPDATE_INFO:
66
  filename = update_financial_info(processed_file)
67
  return filename
68
- file_name = "processed_file.xlsx"
69
- sector_data.to_excel(file_name, index=False)
70
- return file_name
 
 
 
 
 
 
 
 
71
 
72
 
73
  iface = gr.Interface(
74
  fn=main_interface,
75
  inputs=[gr.inputs.File(label="Requirements file"), gr.inputs.File(label="(Optional) Populated file"),
76
- gr.inputs.Dropdown([Config.ACTION_BASIC_INFO, Config.ACTION_STRATEGY_INFO, Config.ACTION_FINANCIAL_INFO,
77
- Config.ACTION_UPDATE_INFO],
 
 
 
78
  label="Select the type of information you want to extract")],
79
  outputs=gr.outputs.File()
80
  )
 
1
  from basic_handler import get_basic_info
2
  from strategy_handler import get_strategy_info
3
  from finance_handler import get_financial_info
4
+ from rolling_update import update_financial_info, initial_update_flow
5
+ from select_top_financials import select_by_financials
6
  from config import Config
7
+ from common_functionality import convert_to_excel
8
+ import os
9
+ from dotenv import load_dotenv
10
 
11
  import pandas as pd
12
  import openai
13
  import gradio as gr
14
 
15
+ load_dotenv()
16
+ openai.api_key = os.getenv("OPENAI_API_KEY")
17
+ print(f"os: {os.getenv('OPENAI_API_KEY')}")
18
+ print(openai.api_key)
19
 
20
  # TODO: add an identifier for financial columns for better repeat parsing
21
 
22
+
23
  def process_file(file) -> dict:
24
  input_data = pd.read_excel(file.name)
25
  data_dict = get_formatting_columns(input_data)
 
38
  return data_dict
39
 
40
 
41
+ def shape_sector_info(data_dict: dict, processed_data: pd.DataFrame, action) -> dict[str, pd.DataFrame]:
42
  if action == Config.ACTION_BASIC_INFO:
43
  processed_data = get_basic_info(data_dict["sector"], data_dict["num_companies"],
44
  data_dict[Config.ACTION_BASIC_INFO])
45
+ [processed_data[c].update(get_financial_info(company["ticker"], data_dict[Config.ACTION_FINANCIAL_INFO])
46
+ ) for c, company in enumerate(processed_data)]
47
  sector_data = pd.DataFrame(processed_data)
48
+ updated_data = initial_update_flow(sector_data)
49
+ filename = convert_to_excel(updated_data, filename="basic+financial")
50
+ return filename
51
+ # return {Config.MAIN_SHEET_NAME: sector_data}
52
+ processed_data = processed_data.to_dict(orient="records")
53
  if action == Config.ACTION_STRATEGY_INFO:
54
  [processed_data[c].update(get_strategy_info(company["name"], data_dict[Config.ACTION_STRATEGY_INFO])
55
  ) for c, company in enumerate(processed_data)]
 
 
 
56
  else:
57
+ return {"Invalid action Error": pd.DataFrame()}
58
  sector_data = pd.DataFrame(processed_data)
59
+ return {Config.MAIN_SHEET_NAME: sector_data}
60
 
61
 
62
+ def main_interface(requirements_file, processed_file, action): # file, file, str
63
+ if requirements_file:
64
  data_dict = process_file(requirements_file)
65
+ else:
66
  data_dict = {}
67
  if action == Config.ACTION_BASIC_INFO:
68
+ filename = shape_sector_info(data_dict=data_dict, processed_data=None, action=action)
69
+ return filename
70
+ elif action == Config.ACTION_STRATEGY_INFO:
71
  sector_data = pd.read_excel(processed_file.name)
72
  sector_data = shape_sector_info(data_dict=data_dict, processed_data=sector_data, action=action)
73
  if type(sector_data) == str:
74
  return sector_data
75
+ elif action == Config.ACTION_SELECTED_STRATEGY_INFO:
76
+ processed_data = pd.read_excel(processed_file.name, sheet_name=None)
77
+ selected_data = processed_data[Config.SELECTED_BY_FINANCIALS]
78
+ selected_data = selected_data.to_dict(orient="records")
79
+ [selected_data[c].update(get_strategy_info(company["ticker"], data_dict[Config.ACTION_STRATEGY_INFO])
80
+ ) for c, company in enumerate(selected_data)]
81
+ processed_data[Config.SELECTED_BY_FINANCIALS] = pd.DataFrame(selected_data)
82
+ filename = convert_to_excel(processed_data, filename="updated_strategic_data", index=False)
83
+ return filename
84
  elif action == Config.ACTION_UPDATE_INFO:
85
  filename = update_financial_info(processed_file)
86
  return filename
87
+ elif action == Config.ACTION_SELECT_BY_FINANCIALS:
88
+ filename = select_by_financials(processed_file)
89
+ return filename
90
+ processed_dataframes = pd.read_excel(processed_file.name, sheet_name=None)
91
+ if len(processed_dataframes.keys()) > 1:
92
+ processed_dataframes.pop(Config.MAIN_SHEET_NAME)
93
+ processed_dataframes = {**sector_data, **processed_dataframes}
94
+ filename = convert_to_excel(processed_dataframes, filename="updated_data", index=False)
95
+ else:
96
+ filename = convert_to_excel(sector_data, filename="updated_data", index=False)
97
+ return filename
98
 
99
 
100
  iface = gr.Interface(
101
  fn=main_interface,
102
  inputs=[gr.inputs.File(label="Requirements file"), gr.inputs.File(label="(Optional) Populated file"),
103
+ gr.inputs.Dropdown([Config.ACTION_BASIC_INFO,
104
+ Config.ACTION_STRATEGY_INFO,
105
+ Config.ACTION_SELECTED_STRATEGY_INFO,
106
+ Config.ACTION_UPDATE_INFO,
107
+ Config.ACTION_SELECT_BY_FINANCIALS],
108
  label="Select the type of information you want to extract")],
109
  outputs=gr.outputs.File()
110
  )
basic+financial.xlsx ADDED
Binary file (7.27 kB). View file
 
basic_handler.py CHANGED
@@ -1,5 +1,9 @@
1
  from prompts import Prompts
2
  from common_functionality import send_openai_request
 
 
 
 
3
 
4
 
5
  def get_basic_info(sector: str, n_companies: int, required_fields: list) -> list:
@@ -9,18 +13,13 @@ def get_basic_info(sector: str, n_companies: int, required_fields: list) -> list
9
  ignore_companies = []
10
  for _ in range(n_companies // max_num_companies):
11
  prompt = Prompts.BASIC_INFORMATION_PROMPT.format(num_companies=max_num_companies,
12
- sector=sector, ignore_companies=ignore_companies,
13
  required_fields=required_fields)
14
- data.extend(send_openai_request(prompt))
15
- ignore_companies = [i["name"] for i in data]
16
- prompt = Prompts.BASIC_INFORMATION_PROMPT.format(num_companies=n_companies, sector=sector,
17
- ignore_companies=ignore_companies,
18
- required_fields=required_fields)
19
- resp = send_openai_request(prompt)
20
- if type(resp) == dict:
21
- data.append(resp)
22
- else:
23
- data.extend(resp)
24
  else:
25
  prompt = Prompts.BASIC_INFORMATION_PROMPT_SINGLE_USE.format(num_companies=n_companies, sector=sector,
26
  required_fields=required_fields)
 
1
  from prompts import Prompts
2
  from common_functionality import send_openai_request
3
+ import openai
4
+ import os
5
+
6
+ openai.api_key = os.getenv("OPENAI_API_KEY")
7
 
8
 
9
  def get_basic_info(sector: str, n_companies: int, required_fields: list) -> list:
 
13
  ignore_companies = []
14
  for _ in range(n_companies // max_num_companies):
15
  prompt = Prompts.BASIC_INFORMATION_PROMPT.format(num_companies=max_num_companies,
16
+ ignore_companies=ignore_companies,
17
  required_fields=required_fields)
18
+ resp = send_openai_request(prompt)
19
+ if type(resp) == dict:
20
+ data.append(resp)
21
+ else:
22
+ data.extend(resp)
 
 
 
 
 
23
  else:
24
  prompt = Prompts.BASIC_INFORMATION_PROMPT_SINGLE_USE.format(num_companies=n_companies, sector=sector,
25
  required_fields=required_fields)
common_functionality.py CHANGED
@@ -1,5 +1,21 @@
1
  import json
2
  import openai
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
 
5
  def send_openai_request(prompt: str, max_tokens=2000, temperature=0.1) -> dict:
@@ -18,3 +34,10 @@ def send_openai_request(prompt: str, max_tokens=2000, temperature=0.1) -> dict:
18
  presence_penalty=0
19
  )
20
  return json.loads(response.choices[0]["message"]["content"])
 
 
 
 
 
 
 
 
1
  import json
2
  import openai
3
+ import pandas as pd
4
+ import yfinance as yf
5
+
6
+ from config import Config
7
+
8
+
9
+ def convert_to_excel(dataframes: dict[pd.DataFrame], filename="updated_data", index=True) -> str:
10
+ """
11
+ Accepts a dictionary of dataframes and converts it to an Excel file
12
+ """
13
+ filename = f"{filename}.xlsx"
14
+ writer = pd.ExcelWriter(filename, engine='openpyxl')
15
+ for ticker, df in dataframes.items():
16
+ df.to_excel(writer, index=index, sheet_name=ticker)
17
+ writer.close()
18
+ return filename
19
 
20
 
21
  def send_openai_request(prompt: str, max_tokens=2000, temperature=0.1) -> dict:
 
34
  presence_penalty=0
35
  )
36
  return json.loads(response.choices[0]["message"]["content"])
37
+
38
+
39
+ def get_financial_columns(data: pd.DataFrame) -> list:
40
+ test = yf.Ticker("AAPL")
41
+ financial_columns = [x for x in data.columns if x in (test.info.keys() or Config.FINANCE_HISTORICAL_RETURN in x)
42
+ and x != "Country"]
43
+ return financial_columns
config.py CHANGED
@@ -1,11 +1,17 @@
1
  class Config:
2
 
3
- ACTION_BASIC_INFO = "Basic"
4
  ACTION_STRATEGY_INFO = "Strategy"
5
- ACTION_FINANCIAL_INFO = "Financial (Rolling)"
 
6
  ACTION_UPDATE_INFO = "Update financials"
 
 
7
 
8
  FINANCE_UPDATE_TIME = "last_updated"
9
  FINANCE_HISTORICAL_RETURN = "historical_return"
10
 
11
  MAIN_SHEET_NAME = "Companies"
 
 
 
 
1
  class Config:
2
 
3
+ ACTION_BASIC_INFO = "Basic+Financial"
4
  ACTION_STRATEGY_INFO = "Strategy"
5
+ ACTION_SELECTED_STRATEGY_INFO = "Selected Companies Strategy"
6
+ ACTION_FINANCIAL_INFO = "Financial"
7
  ACTION_UPDATE_INFO = "Update financials"
8
+ ACTION_SELECT_BY_STRATEGY = "Select top best strategy companies"
9
+ ACTION_SELECT_BY_FINANCIALS = "Select top best financials companies"
10
 
11
  FINANCE_UPDATE_TIME = "last_updated"
12
  FINANCE_HISTORICAL_RETURN = "historical_return"
13
 
14
  MAIN_SHEET_NAME = "Companies"
15
+
16
+ SELECTED_BY_FINANCIALS = "Top Financials"
17
+ TEMP_NUMBER_TO_SELECT = 2
data_formatting.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
prompts.py CHANGED
@@ -20,4 +20,10 @@ class Prompts:
20
  "flat JSON with the keys being the provided categories description and score. " \
21
  "Do not nest values within the JSON."
22
  FINANCIAL_INFORMATION_PROMPT = ""
 
 
 
 
 
 
23
 
 
20
  "flat JSON with the keys being the provided categories description and score. " \
21
  "Do not nest values within the JSON."
22
  FINANCIAL_INFORMATION_PROMPT = ""
23
+ SELECT_TOP_FINANCIALS_PROMPT = "Select the top {num_companies} companies with the best financials out of the " \
24
+ "companies in the provided list based on the financial metrics provided. For each" \
25
+ " company provide a short 2-3 bulletpoints justification why this company was " \
26
+ "selected. Format your answer as the following JSON:\n" \
27
+ "[{{ticker: company_ticker, justification: justification}}, ...]\n" \
28
+ "List of companies with financials: {companies}"
29
 
rolling_update.py CHANGED
@@ -2,6 +2,7 @@ import pandas as pd
2
  import yfinance as yf
3
  from finance_handler import get_financial_info
4
  from config import Config
 
5
 
6
 
7
  def update_financial_info(data) -> str:
@@ -41,8 +42,7 @@ def initial_update_flow(data: pd.DataFrame) -> dict:
41
  dataframes: dictionary of dataframes with the updated info
42
  """
43
  tickers = data["ticker"].unique()
44
- test = yf.Ticker("AAPL")
45
- financial_columns = [x for x in data.columns if x in test.info.keys() or Config.FINANCE_HISTORICAL_RETURN in x]
46
  processed_data = data.to_dict(orient="records")
47
  current_data = [data[data["ticker"] == ticker][financial_columns + [Config.FINANCE_UPDATE_TIME]
48
  ].to_dict(orient="records")[0] for ticker in tickers]
@@ -66,7 +66,8 @@ def add_financial_history(dataframes: dict[pd.DataFrame]) -> dict[pd.DataFrame]:
66
  2. Updates the main tab with the most recent financial info for each company
67
 
68
  """
69
- financial_columns = dataframes[list(dataframes.keys())[-1]].columns
 
70
  for name, df in dataframes.items():
71
  if name == Config.MAIN_SHEET_NAME:
72
  df_dict = df.to_dict(orient="records")
@@ -74,20 +75,10 @@ def add_financial_history(dataframes: dict[pd.DataFrame]) -> dict[pd.DataFrame]:
74
  for c, company in enumerate(df_dict)]
75
  dataframes[name] = pd.DataFrame(df_dict)
76
  continue
 
 
77
  df_dict = df.to_dict(orient="records")
78
  updated_info = dataframes[Config.MAIN_SHEET_NAME][dataframes[Config.MAIN_SHEET_NAME]["ticker"] == name][financial_columns].to_dict(orient="records")[0]
79
  df_dict.append(updated_info)
80
  dataframes[name] = pd.DataFrame(df_dict)
81
  return dataframes
82
-
83
-
84
- def convert_to_excel(dataframes: dict[pd.DataFrame], filename="updated_data", index=True) -> str:
85
- """
86
- Accepts a dictionary of dataframes and converts it to an Excel file
87
- """
88
- filename = f"{filename}.xlsx"
89
- writer = pd.ExcelWriter(filename, engine='openpyxl')
90
- for ticker, df in dataframes.items():
91
- df.to_excel(writer, index=index, sheet_name=ticker)
92
- writer.close()
93
- return filename
 
2
  import yfinance as yf
3
  from finance_handler import get_financial_info
4
  from config import Config
5
+ from common_functionality import convert_to_excel, get_financial_columns
6
 
7
 
8
  def update_financial_info(data) -> str:
 
42
  dataframes: dictionary of dataframes with the updated info
43
  """
44
  tickers = data["ticker"].unique()
45
+ financial_columns = get_financial_columns(data)
 
46
  processed_data = data.to_dict(orient="records")
47
  current_data = [data[data["ticker"] == ticker][financial_columns + [Config.FINANCE_UPDATE_TIME]
48
  ].to_dict(orient="records")[0] for ticker in tickers]
 
66
  2. Updates the main tab with the most recent financial info for each company
67
 
68
  """
69
+ companies_tickers_filtered = [i for i in dataframes.keys() if i not in Config.__dict__.values()][0]
70
+ financial_columns = dataframes[companies_tickers_filtered].columns
71
  for name, df in dataframes.items():
72
  if name == Config.MAIN_SHEET_NAME:
73
  df_dict = df.to_dict(orient="records")
 
75
  for c, company in enumerate(df_dict)]
76
  dataframes[name] = pd.DataFrame(df_dict)
77
  continue
78
+ if name == Config.SELECTED_BY_FINANCIALS:
79
+ continue
80
  df_dict = df.to_dict(orient="records")
81
  updated_info = dataframes[Config.MAIN_SHEET_NAME][dataframes[Config.MAIN_SHEET_NAME]["ticker"] == name][financial_columns].to_dict(orient="records")[0]
82
  df_dict.append(updated_info)
83
  dataframes[name] = pd.DataFrame(df_dict)
84
  return dataframes
 
 
 
 
 
 
 
 
 
 
 
 
rolling_update.xlsx ADDED
Binary file (8.32 kB). View file
 
select_top_financials.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import pandas as pd
3
+
4
+ from config import Config
5
+ from common_functionality import convert_to_excel, get_financial_columns, send_openai_request
6
+ from prompts import Prompts
7
+
8
+
9
+ def select_by_financials(processed_file):
10
+ dataframes = pd.read_excel(processed_file.name, sheet_name=None)
11
+ selected_companies_df = get_selected_companies(dataframes[Config.MAIN_SHEET_NAME])
12
+ joined_data = {**dataframes, **selected_companies_df}
13
+ filename = convert_to_excel(joined_data, filename="selected_companies", index=False)
14
+ return filename
15
+
16
+
17
+ def get_selected_companies(data: pd.DataFrame) -> dict:
18
+ financial_columns = get_financial_columns(data)
19
+ if Config.FINANCE_UPDATE_TIME in financial_columns:
20
+ financial_columns.remove(Config.FINANCE_UPDATE_TIME)
21
+ if "country" in financial_columns:
22
+ financial_columns.remove("country")
23
+ tickers = list(data["ticker"])
24
+ financial_data = data[financial_columns].to_dict(orient="records")
25
+ for c, company in enumerate(financial_data):
26
+ company.update({"ticker": tickers[c]})
27
+ prompt = Prompts.SELECT_TOP_FINANCIALS_PROMPT.format(num_companies=Config.TEMP_NUMBER_TO_SELECT,
28
+ companies=financial_data)
29
+ response = send_openai_request(prompt=prompt)
30
+ selected_df = pd.DataFrame(response)
31
+ selected_df["justification"] = selected_df["justification"].apply(lambda x: " ".join(x))
32
+ selected_df = pd.merge(selected_df, data[financial_columns + ["ticker"]], on="ticker")
33
+ return {Config.SELECTED_BY_FINANCIALS: selected_df}
selected_companies.xlsx ADDED
Binary file (8.26 kB). View file
 
updated_strategic_data.xlsx ADDED
Binary file (9.86 kB). View file
 
workflow.ipynb ADDED
@@ -0,0 +1,599 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 24,
6
+ "metadata": {
7
+ "ExecuteTime": {
8
+ "end_time": "2023-09-27T02:30:24.971742Z",
9
+ "start_time": "2023-09-27T02:30:24.766127Z"
10
+ }
11
+ },
12
+ "outputs": [],
13
+ "source": [
14
+ "import pandas as pd\n",
15
+ "import json\n",
16
+ "import datetime\n",
17
+ "import yfinance as yf\n",
18
+ "import openai\n",
19
+ "openai.api_key = \"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\""
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 41,
25
+ "metadata": {
26
+ "ExecuteTime": {
27
+ "end_time": "2023-09-21T02:11:08.848554Z",
28
+ "start_time": "2023-09-21T02:11:08.820339Z"
29
+ }
30
+ },
31
+ "outputs": [
32
+ {
33
+ "data": {
34
+ "text/plain": " Sector\n0 Cybersecurity\n1 2\n2 Name, ticker, subsector, country, description \n3 currentPrice, trailingPE, forwardPE, marketCap...\n4 Leadership and Governance, Market Position and...",
35
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Sector</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Cybersecurity</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Name, ticker, subsector, country, description</td>\n </tr>\n <tr>\n <th>3</th>\n <td>currentPrice, trailingPE, forwardPE, marketCap...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Leadership and Governance, Market Position and...</td>\n </tr>\n </tbody>\n</table>\n</div>"
36
+ },
37
+ "execution_count": 41,
38
+ "metadata": {},
39
+ "output_type": "execute_result"
40
+ }
41
+ ],
42
+ "source": [
43
+ "with open(\"/Users/artemkalyta/Downloads/input_v0.1-example.xlsx\", 'rb') as f:\n",
44
+ " temp_data = pd.read_excel(f)\n",
45
+ "temp_data"
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "code",
50
+ "execution_count": 42,
51
+ "metadata": {
52
+ "ExecuteTime": {
53
+ "end_time": "2023-09-21T02:11:15.777916Z",
54
+ "start_time": "2023-09-21T02:11:15.763160Z"
55
+ }
56
+ },
57
+ "outputs": [
58
+ {
59
+ "data": {
60
+ "text/plain": " Sector\n0 Cybersecurity\n1 2\n2 Name, ticker, subsector, country, description \n3 currentPrice, trailingPE, forwardPE, marketCap...\n4 Leadership and Governance, Market Position and...",
61
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Sector</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Cybersecurity</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Name, ticker, subsector, country, description</td>\n </tr>\n <tr>\n <th>3</th>\n <td>currentPrice, trailingPE, forwardPE, marketCap...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Leadership and Governance, Market Position and...</td>\n </tr>\n </tbody>\n</table>\n</div>"
62
+ },
63
+ "execution_count": 42,
64
+ "metadata": {},
65
+ "output_type": "execute_result"
66
+ }
67
+ ],
68
+ "source": [
69
+ "data = pd.read_excel(\"/Users/artemkalyta/Downloads/input_v0.1-example.xlsx\")\n",
70
+ "data.head()"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "execution_count": 52,
76
+ "metadata": {
77
+ "ExecuteTime": {
78
+ "end_time": "2023-09-21T02:16:38.142654Z",
79
+ "start_time": "2023-09-21T02:16:17.490453Z"
80
+ }
81
+ },
82
+ "outputs": [],
83
+ "source": [
84
+ "def get_formatting_columns(data: pd.DataFrame) -> dict:\n",
85
+ " data_dict = {\"sector\": None, \"num_companies\": None, \"basic_info\": None, \"financial_info\": None, \"strategy_info\": None}\n",
86
+ " data_dict[\"sector\"] = data[\"Sector\"].values.tolist()[0]\n",
87
+ " data_dict[\"num_companies\"] = data[\"Sector\"].values.tolist()[1]\n",
88
+ " basic_info = [x.strip().lower() for x in data[\"Sector\"].values.tolist()[2].split(\", \")]\n",
89
+ " financial_info = [x.strip() for x in data[\"Sector\"].values.tolist()[3].split(\", \")]\n",
90
+ " strategy_info = [x.strip() for x in data[\"Sector\"].values.tolist()[4].split(\", \")]\n",
91
+ " data_dict[\"basic_info\"] = basic_info\n",
92
+ " data_dict[\"financial_info\"] = financial_info\n",
93
+ " data_dict[\"strategy_info\"] = strategy_info\n",
94
+ " return data_dict\n",
95
+ "\n",
96
+ "def send_openai_request(prompt, max_tokens=2000):\n",
97
+ " response = openai.ChatCompletion.create(\n",
98
+ " model=\"gpt-4\",\n",
99
+ " messages=[\n",
100
+ " {\n",
101
+ " \"role\": \"user\",\n",
102
+ " \"content\": f\"{prompt}\"\n",
103
+ " }\n",
104
+ " ],\n",
105
+ " temperature=0.1,\n",
106
+ " max_tokens=max_tokens,\n",
107
+ " top_p=1,\n",
108
+ " frequency_penalty=0,\n",
109
+ " presence_penalty=0\n",
110
+ " )\n",
111
+ " return json.loads(response.choices[0][\"message\"][\"content\"])\n",
112
+ "\n",
113
+ "\n",
114
+ "def get_basic_info(sector: str, n_companies: int, required_fields: list) -> list:\n",
115
+ " max_num_companies = 20\n",
116
+ " data = []\n",
117
+ " if n_companies > max_num_companies:\n",
118
+ " ignore_companies = []\n",
119
+ " for _ in range(n_companies // max_num_companies): \n",
120
+ " prompt = f\"\"\"Provide a list of the {max_num_companies} most prominent companies in {sector} with the best potential growth. Companies that you should not mention: {ignore_companies}. Include all the relevant, concise, and descriptive information and format the response as the valid JSON with the following keys: {required_fields}\"\"\"\n",
121
+ " data.extend(send_openai_request(prompt))\n",
122
+ " ignore_companies = [i[\"name\"] for i in data]\n",
123
+ " prompt = f\"\"\"Provide a list of the {n_companies % max_num_companies} most prominent companies in {sector} with the best potential growth. Companies that you should not mention: {ignore_companies}. Include all the relevant, concise, and descriptive information and format the response as the valid JSON with the following keys: {required_fields}\"\"\"\n",
124
+ " resp = send_openai_request(prompt)\n",
125
+ " if type(resp) == dict:\n",
126
+ " data.append(resp)\n",
127
+ " else:\n",
128
+ " data.extend(resp)\n",
129
+ " else:\n",
130
+ " prompt = f\"\"\"Provide a list of the {n_companies} most prominent companies in {sector} with the best potential growth. Include all the relevant, concise, and descriptive information and format the response as the valid JSON with the following keys: {required_fields}\"\"\"\n",
131
+ " data.extend(send_openai_request(prompt))\n",
132
+ " return data\n",
133
+ "\n",
134
+ "\n",
135
+ "def get_strategy_info(company_name: str, categories: list) -> dict:\n",
136
+ " prompt = f\"\"\"Please analyze {company_name} and give a descriptive, detailed, and very specific analysis (at least 5-6 sentences or more) using numbers when appropriate (only if you're absolutely confident in the numbers) on each of the following categories: {categories}. Each category should be described in detail. All of the information should be useful to a potential investor and should describe the advantages and drawbacks of investing in the company, you should approach every category only from a business and potential profit perspective. Please format your answer as a valid JSON with the keys being the provided categories.\"\"\"\n",
137
+ " response = send_openai_request(prompt)\n",
138
+ " return response\n",
139
+ "\n",
140
+ "def historical_return(company: yf.Ticker, history_years: int) -> float:\n",
141
+ " history = company.history(period=f\"{history_years}y\")\n",
142
+ " return (history[\"Close\"].iloc[-1] - history[\"Close\"].iloc[0]) / history[\"Close\"].iloc[0]\n",
143
+ "\n",
144
+ "def get_financial_info(ticker: str, metrics: list) -> dict:\n",
145
+ " company = yf.Ticker(ticker)\n",
146
+ " financial_info = {}\n",
147
+ " for metric in metrics:\n",
148
+ " if company.info.get(metric, None):\n",
149
+ " financial_info.update({metric: company.info.get(metric)})\n",
150
+ " continue\n",
151
+ " if \"historical_return\" in metric:\n",
152
+ " financial_info.update({metric: historical_return(company, metric[-1])})\n",
153
+ " return financial_info\n",
154
+ "\n",
155
+ "def shape_sector_info(data_dict: dict) -> pd.DataFrame:\n",
156
+ " sector_data = get_basic_info(data_dict[\"sector\"], data_dict[\"num_companies\"], data_dict[\"basic_info\"])\n",
157
+ " for c, company in enumerate(sector_data):\n",
158
+ " #sector_data[c].update(get_strategy_info(company[\"name\"], data_dict[\"strategy_info\"]))\n",
159
+ " sector_data[c].update(get_financial_info(company[\"ticker\"], data_dict[\"financial_info\"]))\n",
160
+ " sector_data = pd.DataFrame(sector_data)\n",
161
+ " return sector_data\n",
162
+ "\n",
163
+ "\n",
164
+ "\n",
165
+ "data_dict = get_formatting_columns(data)\n",
166
+ "sector_data = shape_sector_info(data_dict)\n"
167
+ ]
168
+ },
169
+ {
170
+ "cell_type": "code",
171
+ "execution_count": 53,
172
+ "outputs": [
173
+ {
174
+ "data": {
175
+ "text/plain": " name ticker subsector country \\\n0 CrowdStrike Holdings Inc. CRWD Software-Infrastructure United States \n1 Palo Alto Networks Inc. PANW Software-Infrastructure United States \n\n description currentPrice forwardPE \\\n0 CrowdStrike is a leading cybersecurity company... 163.89 46.825714 \n1 Palo Alto Networks is a global cybersecurity l... 235.30 36.765625 \n\n marketCap beta recommendationMean historical_return1 \\\n0 39134965760 0.959392 1.8 -0.031669 \n1 72880881664 1.191908 1.8 0.375058 \n\n historical_return5 trailingPE \n0 1.825690 NaN \n1 2.075549 188.24 ",
176
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>ticker</th>\n <th>subsector</th>\n <th>country</th>\n <th>description</th>\n <th>currentPrice</th>\n <th>forwardPE</th>\n <th>marketCap</th>\n <th>beta</th>\n <th>recommendationMean</th>\n <th>historical_return1</th>\n <th>historical_return5</th>\n <th>trailingPE</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>CrowdStrike Holdings Inc.</td>\n <td>CRWD</td>\n <td>Software-Infrastructure</td>\n <td>United States</td>\n <td>CrowdStrike is a leading cybersecurity company...</td>\n <td>163.89</td>\n <td>46.825714</td>\n <td>39134965760</td>\n <td>0.959392</td>\n <td>1.8</td>\n <td>-0.031669</td>\n <td>1.825690</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Palo Alto Networks Inc.</td>\n <td>PANW</td>\n <td>Software-Infrastructure</td>\n <td>United States</td>\n <td>Palo Alto Networks is a global cybersecurity l...</td>\n <td>235.30</td>\n <td>36.765625</td>\n <td>72880881664</td>\n <td>1.191908</td>\n <td>1.8</td>\n <td>0.375058</td>\n <td>2.075549</td>\n <td>188.24</td>\n </tr>\n </tbody>\n</table>\n</div>"
177
+ },
178
+ "execution_count": 53,
179
+ "metadata": {},
180
+ "output_type": "execute_result"
181
+ }
182
+ ],
183
+ "source": [
184
+ "sector_data"
185
+ ],
186
+ "metadata": {
187
+ "collapsed": false,
188
+ "ExecuteTime": {
189
+ "end_time": "2023-09-21T02:17:24.897424Z",
190
+ "start_time": "2023-09-21T02:17:24.888427Z"
191
+ }
192
+ }
193
+ },
194
+ {
195
+ "cell_type": "code",
196
+ "execution_count": 54,
197
+ "metadata": {
198
+ "ExecuteTime": {
199
+ "end_time": "2023-09-21T02:17:28.507658Z",
200
+ "start_time": "2023-09-21T02:17:28.488571Z"
201
+ }
202
+ },
203
+ "outputs": [
204
+ {
205
+ "data": {
206
+ "text/plain": " name ticker subsector country \\\n0 CrowdStrike Holdings Inc. CRWD Software-Infrastructure United States \n1 Palo Alto Networks Inc. PANW Software-Infrastructure United States \n\n description currentPrice forwardPE \\\n0 CrowdStrike is a leading cybersecurity company... 163.89 46.825714 \n1 Palo Alto Networks is a global cybersecurity l... 235.30 36.765625 \n\n marketCap beta recommendationMean historical_return1 \\\n0 39134965760 0.959392 1.8 -0.031669 \n1 72880881664 1.191908 1.8 0.375058 \n\n historical_return5 trailingPE \n0 1.825690 NaN \n1 2.075549 188.24 ",
207
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>ticker</th>\n <th>subsector</th>\n <th>country</th>\n <th>description</th>\n <th>currentPrice</th>\n <th>forwardPE</th>\n <th>marketCap</th>\n <th>beta</th>\n <th>recommendationMean</th>\n <th>historical_return1</th>\n <th>historical_return5</th>\n <th>trailingPE</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>CrowdStrike Holdings Inc.</td>\n <td>CRWD</td>\n <td>Software-Infrastructure</td>\n <td>United States</td>\n <td>CrowdStrike is a leading cybersecurity company...</td>\n <td>163.89</td>\n <td>46.825714</td>\n <td>39134965760</td>\n <td>0.959392</td>\n <td>1.8</td>\n <td>-0.031669</td>\n <td>1.825690</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Palo Alto Networks Inc.</td>\n <td>PANW</td>\n <td>Software-Infrastructure</td>\n <td>United States</td>\n <td>Palo Alto Networks is a global cybersecurity l...</td>\n <td>235.30</td>\n <td>36.765625</td>\n <td>72880881664</td>\n <td>1.191908</td>\n <td>1.8</td>\n <td>0.375058</td>\n <td>2.075549</td>\n <td>188.24</td>\n </tr>\n </tbody>\n</table>\n</div>"
208
+ },
209
+ "execution_count": 54,
210
+ "metadata": {},
211
+ "output_type": "execute_result"
212
+ }
213
+ ],
214
+ "source": [
215
+ "sector_data.to_excel(\"/Users/artemkalyta/Downloads/output_v0.1-example.xlsx\")\n",
216
+ "sector_data"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": 23,
222
+ "metadata": {
223
+ "ExecuteTime": {
224
+ "end_time": "2023-09-27T02:13:50.891551Z",
225
+ "start_time": "2023-09-27T02:13:50.886535Z"
226
+ }
227
+ },
228
+ "outputs": [
229
+ {
230
+ "name": "stdout",
231
+ "output_type": "stream",
232
+ "text": [
233
+ "['__class__', '__delattr__', '__delete__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__get__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__isabstractmethod__', '__le__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__set__', '__set_name__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 'deleter', 'fdel', 'fget', 'fset', 'getter', 'setter']\n"
234
+ ]
235
+ }
236
+ ],
237
+ "source": [
238
+ "import yfinance as yf\n",
239
+ "print(dir(yf.ticker.Ticker.financials))\n"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": 27,
245
+ "metadata": {
246
+ "ExecuteTime": {
247
+ "end_time": "2023-09-21T01:56:55.816136Z",
248
+ "start_time": "2023-09-21T01:56:55.777294Z"
249
+ }
250
+ },
251
+ "outputs": [
252
+ {
253
+ "data": {
254
+ "text/plain": "0.3457653580944472"
255
+ },
256
+ "execution_count": 27,
257
+ "metadata": {},
258
+ "output_type": "execute_result"
259
+ }
260
+ ],
261
+ "source": [
262
+ "def historical_return(ticker: str, history_years: int) -> float:\n",
263
+ " company = yf.Ticker(ticker)\n",
264
+ " history = company.history(period=f\"{history_years}y\")\n",
265
+ " return (history[\"Close\"].iloc[-1] - history[\"Close\"].iloc[0]) / history[\"Close\"].iloc[0]\n",
266
+ "\n",
267
+ "historical_return(\"GOOG\", 1)"
268
+ ]
269
+ },
270
+ {
271
+ "cell_type": "code",
272
+ "execution_count": 51,
273
+ "metadata": {
274
+ "ExecuteTime": {
275
+ "end_time": "2023-09-21T02:15:43.563900Z",
276
+ "start_time": "2023-09-21T02:15:43.372194Z"
277
+ }
278
+ },
279
+ "outputs": [
280
+ {
281
+ "data": {
282
+ "text/plain": "1.8"
283
+ },
284
+ "execution_count": 51,
285
+ "metadata": {},
286
+ "output_type": "execute_result"
287
+ }
288
+ ],
289
+ "source": [
290
+ "tick = yf.Ticker(\"PANW\").info[\"recommendationMean\"]\n",
291
+ "tick"
292
+ ]
293
+ },
294
+ {
295
+ "cell_type": "code",
296
+ "execution_count": 4,
297
+ "metadata": {
298
+ "ExecuteTime": {
299
+ "end_time": "2023-09-21T01:29:10.489157Z",
300
+ "start_time": "2023-09-21T01:29:10.032458Z"
301
+ }
302
+ },
303
+ "outputs": [
304
+ {
305
+ "ename": "KeyError",
306
+ "evalue": "'regularMarketPrice'",
307
+ "output_type": "error",
308
+ "traceback": [
309
+ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
310
+ "\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)",
311
+ "Cell \u001B[0;32mIn[4], line 3\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01myfinance\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m \u001B[38;5;21;01myf\u001B[39;00m\n\u001B[1;32m 2\u001B[0m ticker \u001B[38;5;241m=\u001B[39m yf\u001B[38;5;241m.\u001B[39mTicker(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mGOOGL\u001B[39m\u001B[38;5;124m'\u001B[39m)\u001B[38;5;241m.\u001B[39minfo\n\u001B[0;32m----> 3\u001B[0m marketPrice \u001B[38;5;241m=\u001B[39m \u001B[43mticker\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mregularMarketPrice\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m]\u001B[49m\n\u001B[1;32m 4\u001B[0m previousClosePrice \u001B[38;5;241m=\u001B[39m ticker[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mregularMarketPreviousClose\u001B[39m\u001B[38;5;124m'\u001B[39m]\n\u001B[1;32m 5\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mTicker Value: GOOGL\u001B[39m\u001B[38;5;124m'\u001B[39m)\n",
312
+ "\u001B[0;31mKeyError\u001B[0m: 'regularMarketPrice'"
313
+ ]
314
+ }
315
+ ],
316
+ "source": [
317
+ "[\"currentPrice\", \"trailingPE\", \"forwardPE\", \"marketCap\", \"beta\", \"recommendationMean\", \"historical_return1\", \"historical_return5\"]"
318
+ ]
319
+ },
320
+ {
321
+ "cell_type": "code",
322
+ "execution_count": 36,
323
+ "metadata": {
324
+ "ExecuteTime": {
325
+ "end_time": "2023-09-21T02:04:30.806580Z",
326
+ "start_time": "2023-09-21T02:04:30.802510Z"
327
+ }
328
+ },
329
+ "outputs": [
330
+ {
331
+ "data": {
332
+ "text/plain": "dict"
333
+ },
334
+ "execution_count": 36,
335
+ "metadata": {},
336
+ "output_type": "execute_result"
337
+ }
338
+ ],
339
+ "source": [
340
+ "a = ticker.get(\"ebitdaa\", None)\n",
341
+ "print(a)"
342
+ ]
343
+ },
344
+ {
345
+ "cell_type": "code",
346
+ "execution_count": 12,
347
+ "outputs": [
348
+ {
349
+ "data": {
350
+ "text/plain": "{'sector': 'Cybersecurity',\n 'num_companies': 2,\n 'basic_info': ['name', 'ticker', 'subsector', 'country', 'description'],\n 'financian_info': ['P/E',\n 'Market Cap',\n 'Beta',\n 'Analyst Recommendation',\n '1-year Return',\n '5-year Return'],\n 'strategy_info': ['Leadership and Governance',\n 'Market Position and Competitive Advantage',\n 'R&D and Innovation',\n 'ESG Factors']}"
351
+ },
352
+ "execution_count": 12,
353
+ "metadata": {},
354
+ "output_type": "execute_result"
355
+ }
356
+ ],
357
+ "source": [
358
+ "data_dict"
359
+ ],
360
+ "metadata": {
361
+ "collapsed": false,
362
+ "ExecuteTime": {
363
+ "end_time": "2023-09-21T01:36:45.859093Z",
364
+ "start_time": "2023-09-21T01:36:45.852532Z"
365
+ }
366
+ }
367
+ },
368
+ {
369
+ "cell_type": "code",
370
+ "execution_count": 13,
371
+ "outputs": [
372
+ {
373
+ "data": {
374
+ "text/plain": "dict_keys(['__module__', 'ACTION_BASIC_INFO', 'ACTION_STRATEGY_INFO', 'ACTION_FINANCIAL_INFO', 'ACTION_UPDATE_INFO', 'ACTION_SELECT_BY_STRATEGY', 'ACTION_SELECT_BY_FINANCIALS', 'FINANCE_UPDATE_TIME', 'FINANCE_HISTORICAL_RETURN', 'MAIN_SHEET_NAME', 'SELECTED_BY_FINANCIALS', '__dict__', '__weakref__', '__doc__', '__annotations__'])"
375
+ },
376
+ "execution_count": 13,
377
+ "metadata": {},
378
+ "output_type": "execute_result"
379
+ }
380
+ ],
381
+ "source": [
382
+ "from config import Config\n",
383
+ "\n",
384
+ "Config.__dict__.keys()"
385
+ ],
386
+ "metadata": {
387
+ "collapsed": false,
388
+ "ExecuteTime": {
389
+ "end_time": "2023-10-02T03:08:06.743908Z",
390
+ "start_time": "2023-10-02T03:08:06.740531Z"
391
+ }
392
+ }
393
+ },
394
+ {
395
+ "cell_type": "code",
396
+ "execution_count": 7,
397
+ "outputs": [
398
+ {
399
+ "data": {
400
+ "text/plain": "['a', 'b']"
401
+ },
402
+ "execution_count": 7,
403
+ "metadata": {},
404
+ "output_type": "execute_result"
405
+ }
406
+ ],
407
+ "source": [
408
+ "a = [\"a\", \"b\", \"c\"]\n",
409
+ "a.remove(\"c\")\n",
410
+ "a"
411
+ ],
412
+ "metadata": {
413
+ "collapsed": false,
414
+ "ExecuteTime": {
415
+ "end_time": "2023-10-02T02:42:04.990078Z",
416
+ "start_time": "2023-10-02T02:42:04.987609Z"
417
+ }
418
+ }
419
+ },
420
+ {
421
+ "cell_type": "code",
422
+ "execution_count": 22,
423
+ "outputs": [
424
+ {
425
+ "data": {
426
+ "text/plain": "1.9"
427
+ },
428
+ "execution_count": 22,
429
+ "metadata": {},
430
+ "output_type": "execute_result"
431
+ }
432
+ ],
433
+ "source": [
434
+ "ticker[\"recommendationMean\"]"
435
+ ],
436
+ "metadata": {
437
+ "collapsed": false,
438
+ "ExecuteTime": {
439
+ "end_time": "2023-09-21T01:51:38.725734Z",
440
+ "start_time": "2023-09-21T01:51:38.723808Z"
441
+ }
442
+ }
443
+ },
444
+ {
445
+ "cell_type": "code",
446
+ "execution_count": 2,
447
+ "outputs": [
448
+ {
449
+ "name": "stdout",
450
+ "output_type": "stream",
451
+ "text": [
452
+ "Select the top 5 companies with the best financials out of the companies in the provided list based on the financial metrics provided. For each company provide a short 2-3 bulletpoint justification why this company was selected. Format your answer as the following JSON:[{company_name: justification}, {company_name2: justification}]\n"
453
+ ]
454
+ }
455
+ ],
456
+ "source": [
457
+ "SELECT_TOP_FINANCIALS_PROMPT = \"Select the top {num_companies} companies with the best financials out of the \" \\\n",
458
+ " \"companies in the provided list based on the financial metrics provided. For each\" \\\n",
459
+ " \" company provide a short 2-3 bulletpoint justification why this company was \" \\\n",
460
+ " \"selected. Format your answer as the following JSON:\" \\\n",
461
+ " \"[{{company_name: justification}}, {{company_name2: justification}}]\"\n",
462
+ "print(SELECT_TOP_FINANCIALS_PROMPT.format(num_companies=5))"
463
+ ],
464
+ "metadata": {
465
+ "collapsed": false,
466
+ "ExecuteTime": {
467
+ "end_time": "2023-10-02T02:24:51.683836Z",
468
+ "start_time": "2023-10-02T02:24:51.679391Z"
469
+ }
470
+ }
471
+ },
472
+ {
473
+ "cell_type": "code",
474
+ "execution_count": 4,
475
+ "outputs": [
476
+ {
477
+ "data": {
478
+ "text/plain": "False"
479
+ },
480
+ "execution_count": 4,
481
+ "metadata": {},
482
+ "output_type": "execute_result"
483
+ }
484
+ ],
485
+ "source": [
486
+ "import yfinance as yf\n",
487
+ "\n",
488
+ "test = yf.Ticker(\"AAPL\")\n",
489
+ "\"ticker\" in test.info.keys()"
490
+ ],
491
+ "metadata": {
492
+ "collapsed": false,
493
+ "ExecuteTime": {
494
+ "end_time": "2023-10-02T02:39:36.762388Z",
495
+ "start_time": "2023-10-02T02:39:36.559113Z"
496
+ }
497
+ }
498
+ },
499
+ {
500
+ "cell_type": "code",
501
+ "execution_count": 61,
502
+ "outputs": [
503
+ {
504
+ "data": {
505
+ "text/plain": "[{'name': 'CrowdStrike Holdings Inc.',\n 'ticker': 'CRWD',\n 'subsector': 'Software-Infrastructure',\n 'country': 'United States',\n 'description': 'CrowdStrike is a leading cybersecurity company that provides cloud-delivered solutions for endpoint protection, threat intelligence, and cyber attack response. Their AI-powered technology is designed to stop breaches and their Falcon platform offers instant visibility and protection across the enterprise and prevents attacks on endpoints on or off the network.',\n 'currentPrice': 163.89,\n 'forwardPE': 46.825714,\n 'marketCap': 39134965760,\n 'beta': 0.959392,\n 'recommendationMean': 1.8,\n 'historical_return1': -0.03166913211433715,\n 'historical_return5': 1.825689644649111,\n 'trailingPE': nan},\n {'name': 'Palo Alto Networks Inc.',\n 'ticker': 'PANW',\n 'subsector': 'Software-Infrastructure',\n 'country': 'United States',\n 'description': 'Palo Alto Networks is a global cybersecurity leader known for always challenging the security status quo. Their mission is to protect the digital way of life by preventing successful cyberattacks. They have pioneered the next generation of network security with innovative products that deliver cybersecurity across clouds, networks, and mobile devices.',\n 'currentPrice': 235.3,\n 'forwardPE': 36.765625,\n 'marketCap': 72880881664,\n 'beta': 1.191908,\n 'recommendationMean': 1.8,\n 'historical_return1': 0.3750584955932131,\n 'historical_return5': 2.075548954405327,\n 'trailingPE': 188.24}]"
506
+ },
507
+ "execution_count": 61,
508
+ "metadata": {},
509
+ "output_type": "execute_result"
510
+ }
511
+ ],
512
+ "source": [
513
+ "temp = sector_data.to_dict(orient='records')\n",
514
+ "temp"
515
+ ],
516
+ "metadata": {
517
+ "collapsed": false,
518
+ "ExecuteTime": {
519
+ "end_time": "2023-09-26T18:55:54.094789Z",
520
+ "start_time": "2023-09-26T18:55:54.094116Z"
521
+ }
522
+ }
523
+ },
524
+ {
525
+ "cell_type": "code",
526
+ "execution_count": 28,
527
+ "outputs": [
528
+ {
529
+ "data": {
530
+ "text/plain": " price market_cap\nupdated \n2021-09-01 90 90000\n2021-09-02 100 100000",
531
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>price</th>\n <th>market_cap</th>\n </tr>\n <tr>\n <th>updated</th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2021-09-01</th>\n <td>90</td>\n <td>90000</td>\n </tr>\n <tr>\n <th>2021-09-02</th>\n <td>100</td>\n <td>100000</td>\n </tr>\n </tbody>\n</table>\n</div>"
532
+ },
533
+ "execution_count": 28,
534
+ "metadata": {},
535
+ "output_type": "execute_result"
536
+ }
537
+ ],
538
+ "source": [
539
+ "a = {\"price\": 90, \"market_cap\": 90000, \"updated\": \"2021-09-01\"}\n",
540
+ "b = {\"price\": 100, \"market_cap\": 100000, \"updated\": \"2021-09-02\"}\n",
541
+ "df = pd.DataFrame([a, b]).set_index(\"updated\")\n",
542
+ "df"
543
+ ],
544
+ "metadata": {
545
+ "collapsed": false,
546
+ "ExecuteTime": {
547
+ "end_time": "2023-09-27T02:34:11.587757Z",
548
+ "start_time": "2023-09-27T02:34:11.581806Z"
549
+ }
550
+ }
551
+ },
552
+ {
553
+ "cell_type": "code",
554
+ "execution_count": 31,
555
+ "outputs": [
556
+ {
557
+ "data": {
558
+ "text/plain": "{'price': {'2021-09-01': 90, '2021-09-02': 100},\n 'market_cap': {'2021-09-01': 90000, '2021-09-02': 100000}}"
559
+ },
560
+ "execution_count": 31,
561
+ "metadata": {},
562
+ "output_type": "execute_result"
563
+ }
564
+ ],
565
+ "source": [
566
+ "df.to_dict()"
567
+ ],
568
+ "metadata": {
569
+ "collapsed": false,
570
+ "ExecuteTime": {
571
+ "end_time": "2023-09-27T18:19:53.124974Z",
572
+ "start_time": "2023-09-27T18:19:53.114800Z"
573
+ }
574
+ }
575
+ }
576
+ ],
577
+ "metadata": {
578
+ "kernelspec": {
579
+ "display_name": "investor-env",
580
+ "language": "python",
581
+ "name": "python3"
582
+ },
583
+ "language_info": {
584
+ "codemirror_mode": {
585
+ "name": "ipython",
586
+ "version": 3
587
+ },
588
+ "file_extension": ".py",
589
+ "mimetype": "text/x-python",
590
+ "name": "python",
591
+ "nbconvert_exporter": "python",
592
+ "pygments_lexer": "ipython3",
593
+ "version": "3.11.5"
594
+ },
595
+ "orig_nbformat": 4
596
+ },
597
+ "nbformat": 4,
598
+ "nbformat_minor": 2
599
+ }