Shantala commited on
Commit
c9b9e42
·
verified ·
1 Parent(s): 182c379

Create ETF_sector_data_prep.py

Browse files
Files changed (1) hide show
  1. ETF_sector_data_prep.py +44 -0
ETF_sector_data_prep.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yahooquery as yq
2
+ import pandas as pd
3
+
4
+ tickers = ['EWA', 'EWH', 'EWJ', 'EWM', 'EWS', 'EWT', 'EWY', 'THD']
5
+ countries = ['Australia', 'Hong Kong', 'Japan', 'Malaysia', 'Singapore', 'Taiwan', 'South Korea', 'Thailand']
6
+
7
+
8
+ all_dataframes = []
9
+
10
+ for ticker, country in zip(tickers, countries):
11
+ t = yq.Ticker(ticker)
12
+ # sector weightings, returns pandas DataFrame with one column containing the weights, indexed by the sector names
13
+ df = t.fund_sector_weightings
14
+ # give a name to the first column
15
+ df.columns = ['Weight']
16
+ # turn the index column into a dataframe column
17
+ df['Sector'] = df.index
18
+
19
+ # Add the Country column
20
+ df['Country'] = country
21
+
22
+ # Append the dataframe to the list
23
+ all_dataframes.append(df)
24
+
25
+ # Concatenate all dataframes into one
26
+ sector_df = pd.concat(all_dataframes, ignore_index=True)
27
+
28
+ # dictionary with keys equal to the elements of the sectors and values as specified in the assignment
29
+ sector_dict = {'realestate': 'Real Estate', 'consumer_cyclical': 'Consumer Cyclical', 'basic_materials': 'Basic Materials', 'consumer_defensive': 'Consumer Defensive', 'technology': 'Technology', 'communication_services': 'Communication Services', 'financial_services': 'Financial Services', 'utilities': 'Utilities', 'industrials': 'Industrials', 'energy': 'Energy', 'healthcare': 'Healthcare'}
30
+
31
+ # create a new column in the dataframe caled 'Sector_Name' and fill it with the values from the sector_dict
32
+ sector_df['Sector_Name'] = sector_df['Sector'].map(sector_dict)
33
+
34
+ # Reorder the columns
35
+ new_order = ['Country', 'Sector_Name', 'Weight', 'Sector']
36
+ sector_df = sector_df[new_order]
37
+
38
+ # Drop the 'Sector' column
39
+ sector_df = sector_df.drop('Sector', axis=1)
40
+
41
+ # Convert 'Weight' column to percentages
42
+ sector_df['Weight'] = sector_df['Weight'] * 100
43
+ # Rename the 'Weight' column
44
+ sector_df = sector_df.rename(columns={'Weight': 'Weight(%)'})