hkayabilisim commited on
Commit
3ecc6cd
·
1 Parent(s): ad72737

Added exposure generation functionality

Browse files

Embedded data generation codes from [1] into backend.

[1] https://github.com/TomorrowsCities/DataProductionPython

tomorrowcities/backend/engine.py CHANGED
@@ -7,6 +7,18 @@ from scipy.stats import norm
7
  from scipy.interpolate import interp1d
8
  import networkx as nx
9
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def compute_road_infra(buildings, household, individual,
11
  nodes, edges, intensity, fragility, hazard,
12
  road_water_height_threshold,
@@ -915,4 +927,1702 @@ def calculate_metrics(gdf_buildings, df_household, df_individual, infra, hazard_
915
  metrics["metric7"]["value"] = int(df_metric7['metric7'].sum())
916
  metrics["metric8"]["value"] = int(df_metric8['metric8'].sum())
917
 
918
- return metrics, df_metrics
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  from scipy.interpolate import interp1d
8
  import networkx as nx
9
 
10
+ import time
11
+ import sys
12
+ import uuid
13
+ import os.path
14
+ import random
15
+ from random import sample
16
+ from numpy.random import multinomial, randint
17
+ from math import ceil
18
+ import math
19
+ from itertools import repeat, chain
20
+ from .utils import ParameterFile
21
+
22
  def compute_road_infra(buildings, household, individual,
23
  nodes, edges, intensity, fragility, hazard,
24
  road_water_height_threshold,
 
927
  metrics["metric7"]["value"] = int(df_metric7['metric7'].sum())
928
  metrics["metric8"]["value"] = int(df_metric8['metric8'].sum())
929
 
930
+ return metrics, df_metrics
931
+
932
+
933
+ def dist2vector(d_value, d_number,d_limit,shuffle_or_not):
934
+ # d_value, d_number = vectors of same length (numpy array)
935
+ # d_limit = single integer which indicates the sum of all values
936
+ # in d_number.
937
+ # shuffle_or_not = 'shuffle' will return a randomly shuffled list otherwise
938
+ # by default or with 'DoNotShuffle' the list will not be shuffled
939
+ # Output: insert_vector is a list
940
+ # get rid of extra dimensions if there is any
941
+ # x: to be repeated array
942
+ x = np.squeeze(d_value)
943
+ # how many repetations per element
944
+ w = np.squeeze(d_number)
945
+ # total number of repetetions
946
+ n = d_limit
947
+ # rounding off float repetetations
948
+ reps = np.round(w).astype('int32')
949
+ # make sure sum of reps is still n after rounding
950
+ reps[-1] = n - np.sum(reps[:-1])
951
+ # Repet x[i] reps[i] times for all i
952
+ y = np.repeat(x, reps)
953
+ if shuffle_or_not == 'shuffle':
954
+ random.shuffle(y)
955
+ return [str(element) for element in y]
956
+
957
+ def generate_exposure(parameter_file: ParameterFile, land_use_file: gpd.GeoDataFrame, population_calculate=False, seed=42):
958
+ # To re-generate a desired state comment above line and use: rng = int(seed_value_in_result)
959
+ tic = time.time()
960
+ print('1 -------', end=' ')
961
+ random.seed(seed)
962
+ np.random.seed(seed)
963
+ df_nc, ipdf, df1, df2, df3 = parameter_file.get_sheets()
964
+
965
+
966
+ # Convert both to the same target coordinate system
967
+ landuse_shp = land_use_file.set_crs("EPSG:4326",allow_override=True)
968
+ landuse_shp = landuse_shp.to_crs(f"EPSG:3857")
969
+
970
+
971
+ # Extract the nomenclature for load resisting system and land use types
972
+ startmarker = '\['
973
+ startidx = df_nc[df_nc.apply(lambda row: row.astype(str).str.contains(\
974
+ startmarker,case=False).any(), axis=1)]
975
+
976
+ endmarker = '\]'
977
+ endidx = df_nc[df_nc.apply(lambda row: row.astype(str).str.contains(\
978
+ endmarker,case=False).any(), axis=1)]
979
+
980
+ # Load resisting system types
981
+ lrs_types_temp = df_nc.loc[list(range(startidx.index[0]+1,endidx.index[0]))]
982
+ lrs_types = lrs_types_temp[1].to_numpy().astype(str)
983
+ lrsidx = {}
984
+ count = 0
985
+ for key in lrs_types:
986
+ lrsidx[str(key)] = count
987
+ count+=1
988
+
989
+ # Landuse Types
990
+ lut_types_temp = df_nc.loc[list(range(startidx.index[1]+1,endidx.index[1]))]
991
+ lut_types = lut_types_temp[1].astype(str)
992
+ lutidx = {}
993
+ count = 0
994
+ for key in lut_types:
995
+ lutidx[key] = count
996
+ count+=1
997
+
998
+ # Income types is hardcoded
999
+ avg_income_types =np.array(['lowIncomeA','lowIncomeB','midIncome','highIncome'])
1000
+
1001
+ #Average dwelling area (sqm) wrt income type (44 for LI, 54 for MI,
1002
+ #67 for HI in Tomorrovwille)
1003
+ #Range of footprint area fpt_area (sqm) wrt. income type (32-66 for LI,
1004
+ # 32-78 for MI and 70-132 for HI in Tomorrowville)
1005
+ average_dwelling_area = np.array([ipdf.iloc[13,2],ipdf.iloc[13,3],\
1006
+ ipdf.iloc[13,4],ipdf.iloc[13,5]])
1007
+
1008
+ fpt_area = {'lowIncomeA':np.fromstring(ipdf.iloc[14,2],dtype=float,sep=','),
1009
+ 'lowIncomeB':np.fromstring(ipdf.iloc[14,3],dtype=float,sep=','),
1010
+ 'midIncome':np.fromstring(ipdf.iloc[14,4],dtype=float,sep=','),
1011
+ 'highIncome':np.fromstring(ipdf.iloc[14,5],dtype=float,sep=',')}
1012
+
1013
+ # Storey definition 1- Low rise (LR) 1-4, 2- Mid rise (MR) 5-8,
1014
+ # 3- High rise (HR) 9-19
1015
+ storey_range = {0:np.fromstring(ipdf.iloc[17,2],dtype=int,sep=','),
1016
+ 1:np.fromstring(ipdf.iloc[17,3],dtype=int,sep=','),
1017
+ 2:np.fromstring(ipdf.iloc[17,4],dtype=int,sep=',')}
1018
+
1019
+ # Code Compliance Levels (Low, Medium, High): 1 - LC, 2 - MC, 3 - HC
1020
+ code_level = np.array(['LC','MC','HC'])
1021
+
1022
+ # Nr of commercial buildings per 1000 individuals
1023
+ numb_com = ipdf.iloc[2,1]
1024
+ # Nr of industrial buildings per 1000 individuals
1025
+ numb_ind = ipdf.iloc[3,1]
1026
+
1027
+ # Area constraints in percentage (AC) for residential and commercial zones.
1028
+ # Total built-up areas in these zones cannot exceed (AC*available area)
1029
+ AC_com = ipdf.iloc[6,1] # in percent
1030
+ AC_ind = ipdf.iloc[7,1] # in percent
1031
+
1032
+ # Assumption 14 and 15: Number of individuals per school and hospitals
1033
+ nsch_pi = ipdf.iloc[9,1]
1034
+ nhsp_pi = ipdf.iloc[10,1]
1035
+
1036
+ # Unit price for replacement wrt occupancy type and special facility
1037
+ # status of the building
1038
+ # Occupancy type is unchangeable, only replacement value is taken from user input
1039
+ Unit_price={'Res':ipdf.iloc[20,2],'Com':ipdf.iloc[20,3],'Ind':ipdf.iloc[20,4],
1040
+ 'ResCom':ipdf.iloc[20,5],'Edu':ipdf.iloc[20,6],'Hea':ipdf.iloc[20,7]}
1041
+
1042
+ #household_building_match = 'footprint' # 'footprint' or 'number_of_units'
1043
+
1044
+ print(time.time() - tic)
1045
+ tic = time.time()
1046
+ print('2 ------',end=' ')
1047
+ #%% Read the landuse shapefile
1048
+
1049
+
1050
+
1051
+ #Calculate area of landuse zones using polygons only if area is not already.
1052
+ # First, convert coordinate system to cartesian
1053
+ if 'area' not in landuse_shp.columns:
1054
+ landuse_shp_cartesian = landuse_shp.copy()
1055
+ landuse_shp_cartesian = landuse_shp_cartesian.to_crs({'init': 'epsg:3857'})
1056
+ landuse_shp_cartesian['area']=landuse_shp_cartesian['geometry'].area # m^2
1057
+ landuse_shp_cartesian['area']=landuse_shp_cartesian['area']/10**4 # Hectares
1058
+ landuse_shp_cartesian = landuse_shp_cartesian.drop(columns=['geometry'])
1059
+ landuse = landuse_shp_cartesian.copy()
1060
+ else:
1061
+ landuse = landuse_shp.copy()
1062
+ landuse = landuse.drop(columns=['geometry'])
1063
+
1064
+ # In the landuse shape file, if avgincome = lowIncome, replace it by lowIncomeA
1065
+ lowIncome_mask = landuse['avgincome'] == 'lowIncome'
1066
+ landuse.loc[lowIncome_mask,'avgincome'] = 'lowIncomeA'
1067
+
1068
+ # Typecast the various fields in landuse shapefile
1069
+ landuse['population'] = landuse['population'].astype(int)
1070
+ landuse['densitycap'] = landuse['densitycap'].astype(float)
1071
+ landuse['area'] = landuse['area'].astype(float)
1072
+ landuse['zoneid'] = landuse['zoneid'].astype(int)
1073
+ landuse['floorarear'] = landuse['floorarear'].astype(float)
1074
+ landuse['setback'] = landuse['setback'].astype(float)
1075
+
1076
+
1077
+
1078
+ #%% Read the landuse table (if xlsx file instead of shapefile is available)
1079
+ #landuse = pd.read_excel(os.path.join(ippath,ipfile_landuse),sheet_name=0)
1080
+
1081
+ #%% Concatenate the dataframes and process the data
1082
+ tabledf = pd.concat([df1,df2,df3]).reset_index(drop=True)
1083
+
1084
+ # Define a dictionary containing data distribution tables
1085
+ # Table names sorted according to the order in the excel input spreadsheet
1086
+ tables_temp = {
1087
+ 't1':[],'t2':[],'t3':[],'t4':[],'t5':[],'t5a':[],'t6':[],'t9':[],
1088
+ 't12':[],'t13':[],'t7':[],'t8':[],'t11':[],'t10':[],'t14':[]
1089
+ }
1090
+ startmarker = '\['
1091
+ startidx = tabledf[tabledf.apply(lambda row: row.astype(str).str.contains(\
1092
+ startmarker,case=False).any(), axis=1)]
1093
+
1094
+ endmarker = '\]'
1095
+ endidx = tabledf[tabledf.apply(lambda row: row.astype(str).str.contains(\
1096
+ endmarker,case=False).any(), axis=1)]
1097
+
1098
+ count=0
1099
+ for key in tables_temp:
1100
+ #print(startidx.index[count], endidx.index[count])
1101
+ tablepart = tabledf.loc[list(range(startidx.index[count]+1,endidx.index[count]))]
1102
+ tablepart = tablepart.drop(columns =0 )
1103
+ tablepart = tablepart.dropna(axis=1).reset_index(drop=True).values.tolist()
1104
+ tables_temp[key].append(tablepart)
1105
+ count+=1
1106
+
1107
+ tables = tables_temp
1108
+
1109
+ print(time.time() - tic)
1110
+ tic = time.time()
1111
+ print('3 ------',end=' ')
1112
+ #%% Basic exception handling to check improper inputs in the spreadsheet
1113
+ input_error_flag = False
1114
+ input_error_flag_shp = False
1115
+
1116
+ if numb_com ==0:
1117
+ print('The number of commercial buildings cannot be zero.')
1118
+ input_error_flag = True
1119
+ if numb_ind == 0:
1120
+ print('The number of industrial buildings cannot be zero.')
1121
+ input_error_flag = True
1122
+
1123
+ if len(lutidx) != len(tables['t7'][0]) or len(lutidx) != len(tables['t8'][0])\
1124
+ or len(lutidx) != len(tables['t9'][0]) or len(lutidx) != len(tables['t11'][0]):
1125
+ print('The number of rows in Tables 7,8,9 and 11 must be equal to '\
1126
+ 'the number of land use types (LUT) in Nomenclature sheet.\n')
1127
+ input_error_flag = True
1128
+
1129
+ if len(lrsidx)!=len(tables['t7'][0][0]) or len(lrsidx)!=len(tables['t8'][0][0])\
1130
+ or len(lrsidx)!=len(tables['t11'][0][0]):
1131
+ print('The number of columns in Tables 7,8 and 11 must be equal to '\
1132
+ 'the number of load resisting system (LRS) types in '\
1133
+ 'Nomenclature sheet. \n')
1134
+ input_error_flag = True
1135
+
1136
+ # Check if avgincome values are missing for fields in the nomenclature list
1137
+ for val in lut_types:
1138
+ avgInc_mask = landuse['luf'] == val
1139
+ incomeval4lut = landuse.loc[avgInc_mask,'avgincome']
1140
+ if incomeval4lut.isnull().values.any():
1141
+ print('avgincome field missing for ',val,'\n')
1142
+ input_error_flag_shp = True
1143
+
1144
+ if input_error_flag:
1145
+ print('Please correct the faulty inputs in the input spreadsheet.\n')
1146
+ sys.exit(1)
1147
+
1148
+ if input_error_flag_shp:
1149
+ print('Please correct the faulty inputs in the input shapefile.\n')
1150
+ sys.exit(1)
1151
+
1152
+
1153
+ print(time.time() - tic)
1154
+ tic = time.time()
1155
+ print('3 ------',end=' ')
1156
+
1157
+ #%% Note on definition of data layers
1158
+ # The household layer is initialized as Pandas dataframe in Step 2
1159
+ # The individual layer is initialized as Pandas dataframe in Step 5
1160
+ # The building layer is initialized as Pandas dataframe in Step 12
1161
+ # landuse_res_df (residential zone landuse subdataframe) is defined in step 11
1162
+ # landuse_ic_df (commercial/industrial) is also defined in step 11
1163
+
1164
+ #%% Function definition: dist2vector
1165
+
1166
+ print(time.time() - tic)
1167
+ tic = time.time()
1168
+ print('4 ------',end=' ')
1169
+ #%% The data generation process begins here____________________________________
1170
+
1171
+ #%% Step 1: Calculate maximum population (nPeople)
1172
+ if population_calculate:
1173
+ landuse['population'] = landuse['population'].astype(int)
1174
+ # Subtracts existing population from projected population
1175
+ nPeople = round(landuse['densitycap']*landuse['area']-landuse['population'])
1176
+ nPeople[nPeople<0]=0
1177
+ else:
1178
+ landuse['population'] = landuse['population'].astype(int)
1179
+ nPeople = landuse['population']
1180
+
1181
+ print(time.time() - tic)
1182
+ tic = time.time()
1183
+ print('5 ------',end=' ')
1184
+ #%% Step 2: Calculate the number of households (nhouse), hhid
1185
+ # Assumption 1: Household size distribution is same for different income types
1186
+ # Question: How to ensure that there are no NaNs while assigning zone type?
1187
+
1188
+ # Convert Table 1 to numpy array
1189
+ t1_list = tables['t1'][0]
1190
+ # No. of individuals
1191
+ t1_l1 = np.array(t1_list[0], dtype=int)
1192
+ t1_l2 = np.array(t1_list[1], dtype=float) # Probabilities
1193
+
1194
+ # Compute the probability of X number of people living in a household
1195
+ household_prop = t1_l2/sum(t1_l2)
1196
+ # Total number of households for all zones
1197
+ nhouse_all = round(nPeople/(sum(household_prop*t1_l1)))
1198
+ nhouse_all = nhouse_all.astype('int32')
1199
+ nhouse = nhouse_all[nhouse_all>0] # Exclude zones with zero households
1200
+ nhouseidx = nhouse.index
1201
+ #Preallocate a dataframe with nan to hold the household layer
1202
+ household_df = pd.DataFrame(np.nan, index = range(sum(nhouse)),
1203
+ columns=['bldid','hhid','income','nind','commfacid',
1204
+ 'income_numb','zonetype','zoneid',
1205
+ 'approxFootprint'])
1206
+ #Calculate a list of cumulative sum of nhouse
1207
+ nhouse_cuml = np.cumsum(nhouse)
1208
+
1209
+ # Assign household id (hhid)
1210
+ a = 0
1211
+ for i in nhouseidx:
1212
+ b = nhouse_cuml[i]
1213
+ household_df.loc[range(a,b),'hhid'] = range(a+1,b+1) # First hhid index =1
1214
+ household_df.loc[range(a,b),'zoneid'] = landuse.loc[i,'zoneid']
1215
+ household_df.loc[range(a,b),'zonetype'] = landuse.loc[i,'avgincome']
1216
+ a = b
1217
+
1218
+ del a,b
1219
+ household_df['hhid'] = household_df['hhid'].astype(int)
1220
+
1221
+ print(time.time() - tic)
1222
+ tic = time.time()
1223
+ print('6 ------',end=' ')
1224
+ #%% Step 3: Identify the household size and assign "nInd" values to each household
1225
+ a_g = 0
1226
+ for i in nhouseidx:
1227
+ b_g = nhouse_cuml[i]
1228
+ # Find Total of every different nInd number for households
1229
+ household_num = nhouse[i] * household_prop
1230
+ # Round the household numbers for various numbers of individuals
1231
+ # without exceeding total household number
1232
+ cumsum_household_num = np.round_(np.cumsum(household_num)).astype('int32')
1233
+ cumsum_household_num_diff = np.diff(cumsum_household_num)
1234
+ first_val = nhouse[i] - sum(cumsum_household_num_diff)
1235
+ household_num_round = np.insert(cumsum_household_num_diff,0,first_val)
1236
+
1237
+ #Generate a column vector
1238
+ d_value = t1_l1
1239
+ d_number = cumsum_household_num
1240
+ insert_vector = np.ones(d_number[-1])
1241
+ a, count =0, 0
1242
+ for value in d_value:
1243
+ b = d_number[count]
1244
+ #This works for numbers but not for strings
1245
+ subvector = np.empty(household_num_round[count]) #
1246
+ subvector.fill(value) #
1247
+ insert_vector[a:b] = subvector #
1248
+ a = b
1249
+ count+=1
1250
+ del a,b
1251
+ insert_vector = np.random.permutation(insert_vector)
1252
+
1253
+ household_df.loc[range(a_g,b_g), 'nind'] = insert_vector
1254
+ a_g = b_g
1255
+
1256
+ del a_g, b_g, count,insert_vector,subvector
1257
+
1258
+ household_df['nind'] = household_df['nind'].astype(int)
1259
+
1260
+ print(time.time() - tic)
1261
+ tic = time.time()
1262
+ print('7 ------',end=' ')
1263
+ #%% Step 4: Identify and assign income type of the households
1264
+ # Table 2 states the % of various income groups in different income zones
1265
+ # Convert Table 2 to numpy array
1266
+ # for row in range((len(tables['t2'][0]))):
1267
+ # tables['t2'][0][row]=np.fromstring(tables['t2'][0][row],dtype=float,sep=',')
1268
+
1269
+ t2 = np.array(tables['t2'][0])
1270
+
1271
+ count = 0
1272
+
1273
+ for inc in avg_income_types:
1274
+ #Find indices corresponding to a zone type
1275
+ itidx = household_df['zonetype'] == inc
1276
+ if sum(itidx) ==0: #i.e. this income zone doesn't exist in the landuse data
1277
+ count+=1
1278
+ continue
1279
+
1280
+ income_entries = t2[count]*sum(itidx)
1281
+ d_limit = sum(itidx) # Size of array to match after rounding off
1282
+ d_value = avg_income_types[income_entries!=0]
1283
+ d_number = income_entries[income_entries!=0] #ip
1284
+
1285
+ insert_vector = dist2vector(d_value, d_number,d_limit,'shuffle')
1286
+ count+=1
1287
+ household_df.loc[itidx, 'income'] = insert_vector
1288
+ print(time.time() - tic)
1289
+ tic = time.time()
1290
+ print('8 ------',end=' ')
1291
+ del count,insert_vector
1292
+
1293
+
1294
+ #%% Step 5: Identify and assign a unique ID for each individual
1295
+
1296
+ #Asumption 2: Gender distribution is same for different income types
1297
+
1298
+ #Preallocate a dataframe with nan to hold the individual layer
1299
+ nindiv = int(sum(household_df['nind'])) # Total number of individuals
1300
+ individual_df = pd.DataFrame(np.nan, index = range(nindiv),
1301
+ columns=['hhid', 'individ', 'gender', 'age','head',
1302
+ 'eduattstat','indivfacid_1','indivfacid_2',
1303
+ 'indivfacid',
1304
+ 'schoolenrollment','labourForce','employed'])
1305
+ individual_df.loc[range(nindiv),'individ'] = [range(1,nindiv+1)]
1306
+ individual_df['individ'].astype('int')
1307
+ print(time.time() - tic)
1308
+ tic = time.time()
1309
+ print('9 ------',end=' ')
1310
+ #%% Step 6: Identify and assign gender for each individual
1311
+ # Convert the gender distribution table 3 to numpy array
1312
+ tables['t3'][0] = np.array(tables['t3'][0][0],dtype=float)
1313
+ female_p = tables['t3'][0][0]
1314
+ male_p = 1-female_p
1315
+ gender_value = np.array([1,2], dtype=int) # 1=Female, 2=Male
1316
+ gender_number = np.array([female_p, male_p])*nindiv
1317
+
1318
+ d_limit = nindiv # Size of array to match after rounding off
1319
+ d_value = gender_value
1320
+ d_number = gender_number
1321
+
1322
+ insert_vector = dist2vector(d_value, d_number,d_limit,'shuffle')
1323
+ individual_df.loc[range(nindiv),'gender'] = insert_vector
1324
+ individual_df['gender'] = individual_df['gender'].astype('int')
1325
+
1326
+ #%% Step 7: Identify and assign age for each individual
1327
+ #Assumption 3: Age profile is same for different income types
1328
+ #Convert the age profile wrt gender distribution table 4 to numpy array
1329
+ ageprofile_value = np.array([1,2,3,4,5,6,7,8,9,10], dtype=int)
1330
+ t4_l1_f = np.array(tables['t4'][0][0], dtype=float) #For female
1331
+ t4_l2_m = np.array(tables['t4'][0][1], dtype=float) #For male
1332
+ t4 = np.array([t4_l1_f, t4_l2_m])
1333
+
1334
+ for i in range(len(gender_value)):
1335
+ gidx = individual_df['gender'] == gender_value[i]
1336
+ d_limit = sum(gidx)
1337
+ d_value = ageprofile_value
1338
+ d_number = t4[i]*sum(gidx)
1339
+ insert_vector = dist2vector(d_value, d_number,d_limit,'shuffle')
1340
+ individual_df.loc[gidx,'age'] = insert_vector
1341
+
1342
+ individual_df['age'] = individual_df['age'].astype(int)
1343
+ print(time.time() - tic)
1344
+ tic = time.time()
1345
+ print('10 ------',end=' ')
1346
+ #%% Step 8: Identify and assign education attainment status for each individual
1347
+
1348
+ # Assumption 4: Education Attainment status is same for different income types
1349
+ # Education Attainment Status (Meta Data)
1350
+ # 1 - Only literate
1351
+ # 2 - Primary school
1352
+ # 3 - Elementary sch.
1353
+ # 4 - High school
1354
+ # 5 - University and above
1355
+ #Convert the educational status distribution table 5 to numpy array
1356
+ education_value = np.array([1,2,3,4,5], dtype=int)
1357
+ t5_l1_f = np.array(tables['t5'][0][0], dtype=float) #For female
1358
+ t5_l2_m = np.array(tables['t5'][0][1], dtype=float) #For male
1359
+ t5 = np.array([t5_l1_f, t5_l2_m])
1360
+
1361
+ for i in range(len(gender_value)):
1362
+ gidx = individual_df['gender'] == gender_value[i]
1363
+ d_limit = sum(gidx)
1364
+ d_value = education_value
1365
+ d_number = t5[i]*sum(gidx)
1366
+ insert_vector = dist2vector(d_value, d_number,d_limit,'shuffle')
1367
+ individual_df.loc[gidx,'eduattstat'] = insert_vector
1368
+
1369
+ individual_df['eduattstat'] = individual_df['eduattstat'].astype(int)
1370
+
1371
+ print(time.time() - tic)
1372
+ tic = time.time()
1373
+ print('11 ------',end=' ')
1374
+ #%% Step 9: Identify and assign the head of household to corresponding hhid
1375
+
1376
+ # Assumption 5: Head of household is dependent on gender
1377
+ # Assumption 6: Only (age>20) can be head of households
1378
+ #Convert the head of houseold distribution table 6 to numpy array
1379
+ tables['t6'][0] = np.array(tables['t6'][0][0],dtype=float)
1380
+ female_hh = tables['t6'][0][0]
1381
+ male_hh = 1-female_hh
1382
+
1383
+ # Calculate the number of household heads by gender
1384
+ hh_number= np.array([female_hh, male_hh])*sum(nhouse)
1385
+ hh_number= hh_number.astype(int)
1386
+ hh_number[0] = sum(nhouse) - hh_number[1]
1387
+
1388
+ for i in range(len(gender_value)): #Assign female and male candidates
1389
+ gaidx= (individual_df['gender'] == gender_value[i]) & \
1390
+ (individual_df['age']>4) # '>4' denotes above age group '18-20'
1391
+ #Index of household head candidates in individual_df
1392
+ hh_candidate_idx = list(individual_df.loc[gaidx,'gender'].index)
1393
+ # Take a random permutation sample to obtain household head indices from
1394
+ # the index of possible household candidates in individual_df
1395
+ ga_hh_idx = random.sample(hh_candidate_idx, hh_number[i])
1396
+ #print('gaidx=',sum(gaidx), 'ga_hh_idx', len(ga_hh_idx))
1397
+
1398
+ individual_df.loc[ga_hh_idx,'head'] = 1
1399
+
1400
+
1401
+
1402
+ # 1= household head, 2= household members other than the head
1403
+ individual_df.loc[individual_df['head'] != 1,'head'] =0
1404
+
1405
+ #Assign household ID (hhid) randomly
1406
+ hhid_temp = household_df['hhid'].tolist()
1407
+ random.shuffle(hhid_temp)
1408
+ individual_df.loc[individual_df['head'] == 1,'hhid'] = hhid_temp
1409
+ print(time.time() - tic)
1410
+ tic = time.time()
1411
+ print('12 ------',end=' ')
1412
+ #%% Step 10: Identify and assign the household that each individual belongs to
1413
+ # In relation with Assumption 6, no individuals under 20 years of age can live
1414
+ # alone in an household
1415
+ individual_df_temp = individual_df[individual_df['head']==0]
1416
+ individual_df_temp_idx = list(individual_df_temp.index)
1417
+ #hhidlist = household_df['hhid'].tolist()
1418
+ for i in range(1,len(t1_l1)): #Loop through household numbers >1
1419
+ hh_nind = t1_l1[i] # Number of individuals in households
1420
+ # Find hhid corresponding to household numbers
1421
+ hh_df_idx = household_df['nind']== hh_nind
1422
+ hhidx = household_df.loc[hh_df_idx,'hhid'].tolist()
1423
+ #Random shuffle hhidx here
1424
+ amph = hh_nind -1 # additional member per household
1425
+ for j in range(amph):
1426
+ # Randomly select len(hhidx) number of indices from individual_df_temp_idx
1427
+ idtidx = random.sample(individual_df_temp_idx, len(hhidx))
1428
+ individual_df.loc[idtidx,'hhid'] = hhidx
1429
+ #Remove idtidx before next iteration
1430
+ individual_df_temp = individual_df_temp.drop(index=idtidx)
1431
+ individual_df_temp_idx = list(individual_df_temp.index)
1432
+
1433
+ individual_df['hhid'] = individual_df['hhid'].astype(int)
1434
+
1435
+ print(time.time() - tic)
1436
+ tic = time.time()
1437
+ print('13 ------',end=' ')
1438
+ #%% Step 10a: Identify school enrollment for each individual
1439
+ # Final output 0 = not enrolled in school, 1 = enrolled in school
1440
+ # Assumption 16: Schooling age limits- AP2 and AP3 ( 5 to 18 years old)
1441
+ # can go to school
1442
+ # Convert distribution table 5a to numpy array
1443
+ # Table 5a contains school enrollment probability
1444
+ for row in range((len(tables['t5a'][0]))):
1445
+ tables['t5a'][0][row]=np.array(tables['t5a'][0][row],dtype=float)
1446
+ t5a = np.array(tables['t5a'][0]) # Table 5a
1447
+ # Find individuals with age between 5-18 (these are students)
1448
+ # Also find individual Id of students and household Id of students
1449
+ agemask = (individual_df['age'] == 2) | (individual_df['age']==3)
1450
+ school_df = pd.DataFrame(np.nan, index = range(sum(agemask)),
1451
+ columns=['individ','hhid','eduattstath','income','enrollment'])
1452
+ school_df_idx = individual_df.loc[agemask,'individ'].index
1453
+ school_df.set_index(school_df_idx, inplace=True)
1454
+ school_df['individ'] = individual_df.loc[agemask,'individ']
1455
+ school_df['hhid'] = individual_df.loc[agemask,'hhid']
1456
+ # Then, pick a slice of individual_df corresponding to the household a student
1457
+ # belongs to. From there, Pick eduAtt status of head of household. To expedite
1458
+ # computation, dataframe columns have been converted to list
1459
+ school_df_hhid_list = list(school_df['hhid'])
1460
+ temp_df = individual_df[individual_df['hhid'].isin(school_df_hhid_list)]
1461
+ head4school_df = temp_df[temp_df['head'] == 1]
1462
+ head4school_df_hhid_list = list(head4school_df['hhid'])
1463
+ head4school_df_edus_list = list(head4school_df['eduattstat'])
1464
+ school_df_edu_list = np.ones(len(school_df_hhid_list))*np.nan
1465
+
1466
+ # Label 'lowIncomeA' and 'lowIncomeB' = 1, 'midIncome' =2, 'highIncome' =3
1467
+ household_df_hhid_list = list(household_df['hhid'])
1468
+ #Use .copy() to avoid SettingwithCopyWarning
1469
+ income4school_df=household_df[household_df['hhid'].\
1470
+ isin(school_df_hhid_list)].copy()
1471
+ li_mask = (income4school_df['income'] == avg_income_types[0]) |\
1472
+ (income4school_df['income'] == avg_income_types[1])
1473
+ lm_mask = income4school_df['income'] == avg_income_types[2]
1474
+ lh_mask = income4school_df['income'] == avg_income_types[3]
1475
+ income4school_df.loc[li_mask,'income'] = 1
1476
+ income4school_df.loc[lm_mask,'income'] = 2
1477
+ income4school_df.loc[lh_mask,'income'] = 3
1478
+ income4school_df_income_list = list(income4school_df['income'])
1479
+ income4school_df_hhid_list = list(income4school_df['hhid'])
1480
+ school_df_income_list = np.ones(len(school_df_hhid_list))*np.nan
1481
+
1482
+ # Faster way
1483
+ #school_df
1484
+ #head4school_df
1485
+ school_df_edu_list_df = school_df[['hhid']].merge(head4school_df[['hhid','eduattstat']], how='left', on='hhid')
1486
+ school_df_edu_list= list(school_df_edu_list_df['eduattstat'])
1487
+
1488
+ school_df_income_list_df = school_df[['hhid']].merge(income4school_df[['hhid','income']], how='left', on='hhid')
1489
+ school_df_income_list= list(school_df_income_list_df['income'])
1490
+
1491
+ #count=0
1492
+ # NOTE: If the operation inside this for loop can be replaced with indexing
1493
+ # operation the computation time for this code can be further reduced.
1494
+ #for hhid in school_df_hhid_list:
1495
+ # #print('hhid',hhid, count, len(school_df_hhid_list))
1496
+ # #assign education attained by head of household to school_df
1497
+ # hhid_temp = [i for i, value in enumerate(head4school_df_hhid_list)\
1498
+ # if value == hhid ]
1499
+ # school_df_edu_list[count] = head4school_df_edus_list[hhid_temp[0]]
1500
+ # #assign income type of household to school_df
1501
+ # hhid_temp2 = [i for i, value in enumerate(income4school_df_hhid_list)\
1502
+ # if value == hhid ]
1503
+ # school_df_income_list[count] = income4school_df_income_list[hhid_temp2[0]]
1504
+ # count+=1
1505
+
1506
+
1507
+
1508
+ #print('original edu')
1509
+ #print(len(school_df_edu_list), school_df_edu_list[:10],school_df_edu_list[-10:])
1510
+ #print('original income')
1511
+ #print(len(school_df_income_list), school_df_income_list[:10],school_df_income_list[-10:])
1512
+
1513
+ school_df.loc[school_df.index, 'eduattstath'] = school_df_edu_list
1514
+ school_df['eduattstath'] = school_df['eduattstath'].astype(int)
1515
+ school_df['income'] = school_df_income_list
1516
+ school_df['income'] = school_df['income'].astype(int)
1517
+
1518
+ print(time.time() - tic)
1519
+ tic = time.time()
1520
+ print('14 ------',end=' ')
1521
+
1522
+ #assign school enrollment (1 = enrolled, 0 = not enrolled)
1523
+ for incomeclass in range(1,4): # Income class 1,2,3
1524
+ for head_eduattstat in range(1,6): # Education attainment category 1 to 5
1525
+ enrmask = (school_df['income'] == incomeclass) &\
1526
+ (school_df['eduattstath'] == head_eduattstat)
1527
+ no_of_pstudents = sum(enrmask) # Number of potential students
1528
+ if no_of_pstudents ==0: #continue if no students exist for given case
1529
+ continue
1530
+ i,j = incomeclass-1, head_eduattstat-1 # indices to access table 5a
1531
+ d_limit = no_of_pstudents # Size of array to match after rounding off
1532
+ d_value = [1,0] #1= enrolled, 0 = not enrolled
1533
+ d_number = np.array([t5a[i,j], 1-t5a[i,j]])*no_of_pstudents
1534
+ insert_vector = dist2vector(d_value, d_number,d_limit,'shuffle')
1535
+ school_df.loc[enrmask,'enrollment'] = insert_vector
1536
+
1537
+ school_df['enrollment']= school_df['enrollment'].astype(int)
1538
+ # Substitute the enrollment status back to individual_df dataframe
1539
+ individual_df.loc[school_df.index,'schoolenrollment']= school_df['enrollment']
1540
+
1541
+ print(time.time() - tic)
1542
+ tic = time.time()
1543
+ print('15 ------',end=' ')
1544
+ #%% Step 11: Identify approximate total residential building area needed
1545
+ # (approxDwellingAreaNeeded_sqm)
1546
+ # Assumption 7a on Average dwelling area (sqm) for different income types.
1547
+
1548
+ # The output is stored in the column 'totalbldarea_res' in landuse_res_df,
1549
+ # which represents the total buildable area
1550
+
1551
+ #Sub dataframe of landuse type containing only residential areas
1552
+ landuse_res_df = landuse.loc[nhouse.index].copy()
1553
+ landuse_res_df.loc[nhouse.index,'nhousehold'] = nhouse
1554
+ hh_temp_df = household_df.copy()
1555
+
1556
+ for i in range(0,len(avg_income_types)):
1557
+ hh_temp_df['income'] = hh_temp_df['income'].replace(avg_income_types[i],\
1558
+ average_dwelling_area[i])
1559
+ for index in landuse_res_df.index: # Loop through each residential zone
1560
+ zoneid = landuse_res_df['zoneid'][index]
1561
+ sum_part = hh_temp_df.loc[hh_temp_df['zoneid']==zoneid,'income'].sum()
1562
+ landuse_res_df.loc[index, 'approxDwellingAreaNeeded_sqm'] = sum_part
1563
+
1564
+ # Zones where no households live i.e. potential commercial or industrial zones
1565
+ noHH = nhouse_all[nhouse_all<=0].index
1566
+ landuse_ic_df = landuse.loc[noHH].copy()
1567
+ landuse_ic_df['area'] = landuse_ic_df['area']*10000 # Convert hectare to sq m
1568
+
1569
+
1570
+ print(time.time() - tic)
1571
+ tic = time.time()
1572
+ print('16 ------',end=' ')
1573
+
1574
+ #%% Steps 12,13,14,15:
1575
+ # Identify number of residential buildings and generate building layer
1576
+
1577
+ # Table 7 contains Number of storeys distribution for various LRS and LUT
1578
+ # Table 11 contains code compliance distribution for various LRS and LUT
1579
+ t7= tables['t7'][0]
1580
+ t11 = tables['t11'][0]
1581
+
1582
+ # Convert Table 8 to numpy array
1583
+ # Table8 contains LRS distribution with respect to various LUT
1584
+ for row in range((len(tables['t8'][0]))):
1585
+ tables['t8'][0][row]=np.array(tables['t8'][0][row],dtype=float)
1586
+ t8 = np.array(tables['t8'][0]) # Table 8
1587
+
1588
+ # Determine the number of buildings in each zone based on average income class
1589
+ # building footprint range for each landuse zone and Tables 7 and 8
1590
+ no_of_resbldg = 0 # Total residential buildings in all zones
1591
+ footprint_base_sum = 0 # footprint at base, not multiplied by storeys
1592
+ footprint_base_L,storey_L,lrs_L,zoneid_L,codelevel_L = [],[],[],[],[]
1593
+
1594
+ print(time.time() - tic)
1595
+ tic = time.time()
1596
+ print('17 ------',end=' ')
1597
+ for i in landuse_res_df.index: #Loop through zones
1598
+ zoneid = landuse_res_df['zoneid'][i]
1599
+ #totalbldarea_res = landuse_res_df['totalbldarea_res'][i]
1600
+ #totalbldarea_res is the total residential area that needs to be built
1601
+ totalbldarea_res = landuse_res_df.loc[i,'approxDwellingAreaNeeded_sqm']
1602
+ avgincome = landuse_res_df['avgincome'][i]
1603
+ lut_zone = landuse_res_df['luf'][i]
1604
+ fpt_range = fpt_area[avgincome]
1605
+ # Generate a vector of footprints such that sum of all the footprints in
1606
+ # lenmax equals maximum possible length of vector of building footprints
1607
+ lenmax = int(totalbldarea_res/np.min(fpt_range))
1608
+ footprints_temp = np.random.uniform(np.min(fpt_range),\
1609
+ np.max(fpt_range), size=(lenmax,1))
1610
+ footprints_temp = footprints_temp.reshape(len(footprints_temp),)
1611
+ # Select LRS using multinomial distribution and Table 8
1612
+ lrs_number=multinomial(len(footprints_temp), t8[lutidx[lut_zone]],size=1)
1613
+ lrs_vector=np.array(dist2vector(lrs_types,lrs_number,\
1614
+ np.sum(lrs_number),'shuffle'))
1615
+
1616
+ # Select storeys in a zone for various LRS using multinomial distribution
1617
+ #storey_vector = np.array([],dtype=int)
1618
+ storey_vector = np.array(np.zeros(len(lrs_vector),dtype=int)) #must be assigned after loop
1619
+ for lrs in lrs_types: # Loop through LRS types in a zone
1620
+ t7row = t7[lutidx[lut_zone]] #Extract row for LUT
1621
+ #Extract storey distribution in row for LRS
1622
+ t7dist = np.fromstring(t7row[lrsidx[lrs]],dtype=float, sep=',')
1623
+ lrs_pos = lrs_vector==lrs
1624
+ storey_number = multinomial(sum(lrs_pos),t7dist,size=1)
1625
+ storey_vector_part = np.array([],dtype=int)
1626
+ for idx,st_range in storey_range.items(): #Loop through storey classes
1627
+ sv_temp = \
1628
+ randint(st_range[0],st_range[1]+1,storey_number[0][idx])
1629
+ storey_vector_part = \
1630
+ np.concatenate((storey_vector_part,sv_temp),axis =0)
1631
+ # Need to shuffle storey_vector before multiplying and deleting
1632
+ #extra values, otherwise 100% of storeys will be low rise, resulting in
1633
+ #larger number of buildings
1634
+ np.random.shuffle(storey_vector_part)
1635
+ storey_vector[lrs_pos] =storey_vector_part
1636
+ # Select code compliance level for various LRS using multinomial dist
1637
+ cc_vector = [] # code compliance vector for a zone
1638
+ for lrs in lrs_types: # for each LRS in a zone
1639
+ t11row = t11[lutidx[lut_zone]]
1640
+ t11dist = np.fromstring(t11row[lrsidx[lrs]],dtype=float, sep=',')
1641
+ lrs_pos = lrs_vector==lrs
1642
+ cc_number = multinomial(sum(lrs_pos),t11dist,size=1)
1643
+ cc_part = dist2vector(code_level, cc_number,sum(lrs_pos),'shuffle')
1644
+ cc_vector += cc_part
1645
+ random.shuffle(cc_vector)
1646
+
1647
+ #If it is necessary to equalize number of storeys = number of households
1648
+ storey_vector_cs = np.cumsum(storey_vector)
1649
+ stmask = storey_vector_cs <= landuse_res_df.loc[i,'nhousehold']
1650
+ if sum(stmask)>0:
1651
+ stlimit_idx = np.max(np.where(stmask))+1
1652
+ stlimit_idx_range = range(stlimit_idx+1,len(footprints_temp))
1653
+ else:
1654
+ stlimit_idx_range = range(1,len(footprints_temp))
1655
+
1656
+ footprints_base = footprints_temp #Footprints without storey
1657
+ dwellingArea_temp= footprints_temp*storey_vector
1658
+ dwellingArea_temp_cs = np.cumsum(dwellingArea_temp)
1659
+
1660
+ #If it is necessary to equalize required footprint = provided footprint
1661
+ #OPTIONAL:Here, introduce a method to match total buildable area (dwelling)
1662
+ # fpmask = dwellingArea_temp_cs <= totalbldarea_res
1663
+ # #Indices of footprints whose sum <= dwelling area needed in a zone
1664
+ # # '+ 1' provides slightly more dwelling area than needed
1665
+ # footprints_idx = np.max(np.where(fpmask)) + 1
1666
+
1667
+ # Delete additional entries in the vectors for footprint, lrs and storeys
1668
+ # which do not fit into total buildable area
1669
+ #ftrange = range(footprints_idx+1,len(dwellingArea_temp))
1670
+
1671
+ ftrange = stlimit_idx_range
1672
+
1673
+ dwellingArea = np.delete(dwellingArea_temp,ftrange)
1674
+ footprints_base = np.delete(footprints_base,ftrange)
1675
+ lrs_vector_final = np.delete(lrs_vector,ftrange)
1676
+ storey_vector_final = np.delete(storey_vector,ftrange)
1677
+ cc_vector = np.array(cc_vector)
1678
+ cc_vector_final = np.delete(cc_vector,ftrange)
1679
+ no_of_resbldg += len(dwellingArea)
1680
+
1681
+ #footprint_base_sum+=np.sum(footprints_base)
1682
+ # Store the vectors in lists for substitution in dataframe
1683
+ footprint_base_L += list(footprints_base)
1684
+ storey_L += list(storey_vector_final)
1685
+ lrs_L += list(lrs_vector_final)
1686
+ zoneid_L += [zoneid]*len(dwellingArea)
1687
+ codelevel_L += list(cc_vector_final)
1688
+
1689
+ landuse_res_df.loc[i,'footprint_sqm'] = np.sum(footprints_base)
1690
+ landuse_res_df.loc[i,'dwellingAreaProvided_sqm'] = np.sum(dwellingArea)
1691
+
1692
+ landuse_res_df.loc[i, 'Storey_units'] = sum(storey_vector_final)
1693
+ #'No_of_res_buildings' denotes total residential + ResCom buildings
1694
+ landuse_res_df.loc[i, 'No_of_res_buildings'] = len(footprints_base)
1695
+ # Check distribution after deletion (for debugging) by counting LR
1696
+ #print(sum(storey_vector_final<5)/len(storey_vector_final))
1697
+
1698
+ print(time.time() - tic)
1699
+ tic = time.time()
1700
+ print('18 ------',end=' ')
1701
+ # landuse_res_df['area'] denotes the total buildable area
1702
+ landuse_res_df['area'] *= 10000 # Convert hectares to sq m, 1ha =10^4 sqm
1703
+
1704
+ # landuse_res_df['builtArea_percent'] denotes the percentage of total
1705
+ # buildable area that needs to be built to accomodate the projected population
1706
+ landuse_res_df['builtArea_percent'] =\
1707
+ landuse_res_df['footprint_sqm']/landuse_res_df['area']*100
1708
+
1709
+ #ADD HERE : EXCEPTION HANDLING for built area exceeding available area
1710
+
1711
+ #print(no_of_resbldg)
1712
+
1713
+ #ADD: Check if calculated footprint exceeds total buildable area (landuse.area)
1714
+
1715
+ #Create and populate the building layer, with unassigned values as NaN
1716
+ resbld_df = pd.DataFrame(np.nan, index = range(0, no_of_resbldg),
1717
+ columns=['zoneid', 'bldid', 'specialfac', 'repvalue',
1718
+ 'nhouse', 'residents', 'expstr','fptarea',
1719
+ 'occbld','lrstype','codelevel',
1720
+ 'nstoreys'])
1721
+ resbld_range = range(0,no_of_resbldg)
1722
+ #resbld_df.loc[resbld_range,'bldid'] = list(range(1,no_of_resbldg+1))
1723
+ resbld_df.loc[resbld_range,'zoneid'] = zoneid_L
1724
+ resbld_df['zoneid'] = resbld_df['zoneid'].astype('int')
1725
+ resbld_df.loc[resbld_range,'occbld'] = 'Res'
1726
+ resbld_df.loc[resbld_range,'specialfac'] = 0
1727
+ resbld_df.loc[resbld_range,'fptarea'] = footprint_base_L
1728
+ resbld_df.loc[resbld_range,'nstoreys'] = storey_L
1729
+ resbld_df.loc[resbld_range,'lrstype'] = lrs_L
1730
+ resbld_df.loc[resbld_range,'codelevel'] = codelevel_L
1731
+ print(time.time() - tic)
1732
+ tic = time.time()
1733
+ print('19 ------',end=' ')
1734
+ #%% Assign zoneids and building IDs for Res and ResCom
1735
+ # Assign 'ResCom' status based on Table 9
1736
+ # Assumption: Total residential buildings = Res + ResCom
1737
+ # Convert Table 9 to numpy array
1738
+ # Table 9 contains occupancy type with respect to various LUT
1739
+ # Occupancy types: Residential (Res), Industrial (Ind), Commercial (Com)
1740
+ # Residential and commercial mixed (ResCom)
1741
+ for row in range((len(tables['t9'][0]))):
1742
+ tables['t9'][0][row]=np.array(tables['t9'][0][row],dtype=float)
1743
+ t9 = np.array(tables['t9'][0]) # Table 9
1744
+
1745
+ #available_LUT = list(set(landuse_res_df['luf']))
1746
+ available_zoneid = list(set(resbld_df['zoneid']))
1747
+ for zoneid in available_zoneid: #Loop through zones
1748
+ zonemask = resbld_df['zoneid'] == zoneid
1749
+ zone_idx = list(zonemask.index.values[zonemask])
1750
+ lutlrdidx=landuse_res_df[landuse_res_df['zoneid']==zoneid].index.values[0]
1751
+ #Occupancy type distribution for a zone
1752
+ occtypedist = t9[lutidx[ landuse_res_df['luf'][lutlrdidx]]]
1753
+ no_of_resbld = sum(zonemask) # Number of residential buildings in a zone
1754
+ # if mixed residential+commercial buildings as well as residential buildings exist
1755
+ if occtypedist[3] !=0 and occtypedist[0] !=0 :
1756
+ # nrc = number of mixed res+com buildings in a zone
1757
+ nrc = int(occtypedist[3]/occtypedist[0]*no_of_resbld)
1758
+ elif occtypedist[3] !=0 and occtypedist[0] ==0:
1759
+ nrc = int(no_of_resbld)
1760
+ else: # if only residential buildings exist
1761
+ continue
1762
+ nrc_idx = sample(zone_idx,nrc)
1763
+ resbld_df.loc[nrc_idx,'occbld'] = 'ResCom'
1764
+
1765
+ print(time.time() - tic)
1766
+ tic = time.time()
1767
+ print('20 ------',end=' ')
1768
+ #Assign building Ids for res and rescom buildings
1769
+ lenresbld = len(resbld_df)
1770
+ resbld_df.loc[range(0,lenresbld),'bldid'] = list(range(1,lenresbld+1))
1771
+ resbld_df['bldid'] = resbld_df['bldid'].astype('int')
1772
+
1773
+ #%% STEP16: Identify and assign number of households and residents for each
1774
+ #residential building
1775
+ #Assign nhouse, residents. All the households and residents must be assigned
1776
+ #to this layer.
1777
+ print(time.time() - tic)
1778
+ tic = time.time()
1779
+ print('20.2 ------',end=' ')
1780
+ dwellings_str=dist2vector(resbld_df['bldid'],np.array(storey_L),\
1781
+ np.sum(np.array(storey_L)),'DoNotShuffle')
1782
+ print(time.time() - tic)
1783
+ tic = time.time()
1784
+ print('20.3 ------',end=' ')
1785
+ dwellings = list(map(int,dwellings_str))
1786
+ #dwellings.sort()
1787
+ dwellings_selected = dwellings[0:len(household_df)]
1788
+ print(time.time() - tic)
1789
+ tic = time.time()
1790
+ print('20.4 ------',end=' ')
1791
+ random.shuffle(dwellings_selected)
1792
+ #Assign building IDs to all households
1793
+ household_df.loc[:,'bldid'] = dwellings_selected
1794
+
1795
+
1796
+ # Assign number of households and residents to residential buildings resbld_df
1797
+ # This loop must be optimized for speed
1798
+
1799
+ print(time.time() - tic)
1800
+ tic = time.time()
1801
+ print('20.5 ------',end=' ')
1802
+
1803
+ # Alternative
1804
+ # Drop the columns which I'll already generate in a second
1805
+ resbld_df = resbld_df.drop(columns=['nhouse','residents'])
1806
+ # Get nind information from household table
1807
+ resbld_w_household = resbld_df[['bldid']].merge(household_df[['bldid','hhid','nind']], how='inner', on='bldid')
1808
+ # Aggregate by bldid. nhouse: count of household, residents: number of individuals
1809
+ resbld_w_household = resbld_w_household.groupby('bldid').agg({'hhid':'count','nind':'sum'}).reset_index().rename(columns={'hhid':'nhouse','nind':'residents'})
1810
+ # Merge nhouse and residents columns back into building table
1811
+ resbld_df = resbld_df.merge(resbld_w_household,how='inner',on='bldid')
1812
+
1813
+ print(time.time() - tic)
1814
+ tic = time.time()
1815
+ print('21 ------',end=' ')
1816
+ # Remove rows in resbld_df which contains no residents
1817
+
1818
+
1819
+
1820
+ #%% Step 17,18: Identify and generate commercial and industrial buildings
1821
+ # No household or individual lives in com, ind, hosp, sch zones
1822
+ # Assumption 10 and 11: Assume a certain number of commercial and industrial
1823
+ # buildings per 1000 individuals
1824
+
1825
+ # No commercial and industrial buildings in:recreational areas,agriculture,
1826
+ # residential (gated neighbourhood), residential (low-density)
1827
+ # But com an ind build can occur in any zone where permitted by table 9
1828
+ ncom = round(nindiv/1000*numb_com)
1829
+ nind = round(nindiv/1000*numb_ind)
1830
+ nci = np.array([ncom,nind])
1831
+ occbld_label = ['Com','Ind']
1832
+ nci_cs = np.cumsum(nci)
1833
+ indcom_df = pd.DataFrame(np.nan, index = range(0, ncom+nind),
1834
+ columns=['zoneid', 'bldid', 'specialfac', 'repvalue',
1835
+ 'nhouse', 'residents', 'expstr','fptarea',
1836
+ 'lut_number','occbld','lrstype','codelevel',
1837
+ 'nstoreys'])
1838
+
1839
+ t10= tables['t10'][0] # Extract Table 10
1840
+ a = 0
1841
+ for i in range(0,len(nci)): # First commercial, then industrial
1842
+ attr = t10[i]
1843
+ #Extract distributions for footprint, storeys, code compliance and LRS
1844
+ fpt_ic = np.fromstring(attr[0], dtype=float, sep=',')
1845
+ nstorey_ic = np.fromstring(attr[1], dtype=int, sep=',')
1846
+ codelevel_ic = np.fromstring(attr[2], dtype=float, sep=',')
1847
+ lrs_ic = np.fromstring(attr[3], dtype=float, sep=',')
1848
+ range_ic = range(a,nci_cs[i])
1849
+ a = nci_cs[i]
1850
+ # Generate footprints
1851
+ indcom_df.loc[range_ic,'fptarea'] = np.random.uniform(\
1852
+ np.min(fpt_ic),np.max(fpt_ic), size=(nci[i],1)).reshape(nci[i],)
1853
+ # Generate number of storeys
1854
+ indcom_df.loc[range_ic,'nstoreys'] =randint(np.min(nstorey_ic),\
1855
+ np.max(nstorey_ic)+1,size=(nci[i],1)).reshape(nci[i],)
1856
+ # Generate code compliance
1857
+ cc_number_ic = multinomial(nci[i],codelevel_ic,size=1)
1858
+ indcom_df.loc[range_ic,'codelevel'] =\
1859
+ dist2vector(code_level, cc_number_ic,nci[i],'shuffle')
1860
+ # Generate LRS
1861
+ lrs_number_ic = multinomial(nci[i],lrs_ic,size=1)
1862
+ indcom_df.loc[range_ic,'lrstype'] =\
1863
+ dist2vector(lrs_types,lrs_number_ic,nci[i],'shuffle')
1864
+ indcom_df.loc[range_ic,'occbld']= occbld_label[i]
1865
+
1866
+ print(time.time() - tic)
1867
+ tic = time.time()
1868
+ print('22 ------',end=' ')
1869
+ # Assign number of households, Residents, special facility label
1870
+ range_all_ic = range(0,len(indcom_df))
1871
+ indcom_df.loc[range_all_ic,'nhouse'] = 0
1872
+ indcom_df.loc[range_all_ic,'residents'] = 0
1873
+ indcom_df.loc[range_all_ic,'specialfac'] = 0
1874
+
1875
+ ind_df = indcom_df[indcom_df['occbld'] == 'Ind'].copy()
1876
+ com_df = indcom_df[indcom_df['occbld'] == 'Com'].copy()
1877
+ ind_df.reset_index(drop=True,inplace=True)
1878
+ com_df.reset_index(drop=True,inplace=True)
1879
+
1880
+ #%% Step 19,20 Generate school and hospitals along with their attributes
1881
+
1882
+ # Assumption 14 and 15: For example : 1 school per 10000 individuals,
1883
+ # 1 hospital per 25000 individuals
1884
+ nsch = round(nindiv/nsch_pi) # Number of schools
1885
+ nhsp = round(nindiv/nhsp_pi) # Number of hospitals
1886
+
1887
+ if nsch == 0:
1888
+ print("WARNING: Total population",nindiv,"is less than the user-specified "\
1889
+ "number of individuals per school",nsch_pi,". So, total school for "\
1890
+ "this population = 1 (by default) \n")
1891
+ nsch = 1
1892
+
1893
+ if nhsp == 0:
1894
+ print("WARNING: Total population",nindiv,"is less than the user-specified "\
1895
+ "number of individuals per hospital",nhsp_pi,". So, total hospital for "\
1896
+ "this population = 1 (by default) \n ")
1897
+ nhsp = 1
1898
+
1899
+ nsh = np.array([nsch,nhsp])
1900
+ nsh_cs = np.cumsum(nsh)
1901
+ occbld_label_sh = ['Edu','Hea']
1902
+ specialfac = [1,2] # Special facility label
1903
+ schhsp_df = pd.DataFrame(np.nan, index = range(0, nsch+nhsp),
1904
+ columns=['zoneid', 'bldid', 'specialfac', 'repvalue',
1905
+ 'nhouse', 'residents', 'expstr','fptarea',
1906
+ 'lut_number','occbld','lrstype','codelevel',
1907
+ 'nstoreys'])
1908
+ t14= tables['t14'][0] # Extract Table 14
1909
+ print(time.time() - tic)
1910
+ tic = time.time()
1911
+ print('23 ------',end=' ')
1912
+ a=0
1913
+ for i in range(0,len(t14)): # First school, then hospital
1914
+ attr_sh = t14[i]
1915
+ #Extract distributions for footprint, storeys, code compliance and LRS
1916
+ fpt_sh = np.fromstring(attr_sh[0], dtype=float, sep=',')
1917
+ nstorey_sh = np.fromstring(attr_sh[1], dtype=int, sep=',')
1918
+ codelevel_sh = np.fromstring(attr_sh[2], dtype=float, sep=',')
1919
+ lrs_sh = np.fromstring(attr_sh[3], dtype=float, sep=',')
1920
+ range_sh = range(a,nsh_cs[i])
1921
+ a = nsh_cs[i]
1922
+ # Generate footprints
1923
+ schhsp_df.loc[range_sh,'fptarea'] = np.random.uniform(\
1924
+ np.min(fpt_sh),np.max(fpt_sh), size=(nsh[i],1)).reshape(nsh[i],)
1925
+ # Generate number of storeys
1926
+ schhsp_df.loc[range_sh,'nstoreys'] =randint(np.min(nstorey_sh),\
1927
+ np.max(nstorey_sh)+1,size=(nsh[i],1)).reshape(nsh[i],)
1928
+ # Generate code compliance
1929
+ cc_number_sh = multinomial(nsh[i],codelevel_sh,size=1)
1930
+ schhsp_df.loc[range_sh,'codelevel'] =\
1931
+ dist2vector(code_level, cc_number_sh,nsh[i],'shuffle')
1932
+ # Generate LRS
1933
+ lrs_number_sh = multinomial(nsh[i],lrs_sh,size=1)
1934
+ schhsp_df.loc[range_sh,'lrstype'] =\
1935
+ dist2vector(lrs_types,lrs_number_sh,nsh[i],'shuffle')
1936
+ schhsp_df.loc[range_sh,'occbld']= occbld_label_sh[i]
1937
+
1938
+ # Assign special facility label
1939
+ schhsp_df.loc[range_sh,'specialfac'] = specialfac[i]
1940
+
1941
+ # Assign number of households, Residents,
1942
+ range_all_sh = range(0,len(schhsp_df))
1943
+ schhsp_df.loc[range_all_sh,'nhouse'] = 0
1944
+ schhsp_df.loc[range_all_sh,'residents'] = 0
1945
+
1946
+ print(time.time() - tic)
1947
+ tic = time.time()
1948
+ print('24 ------',end=' ')
1949
+ #%% Assign zoneIds for Industrial and Commercial buildings
1950
+
1951
+ # The number of industrial and commercial buildings are estimated using the
1952
+ # following 2 methods:
1953
+ # Method 1: Assumption of number of industrial or commercial building per
1954
+ # 1000 individuals. (Done in steps 17,18)
1955
+ # Method 2: Table 9 specifies what the occupancy type distribution should be
1956
+ # in different land use types. This gives a different estimate of the
1957
+ # number of the buiildings as compared to Method 1. (Done here)
1958
+ # To make these two Methods compatible, the value from Method 1 is treated as
1959
+ # the actual value of the buildings, and Method 2 is used to ensure that
1960
+ # these buildings are distributed in such a way that they follow Table 9.
1961
+ #
1962
+ # The following method of assigning the ZoneIDs treats the mixed used zones
1963
+ # (residential, residential+commercial) and purely industrial or commercial
1964
+ # zones as 2 separate cases.
1965
+ #
1966
+ # For each of the following 2 cases, we need to first find the number of
1967
+ # industrial and commercial buildings in each zone
1968
+
1969
+ # Case 1: For industrial/commercial buildings in residential areas_____________
1970
+ for i in landuse_res_df.index:
1971
+ #Occupancy type distribution for a zone
1972
+ otd = t9[lutidx[landuse_res_df.loc[i,'luf']]]
1973
+ if otd[1]==0 and otd[2]==0:
1974
+ # If neither industrial nor commercial buildings exist
1975
+ landuse_res_df.loc[i,'ind_weightage'] = 0
1976
+ landuse_res_df.loc[i,'com_weightage'] = 0
1977
+ continue
1978
+ # Number of residential + rescom building
1979
+ Nrc = landuse_res_df.loc[i, 'No_of_res_buildings']
1980
+
1981
+ # Tb = total possible number of buildings in a zone (all accupancy types)
1982
+ # This is used as weightage factor to distribute the buildings
1983
+ # according to Method 2.
1984
+ if otd[0] == 0 and otd[3]==0:
1985
+ Tb = Nrc # If neither residential nor res+com exist
1986
+ print('Warning: If population exists, but neither residential nor '\
1987
+ 'residential+commercial buildings are allowed, there is '\
1988
+ 'inconsistency between population and current row in table 9.'\
1989
+ 'Therefore, it is assumed that total number of buildings in '\
1990
+ 'zoneid', landuse_res_df.loc[i,'zoneid'],\
1991
+ '= no. of residential buildings in this zone.')
1992
+ print('Also, consider allowing residential and/or res+com building '\
1993
+ 'to this zone in Table 9, if it is assigned population.\n')
1994
+ else:
1995
+ Tb = Nrc/(otd[0]+otd[3]) # If either residential or res+com exist
1996
+
1997
+ #Calculate the number of industrial buildings using Table 9
1998
+ if otd[1]>0:
1999
+ landuse_res_df.loc[i,'ind_weightage'] = ceil(Tb * otd[1])
2000
+ #landuse_res_df.loc[i,'no_of_ind_buildings'] = ceil(Tb * otd[1])
2001
+ else:
2002
+ # landuse_res_df.loc[i,'no_of_ind_buildings'] = 0
2003
+ landuse_res_df.loc[i,'ind_weightage'] = 0
2004
+
2005
+ #Calculate the number of commercial buildings using Table 9
2006
+ if otd[2]>0:
2007
+ landuse_res_df.loc[i,'com_weightage'] = ceil(Tb * otd[2])
2008
+ #landuse_res_df.loc[i,'no_of_com_buildings'] = ceil(Tb * otd[2])
2009
+ else:
2010
+ landuse_res_df.loc[i,'com_weightage'] = 0
2011
+ #landuse_res_df.loc[i,'no_of_com_buildings'] = 0
2012
+
2013
+ print(time.time() - tic)
2014
+ tic = time.time()
2015
+ print('25 ------',end=' ')
2016
+ # If number of buildings (industrial/commercial) estimated from Method 2(in the
2017
+ # above steps of Case 1) exceeds the number of buildings estimated from
2018
+ # Method 1, treat the value from Method 1 as the upper limit.
2019
+ # Then, using the number of buildings from Method 2 as weightage factor,
2020
+ # distribute the number of buildings from Method 1 proportionally to
2021
+ # all the mixed use zones. This situation arises if the number of
2022
+ # industrial/commercial buildings per 1000 people is low.
2023
+ #
2024
+ # Otherwise, if the number of industrial/commercial buildings estimated from
2025
+ # Method 1 is larger than that estimated from Method 2, it is assumed that the
2026
+ # number of buildings is large enough not to fit into the mixed use zones
2027
+ # being considered under Case 1, and the additional buildings not assigned into
2028
+ # mixed use zones is assigned under case 2 in the following section.
2029
+ #
2030
+ # This method requires the area of industrial/commercial buildings in the
2031
+ # mixed use zones to be checked separately to see if they fit into these zones.
2032
+
2033
+ com_wt = landuse_res_df['com_weightage'].copy()
2034
+ if com_wt.sum() > ncom:
2035
+ landuse_res_df['no_of_com_buildings'] = np.floor(ncom*com_wt/com_wt.sum())
2036
+ else:
2037
+ landuse_res_df['no_of_com_buildings'] = com_wt
2038
+
2039
+ ind_wt = landuse_res_df['ind_weightage'].copy()
2040
+ if ind_wt.sum() > nind:
2041
+ landuse_res_df['no_of_ind_buildings'] = np.floor(nind*ind_wt/ind_wt.sum())
2042
+ else:
2043
+ landuse_res_df['no_of_ind_buildings'] = ind_wt
2044
+
2045
+
2046
+ landuse_res_df['no_of_ind_buildings'] =\
2047
+ landuse_res_df['no_of_ind_buildings'].astype('int')
2048
+ landuse_res_df['no_of_com_buildings'] =\
2049
+ landuse_res_df['no_of_com_buildings'].astype('int')
2050
+
2051
+ # Number and area of commercial buildings to be assigned
2052
+ nCom_asgn = landuse_res_df['no_of_com_buildings'].sum()
2053
+ nCom_asgn_area = com_df.loc[range(0, nCom_asgn),'fptarea'].sum()
2054
+ # Number and area of industrial buildings to be assigned
2055
+ nInd_asgn = landuse_res_df['no_of_ind_buildings'].sum()
2056
+ nInd_asgn_area = ind_df.loc[range(0,nInd_asgn),'fptarea'].sum()
2057
+
2058
+
2059
+ # Assign zoneid to industrial buildings (if any) in residential areas
2060
+ zoneid_r_i = dist2vector(list(landuse_res_df['zoneid']),\
2061
+ list(landuse_res_df['no_of_ind_buildings']),nInd_asgn,'shuffle')
2062
+ ind_df.loc[range(0,nInd_asgn),'zoneid'] = list(map(int,zoneid_r_i))
2063
+
2064
+ # Assign zoneid to commercial buildings (if any) in residential areas
2065
+ zoneid_r_c = dist2vector(list(landuse_res_df['zoneid']),\
2066
+ list(landuse_res_df['no_of_com_buildings']),nCom_asgn,'shuffle')
2067
+ com_df.loc[range(0,nCom_asgn),'zoneid'] = list(map(int,zoneid_r_c))
2068
+
2069
+
2070
+ # Back-calculated number of commercial buildings per 1000 people
2071
+ #nCom_asgn/(len(individual_df)/1000)
2072
+
2073
+ # Case 2 For industrial/commercial buildings in non-residential areas__________
2074
+
2075
+ # Number of industrial buildings that have not been assigned
2076
+ nInd_tba = int(len(ind_df) - nInd_asgn)
2077
+ # Number of commercial buildings that have not been assigned
2078
+ nCom_tba = int(len(com_df) - nCom_asgn)
2079
+
2080
+ print(time.time() - tic)
2081
+ tic = time.time()
2082
+ print('26 ------',end=' ')
2083
+ # Before assigning zones to buildings, find out the area available for buildings
2084
+ # in each zones. Since no population is assigned to residential and commercial
2085
+ # buildings, the number of buildings in a zone is controlled solely by area.
2086
+ for i in landuse_ic_df.index:
2087
+ #Occupancy type distribution for a zone
2088
+ try:
2089
+ otd = t9[lutidx[landuse_ic_df.loc[i,'luf']]]
2090
+ except KeyError:
2091
+ continue
2092
+
2093
+ if otd[1]>0:
2094
+ landuse_ic_df.loc[i,'areaavailableforind']=\
2095
+ AC_ind/100*landuse_ic_df.loc[i,'area']
2096
+ else:
2097
+ landuse_ic_df.loc[i,'areaavailableforind']=0
2098
+
2099
+ if otd[2]>0:
2100
+ landuse_ic_df.loc[i,'areaavailableforcom']=\
2101
+ AC_com/100*landuse_ic_df.loc[i,'area']
2102
+ else:
2103
+ landuse_ic_df.loc[i,'areaavailableforcom']=0
2104
+
2105
+ print(time.time() - tic)
2106
+ tic = time.time()
2107
+ print('27 ------',end=' ')
2108
+ # Check how many of the generated com/ind buildings fit into the available area
2109
+ ind_fptarea_cs = list(np.cumsum(ind_df['fptarea']))
2110
+ com_fptarea_cs = list(np.cumsum(com_df['fptarea']))
2111
+
2112
+ # Total areas available for commercial and industrial buildings in all zones
2113
+ At_c= landuse_ic_df['areaavailableforcom'].sum()
2114
+ At_i = landuse_ic_df['areaavailableforind'].sum()
2115
+ licidx = landuse_ic_df.index
2116
+
2117
+ #Assign number of industrial buildings to industrial zones____
2118
+ # Unassigned area (c or i) = Total footprint (c or i) - area to be assigned(c or i)
2119
+ unassigned_ind_area = ind_fptarea_cs[-1]-nInd_asgn_area # Total - assigned
2120
+ # if unassigned_ind_area <= At_i:
2121
+ # landuse_ic_df.loc[licidx,'no_of_ind_buildings'] =\
2122
+ # landuse_ic_df['areaavailableforind']/At_i*nInd_tba
2123
+ # landuse_ic_df['no_of_ind_buildings'] =\
2124
+ # landuse_ic_df['no_of_ind_buildings'].fillna(0)
2125
+ # landuse_ic_df['no_of_ind_buildings']=\
2126
+ # landuse_ic_df['no_of_ind_buildings'].astype('int')
2127
+ # else:
2128
+ # print('Required industrial buildings do not fit into available land area.')
2129
+ # sys.exit(1)
2130
+
2131
+ if unassigned_ind_area > At_i:
2132
+ # Need to truncate excess industrial buildings
2133
+ print('WARNING: Required industrial buildings do not fit into available '\
2134
+ 'land area. So, excess industrial buildings have been removed.')
2135
+ ind_df_unassignedArea = np.cumsum(ind_df.loc[range(nInd_asgn,len(ind_df)),\
2136
+ 'fptarea'])
2137
+ ind_df_UAmask = ind_df_unassignedArea < At_i
2138
+ nInd_tba = sum(ind_df_UAmask)
2139
+
2140
+ landuse_ic_df.loc[licidx,'no_of_ind_buildings'] =\
2141
+ landuse_ic_df['areaavailableforind']/At_i*nInd_tba
2142
+ landuse_ic_df['no_of_ind_buildings'] =\
2143
+ landuse_ic_df['no_of_ind_buildings'].fillna(0)
2144
+ landuse_ic_df['no_of_ind_buildings']=\
2145
+ landuse_ic_df['no_of_ind_buildings'].astype('int')
2146
+
2147
+ #Assign number of commercial buildings to commercial zones____
2148
+ unassigned_com_area = com_fptarea_cs[-1]-nCom_asgn_area
2149
+
2150
+ if unassigned_com_area > At_c:
2151
+ # Need to truncate excess commercial buildings
2152
+ print('WARNING: Required commercial buildings do not fit into available '\
2153
+ 'land area. So, excess commerical buildings have been removed.')
2154
+ com_df_unassignedArea = np.cumsum(com_df.loc[range(nCom_asgn,len(com_df)),\
2155
+ 'fptarea'])
2156
+ com_df_UAmask = com_df_unassignedArea < At_c
2157
+ nCom_tba = sum(com_df_UAmask)
2158
+
2159
+ landuse_ic_df.loc[licidx,'no_of_com_buildings'] =\
2160
+ landuse_ic_df['areaavailableforcom']/At_c*nCom_tba
2161
+ landuse_ic_df['no_of_com_buildings'] =\
2162
+ landuse_ic_df['no_of_com_buildings'].fillna(0)
2163
+ landuse_ic_df['no_of_com_buildings']=\
2164
+ landuse_ic_df['no_of_com_buildings'].astype('int')
2165
+
2166
+
2167
+ print(time.time() - tic)
2168
+ tic = time.time()
2169
+ print('28 ------',end=' ')
2170
+ # Begin assigning buildings to zones
2171
+ # Assign zoneid to industrial buildings (if any) in industrial areas
2172
+ limit_zoneid_ic_i = landuse_ic_df['no_of_ind_buildings'].sum()
2173
+ zoneid_ic_i = dist2vector(list(landuse_ic_df['zoneid']),\
2174
+ list(landuse_ic_df['no_of_ind_buildings']),\
2175
+ limit_zoneid_ic_i,'shuffle')
2176
+ ind_df.loc[range(nInd_asgn,nInd_asgn+limit_zoneid_ic_i),'zoneid']=list(map(int,zoneid_ic_i))
2177
+ ind_df = ind_df[ind_df['zoneid'].notna()] #Remove unassigned buildings
2178
+
2179
+ # Assign zoneid to commercial buildings (if any) in commercial areas
2180
+ limit_zoneid_ic_c = landuse_ic_df['no_of_com_buildings'].sum()
2181
+ zoneid_ic_c = dist2vector(list(landuse_ic_df['zoneid']),\
2182
+ list(landuse_ic_df['no_of_com_buildings']),\
2183
+ limit_zoneid_ic_c,'shuffle')
2184
+ com_df.loc[range(nCom_asgn,nCom_asgn+limit_zoneid_ic_c),'zoneid']=list(map(int,zoneid_ic_c))
2185
+ com_df = com_df[com_df['zoneid'].notna()] #Remove unassigned buildings
2186
+
2187
+
2188
+ print(time.time() - tic)
2189
+ tic = time.time()
2190
+ print('29 ------',end=' ')
2191
+ #%% Find populations in each zones and assign it back to landuse layer
2192
+ for i in landuse.index:
2193
+ zidmask = resbld_df['zoneid'] == landuse.loc[i,'zoneid']
2194
+ if sum(zidmask) == 0: # if no population has been added to the zone
2195
+ landuse.loc[i,'populationAdded'] = 0
2196
+ continue
2197
+ else: # if new population has been added to the zone
2198
+ zone_nInd = resbld_df['residents'][zidmask]
2199
+ landuse.loc[i,'populationAdded'] = int(zone_nInd.sum())
2200
+ # population=Existing population, populationAdded=Projected future population
2201
+ # populationFinal = existing + future projected population
2202
+ landuse['populationfinal'] = landuse['population']+landuse['populationAdded']
2203
+ landuse['populationfinal'] = landuse['populationfinal'].astype('int')
2204
+
2205
+ #%% Assign zoneIds for schools and hospitals
2206
+ # Assign schools and hospitals to zones starting from the highest
2207
+ # population until the number of schools and hospitals are reached
2208
+ landuse_sorted = landuse.sort_values(by=['populationfinal'],\
2209
+ ascending=False).copy()
2210
+ landuse_sorted.reset_index(inplace=True, drop=True)
2211
+ #Remove zones without population
2212
+ no_popl_zones = landuse_sorted['populationfinal']==0
2213
+ landuse_sorted =landuse_sorted.drop(index=landuse_sorted.index[no_popl_zones])
2214
+
2215
+ sch_df = schhsp_df[schhsp_df['occbld']=='Edu'].copy() #Educational institutions
2216
+ hsp_df = schhsp_df[schhsp_df['occbld']=='Hea'].copy() #Health institutions
2217
+
2218
+ sch_df.reset_index(drop=True,inplace=True)
2219
+ hsp_df.reset_index(drop=True,inplace=True)
2220
+
2221
+ # Assign zoneids for schools/educational institutions
2222
+ sch_range = range(0,len(sch_df))
2223
+ if len(sch_df) <= len(landuse_sorted):
2224
+ sch_df.loc[sch_range, 'zoneid'] = landuse_sorted.loc[sch_range,'zoneid']
2225
+ else:
2226
+ iterations_s = ceil(len(sch_df)/len(landuse_sorted))
2227
+ a1_s= list(repeat(landuse_sorted['zoneid'].tolist(),iterations_s))
2228
+ a_s = list(chain(*a1_s))
2229
+ sch_df.loc[sch_range, 'zoneid'] = a_s[0:len(sch_df)]
2230
+
2231
+ # Assign zoneids for hospitals/health institutions
2232
+ hsp_range= range(0,len(hsp_df))
2233
+ if len(hsp_df) <= len(landuse_sorted):
2234
+ hsp_range = range(0,len(hsp_df))
2235
+ hsp_df.loc[hsp_range, 'zoneid'] = landuse_sorted.loc[hsp_range,'zoneid']
2236
+ else:
2237
+ iterations_h = ceil(len(hsp_df)/len(landuse_sorted))
2238
+ a1_h= list(repeat(landuse_sorted['zoneid'].tolist(),iterations_h))
2239
+ a_h = list(chain(*a1_h))
2240
+ hsp_df.loc[hsp_range, 'zoneid'] = a_h[0:len(hsp_df)]
2241
+
2242
+
2243
+ print(time.time() - tic)
2244
+ tic = time.time()
2245
+ print('30 ------',end=' ')
2246
+ #%% Concatenate the residential, industrial/commercial and special facilities
2247
+ # dataframes to obtain the complete building dataframe
2248
+ building_df=pd.concat([resbld_df,ind_df,com_df,sch_df,\
2249
+ hsp_df]).reset_index(drop=True)
2250
+ #building_df=pd.concat([resbld_df,sch_df, hsp_df]).reset_index(drop=True)
2251
+ building_df['nstoreys'] = building_df['nstoreys'].astype(int)
2252
+
2253
+ #Assign exposure string
2254
+ building_df['expstr'] = building_df['lrstype'].astype(str)+'+'+\
2255
+ building_df['codelevel'].astype(str)+'+'+\
2256
+ building_df['nstoreys'].astype(str)+'s'+'+'+\
2257
+ building_df['occbld'].astype(str)
2258
+ # Assign building ids
2259
+ # lenbdf = len(building_df)
2260
+ # building_df.loc[range(0,lenbdf),'bldid'] = list(range(1,lenbdf+1))
2261
+ building_df.loc[range(len(resbld_df),len(building_df)),'bldid'] =\
2262
+ list(range(len(resbld_df)+1,len(building_df)+1))
2263
+ building_df['bldid'] = building_df['bldid'].astype('int')
2264
+
2265
+ #%% Step 21 Employment status of the individuals
2266
+ # Assumption 9: Only 20-65 years old individuals can work
2267
+ # Extract Tables 12 and 13
2268
+ t12 = np.array(tables['t12'][0][0],dtype=float) #[Female, Male]
2269
+
2270
+ t13_f = np.array(tables['t13'][0][0],dtype=float) #Female
2271
+ t13_m = np.array(tables['t13'][0][1],dtype=float) #Male
2272
+ t13 = [t13_f,t13_m]
2273
+
2274
+ # Identify individuals who can work
2275
+ working_females_mask = (individual_df['gender']==1) & \
2276
+ (individual_df['age']>=5) & (individual_df['age']<=9)
2277
+ working_males_mask = (individual_df['gender']==2) & \
2278
+ (individual_df['age']>=5) & (individual_df['age']<=9)
2279
+ potential_female_workers = individual_df.index[working_females_mask]
2280
+ potential_male_workers = individual_df.index[working_males_mask]
2281
+
2282
+ # But according to Table 12, not all individuals who can work are employed,
2283
+ # so the labour force is less than 100%
2284
+ labourforce_female = sample(list(potential_female_workers),\
2285
+ int(t12[0]*len(potential_female_workers)))
2286
+ labourforce_male = sample(list(potential_male_workers),\
2287
+ int(t12[1]*len(potential_male_workers)))
2288
+ # labourForce = 1 indicates that an individual is a part of labour force, but
2289
+ # not necessarily employed.
2290
+ individual_df.loc[labourforce_female,'labourForce'] =1
2291
+ individual_df.loc[labourforce_male,'labourForce'] =1
2292
+
2293
+ print(time.time() - tic)
2294
+ tic = time.time()
2295
+ print('31 ------',end=' ')
2296
+ # According to Table 13, the employment probability for labourforce differs
2297
+ # based on educational attainment status
2298
+ for epd_array in t13: #Employment probability distribution for female and male
2299
+ count = 0
2300
+ ind_employed_idx =[]
2301
+ for epd in epd_array: # EPD for various educational attainment status
2302
+ # Individuals in labour force that belong to current EPD
2303
+ eamask = (individual_df['eduattstat'] == education_value[count]) & \
2304
+ (individual_df['labourForce']==1)
2305
+ nInd_in_epd = sum(eamask)
2306
+ if nInd_in_epd == 0:
2307
+ continue
2308
+
2309
+ nInd_employed = int(epd*nInd_in_epd)
2310
+ if nInd_employed == 0:
2311
+ continue
2312
+ ind_ea_labourforce = list(individual_df.index[eamask])
2313
+ ind_employed_idx = sample(ind_ea_labourforce, nInd_employed)
2314
+ individual_df.loc[ind_employed_idx,'employed'] = 1
2315
+
2316
+ #Check ouput epd (for debugging)
2317
+ #print(epd,':',len(ind_employed_idx)/len(ind_ea_labourforce))
2318
+
2319
+ count+=1
2320
+
2321
+ print(time.time() - tic)
2322
+ tic = time.time()
2323
+ print('32 ------',end=' ')
2324
+ #%% Step 22 Assign IndividualFacID
2325
+ # bld_ID of the building that the individual regularly visits
2326
+ # (can be workplace, school, etc.)
2327
+ # Assumption 13: Each individual is working within the total study area extent.
2328
+ # Assumption 17: Each individual (within schooling age limits) goes to
2329
+ # school within the total study area extent.
2330
+
2331
+ # indivfacid_1 denotes bldid of the schools
2332
+ # students (schoolenrollment=1) go to, whereas, indivfacid_2 denotes bldid of
2333
+ # com, ind and rescom buildings where working people go to (workplace bldid).
2334
+
2335
+ # Assign working places to employed people in indivfacid_2_________________
2336
+ # Working places are defined as occupancy types 'Ind','Com' and 'ResCom'
2337
+
2338
+ workplacemask=(building_df['occbld']=='Ind') | (building_df['occbld']=='Com')\
2339
+ | (building_df['occbld'] == 'ResCom')
2340
+ workplaceidx = building_df.index[workplacemask]
2341
+ workplace_bldid = building_df['bldid'][workplaceidx].tolist()
2342
+
2343
+ employedmask = individual_df['employed'] ==1
2344
+ employedidx = individual_df.index[employedmask]
2345
+ if len(employedidx)>len(workplaceidx):
2346
+ repetition = ceil(len(employedidx)/len(workplaceidx))
2347
+ workplace_sample_temp = list(repeat(workplace_bldid,repetition))
2348
+ workplace_sample = list(chain(*workplace_sample_temp))
2349
+ else:
2350
+ workplace_sample = workplace_bldid
2351
+ random.shuffle(workplace_sample)
2352
+
2353
+ print(time.time() - tic)
2354
+ tic = time.time()
2355
+ print('33 ------',end=' ')
2356
+
2357
+ individual_df.loc[employedidx,'indivfacid_2'] = \
2358
+ workplace_sample[0:sum(employedmask)]
2359
+
2360
+ individual_df.loc[employedidx,'indivfacid'] = \
2361
+ workplace_sample[0:sum(employedmask)]
2362
+
2363
+ # Assign school bldids to enrolled students in indivfacid_1________________
2364
+ schoolmask = building_df['occbld']=='Edu'
2365
+ schoolidx = building_df.index[schoolmask]
2366
+ school_bldid = building_df['bldid'][schoolidx].tolist()
2367
+
2368
+ studentmask = individual_df['schoolenrollment'] ==1
2369
+ studentidx = individual_df.index[studentmask]
2370
+ if len(studentidx)>len(schoolidx):
2371
+ repetition = ceil(len(studentidx)/len(schoolidx))
2372
+ school_sample_temp = list(repeat(school_bldid,repetition))
2373
+ school_sample = list(chain(*school_sample_temp))
2374
+ else:
2375
+ school_sample = school_bldid
2376
+ random.shuffle(school_sample)
2377
+
2378
+ individual_df.loc[studentidx,'indivfacid_1'] = \
2379
+ school_sample[0:sum(studentmask)]
2380
+ individual_df.loc[studentidx,'indivfacid'] = \
2381
+ school_sample[0:sum(studentmask)]
2382
+
2383
+ # Replace missing values with -1 instead of NaN
2384
+ individual_df['indivfacid_1'] = individual_df['indivfacid_1'].fillna(-1)
2385
+ individual_df['indivfacid_2'] = individual_df['indivfacid_2'].fillna(-1)
2386
+ individual_df['indivfacid'] = individual_df['indivfacid'].fillna(-1)
2387
+
2388
+ print(time.time() - tic)
2389
+ tic = time.time()
2390
+ print('34 ------',end=' ')
2391
+ #%% Step 23 Assign community facility ID (commfacid) to household layer
2392
+ # commfacid denotes the bldid of the hospital the households usually go to.
2393
+
2394
+ # In this case, randomly assign bldid of hospitals to the households, but in
2395
+ # next version, households must be assigned hospitals closest to their location
2396
+ hospitalmask = building_df['occbld']=='Hea'
2397
+ hospitalidx = building_df.index[hospitalmask]
2398
+ hospital_bldid = building_df['bldid'][hospitalidx].tolist()
2399
+ repetition = ceil(len(household_df)/len(hospitalidx))
2400
+ hospital_sample_temp = list(repeat(hospital_bldid,repetition))
2401
+ hospital_sample = list(chain(*hospital_sample_temp))
2402
+ random.shuffle(hospital_sample)
2403
+
2404
+ household_df.loc[household_df.index,'commfacid'] =\
2405
+ hospital_sample[0:len(household_df)]
2406
+
2407
+ print(time.time() - tic)
2408
+ tic = time.time()
2409
+ print('34.5 ------',end=' ')
2410
+ #%% Step 24 Assign repvalue
2411
+ # Assumption 12: Unit price for replacement wrt occupation type and
2412
+ # special facility status of the building
2413
+
2414
+ # Assign unit price
2415
+ for occtype in Unit_price:
2416
+ occmask = building_df['occbld'] == occtype
2417
+ occidx = building_df.index[occmask]
2418
+ building_df.loc[occidx, 'unit_price'] = Unit_price[occtype]
2419
+
2420
+ building_df['repvalue'] = building_df['fptarea'] *\
2421
+ building_df['nstoreys']* building_df['unit_price']
2422
+
2423
+
2424
+ print(time.time() - tic)
2425
+ tic = time.time()
2426
+ print('35 ------',end=' ')
2427
+ #%% Remove unnecessary columns and save the results
2428
+ # building_df = building_df.drop(columns=\
2429
+ # ['lut_number','lrstype','codelevel','nstoreys','occbld','unit_price'])
2430
+ building_df = building_df.drop(columns=['lut_number'])
2431
+ household_df = household_df.drop(columns=\
2432
+ ['income_numb','zonetype','zoneid','approxFootprint'])
2433
+ individual_df = individual_df.drop(columns=\
2434
+ ['schoolenrollment','labourForce','employed'])
2435
+
2436
+ # Rename indices to convert all header names to lowercase
2437
+ building_df.rename(columns={'zoneid':'zoneid','bldID':'bldid','expStr':'expstr',\
2438
+ 'specialFac':'specialfac','repValue':'repvalue','nHouse':'nhouse'},\
2439
+ inplace=True)
2440
+ household_df.rename(columns={'bldID':'bldid','hhID':'hhid','nIND':'nind',\
2441
+ 'CommFacID':'commfacid'}, inplace=True)
2442
+ individual_df.rename(columns={'hhID':'hhid','indivID':'individ',\
2443
+ 'eduAttStat':'eduattstat','indivFacID_1':'indivfacid_1',\
2444
+ 'indivFacID_2':'indivfacid_2'}, inplace=True)
2445
+
2446
+
2447
+ #%% Generate building centroid coordinates
2448
+
2449
+ histo = building_df.groupby(['zoneid'])['zoneid'].count()
2450
+ max_val = building_df.groupby(['zoneid'])['fptarea'].max()
2451
+ landuse_layer = landuse_shp
2452
+ building_layer = building_df
2453
+ final_list = []
2454
+ skipped_buildings_count = 0
2455
+ for i in range(len(histo)):
2456
+ df = landuse_layer[landuse_layer['zoneid'] == histo.index[i]].copy()
2457
+ bui_indx = building_layer['zoneid'] == histo.index[i]
2458
+ bui_attr = building_layer.loc[bui_indx].copy()
2459
+
2460
+ rot_a = random.randint(10, 40)
2461
+ rot_a_rad = rot_a*math.pi/180
2462
+
2463
+ separation_val = math.sqrt(max_val.values[i])/abs(math.cos(rot_a_rad))
2464
+ separation_val = round(separation_val, 2)
2465
+ boundary_approach = (math.sqrt(max_val.values[i])/2)*math.sqrt(2)
2466
+ boundary_approach = round(boundary_approach, 2)
2467
+
2468
+ df2 = df.buffer(-boundary_approach)
2469
+ df2 = gpd.GeoDataFrame(gpd.GeoSeries(df2))
2470
+ df2 = df2.rename(columns={0:'geometry'}).set_geometry('geometry')
2471
+
2472
+ #Continue the loop if buffered dataframe df2 is empty -PR
2473
+ if df2.is_empty[df2.index[0]]:
2474
+ print('Dataframe index ', df.index[0], 'is empty after buffering.\n')
2475
+ skipped_buildings_count +=\
2476
+ len(building_df.loc[building_df['zoneid'] == df.index[0],'zoneid'])
2477
+ continue
2478
+
2479
+ xmin, ymin, xmax, ymax = df2.total_bounds
2480
+ xcoords = [ii for ii in np.arange(xmin, xmax, separation_val)]
2481
+ ycoords = [ii for ii in np.arange(ymin, ymax, separation_val)]
2482
+
2483
+ pointcoords = np.array(np.meshgrid(xcoords, ycoords)).T.reshape(-1, 2)
2484
+ points = gpd.points_from_xy(x=pointcoords[:,0], y=pointcoords[:,1])
2485
+ grid = gpd.GeoSeries(points, crs=df.crs)
2486
+ grid.name = 'geometry'
2487
+
2488
+ gridinside = gpd.sjoin(gpd.GeoDataFrame(grid), df2[['geometry']], how="inner")
2489
+
2490
+ def buff(row):
2491
+ return row.geometry.buffer(row.buff_val, cap_style = 3)
2492
+
2493
+ if len(gridinside) >= histo.values[i]:
2494
+ gridinside = gridinside.sample(min(len(gridinside), histo.values[i]))
2495
+ gridinside['xcoord'] = gridinside.geometry.x
2496
+ gridinside['ycoord'] = gridinside.geometry.y
2497
+
2498
+ buffer_val = np.sqrt(list(bui_attr.fptarea))/2
2499
+ buffered = gridinside.copy()
2500
+ buffered['buff_val'] = buffer_val[0:len(gridinside)]
2501
+
2502
+ if buffered.shape[0]==0: #PR
2503
+ print('Dataframe index ', df.index[0], 'is empty after buffering.\n')
2504
+ skipped_buildings_count +=\
2505
+ len(building_df.loc[building_df['zoneid'] == df.index[0],'zoneid'])
2506
+ continue
2507
+
2508
+ buffered['geometry'] = buffered.apply(buff, axis=1)
2509
+ polyinside = buffered.rotate(rot_a, origin='centroid')
2510
+
2511
+ polyinside2 = gpd.GeoDataFrame(gpd.GeoSeries(polyinside))
2512
+ polyinside2 = polyinside2.rename(columns={0:'geometry'}).set_geometry('geometry')
2513
+ polyinside2['fid'] = list(range(1,len(polyinside2)+1))
2514
+
2515
+ bui_attr['fid'] = list(range(1,len(bui_attr)+1))
2516
+ bui_joined = polyinside2.merge(bui_attr, on='fid')
2517
+ bui_joined = bui_joined.drop(columns=['fid'])
2518
+
2519
+ bui_joined['xcoord'] = list(round(gridinside.geometry.x, 3))
2520
+ bui_joined['ycoord'] = list(round(gridinside.geometry.y, 3))
2521
+
2522
+ elif len(gridinside) < histo.values[i]:
2523
+ separation_val = math.sqrt(max_val.values[i])
2524
+ separation_val = round(separation_val, 2)
2525
+ boundary_approach = (math.sqrt(max_val.values[i])/2)*math.sqrt(2)
2526
+ boundary_approach = round(boundary_approach, 2)
2527
+
2528
+ df2 = df.buffer(-boundary_approach, 200)
2529
+ df2 = gpd.GeoDataFrame(gpd.GeoSeries(df2))
2530
+ df2 = df2.rename(columns={0:'geometry'}).set_geometry('geometry')
2531
+
2532
+ xmin, ymin, xmax, ymax = df2.total_bounds
2533
+ xcoords = [ii for ii in np.arange(xmin, xmax, separation_val)]
2534
+ ycoords = [ii for ii in np.arange(ymin, ymax, separation_val)]
2535
+
2536
+ pointcoords = np.array(np.meshgrid(xcoords, ycoords)).T.reshape(-1, 2)
2537
+ points = gpd.points_from_xy(x=pointcoords[:,0], y=pointcoords[:,1])
2538
+ grid = gpd.GeoSeries(points, crs=df.crs)
2539
+ grid.name = 'geometry'
2540
+
2541
+ gridinside = gpd.sjoin(gpd.GeoDataFrame(grid), df2[['geometry']], how="inner")
2542
+
2543
+ gridinside = gridinside.sample(min(len(gridinside), histo.values[i]))
2544
+ gridinside['xcoord'] = gridinside.geometry.x
2545
+ gridinside['ycoord'] = gridinside.geometry.y
2546
+
2547
+ buffer_val = np.sqrt(list(bui_attr.fptarea))/2
2548
+ buffered = gridinside.copy()
2549
+ buffered['buff_val'] = buffer_val[0:len(gridinside)]
2550
+
2551
+ if buffered.shape[0]==0: #PR
2552
+ print('Dataframe index ', df.index[0], 'is empty after buffering.\n')
2553
+ skipped_buildings_count +=\
2554
+ len(building_df.loc[building_df['zoneid'] == df.index[0],'zoneid'])
2555
+ continue
2556
+
2557
+ buffered['geometry'] = buffered.apply(buff, axis=1)
2558
+ polyinside = buffered.rotate(0, origin='centroid')
2559
+
2560
+ polyinside2 = gpd.GeoDataFrame(gpd.GeoSeries(polyinside))
2561
+ polyinside2 = polyinside2.rename(columns={0:'geometry'}).set_geometry('geometry')
2562
+ polyinside2['fid'] = list(range(1,len(polyinside2)+1))
2563
+
2564
+ bui_attr['fid'] = list(range(1,len(bui_attr)+1))
2565
+ bui_joined = polyinside2.merge(bui_attr, on='fid')
2566
+ bui_joined = bui_joined.drop(columns=['fid'])
2567
+
2568
+ bui_joined['xcoord'] = list(round(gridinside.geometry.x, 3))
2569
+ bui_joined['ycoord'] = list(round(gridinside.geometry.y, 3))
2570
+
2571
+ final_list.append(bui_joined)
2572
+
2573
+ final = pd.concat(final_list)
2574
+ print(time.time() - tic)
2575
+ tic = time.time()
2576
+ print('36 ------',end=' ')
2577
+
2578
+ #print('\nTotal number of buildings generated:', len(building_layer))
2579
+ #print('Total number of coordinate pairs generated:', len(final), '\n')
2580
+
2581
+ # Remove fields corresponding to unassigned buildings from all layers
2582
+ # The footprint generation part of this program may not be able to assign
2583
+ # building footprint in some cases such as narrow strips or highly irregular
2584
+ # but small land areas. In this case, households and individuals
2585
+ # corresponding to buildings without footprint coordinates must also be deleted.
2586
+
2587
+ #Original dataframe which contains all generated buildings
2588
+ unique_building_df = set(building_df['bldid'])
2589
+ #Building dataframe that contains only the building with footprints
2590
+ unique_final = set(final['bldid'])
2591
+ # Calculate list of buildings that do not exist in the dataframe with building
2592
+ # footprints
2593
+ missing_buildings = np.array(list(set(unique_building_df).difference(unique_final)))
2594
+
2595
+ # Extract the list of households corresponding to missing buildings
2596
+ hh_missing_idx_list = []
2597
+ for mb in missing_buildings:
2598
+ hh_missing_mask = household_df['bldid'] == mb
2599
+ hh_missing_idx_list.append(household_df.index[hh_missing_mask].tolist())
2600
+
2601
+ # Flatten the list of lists to obtain indices and hhid of missing households
2602
+ hh_missing_idx = [single_value for sublist in hh_missing_idx_list \
2603
+ for single_value in sublist]
2604
+ hh_missing = household_df.loc[hh_missing_idx,'hhid'].tolist()
2605
+
2606
+ # Extract the list of individuals corresponding to missing buildings
2607
+ ind_missing_idx_list =[]
2608
+ for mh in hh_missing:
2609
+ ind_missing_mask =individual_df['hhid'] == mh
2610
+ ind_missing_idx_list.append(individual_df.index[ind_missing_mask].tolist())
2611
+
2612
+ ind_missing_idx = [single_value for sublist in ind_missing_idx_list\
2613
+ for single_value in sublist]
2614
+
2615
+ # Delete households corresponding to missing buildings
2616
+ household_df.drop(labels = hh_missing_idx, axis=0,inplace=True)
2617
+
2618
+ # Delete individuals corresponding to missing buildings
2619
+ individual_df.drop(labels = ind_missing_idx, axis=0, inplace=True)
2620
+
2621
+ final = final.to_crs("EPSG:4326")
2622
+
2623
+ print(time.time() - tic)
2624
+ tic = time.time()
2625
+ print('37 ------',end=' ')
2626
+ print(time.time() - tic)
2627
+ return final, household_df, individual_df
2628
+
tomorrowcities/backend/utils.py CHANGED
@@ -1,9 +1,33 @@
1
- def building_preprocess(df):
 
 
 
 
 
 
 
 
 
2
  df['occupancy'] = df['expstr'].apply(lambda x: x.split('+')[-1]).astype('category')
3
  df['storeys'] = df['expstr'].apply(lambda x: x.split('+')[-2])
4
  df['code_level'] = df['expstr'].apply(lambda x: x.split('+')[-3]).astype('category')
5
  df['material'] = df['expstr'].apply(lambda x: "+".join(x.split('+')[:-3])).astype('category')
 
 
 
 
 
 
 
6
  return df
7
 
8
- def identity_preprocess(df):
9
- return df
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import geopandas as gpd
3
+
4
+ def inject_columns(df, extra_cols):
5
+ if isinstance(df, gpd.GeoDataFrame) or isinstance(df, pd.DataFrame):
6
+ for col, val in extra_cols.items():
7
+ df[col] = val
8
+ return df
9
+
10
+ def building_preprocess(df, extra_cols):
11
  df['occupancy'] = df['expstr'].apply(lambda x: x.split('+')[-1]).astype('category')
12
  df['storeys'] = df['expstr'].apply(lambda x: x.split('+')[-2])
13
  df['code_level'] = df['expstr'].apply(lambda x: x.split('+')[-3]).astype('category')
14
  df['material'] = df['expstr'].apply(lambda x: "+".join(x.split('+')[:-3])).astype('category')
15
+
16
+ df = inject_columns(df, extra_cols)
17
+
18
+ return df
19
+
20
+ def identity_preprocess(df, extra_cols):
21
+ df = inject_columns(df, extra_cols)
22
  return df
23
 
24
+ class ParameterFile:
25
+ def __init__(self, content: bytes):
26
+ self.df_nc = pd.read_excel(content,sheet_name=1,header=None)
27
+ self.ipdf = pd.read_excel(content,sheet_name=2, header=None)
28
+ self.df1 = pd.read_excel(content,sheet_name=3, header=None)
29
+ self.df2 = pd.read_excel(content,sheet_name=4, header=None)
30
+ self.df3 = pd.read_excel(content,sheet_name=5, header=None)
31
+
32
+ def get_sheets(self):
33
+ return (self.df_nc, self.ipdf, self.df1, self.df2, self.df3)
tomorrowcities/pages/engine.py CHANGED
The diff for this file is too large to render. See raw diff