Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import numpy as np | |
| import plotly.express as px | |
| from scipy import stats | |
| def plot_map(df,city=None,county=None,animate=True,color_dots=True,animate_by='year',show_fig=True,return_fig=False): | |
| """ | |
| Displays a plotly.express.scatter_mapbox interactive map | |
| of crashes in a municipality if specified, or otherwise | |
| statewide. Can be animated over time or static. | |
| Parameters: | |
| ----------- | |
| df : pd.DataFrame | |
| dataframe of crash samples | |
| city or county : tuple or None | |
| if provided, must be a tuple (code,name) | |
| - code : str | |
| the code corresponding to the desired municipality/county | |
| (see the data dictionary) | |
| - name : str | |
| the name you want to use for the municipality/county | |
| in plot title | |
| * At most one of these can be not None! | |
| animate : bool | |
| if animate==True, then the map will animate using | |
| the frequency provided in animate_by | |
| color_dots : bool | |
| if color_dots==True, then dots will be color-coded by | |
| 'serious injury or death' status. | |
| WARNING: if color_dots and animate, then all frames | |
| will be missing samples in 'serious injury or death' | |
| classes which aren't present in first frame - due to | |
| bug in plotly animation_frame implementation. | |
| Recommend only using both when geographic | |
| area is statewide or at least has all values of | |
| 'serious injury or death' in first frame | |
| animate_by : str | |
| the desired animation frequency, must be | |
| either 'year' or 'month' | |
| show_fig : bool | |
| whether to display figure using fig.show() | |
| return_fig : bool | |
| whether to return the figure object | |
| Returns: Either figure or None | |
| -------- | |
| """ | |
| assert (city is None)|(county is None), 'A city and county cannot both be provided.' | |
| # Copy df and create new column for color coding event type | |
| df = df.copy() | |
| df.loc[df.BICYCLE_SUSP_SERIOUS_INJ_COUNT>0,'Serious cyclist injury or death']='serious injury' | |
| df.loc[df.BICYCLE_DEATH_COUNT>0,'Serious cyclist injury or death']='death' | |
| df['Serious cyclist injury or death']=df['Serious cyclist injury or death'].fillna('neither') | |
| # Set animation parameters | |
| if animate: | |
| if animate_by == 'year': | |
| animation_frame = 'CRASH_YEAR' | |
| title_animate = ' by year' | |
| elif animate_by == 'month': | |
| df['DATE'] = pd.to_datetime((df['CRASH_MONTH'].astype('str')\ | |
| +'-'+df['CRASH_YEAR'].astype('str')), | |
| format = "%m-%Y") | |
| df=df.sort_values(by='DATE') | |
| df['DATE']=df['DATE'].astype('str').apply(lambda x: x.rsplit('-',1)[0]) | |
| animation_frame = 'DATE' | |
| title_animate = ' by month' | |
| else: | |
| raise ValueError("animate_by must be 'year' or 'month'") | |
| else: | |
| animation_frame = None | |
| title_animate = '' | |
| if color_dots: | |
| color='Serious cyclist injury or death' | |
| else: | |
| color=None | |
| # Adjustments for when city or county are provided | |
| if city is not None: | |
| df = df[df.MUNICIPALITY==city[0]] | |
| # Ignore extreme outlier samples - lat,lon may be incorrect | |
| df = df[np.abs(stats.zscore(df.DEC_LAT))<=4] | |
| df = df[np.abs(stats.zscore(df.DEC_LONG))<=4] | |
| title_place = city[1]+', PA' | |
| elif county is not None: | |
| df = df[df.COUNTY==county[0]] | |
| # Ignore extreme outlier samples - lat,lon may be incorrect | |
| df = df[np.abs(stats.zscore(df.DEC_LAT))<=4] | |
| df = df[np.abs(stats.zscore(df.DEC_LONG))<=4] | |
| title_place = county[1]+' county, PA' | |
| else: | |
| title_place = 'PA' | |
| # Compute default zoom level based on lat,lon ranges. | |
| # open-street-map uses | |
| max_lat, min_lat = df.DEC_LAT.max(), df.DEC_LAT.min() | |
| max_lon, min_lon = df.DEC_LONG.max(), df.DEC_LONG.min() | |
| # 2^(zoom) = 360/(longitude width of 1 tile) | |
| zoom = np.log2(360/max(max_lon-min_lon,max_lat-min_lat)) | |
| lat_center = (max_lat+min_lat)/2 | |
| lon_center = (max_lon+min_lon)/2 | |
| # Adjust width so that aspect ratio matches shape of state | |
| width_mult = (max_lon-min_lon)/(max_lat-min_lat) | |
| cols = ['CRN','DEC_LAT','DEC_LONG','Serious cyclist injury or death','CRASH_YEAR','CRASH_MONTH'] | |
| if animate_by=='month': | |
| cols.append('DATE') | |
| # Plot mapbox | |
| fig = px.scatter_mapbox(df, lat='DEC_LAT',lon='DEC_LONG', | |
| color=color, | |
| color_discrete_map={'neither':'royalblue','serious injury':'orange','death':'crimson'}, | |
| mapbox_style='open-street-map', | |
| animation_frame = animation_frame, | |
| animation_group='CRN', | |
| hover_data = {'DEC_LAT':False,'DEC_LONG':False, | |
| 'CRASH_YEAR':True,'CRASH_MONTH':True, | |
| 'Serious cyclist injury or death':True}, | |
| width = width_mult*500,height=700,zoom=zoom, | |
| center={'lat':lat_center,'lon':lon_center}, | |
| title=f'Crashes involving bicycles{title_animate}<br> in {title_place}, 2002-2021') | |
| fig.update_layout(legend=dict(orientation='h',xanchor='right',yanchor='bottom',x=1,y=-0.12), | |
| legend_title_side='top') | |
| if show_fig: | |
| fig.show() | |
| if return_fig: | |
| return fig | |
| def feat_perc(feat, df, col_name = 'percentage', feat_name = None): | |
| """ | |
| Constructs a single-column dataframe 'perc' | |
| containing the value counts in the series | |
| df[feat] as percentages of the whole. | |
| - 'df' is the input dataframe. | |
| - 'feat' is the desired column of df. | |
| - 'col_name' is the name of the | |
| column of the output dataframe | |
| - 'feat_name' is the index name | |
| of the output dataframe if provided, otherwise | |
| will use 'feat' as index name. | |
| """ | |
| perc = pd.DataFrame({col_name:df[feat].value_counts(normalize=True).sort_index()}) | |
| if feat_name: | |
| perc.index.name=feat_name | |
| else: | |
| perc.index.name=feat | |
| return perc | |
| def feat_perc_bar(feat,df,feat_name=None,cohort_name=None,show_fig=True,return_fig=False,sort=False): | |
| """ | |
| Makes barplot of two series: | |
| - distribution of feature among all cyclists | |
| - distribution of feature among cyclists with serious injury or fatality | |
| Parameters: | |
| ----------- | |
| feat : str | |
| The column name of the desired feature | |
| df : pd.DataFrame | |
| The input dataframe | |
| feat_name : str or None | |
| The feature name to use in the | |
| x-axis label. If None, will use feat | |
| cohort_name : str or None | |
| qualifier to use in front of 'cyclists' | |
| in titles, if provided, e.g. 'rural cyclists' | |
| show_fig : bool | |
| whether to finish with fig.show() | |
| return_fig : bool | |
| whether to return the fig object | |
| sort : bool | |
| whether to sort bars. If False, will use default sorting | |
| by category name or feature value. If True, will resort | |
| in descending order by percentage | |
| Returns: figure or None | |
| -------- | |
| """ | |
| if feat_name is None: | |
| feat_name=feat | |
| df_inj = df.query('SERIOUS_OR_FATALITY==1') | |
| table = feat_perc(feat,df) | |
| table.loc[:,'cohort']='all' | |
| ordering = list(table['percentage'].sort_values(ascending=False).index) if sort else None | |
| table_inj = feat_perc(feat,df_inj) | |
| table_inj.loc[:,'cohort']='seriously injured or killed' | |
| table = pd.concat([table,table_inj],axis=0).reset_index() | |
| category_orders = {'cohort':['all','seriously injured or killed']} | |
| if sort: | |
| category_orders[feat]=ordering | |
| fig = px.bar(table,y='cohort',x='percentage',color=feat, | |
| barmode='stack',text_auto='.1%', | |
| category_orders=category_orders, | |
| title=f'Distributions of {feat} values within cyclist cohorts') | |
| fig.update_yaxes(tickangle=-90) | |
| fig.update_xaxes(tickformat=".0%") | |
| if show_fig: | |
| fig.show() | |
| if return_fig: | |
| return fig | |
| # def feat_perc_comp(feat,df,feat_name=None,cohort_name = None,merge_inj_death=True): | |
| # """ | |
| # Returns a styled dataframe (Styler object) | |
| # whose underlying dataframe has three columns | |
| # containing value counts of 'feat' among: | |
| # - all cyclists involved in crashes | |
| # - cyclists suffering serious injury or fatality | |
| # each formatted as percentages of the series sum. | |
| # Styled with bars comparing percentages | |
| # Parameters: | |
| # ----------- | |
| # feat : str | |
| # The column name of the desired feature | |
| # df : pd.DataFrame | |
| # The input dataframe | |
| # feat_name : str or None | |
| # The feature name to use in the output dataframe | |
| # index name. If None, will use feat | |
| # cohort_name : str or None | |
| # qualifier to use in front of 'cyclists' | |
| # in titles, if provided, e.g. 'rural cyclists' | |
| # merge_inj_death : bool | |
| # whether to merge seriously injured and killed cohorts | |
| # Returns: | |
| # -------- | |
| # perc_comp : pd.Styler object | |
| # """ | |
| # # Need qualifier for titles if restricting cyclist cohort | |
| # qualifier = cohort_name if cohort_name is not None else '' | |
| # # Two columns or three, depending on merge_inj_death | |
| # if merge_inj_death: | |
| # perc_comp = feat_perc(feat,df=df,feat_name=feat_name, | |
| # col_name='all cyclists',)\ | |
| # .merge(feat_perc(feat,feat_name=feat_name, | |
| # df=df.query('SERIOUS_OR_FATALITY==1'), | |
| # col_name=qualifier+'cyclists with serious injury or fatality'), | |
| # on=feat,how='left') | |
| # perc_comp = perc_comp[perc_comp.max(axis=1)>=0.005] | |
| # else: | |
| # perc_comp = feat_perc(feat,df=df,feat_name=feat_name, | |
| # col_name='all cyclists')\ | |
| # .merge(feat_perc(feat,feat_name=feat_name, | |
| # df=df.query('INJ_SEVERITY=="susp_serious_injury"'), | |
| # col_name=qualifier+'cyclists with serious injury'), | |
| # on=feat,how='left')\ | |
| # .merge(feat_perc(feat,feat_name=feat_name, | |
| # df=df.query('INJ_SEVERITY=="killed"'), | |
| # col_name=qualifier+'cyclists with fatality'), | |
| # on=feat,how='left') | |
| # # If feature is not ordinal, sort rows descending by crash counts | |
| # if feat not in ['AGE_BINS','SPEED_LIMIT','DAY_OF_WEEK','HOUR_OF_DAY']: | |
| # perc_comp=perc_comp.sort_values(by='all cyclists',ascending=False) | |
| # # Relabel day numbers with strings | |
| # if feat == 'DAY_OF_WEEK': | |
| # perc_comp.index=['Sun','Mon','Tues','Wed','Thurs','Fri','Sat'] | |
| # perc_comp.index.name='DAY_OF_WEEK' | |
| # perc_comp=perc_comp.fillna(0) | |
| # table_columns = list(perc_comp.columns) | |
| # # Define format for displaying floats | |
| # format_dict={col:'{:.2%}' for col in perc_comp.columns} | |
| # # Define table styles | |
| # styles = [dict(selector="caption", | |
| # props=[("text-align", "center"), | |
| # ("font-size", "100%"), | |
| # ("color", 'black'), | |
| # ("text-decoration","underline"), | |
| # ("font-weight","bold")])] | |
| # # Return formatted dataframe | |
| # if feat_name is None: | |
| # feat_name=feat | |
| # caption = f'Breakdown of {feat_name} among cyclist groups' | |
| # return perc_comp.reset_index().style.set_table_attributes("style='display:inline'")\ | |
| # .format(format_dict).bar(color='powderblue', | |
| # subset=table_columns).hide().set_caption(caption)\ | |
| # .set_table_styles(styles) |