File size: 7,778 Bytes
805cd82 f5b9ea1 805cd82 f5b9ea1 805cd82 f5b9ea1 805cd82 f5b9ea1 805cd82 f5b9ea1 805cd82 f5b9ea1 805cd82 f5b9ea1 805cd82 f5b9ea1 805cd82 f5b9ea1 805cd82 f5b9ea1 805cd82 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | # load up the libraries
import panel as pn
import pandas as pd
import altair as alt
import math
from vega_datasets import data
# Define functions
def plot_event_distribution(df, eventName):
time_labels = ['0-5', '5-10', '10-15', '15-20', '20-25', '25-30', '30-35', '35-40', '40-45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80', '80-85', '85-90', '>90']
time_labels_plt = ['0-5', '5-10', '10-15', '15-20', '20-25', '25-30', '30-35', '35-40', '40-45', '>45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80', '80-85', '85-90', '>90']
event_data = df[df['eventName'] == eventName].copy()
event_data['timeLabel'] = "0"
for index, row in event_data.iterrows():
minute = row['minute']
match_period = row['matchPeriod']
time_label = '1'
if minute > 45 and match_period == '1H':
time_label = '>45'
else:
left = math.floor(minute / 5)
if left < len(time_labels) - 1:
time_label = time_labels[left]
else:
time_label = '>90'
event_data.loc[index, 'timeLabel'] = time_label
return event_data
def create_event_distribution_df(event_dfs, event_names):
# 初始化一个空的DataFrame来存储结果
results_df = pd.DataFrame(columns=['TeamName', 'eventName', 'timeLabel', 'total_counts', 'matchPeriod'])
for event_df, event_name in zip(event_dfs, event_names):
group_counts = event_df.groupby(['TeamName', 'timeLabel', 'matchPeriod']).size().reset_index(name='total_counts')
group_counts['eventName'] = event_name
results_df = pd.concat([results_df, group_counts], ignore_index=True)
return results_df
def create_altair_chart(final_df, eventName, order, if_add_xticks=False):
selection_interval=alt.selection_interval(encodings=["x"])
mouse_hover = alt.selection_point(on="mouseover", empty=True)
color_encode = alt.Color('matchPeriod:N', title='', scale=alt.Scale(domain=['1H', '2H'], range=['rgb(76, 114, 176)', 'rgb(85, 168, 104)']))
# 这里我们用yellow card来举例
# 1. Base bar plot
base1 = alt.Chart(final_df[final_df['eventName'] == '{}'.format(eventName)]).encode(
x = alt.X('timeLabel:O', scale=alt.Scale(domain=order, paddingInner=0.2), axis=alt.Axis(grid=True, labels=False)),
y = alt.Y('sum(total_counts):Q', title='{} (n)'.format(eventName), axis=alt.Axis(tickCount=3, titleFontSize=24, labelFontSize=18)),
color = alt.condition(selection_interval,
color_encode,
alt.value("lightgray")),
opacity=alt.condition(mouse_hover, alt.value(1), alt.value(0.5))
).add_selection(
selection_interval,
mouse_hover
)
if if_add_xticks:
base1 = base1.encode(
x = alt.X('timeLabel:O', title='match time (min)' ,scale=alt.Scale(domain=order, paddingInner=0.2), axis=alt.Axis(grid=True, titleFontSize=24, labelFontSize=18)),
)
bar_chart_yellow1 = base1.mark_bar()
# 2. Add vertical line
vertical_line1 = alt.Chart(final_df[(final_df['eventName'] == '{}'.format(eventName)) & (final_df['timeLabel'] == ' ')]).encode(
# only show the vertical line at x == ' '
x = alt.X('timeLabel:O', scale=alt.Scale(domain=[' ']), title=''),
)
vertical_line1 = vertical_line1.mark_rule(color='orange', strokeWidth=2)
# 3. Add text
text_chart1 = alt.Chart(final_df[(final_df['eventName'] == '{}'.format(eventName)) & (final_df['timeLabel'] == ' ')]).encode(
# only show the vertical line at x == ' '
x = alt.X('timeLabel:O', scale=alt.Scale(domain=[' ']), title=''),
)
text_chart1 = text_chart1.mark_text(align='center', baseline='middle', fontSize=23, color='orange', dy=-100, text='Half Time', font='Arial')
# 4. Add the rank bar
bar_rank_yellow = alt.Chart(final_df[final_df['eventName'] == '{}'.format(eventName)]).transform_filter(selection_interval).transform_aggregate(
sum_total_counts='sum(total_counts)',
groupby=['TeamName']
).transform_window(
rank='rank(sum_total_counts)',
sort=[alt.SortField('sum_total_counts', order='descending')]
).transform_filter(
alt.datum.rank < 10 # Note: Change this to <= if you want to include the 10th position
).encode(
x=alt.X('sum_total_counts:Q', title='', axis=alt.Axis(labelFontSize=9)),
y=alt.Y('TeamName:N', sort='-x', title='Team Name', axis=alt.Axis(titleFontSize=24, labelFontSize=12, orient='right'))
)
if if_add_xticks:
bar_rank_yellow = bar_rank_yellow.encode(
x=alt.X('sum_total_counts:Q', title='Average {}'.format(eventName), axis=alt.Axis(titleFontSize=24,labelFontSize=9)),
)
bar_rank_yellow = bar_rank_yellow.mark_bar(color='orange').properties(width=300, height=250)
# 5. Combine all the charts
first = (bar_chart_yellow1 + vertical_line1 + text_chart1)
first = first.encode(tooltip=alt.Tooltip('sum(total_counts):Q', format='.0f'))
# bar_rank_yellow = bar_rank_yellow.encode(tooltip=alt.Tooltip('sum(total_counts):Q', format='.0f'))
yellow = first.properties(width=700, height=250) | bar_rank_yellow
return yellow
# we want to use bootstrap/template, tell Panel to load up what we need
pn.extension(design='bootstrap')
# we want to use vega, tell Panel to load up what we need
pn.extension('vega')
# create a basic template using bootstrap
template = pn.template.BootstrapTemplate(
title='SI649 Scientific Visualization Project',
)
# 0. the main column will hold our key content
maincol = pn.Column()
# 1. Load the Data
url = 'https://raw.githubusercontent.com/yanzhuo2001/SI_649_Projects/main/scientific%20viz%20project/final_data.csv'
df = pd.read_csv(url)
for index, row in df.iterrows():
minute = row['minute']
minute1 = math.floor(minute)
df.loc[index, 'minute1'] = minute1
YC_df = plot_event_distribution(df, 'Yellow_Card')
RC_df = plot_event_distribution(df, 'Red_Card')
Goal_df = plot_event_distribution(df, 'Goal')
event_dfs = [YC_df, RC_df, Goal_df]
event_names = ['Yellow_Card', 'Red_Card', 'Goal']
final_df = create_event_distribution_df(event_dfs, event_names)
final_df = final_df[final_df['matchPeriod'].isin(['1H', '2H'])]
for name in final_df['TeamName'].unique():
new = pd.DataFrame({
'eventName': ['Yellow_Card', 'Red_Card', 'Goal'],
'timeLabel': [' '] * 3,
'total_counts': [0] * 3,
'matchPeriod': ['1H']*3,
'TeamName': [name] *3
})
final_df = pd.concat([final_df, new], ignore_index=True)
order = ['0-5', '5-10', '10-15', '15-20', '20-25', '25-30', '30-35', '35-40', '40-45', '>45', ' ', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80', '80-85', '85-90', '>90']
yellow = create_altair_chart(final_df, 'Yellow_Card', order, if_add_xticks=False)
red = create_altair_chart(final_df, 'Red_Card', order, if_add_xticks=True)
goal = create_altair_chart(final_df, 'Goal', order, if_add_xticks=False)
final = (goal & yellow & red).configure_legend(
orient='top-left', # 图例位置在左上角
labelFontSize=18, # 图例标签的字体大小
symbolSize=250, # 图例符号的大小
fillColor='white', # 图例背景颜色
strokeWidth=2, # 图例边框粗细
padding=10, # 图例内的填充
).configure_view(
strokeWidth=1, # 图表边框粗细
stroke='black'
)
# 2. append the plot
maincol.append(final)
template.main.append(maincol)
# Indicate that the template object is the "application" and serve it
template.servable(title="SI649 Scientific Visualization Project") |