fengyzz's picture
Update app.py
805cd82 verified
# load up the libraries
import panel as pn
import pandas as pd
import altair as alt
import math
from vega_datasets import data
# Define functions
def plot_event_distribution(df, eventName):
time_labels = ['0-5', '5-10', '10-15', '15-20', '20-25', '25-30', '30-35', '35-40', '40-45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80', '80-85', '85-90', '>90']
time_labels_plt = ['0-5', '5-10', '10-15', '15-20', '20-25', '25-30', '30-35', '35-40', '40-45', '>45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80', '80-85', '85-90', '>90']
event_data = df[df['eventName'] == eventName].copy()
event_data['timeLabel'] = "0"
for index, row in event_data.iterrows():
minute = row['minute']
match_period = row['matchPeriod']
time_label = '1'
if minute > 45 and match_period == '1H':
time_label = '>45'
else:
left = math.floor(minute / 5)
if left < len(time_labels) - 1:
time_label = time_labels[left]
else:
time_label = '>90'
event_data.loc[index, 'timeLabel'] = time_label
return event_data
def create_event_distribution_df(event_dfs, event_names):
# 初始化一个空的DataFrame来存储结果
results_df = pd.DataFrame(columns=['TeamName', 'eventName', 'timeLabel', 'total_counts', 'matchPeriod'])
for event_df, event_name in zip(event_dfs, event_names):
group_counts = event_df.groupby(['TeamName', 'timeLabel', 'matchPeriod']).size().reset_index(name='total_counts')
group_counts['eventName'] = event_name
results_df = pd.concat([results_df, group_counts], ignore_index=True)
return results_df
def create_altair_chart(final_df, eventName, order, if_add_xticks=False):
selection_interval=alt.selection_interval(encodings=["x"])
mouse_hover = alt.selection_point(on="mouseover", empty=True)
color_encode = alt.Color('matchPeriod:N', title='', scale=alt.Scale(domain=['1H', '2H'], range=['rgb(76, 114, 176)', 'rgb(85, 168, 104)']))
# 这里我们用yellow card来举例
# 1. Base bar plot
base1 = alt.Chart(final_df[final_df['eventName'] == '{}'.format(eventName)]).encode(
x = alt.X('timeLabel:O', scale=alt.Scale(domain=order, paddingInner=0.2), axis=alt.Axis(grid=True, labels=False)),
y = alt.Y('sum(total_counts):Q', title='{} (n)'.format(eventName), axis=alt.Axis(tickCount=3, titleFontSize=24, labelFontSize=18)),
color = alt.condition(selection_interval,
color_encode,
alt.value("lightgray")),
opacity=alt.condition(mouse_hover, alt.value(1), alt.value(0.5))
).add_selection(
selection_interval,
mouse_hover
)
if if_add_xticks:
base1 = base1.encode(
x = alt.X('timeLabel:O', title='match time (min)' ,scale=alt.Scale(domain=order, paddingInner=0.2), axis=alt.Axis(grid=True, titleFontSize=24, labelFontSize=18)),
)
bar_chart_yellow1 = base1.mark_bar()
# 2. Add vertical line
vertical_line1 = alt.Chart(final_df[(final_df['eventName'] == '{}'.format(eventName)) & (final_df['timeLabel'] == ' ')]).encode(
# only show the vertical line at x == ' '
x = alt.X('timeLabel:O', scale=alt.Scale(domain=[' ']), title=''),
)
vertical_line1 = vertical_line1.mark_rule(color='orange', strokeWidth=2)
# 3. Add text
text_chart1 = alt.Chart(final_df[(final_df['eventName'] == '{}'.format(eventName)) & (final_df['timeLabel'] == ' ')]).encode(
# only show the vertical line at x == ' '
x = alt.X('timeLabel:O', scale=alt.Scale(domain=[' ']), title=''),
)
text_chart1 = text_chart1.mark_text(align='center', baseline='middle', fontSize=23, color='orange', dy=-100, text='Half Time', font='Arial')
# 4. Add the rank bar
bar_rank_yellow = alt.Chart(final_df[final_df['eventName'] == '{}'.format(eventName)]).transform_filter(selection_interval).transform_aggregate(
sum_total_counts='sum(total_counts)',
groupby=['TeamName']
).transform_window(
rank='rank(sum_total_counts)',
sort=[alt.SortField('sum_total_counts', order='descending')]
).transform_filter(
alt.datum.rank < 10 # Note: Change this to <= if you want to include the 10th position
).encode(
x=alt.X('sum_total_counts:Q', title='', axis=alt.Axis(labelFontSize=9)),
y=alt.Y('TeamName:N', sort='-x', title='Team Name', axis=alt.Axis(titleFontSize=24, labelFontSize=12, orient='right'))
)
if if_add_xticks:
bar_rank_yellow = bar_rank_yellow.encode(
x=alt.X('sum_total_counts:Q', title='Average {}'.format(eventName), axis=alt.Axis(titleFontSize=24,labelFontSize=9)),
)
bar_rank_yellow = bar_rank_yellow.mark_bar(color='orange').properties(width=300, height=250)
# 5. Combine all the charts
first = (bar_chart_yellow1 + vertical_line1 + text_chart1)
first = first.encode(tooltip=alt.Tooltip('sum(total_counts):Q', format='.0f'))
# bar_rank_yellow = bar_rank_yellow.encode(tooltip=alt.Tooltip('sum(total_counts):Q', format='.0f'))
yellow = first.properties(width=700, height=250) | bar_rank_yellow
return yellow
# we want to use bootstrap/template, tell Panel to load up what we need
pn.extension(design='bootstrap')
# we want to use vega, tell Panel to load up what we need
pn.extension('vega')
# create a basic template using bootstrap
template = pn.template.BootstrapTemplate(
title='SI649 Scientific Visualization Project',
)
# 0. the main column will hold our key content
maincol = pn.Column()
# 1. Load the Data
url = 'https://raw.githubusercontent.com/yanzhuo2001/SI_649_Projects/main/scientific%20viz%20project/final_data.csv'
df = pd.read_csv(url)
for index, row in df.iterrows():
minute = row['minute']
minute1 = math.floor(minute)
df.loc[index, 'minute1'] = minute1
YC_df = plot_event_distribution(df, 'Yellow_Card')
RC_df = plot_event_distribution(df, 'Red_Card')
Goal_df = plot_event_distribution(df, 'Goal')
event_dfs = [YC_df, RC_df, Goal_df]
event_names = ['Yellow_Card', 'Red_Card', 'Goal']
final_df = create_event_distribution_df(event_dfs, event_names)
final_df = final_df[final_df['matchPeriod'].isin(['1H', '2H'])]
for name in final_df['TeamName'].unique():
new = pd.DataFrame({
'eventName': ['Yellow_Card', 'Red_Card', 'Goal'],
'timeLabel': [' '] * 3,
'total_counts': [0] * 3,
'matchPeriod': ['1H']*3,
'TeamName': [name] *3
})
final_df = pd.concat([final_df, new], ignore_index=True)
order = ['0-5', '5-10', '10-15', '15-20', '20-25', '25-30', '30-35', '35-40', '40-45', '>45', ' ', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80', '80-85', '85-90', '>90']
yellow = create_altair_chart(final_df, 'Yellow_Card', order, if_add_xticks=False)
red = create_altair_chart(final_df, 'Red_Card', order, if_add_xticks=True)
goal = create_altair_chart(final_df, 'Goal', order, if_add_xticks=False)
final = (goal & yellow & red).configure_legend(
orient='top-left', # 图例位置在左上角
labelFontSize=18, # 图例标签的字体大小
symbolSize=250, # 图例符号的大小
fillColor='white', # 图例背景颜色
strokeWidth=2, # 图例边框粗细
padding=10, # 图例内的填充
).configure_view(
strokeWidth=1, # 图表边框粗细
stroke='black'
)
# 2. append the plot
maincol.append(final)
template.main.append(maincol)
# Indicate that the template object is the "application" and serve it
template.servable(title="SI649 Scientific Visualization Project")