File size: 7,778 Bytes
805cd82
f5b9ea1
805cd82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5b9ea1
805cd82
 
 
 
f5b9ea1
805cd82
 
 
 
 
 
 
f5b9ea1
 
805cd82
f5b9ea1
 
805cd82
 
 
 
 
 
 
 
 
 
 
 
 
f5b9ea1
805cd82
 
 
 
 
 
 
 
 
 
 
 
f5b9ea1
805cd82
 
 
 
f5b9ea1
805cd82
 
 
 
 
 
f5b9ea1
 
805cd82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5b9ea1
 
805cd82
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# load up the libraries
import panel as pn
import pandas as pd
import altair as alt
import math
from vega_datasets import data

# Define functions
def plot_event_distribution(df, eventName):
    time_labels = ['0-5', '5-10', '10-15', '15-20', '20-25', '25-30', '30-35', '35-40', '40-45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80', '80-85', '85-90', '>90']
    time_labels_plt = ['0-5', '5-10', '10-15', '15-20', '20-25', '25-30', '30-35', '35-40', '40-45', '>45', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80', '80-85', '85-90', '>90']

    event_data = df[df['eventName'] == eventName].copy()
    event_data['timeLabel'] = "0"

    for index, row in event_data.iterrows():
        minute = row['minute']
        match_period = row['matchPeriod']
        time_label = '1'

        if minute > 45 and match_period == '1H':
            time_label = '>45'
        else:
            left = math.floor(minute / 5)
            if left < len(time_labels) - 1:
                time_label = time_labels[left]
            else:
                time_label = '>90'

        event_data.loc[index, 'timeLabel'] = time_label

    return event_data

def create_event_distribution_df(event_dfs, event_names):
    # 初始化一个空的DataFrame来存储结果
    results_df = pd.DataFrame(columns=['TeamName', 'eventName', 'timeLabel', 'total_counts', 'matchPeriod'])
    for event_df, event_name in zip(event_dfs, event_names):
        group_counts = event_df.groupby(['TeamName', 'timeLabel', 'matchPeriod']).size().reset_index(name='total_counts')
        group_counts['eventName'] = event_name
        results_df = pd.concat([results_df, group_counts], ignore_index=True)

    return results_df

def create_altair_chart(final_df, eventName, order, if_add_xticks=False):
    
    selection_interval=alt.selection_interval(encodings=["x"])
    
    mouse_hover = alt.selection_point(on="mouseover", empty=True)

    color_encode = alt.Color('matchPeriod:N', title='', scale=alt.Scale(domain=['1H', '2H'], range=['rgb(76, 114, 176)', 'rgb(85, 168, 104)']))

    # 这里我们用yellow card来举例   
    # 1. Base bar plot
    base1 = alt.Chart(final_df[final_df['eventName'] == '{}'.format(eventName)]).encode(
        x = alt.X('timeLabel:O', scale=alt.Scale(domain=order, paddingInner=0.2), axis=alt.Axis(grid=True, labels=False)),
        y = alt.Y('sum(total_counts):Q', title='{} (n)'.format(eventName), axis=alt.Axis(tickCount=3, titleFontSize=24, labelFontSize=18)),    
        color = alt.condition(selection_interval, 
                            color_encode, 
                            alt.value("lightgray")),
        opacity=alt.condition(mouse_hover, alt.value(1), alt.value(0.5))
    ).add_selection(
        selection_interval,
        mouse_hover
    )
    
    if if_add_xticks:
        base1 = base1.encode(
            x = alt.X('timeLabel:O', title='match time (min)' ,scale=alt.Scale(domain=order, paddingInner=0.2), axis=alt.Axis(grid=True, titleFontSize=24, labelFontSize=18)),
        )
    
    bar_chart_yellow1 = base1.mark_bar()

    # 2. Add vertical line
    vertical_line1 = alt.Chart(final_df[(final_df['eventName'] == '{}'.format(eventName)) & (final_df['timeLabel'] == ' ')]).encode(
        # only show the vertical line at x == ' '
        x = alt.X('timeLabel:O', scale=alt.Scale(domain=[' ']), title=''),
    )
    
    vertical_line1 = vertical_line1.mark_rule(color='orange', strokeWidth=2)

    # 3. Add text
    text_chart1 = alt.Chart(final_df[(final_df['eventName'] == '{}'.format(eventName)) & (final_df['timeLabel'] == ' ')]).encode(
        # only show the vertical line at x == ' '
        x = alt.X('timeLabel:O', scale=alt.Scale(domain=[' ']), title=''),
    )
    
    text_chart1 = text_chart1.mark_text(align='center', baseline='middle', fontSize=23, color='orange', dy=-100, text='Half Time', font='Arial')
    
    
    # 4. Add the rank bar
    bar_rank_yellow = alt.Chart(final_df[final_df['eventName'] == '{}'.format(eventName)]).transform_filter(selection_interval).transform_aggregate(
        sum_total_counts='sum(total_counts)',
        groupby=['TeamName']
    ).transform_window(
        rank='rank(sum_total_counts)',
        sort=[alt.SortField('sum_total_counts', order='descending')]
    ).transform_filter(
        alt.datum.rank < 10  # Note: Change this to <= if you want to include the 10th position
    ).encode(
        x=alt.X('sum_total_counts:Q', title='', axis=alt.Axis(labelFontSize=9)),
        y=alt.Y('TeamName:N', sort='-x', title='Team Name', axis=alt.Axis(titleFontSize=24, labelFontSize=12, orient='right'))
    )
    
    if if_add_xticks:
        bar_rank_yellow = bar_rank_yellow.encode(
            x=alt.X('sum_total_counts:Q', title='Average {}'.format(eventName), axis=alt.Axis(titleFontSize=24,labelFontSize=9)),
        )
    
    bar_rank_yellow =  bar_rank_yellow.mark_bar(color='orange').properties(width=300, height=250)
    
    # 5. Combine all the charts
    first = (bar_chart_yellow1 + vertical_line1 + text_chart1)
    first = first.encode(tooltip=alt.Tooltip('sum(total_counts):Q', format='.0f'))
    # bar_rank_yellow = bar_rank_yellow.encode(tooltip=alt.Tooltip('sum(total_counts):Q', format='.0f'))
    yellow = first.properties(width=700, height=250) | bar_rank_yellow

    return yellow

# we want to use bootstrap/template, tell Panel to load up what we need
pn.extension(design='bootstrap')

# we want to use vega, tell Panel to load up what we need
pn.extension('vega')

# create a basic template using bootstrap
template = pn.template.BootstrapTemplate(
    title='SI649 Scientific Visualization Project',
)

# 0. the main column will hold our key content
maincol = pn.Column()

# 1. Load the Data
url = 'https://raw.githubusercontent.com/yanzhuo2001/SI_649_Projects/main/scientific%20viz%20project/final_data.csv'
df = pd.read_csv(url)

for index, row in df.iterrows():
    minute = row['minute']
    minute1 = math.floor(minute)
    df.loc[index, 'minute1'] = minute1

YC_df = plot_event_distribution(df, 'Yellow_Card')
RC_df = plot_event_distribution(df, 'Red_Card')
Goal_df = plot_event_distribution(df, 'Goal')

event_dfs = [YC_df, RC_df, Goal_df]
event_names = ['Yellow_Card', 'Red_Card', 'Goal']
final_df = create_event_distribution_df(event_dfs, event_names)
final_df = final_df[final_df['matchPeriod'].isin(['1H', '2H'])]

for name in final_df['TeamName'].unique():
    new = pd.DataFrame({
        'eventName': ['Yellow_Card', 'Red_Card', 'Goal'],
        'timeLabel': [' '] * 3,
        'total_counts': [0] * 3,
        'matchPeriod': ['1H']*3,
        'TeamName': [name] *3
    })
    final_df = pd.concat([final_df, new], ignore_index=True)
    
order = ['0-5', '5-10', '10-15', '15-20', '20-25', '25-30', '30-35', '35-40', '40-45', '>45', ' ', '45-50', '50-55', '55-60', '60-65', '65-70', '70-75', '75-80', '80-85', '85-90', '>90']

yellow = create_altair_chart(final_df, 'Yellow_Card', order, if_add_xticks=False)
red = create_altair_chart(final_df, 'Red_Card', order, if_add_xticks=True)
goal = create_altair_chart(final_df, 'Goal', order, if_add_xticks=False)

final = (goal & yellow & red).configure_legend(
    orient='top-left',     # 图例位置在左上角
    labelFontSize=18,      # 图例标签的字体大小
    symbolSize=250,        # 图例符号的大小
    fillColor='white',     # 图例背景颜色
    strokeWidth=2,         # 图例边框粗细
    padding=10,            # 图例内的填充
).configure_view(
    strokeWidth=1,         # 图表边框粗细
    stroke='black'
)

# 2. append the plot
maincol.append(final)   
template.main.append(maincol)

# Indicate that the template object is the "application" and serve it
template.servable(title="SI649 Scientific Visualization Project")