FantasticTony commited on
Commit
77f1587
·
1 Parent(s): 979f8ec

Limit data in visualizations to improve clarity

Browse files

Filtered license type counts to top 10 for the bar chart and focused on key statuses for the line chart to enhance readability. Added an introductory section and revised chart descriptions to increase user understanding of the visualizations.

Files changed (1) hide show
  1. app.py +18 -78
app.py CHANGED
@@ -21,29 +21,31 @@ licenses_data_filtered['LastModifiedDate'] = pd.to_datetime(licenses_data_filter
21
  licenses_data_filtered.dropna(subset=['LastModifiedDate'], inplace=True)
22
 
23
  # chart 1
24
- license_type_counts = licenses_data_filtered['License Type'].value_counts().reset_index()
25
  license_type_counts.columns = ['License Type', 'Count']
26
 
27
  chart1 = alt.Chart(license_type_counts).mark_bar().encode(
28
- x=alt.X('License Type:N', sort='-y', title='License Type'),
29
  y=alt.Y('Count:Q', title='Number of Licenses'),
30
  color=alt.Color('License Type:N', legend=None)
31
  ).properties(
32
- title='Distribution of License Types',
33
  width=600,
34
  height=400
35
  )
36
 
37
  # chart 2
38
- licenses_data_filtered['Year'] = licenses_data_filtered['LastModifiedDate'].dt.year
39
- license_status_counts = licenses_data_filtered.groupby(['Year', 'License Status']).size().reset_index(name='Count')
 
 
40
 
41
  chart2 = alt.Chart(license_status_counts).mark_line().encode(
42
- x=alt.X('Year:O', title='Year'),
43
- y=alt.Y('Count:Q', title='Number of Licenses'),
44
  color=alt.Color('License Status:N', title='License Status')
45
  ).properties(
46
- title='License Status Over Time',
47
  width=600,
48
  height=400
49
  )
@@ -51,80 +53,18 @@ chart2 = alt.Chart(license_status_counts).mark_line().encode(
51
  # APP
52
  st.title('Licenses Data Visualization')
53
 
 
 
 
 
54
  st.write("## Visualization 1: Distribution of License Types")
55
  st.altair_chart(chart1, use_container_width=True)
56
 
57
- st.text("""
58
- This bar chart shows the distribution of different license types in the dataset. I chose a bar chart
59
- because it is effective for comparing categorical data like license types. The colors help to visually
60
- differentiate each category. If I had more time, I would add interactive tooltips to display additional
61
- information about each license type, such as the percentage of the total.
62
- """)
63
 
64
  st.write("## Visualization 2: License Status Over Time")
65
  st.altair_chart(chart2, use_container_width=True)
66
 
67
- st.text("""
68
- This line chart shows the number of licenses by status (e.g., Active, Not Renewed) over time.
69
- The chart helps to visualize trends in the renewal and activity status of licenses. I chose different colors
70
- for each status to make it easy to distinguish between them. If I had more time, I would include an option
71
- to filter by license type to see how specific types have changed over time.
72
- """)
73
-
74
- # source = data.seattle_weather()
75
- #
76
- # scale = alt.Scale(
77
- # domain=["sun", "fog", "drizzle", "rain", "snow"],
78
- # range=["#e7ba52", "#a7a7a7", "#aec7e8", "#1f77b4", "#9467bd"],
79
- # )
80
- # color = alt.Color("weather:N", scale=scale)
81
- #
82
- # # We create two selections:
83
- # # - a brush that is active on the top panel
84
- # # - a multi-click that is active on the bottom panel
85
- # brush = alt.selection_interval(encodings=["x"])
86
- # click = alt.selection_point(encodings=["color"])
87
- #
88
- # # Top panel is scatter plot of temperature vs time
89
- # points = (
90
- # alt.Chart()
91
- # .mark_point()
92
- # .encode(
93
- # alt.X("monthdate(date):T", title="Date (Month Year)"),
94
- # alt.Y(
95
- # "temp_max:Q",
96
- # title="Maximum Daily Temperature (C)",
97
- # scale=alt.Scale(domain=[-5, 40]),
98
- # ),
99
- # color=alt.condition(brush, color, alt.value("lightgray")),
100
- # size=alt.Size("precipitation:Q", scale=alt.Scale(range=[5, 200])),
101
- # )
102
- # .properties(width=550, height=300)
103
- # .add_params(brush)
104
- # .transform_filter(click)
105
- # )
106
- #
107
- # # Bottom panel is a bar chart of weather type
108
- # bars = (
109
- # alt.Chart()
110
- # .mark_bar()
111
- # .encode(
112
- # x="count()",
113
- # y="weather:N",
114
- # color=alt.condition(click, color, alt.value("lightgray")),
115
- # )
116
- # .transform_filter(brush)
117
- # .properties(
118
- # width=550,
119
- # )
120
- # .add_params(click)
121
- # )
122
- #
123
- # chart = alt.vconcat(points, bars, data=source, title="Seattle Weather - 2006 to 2014")
124
- #
125
- # tab1, tab2 = st.tabs(["Streamlit theme (default)", "Altair native theme"])
126
- #
127
- # with tab1:
128
- # st.altair_chart(chart, theme="streamlit", use_container_width=True)
129
- # with tab2:
130
- # st.altair_chart(chart, theme=None, use_container_width=True)
 
21
  licenses_data_filtered.dropna(subset=['LastModifiedDate'], inplace=True)
22
 
23
  # chart 1
24
+ license_type_counts = licenses_data_filtered['License Type'].value_counts().head(10).reset_index()
25
  license_type_counts.columns = ['License Type', 'Count']
26
 
27
  chart1 = alt.Chart(license_type_counts).mark_bar().encode(
28
+ x=alt.X('License Type:N', sort='-y', title='Top 10 License Types'),
29
  y=alt.Y('Count:Q', title='Number of Licenses'),
30
  color=alt.Color('License Type:N', legend=None)
31
  ).properties(
32
+ title='Distribution of Top 10 License Types',
33
  width=600,
34
  height=400
35
  )
36
 
37
  # chart 2
38
+ licenses_data_filtered.loc[:, 'Year'] = licenses_data_filtered['LastModifiedDate'].dt.year
39
+ license_status_counts = licenses_data_filtered[
40
+ licenses_data_filtered['License Status'].isin(['ACTIVE', 'NOT RENEWED', 'EXPIRED'])]
41
+ license_status_counts = license_status_counts.groupby(['Year', 'License Status']).size().reset_index(name='Count')
42
 
43
  chart2 = alt.Chart(license_status_counts).mark_line().encode(
44
+ x=alt.X('Year:O', title='Year (YYYY)'),
45
+ y=alt.Y('Count:Q', title='Number of Licenses Issued'),
46
  color=alt.Color('License Status:N', title='License Status')
47
  ).properties(
48
+ title='License Status Over Time (Filtered for Key Statuses)',
49
  width=600,
50
  height=400
51
  )
 
53
  # APP
54
  st.title('Licenses Data Visualization')
55
 
56
+ st.write("## Introduction")
57
+ st.text(
58
+ "This app provides visual insights into licensing data by focusing on the distribution of license types and tracking the trends of license statuses over time. The visualizations aim to simplify the exploration of the dataset and highlight key insights into how licenses are distributed and change year over year.")
59
+
60
  st.write("## Visualization 1: Distribution of License Types")
61
  st.altair_chart(chart1, use_container_width=True)
62
 
63
+ st.text(
64
+ "This bar chart shows the distribution of the top 10 license types in the dataset. I chose a bar chart because it is effective for comparing categorical data like license types. The colors help to visually differentiate each category, and focusing on the top 10 types makes the chart more readable and less cluttered. If I had more time, I would add interactive tooltips to display additional information about each license type, such as the percentage of the total.")
 
 
 
 
65
 
66
  st.write("## Visualization 2: License Status Over Time")
67
  st.altair_chart(chart2, use_container_width=True)
68
 
69
+ st.text(
70
+ "This line chart shows the number of licenses by key statuses over time. The chart helps to visualize trends in the renewal and activity status of licenses. I chose different colors for each status to make it easy to distinguish between them. Filtering for key statuses allows for a clearer view of the most important trends without overwhelming the viewer. If I had more time, I would include an option to filter by license type to see how specific types have changed over time.")