Shah-Miloni commited on
Commit
a137ed9
·
0 Parent(s):

Homework 5.1

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .bash_history +5 -0
  2. .cache/Microsoft/DeveloperTools/deviceid +1 -0
  3. .config/code-server/config.yaml +4 -0
  4. .local/share/code-server/CachedProfilesData/__default__profile__/extensions.builtin.cache +0 -0
  5. .local/share/code-server/CachedProfilesData/__default__profile__/extensions.user.cache +1 -0
  6. .local/share/code-server/User/History/450b0aa/0rFy.py +75 -0
  7. .local/share/code-server/User/History/450b0aa/2sco.py +73 -0
  8. .local/share/code-server/User/History/450b0aa/2wHf.py +74 -0
  9. .local/share/code-server/User/History/450b0aa/4TtO.py +73 -0
  10. .local/share/code-server/User/History/450b0aa/53Ql.py +75 -0
  11. .local/share/code-server/User/History/450b0aa/53ry.py +75 -0
  12. .local/share/code-server/User/History/450b0aa/8qMH.py +74 -0
  13. .local/share/code-server/User/History/450b0aa/8wPI.py +73 -0
  14. .local/share/code-server/User/History/450b0aa/AIA9.py +208 -0
  15. .local/share/code-server/User/History/450b0aa/BA55.py +73 -0
  16. .local/share/code-server/User/History/450b0aa/BvGZ.py +260 -0
  17. .local/share/code-server/User/History/450b0aa/C2VD.py +73 -0
  18. .local/share/code-server/User/History/450b0aa/Dqul.py +73 -0
  19. .local/share/code-server/User/History/450b0aa/GB0z.py +75 -0
  20. .local/share/code-server/User/History/450b0aa/HxIz.py +107 -0
  21. .local/share/code-server/User/History/450b0aa/I3Tc.py +108 -0
  22. .local/share/code-server/User/History/450b0aa/Km75.py +75 -0
  23. .local/share/code-server/User/History/450b0aa/O3TC.py +260 -0
  24. .local/share/code-server/User/History/450b0aa/P3er.py +107 -0
  25. .local/share/code-server/User/History/450b0aa/Q5uV.py +75 -0
  26. .local/share/code-server/User/History/450b0aa/QeUw.py +69 -0
  27. .local/share/code-server/User/History/450b0aa/RWSp.py +73 -0
  28. .local/share/code-server/User/History/450b0aa/RuPD.py +74 -0
  29. .local/share/code-server/User/History/450b0aa/TdSt.py +109 -0
  30. .local/share/code-server/User/History/450b0aa/Vrxk.py +74 -0
  31. .local/share/code-server/User/History/450b0aa/W0T9.py +73 -0
  32. .local/share/code-server/User/History/450b0aa/WCDW.py +73 -0
  33. .local/share/code-server/User/History/450b0aa/WLXq.py +70 -0
  34. .local/share/code-server/User/History/450b0aa/YJb8.py +73 -0
  35. .local/share/code-server/User/History/450b0aa/YSbm.py +75 -0
  36. .local/share/code-server/User/History/450b0aa/cWhv.py +208 -0
  37. .local/share/code-server/User/History/450b0aa/e7iD.py +73 -0
  38. .local/share/code-server/User/History/450b0aa/entries.json +1 -0
  39. .local/share/code-server/User/History/450b0aa/fdvg.py +107 -0
  40. .local/share/code-server/User/History/450b0aa/fjsF.py +75 -0
  41. .local/share/code-server/User/History/450b0aa/gJ7Y.py +75 -0
  42. .local/share/code-server/User/History/450b0aa/gwPw.py +72 -0
  43. .local/share/code-server/User/History/450b0aa/hisc.py +71 -0
  44. .local/share/code-server/User/History/450b0aa/iwOA.py +108 -0
  45. .local/share/code-server/User/History/450b0aa/jtCb.py +74 -0
  46. .local/share/code-server/User/History/450b0aa/ksQB.py +73 -0
  47. .local/share/code-server/User/History/450b0aa/mE68.py +75 -0
  48. .local/share/code-server/User/History/450b0aa/qzJp.py +75 -0
  49. .local/share/code-server/User/History/450b0aa/rykv.py +75 -0
  50. .local/share/code-server/User/History/450b0aa/s0Hs.py +250 -0
.bash_history ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit run app.py
2
+ streamlit run app.py
3
+ streamlit run app.py
4
+ streamlit run app.py
5
+ streamlit run app.py
.cache/Microsoft/DeveloperTools/deviceid ADDED
@@ -0,0 +1 @@
 
 
1
+ a5c936e7-e0c1-4067-98d1-c3fa3de57425
.config/code-server/config.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ bind-addr: 127.0.0.1:8080
2
+ auth: password
3
+ password: 903bae018fa9e47b8195dac4
4
+ cert: false
.local/share/code-server/CachedProfilesData/__default__profile__/extensions.builtin.cache ADDED
The diff for this file is too large to render. See raw diff
 
.local/share/code-server/CachedProfilesData/__default__profile__/extensions.user.cache ADDED
@@ -0,0 +1 @@
 
 
1
+ {"input":{"location":{"$mid":1,"fsPath":"/home/coder/.local/share/code-server/extensions/extensions.json","external":"file:///home/coder/.local/share/code-server/extensions/extensions.json","path":"/home/coder/.local/share/code-server/extensions/extensions.json","scheme":"file"},"mtime":1731806263514,"profile":true,"profileScanOptions":{"bailOutWhenFileNotFound":true},"type":1,"excludeObsolete":true,"validate":true,"productVersion":"1.91.1","productDate":"2024-07-15T18:41:47.410Z","productCommit":"1962f48b7f71772dc2c060dbaa5a6b4c0792a549","devMode":false,"language":"en","translations":{}},"result":[]}
.local/share/code-server/User/History/450b0aa/0rFy.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
37
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
38
+ Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
39
+
40
+ # Convert 'Expiration Date' to datetime and extract the year
41
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
42
+ df['Expiration Year'] = df['Expiration Date'].dt.year
43
+
44
+ # Group by Expiration Year and count the number of licenses expiring
45
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
46
+ st.header("4. Trend of Licenses Expiring Over Time")
47
+
48
+ # Create trend line (line chart) with circle marks at data points
49
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
50
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
51
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
52
+ tooltip=['Expiration Year', 'License Count']
53
+ )
54
+
55
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
56
+ x=alt.X('Expiration Year:O'),
57
+ y=alt.Y('License Count:Q'),
58
+ tooltip=['Expiration Year', 'License Count']
59
+ )
60
+
61
+ # Combine line and points
62
+ final_chart = line_chart + points
63
+
64
+ st.altair_chart(final_chart, use_container_width=True)
65
+
66
+ # Markdown Write-Up for the trend line chart
67
+ st.markdown("""
68
+ This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
69
+ The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
70
+
71
+ I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
72
+ By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
73
+ The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
74
+ If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
75
+ """)
.local/share/code-server/User/History/450b0aa/2sco.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset, allowing us to easily identify which types of licenses are the most prevalent.
34
+
35
+ **Design Choices**: The x-axis represents the count of each license type, while the y-axis shows the name of the license type. The bars are color-coded to differentiate between the license types, and tooltips are included for better interaction. The chart is sorted in descending order of the license count.
36
+
37
+ **Improvements**: If I had more time, I would add more granular details about the licenses or break down the data further by state or city for more localized insights.
38
+ """)
39
+
40
+ # Convert 'Expiration Date' to datetime and extract the year
41
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
42
+ df['Expiration Year'] = df['Expiration Date'].dt.year
43
+
44
+ # Group by Expiration Year and count the number of licenses expiring
45
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
46
+ st.header("4. Trend of Licenses Expiring Over Time")
47
+
48
+ # Create trend line (line chart) with circle marks at data points
49
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
50
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
51
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
52
+ tooltip=['Expiration Year', 'License Count']
53
+ )
54
+
55
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
56
+ x=alt.X('Expiration Year:O'),
57
+ y=alt.Y('License Count:Q'),
58
+ tooltip=['Expiration Year', 'License Count']
59
+ )
60
+
61
+ # Combine line and points
62
+ final_chart = line_chart + points
63
+
64
+ st.altair_chart(final_chart, use_container_width=True)
65
+
66
+ # Markdown Write-Up for the trend line chart
67
+ st.markdown("""
68
+ **Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
69
+
70
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
73
+ """)
.local/share/code-server/User/History/450b0aa/2wHf.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Create bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+
30
+ st.altair_chart(bar_chart, use_container_width=True)
31
+
32
+ # Markdown Write-Up for the bar chart
33
+ st.markdown("""
34
+ **Highlights**: This bar chart highlights the top 10 most frequent license types in the dataset, allowing us to easily identify which types of licenses are the most prevalent.
35
+
36
+ **Design Choices**: The x-axis represents the count of each license type, while the y-axis shows the name of the license type. The bars are color-coded to differentiate between the license types, and tooltips are included for better interaction. The chart is sorted in descending order of the license count.
37
+
38
+ **Improvements**: If I had more time, I would add more granular details about the licenses or break down the data further by state or city for more localized insights.
39
+ """)
40
+
41
+ # Convert 'Expiration Date' to datetime and extract the year
42
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
43
+ df['Expiration Year'] = df['Expiration Date'].dt.year
44
+
45
+ # Group by Expiration Year and count the number of licenses expiring
46
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
47
+ st.header("4. Trend of Licenses Expiring Over Time")
48
+
49
+ # Create trend line (line chart) with circle marks at data points
50
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
51
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
52
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
53
+ tooltip=['Expiration Year', 'License Count']
54
+ )
55
+
56
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
57
+ x=alt.X('Expiration Year:O'),
58
+ y=alt.Y('License Count:Q'),
59
+ tooltip=['Expiration Year', 'License Count']
60
+ )
61
+
62
+ # Combine line and points
63
+ final_chart = line_chart + points
64
+
65
+ st.altair_chart(final_chart, use_container_width=True)
66
+
67
+ # Markdown Write-Up for the trend line chart
68
+ st.markdown("""
69
+ **Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
70
+
71
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
72
+
73
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
74
+ """)
.local/share/code-server/User/History/450b0aa/4TtO.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. allowing us to easily identify which types of licenses are the most prevalent.
34
+
35
+ **Design Choices**: The x-axis represents the count of each license type, while the y-axis shows the name of the license type. The bars are color-coded to differentiate between the license types, and tooltips are included for better interaction. The chart is sorted in descending order of the license count.
36
+
37
+ **Improvements**: If I had more time, I would add more granular details about the licenses or break down the data further by state or city for more localized insights.
38
+ """)
39
+
40
+ # Convert 'Expiration Date' to datetime and extract the year
41
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
42
+ df['Expiration Year'] = df['Expiration Date'].dt.year
43
+
44
+ # Group by Expiration Year and count the number of licenses expiring
45
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
46
+ st.header("4. Trend of Licenses Expiring Over Time")
47
+
48
+ # Create trend line (line chart) with circle marks at data points
49
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
50
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
51
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
52
+ tooltip=['Expiration Year', 'License Count']
53
+ )
54
+
55
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
56
+ x=alt.X('Expiration Year:O'),
57
+ y=alt.Y('License Count:Q'),
58
+ tooltip=['Expiration Year', 'License Count']
59
+ )
60
+
61
+ # Combine line and points
62
+ final_chart = line_chart + points
63
+
64
+ st.altair_chart(final_chart, use_container_width=True)
65
+
66
+ # Markdown Write-Up for the trend line chart
67
+ st.markdown("""
68
+ **Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
69
+
70
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
73
+ """)
.local/share/code-server/User/History/450b0aa/53Ql.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency.
37
+ To improve this, I focused on the top 10 most frequent license types.
38
+ Some license names were long, so I made sure that full text is visible when you hover on the bars.
39
+ If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
40
+
41
+ # Convert 'Expiration Date' to datetime and extract the year
42
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
43
+ df['Expiration Year'] = df['Expiration Date'].dt.year
44
+
45
+ # Group by Expiration Year and count the number of licenses expiring
46
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
47
+ st.header("2. Trend of Licenses Expiring Over Time")
48
+
49
+ # Create trend line (line chart) with circle marks at data points
50
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
51
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
52
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
53
+ tooltip=['Expiration Year', 'License Count']
54
+ )
55
+
56
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
57
+ x=alt.X('Expiration Year:O'),
58
+ y=alt.Y('License Count:Q'),
59
+ tooltip=['Expiration Year', 'License Count']
60
+ )
61
+
62
+ # Combine line and points
63
+ final_chart = line_chart + points
64
+
65
+ st.altair_chart(final_chart, use_container_width=True)
66
+
67
+ # Markdown Write-Up for the trend line chart
68
+ st.markdown("""
69
+ This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
70
+ The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
73
+ By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
74
+ The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
75
+ If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
.local/share/code-server/User/History/450b0aa/53ry.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency.
37
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
38
+ Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.
39
+ If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
40
+
41
+ # Convert 'Expiration Date' to datetime and extract the year
42
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
43
+ df['Expiration Year'] = df['Expiration Date'].dt.year
44
+
45
+ # Group by Expiration Year and count the number of licenses expiring
46
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
47
+ st.header("2. Trend of Licenses Expiring Over Time")
48
+
49
+ # Create trend line (line chart) with circle marks at data points
50
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
51
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
52
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
53
+ tooltip=['Expiration Year', 'License Count']
54
+ )
55
+
56
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
57
+ x=alt.X('Expiration Year:O'),
58
+ y=alt.Y('License Count:Q'),
59
+ tooltip=['Expiration Year', 'License Count']
60
+ )
61
+
62
+ # Combine line and points
63
+ final_chart = line_chart + points
64
+
65
+ st.altair_chart(final_chart, use_container_width=True)
66
+
67
+ # Markdown Write-Up for the trend line chart
68
+ st.markdown("""
69
+ This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
70
+ The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
73
+ By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
74
+ The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
75
+ If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
.local/share/code-server/User/History/450b0aa/8qMH.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
37
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
38
+ Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
39
+
40
+ # Convert 'Expiration Date' to datetime and extract the year
41
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
42
+ df['Expiration Year'] = df['Expiration Date'].dt.year
43
+
44
+ # Group by Expiration Year and count the number of licenses expiring
45
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
46
+ st.header("4. Trend of Licenses Expiring Over Time")
47
+
48
+ # Create trend line (line chart) with circle marks at data points
49
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
50
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
51
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
52
+ tooltip=['Expiration Year', 'License Count']
53
+ )
54
+
55
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
56
+ x=alt.X('Expiration Year:O'),
57
+ y=alt.Y('License Count:Q'),
58
+ tooltip=['Expiration Year', 'License Count']
59
+ )
60
+
61
+ # Combine line and points
62
+ final_chart = line_chart + points
63
+
64
+ st.altair_chart(final_chart, use_container_width=True)
65
+
66
+ # Markdown Write-Up for the trend line chart
67
+ st.markdown("""
68
+ This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
69
+ The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
70
+
71
+ I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
72
+ By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
73
+ The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
74
+ If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
.local/share/code-server/User/History/450b0aa/8wPI.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
37
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
38
+ Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
39
+
40
+ # Convert 'Expiration Date' to datetime and extract the year
41
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
42
+ df['Expiration Year'] = df['Expiration Date'].dt.year
43
+
44
+ # Group by Expiration Year and count the number of licenses expiring
45
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
46
+ st.header("4. Trend of Licenses Expiring Over Time")
47
+
48
+ # Create trend line (line chart) with circle marks at data points
49
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
50
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
51
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
52
+ tooltip=['Expiration Year', 'License Count']
53
+ )
54
+
55
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
56
+ x=alt.X('Expiration Year:O'),
57
+ y=alt.Y('License Count:Q'),
58
+ tooltip=['Expiration Year', 'License Count']
59
+ )
60
+
61
+ # Combine line and points
62
+ final_chart = line_chart + points
63
+
64
+ st.altair_chart(final_chart, use_container_width=True)
65
+
66
+ # Markdown Write-Up for the trend line chart
67
+ st.markdown("""
68
+ This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
69
+
70
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
73
+ """)
.local/share/code-server/User/History/450b0aa/AIA9.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ #import the data
7
+ #url = "https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/ufo-scrubbed-geocoded-time-standardized-00.csv"
8
+ #df = pd.read_csv(url)
9
+ #df.sum().isnull()
10
+
11
+
12
+
13
+ # #Fill the missing data with Unknown
14
+ # df['County'].fillna('Not available', inplace=True)
15
+ # df['Rep Full Name'].fillna('Not available', inplace=True)
16
+ # df['Year Constructed'].fillna(df['Year Constructed'].median(), inplace=True)
17
+ # df['Senator Full Name'].fillna('Unknown', inplace=True)
18
+ # df['Usage Description 2'].fillna(df['Usage Description 2'].mode()[0], inplace=True)
19
+ # df['Usage Description 3'].fillna(df['Usage Description 3'].mode()[0], inplace=True)
20
+ # df['Address'].fillna('Not available', inplace=True)
21
+ # df['Congressional Full Name'].fillna('Unknown', inplace=True)
22
+
23
+ # #Page Title
24
+ # st.markdown("<h1 style='text-align: center;'>Homework 5.1</h1>", unsafe_allow_html=True)
25
+
26
+ # st.subheader("Analyzing the Building Inventory Dataset")
27
+ # #Visualization 1
28
+ # st.markdown("<h4 style='text-decoration: underline;'>Visualization 1</h4>", unsafe_allow_html=True)
29
+
30
+ # df_filtered = df.dropna(subset=['Bldg Status', 'Year Constructed'])
31
+ # df_filtered['Year Constructed'] = pd.to_numeric(df_filtered['Year Constructed'], errors='coerce')
32
+ # df_filtered = df_filtered[(df_filtered['Year Constructed'] >= 1600) & (df_filtered['Year Constructed'] <= 2100)]
33
+
34
+
35
+ # line_chart = alt.Chart(df_filtered).mark_line(point=True).encode(
36
+ # x=alt.X('Year Constructed:Q', title='Year Constructed'),
37
+ # y=alt.Y('count()', title='Number of Buildings'),
38
+ # color=alt.Color('Bldg Status:N', title='Building Status'),
39
+ # tooltip=['Year Constructed', 'count()', 'Bldg Status']
40
+ # ).properties(
41
+ # width=700,
42
+ # height=400,
43
+ # title="Trend of Building Construction by Status"
44
+ # )
45
+ # st.altair_chart(line_chart, use_container_width=True)
46
+ # st.write("""
47
+ # This line chart highlights the trend in the number of buildings constructed over time,
48
+ # categorized by their building status (e.g., whether they are currently in use or not).
49
+ # The x-axis represents the 'Year Constructed' and the y-axis represents the count of buildings.
50
+ # The color encoding separates the buildings by their 'Bldg Status'
51
+ # I used a line plot with points to clearly indicate the number of buildings per year,
52
+ # which helps in identifying trends and peaks. Another reason is that line chart is ideal for
53
+ # visualizing time-series data. The colors are chosen to differentiate the building statuses
54
+ # effectively.
55
+
56
+ # If I had more time, I would consider adding labels to the data points for clarity and
57
+ # perhaps break down the data further by usage description or location for a more detailed analysis.
58
+ # Probably adding hover effects could display additional information, such as the exact count of
59
+ # buildings and their status, when hovering over each data point. This would allow users to gain
60
+ # deeper insights without cluttering the chart. I would also include filtering options so users
61
+ # could select specific building statuses or even a range of years to focus on, which would make
62
+ # the analysis more targeted.
63
+ # """)
64
+
65
+
66
+ # # Visualization 2
67
+ # st.markdown("<h4 style='text-decoration: underline;'>Visualization 2</h4>", unsafe_allow_html=True)
68
+ # county_agency_data = df.dropna(subset=['County', 'Agency Name'])
69
+ # county_agency_count = county_agency_data.groupby('County')['Agency Name'].nunique().reset_index()
70
+ # county_agency_count.rename(columns={'Agency Name': 'Unique Agencies'}, inplace=True)
71
+
72
+ # county_agency_chart = alt.Chart(county_agency_count).mark_bar().encode(
73
+ # x=alt.X('County:N', sort='-y', title="County"),
74
+ # y=alt.Y('Unique Agencies:Q', title="Number of Agencies"),
75
+ # color=alt.Color('Unique Agencies:Q', scale=alt.Scale(scheme='viridis')),
76
+ # tooltip=['County:N', 'Unique Agencies:Q']
77
+ # ).properties(
78
+ # width=700,
79
+ # height=400,
80
+ # title="Unique Agencies in Each County"
81
+ # )
82
+ # st.altair_chart(county_agency_chart, use_container_width=True)
83
+
84
+ # st.write("""
85
+ # This bar chart highlights the number of unique agencies operating in each county.
86
+ # The x-axis represents the 'County' and the y-axis represents the 'Number of Unique Agencies'
87
+ # I used a bar chart as it allows for easy comparison between counties based on the number of
88
+ # agencies, and sorting the counties by agency count makes it easier to identify areas with
89
+ # more agencies. The color scale uses a 'viridis' color scheme, to show how the number of agencies
90
+ # differs accross counties.
91
+
92
+ # If I had more time, I would consider adding interactivity, like a filter for 'Building Status', so that users can
93
+ # focus on specific statuses, such as buildings that are still in progress or abandoned for each county.
94
+ # I would also implement a slider for 'Year Constructed' to allow users to focus on specific time periods,
95
+ # enabling them to analyze trends and compare the distribution of agencies by different time periods.
96
+ # I would also consider adding a search function so that users can look for specific counties
97
+ # or agency types.
98
+ # """)
99
+ # Load State GeoJSON for Map (example if supported)
100
+ # Visualization: Licenses by State
101
+
102
+
103
+ # Load Dataset
104
+ @st.cache
105
+ def load_data():
106
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
107
+ return pd.read_csv(url)
108
+
109
+ df = load_data()
110
+
111
+ # Handle Missing Values for Visualizations
112
+ # 1. Remove rows with missing `Original Issue Date`
113
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
114
+ df_time = df.dropna(subset=['Original Issue Date'])
115
+
116
+ # Title
117
+ st.title("Licenses Dataset Visualizations")
118
+ st.markdown("This app presents three visualizations of the licenses dataset.")
119
+
120
+ # Visualization 1: Bar Chart (Distribution of Licenses by Type)
121
+ st.header("1. Distribution of Licenses by Type")
122
+ license_count = df['License Type'].value_counts().reset_index()
123
+ license_count.columns = ['License Type', 'Count']
124
+
125
+ bar_chart = alt.Chart(license_count).mark_bar().encode(
126
+ x=alt.X('Count:Q', title='Number of Licenses'),
127
+ y=alt.Y('License Type:N', sort='-x', title='License Type'),
128
+ color=alt.Color('License Type:N', legend=None)
129
+ ).properties(title="Number of Licenses by Type")
130
+
131
+ st.altair_chart(bar_chart, use_container_width=True)
132
+
133
+ st.markdown("""
134
+ **Highlights**: This bar chart shows the distribution of licenses by type.
135
+ **Design Choices**: A horizontal bar chart is used for better readability of license types.
136
+ """)
137
+
138
+ # Visualization 2: Line Chart (Trend of Licenses Over Time)
139
+ st.header("2. Trend of Licenses Over Time")
140
+ time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
141
+ time_data.columns = ['Year', 'Count']
142
+
143
+ line_chart = alt.Chart(time_data).mark_line(point=True).encode(
144
+ x=alt.X('Year:O', title='Year'),
145
+ y=alt.Y('Count:Q', title='Number of Licenses'),
146
+ color=alt.value('blue')
147
+ ).properties(title="Trend of Licenses Issued Over Years")
148
+
149
+ st.altair_chart(line_chart, use_container_width=True)
150
+
151
+ st.markdown("""
152
+ **Highlights**: This line chart shows the trend of license issuances over time.
153
+ **Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
154
+ """)
155
+
156
+
157
+ # Count the number of licenses for each License Type and sort by count
158
+ license_type_counts = df['License Type'].value_counts().reset_index()
159
+ license_type_counts.columns = ['License Type', 'Count']
160
+
161
+ # Get the top 10 License Types
162
+ top_10_license_types = license_type_counts.head(10)
163
+
164
+ # Create the bar chart
165
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
166
+ x=alt.X('Count:Q', title='License Count'),
167
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
168
+ color=alt.Color('License Type:N', legend=None),
169
+ tooltip=['License Type', 'Count']
170
+ ).properties(
171
+ title='Top 10 License Types by Frequency'
172
+ )
173
+
174
+ # Show the chart in the Streamlit app
175
+ st.altair_chart(bar_chart, use_container_width=True)
176
+
177
+
178
+ # Convert the Expiration Date to datetime (if not already)
179
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
180
+
181
+ # Extract the year from Expiration Date
182
+ df['Expiration Year'] = df['Expiration Date'].dt.year
183
+
184
+ # Group by Expiration Year and count the number of licenses expiring
185
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
186
+
187
+ # Create the trend line (line chart) with interactive data points (circle marks)
188
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
189
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
190
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
191
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
192
+ ).properties(
193
+ title='Trend Line for Licenses Expiring Over Time'
194
+ )
195
+
196
+ # Add circle marks at data points
197
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
198
+ x=alt.X('Expiration Year:O'),
199
+ y=alt.Y('License Count:Q'),
200
+ tooltip=['Expiration Year', 'License Count']
201
+ )
202
+
203
+ # Combine the line and point marks
204
+ final_chart = line_chart + points
205
+
206
+ # Show the chart in the Streamlit app
207
+ st.altair_chart(final_chart, use_container_width=True)
208
+
.local/share/code-server/User/History/450b0aa/BA55.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ **Highlights**: This bar chart highlights the top 10 most frequent license types in the dataset, allowing us to easily identify which types of licenses are the most prevalent.
34
+
35
+ **Design Choices**: The x-axis represents the count of each license type, while the y-axis shows the name of the license type. The bars are color-coded to differentiate between the license types, and tooltips are included for better interaction. The chart is sorted in descending order of the license count.
36
+
37
+ **Improvements**: If I had more time, I would add more granular details about the licenses or break down the data further by state or city for more localized insights.
38
+ """)
39
+
40
+ # Convert 'Expiration Date' to datetime and extract the year
41
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
42
+ df['Expiration Year'] = df['Expiration Date'].dt.year
43
+
44
+ # Group by Expiration Year and count the number of licenses expiring
45
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
46
+ st.header("4. Trend of Licenses Expiring Over Time")
47
+
48
+ # Create trend line (line chart) with circle marks at data points
49
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
50
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
51
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
52
+ tooltip=['Expiration Year', 'License Count']
53
+ )
54
+
55
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
56
+ x=alt.X('Expiration Year:O'),
57
+ y=alt.Y('License Count:Q'),
58
+ tooltip=['Expiration Year', 'License Count']
59
+ )
60
+
61
+ # Combine line and points
62
+ final_chart = line_chart + points
63
+
64
+ st.altair_chart(final_chart, use_container_width=True)
65
+
66
+ # Markdown Write-Up for the trend line chart
67
+ st.markdown("""
68
+ **Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
69
+
70
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
73
+ """)
.local/share/code-server/User/History/450b0aa/BvGZ.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ #import the data
7
+ #url = "https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/ufo-scrubbed-geocoded-time-standardized-00.csv"
8
+ #df = pd.read_csv(url)
9
+ #df.sum().isnull()
10
+
11
+
12
+
13
+ # #Fill the missing data with Unknown
14
+ # df['County'].fillna('Not available', inplace=True)
15
+ # df['Rep Full Name'].fillna('Not available', inplace=True)
16
+ # df['Year Constructed'].fillna(df['Year Constructed'].median(), inplace=True)
17
+ # df['Senator Full Name'].fillna('Unknown', inplace=True)
18
+ # df['Usage Description 2'].fillna(df['Usage Description 2'].mode()[0], inplace=True)
19
+ # df['Usage Description 3'].fillna(df['Usage Description 3'].mode()[0], inplace=True)
20
+ # df['Address'].fillna('Not available', inplace=True)
21
+ # df['Congressional Full Name'].fillna('Unknown', inplace=True)
22
+
23
+ # #Page Title
24
+ # st.markdown("<h1 style='text-align: center;'>Homework 5.1</h1>", unsafe_allow_html=True)
25
+
26
+ # st.subheader("Analyzing the Building Inventory Dataset")
27
+ # #Visualization 1
28
+ # st.markdown("<h4 style='text-decoration: underline;'>Visualization 1</h4>", unsafe_allow_html=True)
29
+
30
+ # df_filtered = df.dropna(subset=['Bldg Status', 'Year Constructed'])
31
+ # df_filtered['Year Constructed'] = pd.to_numeric(df_filtered['Year Constructed'], errors='coerce')
32
+ # df_filtered = df_filtered[(df_filtered['Year Constructed'] >= 1600) & (df_filtered['Year Constructed'] <= 2100)]
33
+
34
+
35
+ # line_chart = alt.Chart(df_filtered).mark_line(point=True).encode(
36
+ # x=alt.X('Year Constructed:Q', title='Year Constructed'),
37
+ # y=alt.Y('count()', title='Number of Buildings'),
38
+ # color=alt.Color('Bldg Status:N', title='Building Status'),
39
+ # tooltip=['Year Constructed', 'count()', 'Bldg Status']
40
+ # ).properties(
41
+ # width=700,
42
+ # height=400,
43
+ # title="Trend of Building Construction by Status"
44
+ # )
45
+ # st.altair_chart(line_chart, use_container_width=True)
46
+ # st.write("""
47
+ # This line chart highlights the trend in the number of buildings constructed over time,
48
+ # categorized by their building status (e.g., whether they are currently in use or not).
49
+ # The x-axis represents the 'Year Constructed' and the y-axis represents the count of buildings.
50
+ # The color encoding separates the buildings by their 'Bldg Status'
51
+ # I used a line plot with points to clearly indicate the number of buildings per year,
52
+ # which helps in identifying trends and peaks. Another reason is that line chart is ideal for
53
+ # visualizing time-series data. The colors are chosen to differentiate the building statuses
54
+ # effectively.
55
+
56
+ # If I had more time, I would consider adding labels to the data points for clarity and
57
+ # perhaps break down the data further by usage description or location for a more detailed analysis.
58
+ # Probably adding hover effects could display additional information, such as the exact count of
59
+ # buildings and their status, when hovering over each data point. This would allow users to gain
60
+ # deeper insights without cluttering the chart. I would also include filtering options so users
61
+ # could select specific building statuses or even a range of years to focus on, which would make
62
+ # the analysis more targeted.
63
+ # """)
64
+
65
+
66
+ # # Visualization 2
67
+ # st.markdown("<h4 style='text-decoration: underline;'>Visualization 2</h4>", unsafe_allow_html=True)
68
+ # county_agency_data = df.dropna(subset=['County', 'Agency Name'])
69
+ # county_agency_count = county_agency_data.groupby('County')['Agency Name'].nunique().reset_index()
70
+ # county_agency_count.rename(columns={'Agency Name': 'Unique Agencies'}, inplace=True)
71
+
72
+ # county_agency_chart = alt.Chart(county_agency_count).mark_bar().encode(
73
+ # x=alt.X('County:N', sort='-y', title="County"),
74
+ # y=alt.Y('Unique Agencies:Q', title="Number of Agencies"),
75
+ # color=alt.Color('Unique Agencies:Q', scale=alt.Scale(scheme='viridis')),
76
+ # tooltip=['County:N', 'Unique Agencies:Q']
77
+ # ).properties(
78
+ # width=700,
79
+ # height=400,
80
+ # title="Unique Agencies in Each County"
81
+ # )
82
+ # st.altair_chart(county_agency_chart, use_container_width=True)
83
+
84
+ # st.write("""
85
+ # This bar chart highlights the number of unique agencies operating in each county.
86
+ # The x-axis represents the 'County' and the y-axis represents the 'Number of Unique Agencies'
87
+ # I used a bar chart as it allows for easy comparison between counties based on the number of
88
+ # agencies, and sorting the counties by agency count makes it easier to identify areas with
89
+ # more agencies. The color scale uses a 'viridis' color scheme, to show how the number of agencies
90
+ # differs accross counties.
91
+
92
+ # If I had more time, I would consider adding interactivity, like a filter for 'Building Status', so that users can
93
+ # focus on specific statuses, such as buildings that are still in progress or abandoned for each county.
94
+ # I would also implement a slider for 'Year Constructed' to allow users to focus on specific time periods,
95
+ # enabling them to analyze trends and compare the distribution of agencies by different time periods.
96
+ # I would also consider adding a search function so that users can look for specific counties
97
+ # or agency types.
98
+ # """)
99
+ # Load State GeoJSON for Map (example if supported)
100
+ # Visualization: Licenses by State
101
+
102
+
103
+ # Load Dataset
104
+ @st.cache
105
+ def load_data():
106
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
107
+ return pd.read_csv(url)
108
+
109
+ df = load_data()
110
+
111
+ # Handle Missing Values for Visualizations
112
+ # 1. Remove rows with missing `Original Issue Date`
113
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
114
+ df_time = df.dropna(subset=['Original Issue Date'])
115
+
116
+ # Title
117
+ st.title("Licenses Dataset Visualizations")
118
+ st.markdown("This app presents three visualizations of the licenses dataset.")
119
+
120
+ # Visualization 1: Bar Chart (Distribution of Licenses by Type)
121
+ st.header("1. Distribution of Licenses by Type")
122
+ license_count = df['License Type'].value_counts().reset_index()
123
+ license_count.columns = ['License Type', 'Count']
124
+
125
+ bar_chart = alt.Chart(license_count).mark_bar().encode(
126
+ x=alt.X('Count:Q', title='Number of Licenses'),
127
+ y=alt.Y('License Type:N', sort='-x', title='License Type'),
128
+ color=alt.Color('License Type:N', legend=None)
129
+ ).properties(title="Number of Licenses by Type")
130
+
131
+ st.altair_chart(bar_chart, use_container_width=True)
132
+
133
+ st.markdown("""
134
+ **Highlights**: This bar chart shows the distribution of licenses by type.
135
+ **Design Choices**: A horizontal bar chart is used for better readability of license types.
136
+ """)
137
+
138
+ # Visualization 2: Line Chart (Trend of Licenses Over Time)
139
+ st.header("2. Trend of Licenses Over Time")
140
+ time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
141
+ time_data.columns = ['Year', 'Count']
142
+
143
+ line_chart = alt.Chart(time_data).mark_line(point=True).encode(
144
+ x=alt.X('Year:O', title='Year'),
145
+ y=alt.Y('Count:Q', title='Number of Licenses'),
146
+ color=alt.value('blue')
147
+ ).properties(title="Trend of Licenses Issued Over Years")
148
+
149
+ st.altair_chart(line_chart, use_container_width=True)
150
+
151
+ st.markdown("""
152
+ **Highlights**: This line chart shows the trend of license issuances over time.
153
+ **Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
154
+ """)
155
+
156
+
157
+ # Count the number of licenses for each License Type and sort by count
158
+ license_type_counts = df['License Type'].value_counts().reset_index()
159
+ license_type_counts.columns = ['License Type', 'Count']
160
+
161
+ # Get the top 10 License Types
162
+ top_10_license_types = license_type_counts.head(10)
163
+
164
+ # Create the bar chart
165
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
166
+ x=alt.X('Count:Q', title='License Count'),
167
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
168
+ color=alt.Color('License Type:N', legend=None),
169
+ tooltip=['License Type', 'Count']
170
+ ).properties(
171
+ title='Top 10 License Types by Frequency'
172
+ )
173
+
174
+ # Show the chart in the Streamlit app
175
+ st.altair_chart(bar_chart, use_container_width=True)
176
+
177
+ # Convert the Expiration Date to datetime (if not already)
178
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
179
+
180
+ # Extract the month and year from Expiration Date
181
+ df['Expiration Year'] = df['Expiration Date'].dt.year
182
+ df['Expiration Month'] = df['Expiration Date'].dt.month
183
+
184
+ # Group by year and month and count the number of licenses expiring
185
+ expiration_counts = df.groupby(['Expiration Year', 'Expiration Month']).size().reset_index(name='License Count')
186
+
187
+ # Convert the Expiration Date to datetime (if not already)
188
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
189
+
190
+ # Extract the year from Expiration Date
191
+ df['Expiration Year'] = df['Expiration Date'].dt.year
192
+
193
+ # Group by Expiration Year and count the number of licenses expiring
194
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
195
+
196
+ # Create the trend line (line chart)
197
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
198
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
199
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
200
+ tooltip=['Expiration Year', 'License Count']
201
+ ).properties(
202
+ title='Trend Line for License Expirations Over Time'
203
+ )
204
+
205
+ # Show the chart in the Streamlit app
206
+ st.altair_chart(line_chart, use_container_width=True)
207
+ # Convert the Expiration Date to datetime (if not already)
208
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
209
+
210
+ # Extract the year from Expiration Date
211
+ df['Expiration Year'] = df['Expiration Date'].dt.year
212
+
213
+ # Group by Expiration Year and count the number of licenses expiring
214
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
215
+
216
+ # Create the trend line (line chart)
217
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
218
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
219
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
220
+ tooltip=['Expiration Year', 'License Count']
221
+ ).properties(
222
+ title='Trend Line for Licenses Expiring Over Time'
223
+ )
224
+
225
+ # Show the chart in the Streamlit app
226
+ st.altair_chart(line_chart, use_container_width=True)
227
+
228
+
229
+
230
+ # Convert the Expiration Date to datetime (if not already)
231
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
232
+
233
+ # Extract the year from Expiration Date
234
+ df['Expiration Year'] = df['Expiration Date'].dt.year
235
+
236
+ # Group by Expiration Year and count the number of licenses expiring
237
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
238
+
239
+ # Create the trend line (line chart) with interactive data points (circle marks)
240
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
241
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
242
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
243
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
244
+ ).properties(
245
+ title='Trend Line for Licenses Expiring Over Time'
246
+ ).interactive() # Make the chart interactive
247
+
248
+ # Add circle marks at data points
249
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
250
+ x=alt.X('Expiration Year:O'),
251
+ y=alt.Y('License Count:Q'),
252
+ tooltip=['Expiration Year', 'License Count']
253
+ )
254
+
255
+ # Combine the line and point marks
256
+ final_chart = line_chart + points
257
+
258
+ # Show the chart in the Streamlit app
259
+ st.altair_chart(final_chart, use_container_width=True)
260
+
.local/share/code-server/User/History/450b0aa/C2VD.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ # Load Dataset
7
+ @st.cache
8
+ def load_data():
9
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
10
+ return pd.read_csv(url)
11
+
12
+ df = load_data()
13
+
14
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
15
+ df_time = df.dropna(subset=['Original Issue Date'])
16
+
17
+
18
+
19
+
20
+ # Count the number of licenses for each License Type and sort by count
21
+ license_type_counts = df['License Type'].value_counts().reset_index()
22
+ license_type_counts.columns = ['License Type', 'Count']
23
+
24
+ # Get the top 10 License Types
25
+ top_10_license_types = license_type_counts.head(10)
26
+ st.header("3. Top 10 License Types by Frequency")
27
+ # Create the bar chart
28
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
29
+ x=alt.X('Count:Q', title='License Count'),
30
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
31
+ color=alt.Color('License Type:N', legend=None),
32
+ tooltip=['License Type', 'Count']
33
+ )
34
+
35
+
36
+ # Show the chart in the Streamlit app
37
+ st.altair_chart(bar_chart, use_container_width=True)
38
+
39
+ st.markdown("""
40
+ **Highlights**: This line chart shows .
41
+ **Design Choices**: Points are added to highlight.
42
+ """)
43
+
44
+ # Convert the Expiration Date to datetime (if not already)
45
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
46
+
47
+ # Extract the year from Expiration Date
48
+ df['Expiration Year'] = df['Expiration Date'].dt.year
49
+
50
+ # Group by Expiration Year and count the number of licenses expiring
51
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
52
+ st.header("4. Trend of Licenses Expiring Over Time")
53
+ # Create the trend line (line chart) with interactive data points (circle marks)
54
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
55
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
56
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
57
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
58
+ )
59
+
60
+
61
+ # Add circle marks at data points
62
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
63
+ x=alt.X('Expiration Year:O'),
64
+ y=alt.Y('License Count:Q'),
65
+ tooltip=['Expiration Year', 'License Count']
66
+ )
67
+
68
+ # Combine the line and point marks
69
+ final_chart = line_chart + points
70
+
71
+ # Show the chart in the Streamlit app
72
+ st.altair_chart(final_chart, use_container_width=True)
73
+
.local/share/code-server/User/History/450b0aa/Dqul.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for the bar chart
32
+ st.markdown("""
33
+ **Highlights**: This bar chart highlights the top 10 most frequent license types in the dataset, allowing us to easily identify which types of licenses are the most prevalent.
34
+
35
+ **Design Choices**: The x-axis represents the count of each license type, while the y-axis shows the name of the license type. The bars are color-coded to differentiate between the license types, and tooltips are included for better interaction. The chart is sorted in descending order of the license count.
36
+
37
+ **Improvements**: If I had more time, I would add more granular details about the licenses or break down the data further by state or city for more localized insights.
38
+ """)
39
+
40
+ # Convert 'Expiration Date' to datetime and extract the year
41
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
42
+ df['Expiration Year'] = df['Expiration Date'].dt.year
43
+
44
+ # Group by Expiration Year and count the number of licenses expiring
45
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
46
+ st.header("4. Trend of Licenses Expiring Over Time")
47
+
48
+ # Create trend line (line chart) with circle marks at data points
49
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
50
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
51
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
52
+ tooltip=['Expiration Year', 'License Count']
53
+ )
54
+
55
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
56
+ x=alt.X('Expiration Year:O'),
57
+ y=alt.Y('License Count:Q'),
58
+ tooltip=['Expiration Year', 'License Count']
59
+ )
60
+
61
+ # Combine line and points
62
+ final_chart = line_chart + points
63
+
64
+ st.altair_chart(final_chart, use_container_width=True)
65
+
66
+ # Markdown Write-Up for the trend line chart
67
+ st.markdown("""
68
+ **Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
69
+
70
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
73
+ """)
.local/share/code-server/User/History/450b0aa/GB0z.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
37
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
38
+ Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.
39
+ If I had more time, I would like to find a way to make the column names more readable and include all license types without making the visualization too cluttered""")
40
+
41
+ # Convert 'Expiration Date' to datetime and extract the year
42
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
43
+ df['Expiration Year'] = df['Expiration Date'].dt.year
44
+
45
+ # Group by Expiration Year and count the number of licenses expiring
46
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
47
+ st.header("2. Trend of Licenses Expiring Over Time")
48
+
49
+ # Create trend line (line chart) with circle marks at data points
50
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
51
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
52
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
53
+ tooltip=['Expiration Year', 'License Count']
54
+ )
55
+
56
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
57
+ x=alt.X('Expiration Year:O'),
58
+ y=alt.Y('License Count:Q'),
59
+ tooltip=['Expiration Year', 'License Count']
60
+ )
61
+
62
+ # Combine line and points
63
+ final_chart = line_chart + points
64
+
65
+ st.altair_chart(final_chart, use_container_width=True)
66
+
67
+ # Markdown Write-Up for the trend line chart
68
+ st.markdown("""
69
+ This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
70
+ The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
73
+ By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
74
+ The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
75
+ If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
.local/share/code-server/User/History/450b0aa/HxIz.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ # Load Dataset
7
+ @st.cache
8
+ def load_data():
9
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
10
+ return pd.read_csv(url)
11
+
12
+ df = load_data()
13
+
14
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
15
+ df_time = df.dropna(subset=['Original Issue Date'])
16
+
17
+ # Title
18
+ st.title("Licenses Dataset Visualizations")
19
+ st.markdown("This app presents three visualizations of the licenses dataset.")
20
+
21
+ # Visualization 1: Bar Chart (Distribution of Licenses by Type)
22
+ st.header("1. Distribution of Licenses by Type")
23
+ license_count = df['License Type'].value_counts().reset_index()
24
+ license_count.columns = ['License Type', 'Count']
25
+
26
+ bar_chart = alt.Chart(license_count).mark_bar().encode(
27
+ x=alt.X('Count:Q', title='Number of Licenses'),
28
+ y=alt.Y('License Type:N', sort='-x', title='License Type'),
29
+ color=alt.Color('License Type:N', legend=None)
30
+ ).properties(title="Number of Licenses by Type")
31
+
32
+ st.altair_chart(bar_chart, use_container_width=True)
33
+
34
+ st.markdown("""
35
+ **Highlights**: This bar chart shows the distribution of licenses by type.
36
+ **Design Choices**: A horizontal bar chart is used for better readability of license types.
37
+ """)
38
+
39
+ # Visualization 2: Line Chart (Trend of Licenses Over Time)
40
+ st.header("2. Trend of Licenses Over Time")
41
+ time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
42
+ time_data.columns = ['Year', 'Count']
43
+
44
+ line_chart = alt.Chart(time_data).mark_line(point=True).encode(
45
+ x=alt.X('Year:O', title='Year'),
46
+ y=alt.Y('Count:Q', title='Number of Licenses'),
47
+ color=alt.value('blue')
48
+ ).properties(title="Trend of Licenses Issued Over Years")
49
+
50
+ st.altair_chart(line_chart, use_container_width=True)
51
+
52
+ st.markdown("""
53
+ **Highlights**: This line chart shows the trend of license issuances over time.
54
+ **Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
55
+ """)
56
+
57
+
58
+ # Count the number of licenses for each License Type and sort by count
59
+ license_type_counts = df['License Type'].value_counts().reset_index()
60
+ license_type_counts.columns = ['License Type', 'Count']
61
+
62
+ # Get the top 10 License Types
63
+ top_10_license_types = license_type_counts.head(10)
64
+ st.header("3. Top 10 License Types by Frequency")
65
+ # Create the bar chart
66
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
67
+ x=alt.X('Count:Q', title='License Count'),
68
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
69
+ color=alt.Color('License Type:N', legend=None),
70
+ tooltip=['License Type', 'Count']
71
+ )
72
+
73
+
74
+ # Show the chart in the Streamlit app
75
+ st.altair_chart(bar_chart, use_container_width=True)
76
+
77
+
78
+ # Convert the Expiration Date to datetime (if not already)
79
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
80
+
81
+ # Extract the year from Expiration Date
82
+ df['Expiration Year'] = df['Expiration Date'].dt.year
83
+
84
+ # Group by Expiration Year and count the number of licenses expiring
85
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
86
+ st.header(". Top 10 License Types by Frequency")
87
+ # Create the trend line (line chart) with interactive data points (circle marks)
88
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
89
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
90
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
91
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
92
+ )
93
+
94
+
95
+ # Add circle marks at data points
96
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
97
+ x=alt.X('Expiration Year:O'),
98
+ y=alt.Y('License Count:Q'),
99
+ tooltip=['Expiration Year', 'License Count']
100
+ )
101
+
102
+ # Combine the line and point marks
103
+ final_chart = line_chart + points
104
+
105
+ # Show the chart in the Streamlit app
106
+ st.altair_chart(final_chart, use_container_width=True)
107
+
.local/share/code-server/User/History/450b0aa/I3Tc.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ # Load Dataset
7
+ @st.cache
8
+ def load_data():
9
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
10
+ return pd.read_csv(url)
11
+
12
+ df = load_data()
13
+
14
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
15
+ df_time = df.dropna(subset=['Original Issue Date'])
16
+
17
+ # Title
18
+ st.title("Licenses Dataset Visualizations")
19
+ st.markdown("This app presents three visualizations of the licenses dataset.")
20
+
21
+ # Visualization 1: Bar Chart (Distribution of Licenses by Type)
22
+ st.header("1. Distribution of Licenses by Type")
23
+ license_count = df['License Type'].value_counts().reset_index()
24
+ license_count.columns = ['License Type', 'Count']
25
+
26
+ bar_chart = alt.Chart(license_count).mark_bar().encode(
27
+ x=alt.X('Count:Q', title='Number of Licenses'),
28
+ y=alt.Y('License Type:N', sort='-x', title='License Type'),
29
+ color=alt.Color('License Type:N', legend=None)
30
+ ).properties(title="Number of Licenses by Type")
31
+
32
+ st.altair_chart(bar_chart, use_container_width=True)
33
+
34
+ st.markdown("""
35
+ **Highlights**: This bar chart shows the distribution of licenses by type.
36
+ **Design Choices**: A horizontal bar chart is used for better readability of license types.
37
+ """)
38
+
39
+ # Visualization 2: Line Chart (Trend of Licenses Over Time)
40
+ st.header("2. Trend of Licenses Over Time")
41
+ time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
42
+ time_data.columns = ['Year', 'Count']
43
+
44
+ line_chart = alt.Chart(time_data).mark_line(point=True).encode(
45
+ x=alt.X('Year:O', title='Year'),
46
+ y=alt.Y('Count:Q', title='Number of Licenses'),
47
+ color=alt.value('blue')
48
+ ).properties(title="Trend of Licenses Issued Over Years")
49
+
50
+ st.altair_chart(line_chart, use_container_width=True)
51
+
52
+ st.markdown("""
53
+ **Highlights**: This line chart shows the trend of license issuances over time.
54
+ **Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
55
+ """)
56
+
57
+
58
+ # Count the number of licenses for each License Type and sort by count
59
+ license_type_counts = df['License Type'].value_counts().reset_index()
60
+ license_type_counts.columns = ['License Type', 'Count']
61
+
62
+ # Get the top 10 License Types
63
+ top_10_license_types = license_type_counts.head(10)
64
+
65
+ # Create the bar chart
66
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
67
+ x=alt.X('Count:Q', title='License Count'),
68
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
69
+ color=alt.Color('License Type:N', legend=None),
70
+ tooltip=['License Type', 'Count']
71
+ ).properties(
72
+ title='Top 10 License Types by Frequency'
73
+ )
74
+
75
+ # Show the chart in the Streamlit app
76
+ st.altair_chart(bar_chart, use_container_width=True)
77
+
78
+
79
+ # Convert the Expiration Date to datetime (if not already)
80
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
81
+
82
+ # Extract the year from Expiration Date
83
+ df['Expiration Year'] = df['Expiration Date'].dt.year
84
+
85
+ # Group by Expiration Year and count the number of licenses expiring
86
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
87
+
88
+ # Create the trend line (line chart) with interactive data points (circle marks)
89
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
90
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
91
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
92
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
93
+ )
94
+ )
95
+
96
+ # Add circle marks at data points
97
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
98
+ x=alt.X('Expiration Year:O'),
99
+ y=alt.Y('License Count:Q'),
100
+ tooltip=['Expiration Year', 'License Count']
101
+ )
102
+
103
+ # Combine the line and point marks
104
+ final_chart = line_chart + points
105
+
106
+ # Show the chart in the Streamlit app
107
+ st.altair_chart(final_chart, use_container_width=True)
108
+
.local/share/code-server/User/History/450b0aa/Km75.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency.
37
+ To improve this, I focused on the top 10 most frequent license types.
38
+ Some license names were long, so I made sure that full text is visible when you hover on the bars.
39
+ If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
40
+
41
+ #Secon
42
+ # Converting 'Expiration Date' to datetime
43
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
44
+ df['Expiration Year'] = df['Expiration Date'].dt.year
45
+
46
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
47
+ st.header("2. Trend of Licenses Expiring Over Time")
48
+
49
+ # Trend line
50
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
51
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
52
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
53
+ tooltip=['Expiration Year', 'License Count']
54
+ )
55
+
56
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
57
+ x=alt.X('Expiration Year:O'),
58
+ y=alt.Y('License Count:Q'),
59
+ tooltip=['Expiration Year', 'License Count']
60
+ )
61
+
62
+ # Combine line and points
63
+ final_chart = line_chart + points
64
+
65
+ st.altair_chart(final_chart, use_container_width=True)
66
+
67
+ # Write-Up for second plot
68
+ st.markdown("""
69
+ This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
70
+ The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
73
+ By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
74
+ The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
75
+ If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
.local/share/code-server/User/History/450b0aa/O3TC.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ #import the data
7
+ #url = "https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/ufo-scrubbed-geocoded-time-standardized-00.csv"
8
+ #df = pd.read_csv(url)
9
+ #df.sum().isnull()
10
+
11
+
12
+
13
+ # #Fill the missing data with Unknown
14
+ # df['County'].fillna('Not available', inplace=True)
15
+ # df['Rep Full Name'].fillna('Not available', inplace=True)
16
+ # df['Year Constructed'].fillna(df['Year Constructed'].median(), inplace=True)
17
+ # df['Senator Full Name'].fillna('Unknown', inplace=True)
18
+ # df['Usage Description 2'].fillna(df['Usage Description 2'].mode()[0], inplace=True)
19
+ # df['Usage Description 3'].fillna(df['Usage Description 3'].mode()[0], inplace=True)
20
+ # df['Address'].fillna('Not available', inplace=True)
21
+ # df['Congressional Full Name'].fillna('Unknown', inplace=True)
22
+
23
+ # #Page Title
24
+ # st.markdown("<h1 style='text-align: center;'>Homework 5.1</h1>", unsafe_allow_html=True)
25
+
26
+ # st.subheader("Analyzing the Building Inventory Dataset")
27
+ # #Visualization 1
28
+ # st.markdown("<h4 style='text-decoration: underline;'>Visualization 1</h4>", unsafe_allow_html=True)
29
+
30
+ # df_filtered = df.dropna(subset=['Bldg Status', 'Year Constructed'])
31
+ # df_filtered['Year Constructed'] = pd.to_numeric(df_filtered['Year Constructed'], errors='coerce')
32
+ # df_filtered = df_filtered[(df_filtered['Year Constructed'] >= 1600) & (df_filtered['Year Constructed'] <= 2100)]
33
+
34
+
35
+ # line_chart = alt.Chart(df_filtered).mark_line(point=True).encode(
36
+ # x=alt.X('Year Constructed:Q', title='Year Constructed'),
37
+ # y=alt.Y('count()', title='Number of Buildings'),
38
+ # color=alt.Color('Bldg Status:N', title='Building Status'),
39
+ # tooltip=['Year Constructed', 'count()', 'Bldg Status']
40
+ # ).properties(
41
+ # width=700,
42
+ # height=400,
43
+ # title="Trend of Building Construction by Status"
44
+ # )
45
+ # st.altair_chart(line_chart, use_container_width=True)
46
+ # st.write("""
47
+ # This line chart highlights the trend in the number of buildings constructed over time,
48
+ # categorized by their building status (e.g., whether they are currently in use or not).
49
+ # The x-axis represents the 'Year Constructed' and the y-axis represents the count of buildings.
50
+ # The color encoding separates the buildings by their 'Bldg Status'
51
+ # I used a line plot with points to clearly indicate the number of buildings per year,
52
+ # which helps in identifying trends and peaks. Another reason is that line chart is ideal for
53
+ # visualizing time-series data. The colors are chosen to differentiate the building statuses
54
+ # effectively.
55
+
56
+ # If I had more time, I would consider adding labels to the data points for clarity and
57
+ # perhaps break down the data further by usage description or location for a more detailed analysis.
58
+ # Probably adding hover effects could display additional information, such as the exact count of
59
+ # buildings and their status, when hovering over each data point. This would allow users to gain
60
+ # deeper insights without cluttering the chart. I would also include filtering options so users
61
+ # could select specific building statuses or even a range of years to focus on, which would make
62
+ # the analysis more targeted.
63
+ # """)
64
+
65
+
66
+ # # Visualization 2
67
+ # st.markdown("<h4 style='text-decoration: underline;'>Visualization 2</h4>", unsafe_allow_html=True)
68
+ # county_agency_data = df.dropna(subset=['County', 'Agency Name'])
69
+ # county_agency_count = county_agency_data.groupby('County')['Agency Name'].nunique().reset_index()
70
+ # county_agency_count.rename(columns={'Agency Name': 'Unique Agencies'}, inplace=True)
71
+
72
+ # county_agency_chart = alt.Chart(county_agency_count).mark_bar().encode(
73
+ # x=alt.X('County:N', sort='-y', title="County"),
74
+ # y=alt.Y('Unique Agencies:Q', title="Number of Agencies"),
75
+ # color=alt.Color('Unique Agencies:Q', scale=alt.Scale(scheme='viridis')),
76
+ # tooltip=['County:N', 'Unique Agencies:Q']
77
+ # ).properties(
78
+ # width=700,
79
+ # height=400,
80
+ # title="Unique Agencies in Each County"
81
+ # )
82
+ # st.altair_chart(county_agency_chart, use_container_width=True)
83
+
84
+ # st.write("""
85
+ # This bar chart highlights the number of unique agencies operating in each county.
86
+ # The x-axis represents the 'County' and the y-axis represents the 'Number of Unique Agencies'
87
+ # I used a bar chart as it allows for easy comparison between counties based on the number of
88
+ # agencies, and sorting the counties by agency count makes it easier to identify areas with
89
+ # more agencies. The color scale uses a 'viridis' color scheme, to show how the number of agencies
90
+ # differs accross counties.
91
+
92
+ # If I had more time, I would consider adding interactivity, like a filter for 'Building Status', so that users can
93
+ # focus on specific statuses, such as buildings that are still in progress or abandoned for each county.
94
+ # I would also implement a slider for 'Year Constructed' to allow users to focus on specific time periods,
95
+ # enabling them to analyze trends and compare the distribution of agencies by different time periods.
96
+ # I would also consider adding a search function so that users can look for specific counties
97
+ # or agency types.
98
+ # """)
99
+ # Load State GeoJSON for Map (example if supported)
100
+ # Visualization: Licenses by State
101
+
102
+
103
+ # Load Dataset
104
+ @st.cache
105
+ def load_data():
106
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
107
+ return pd.read_csv(url)
108
+
109
+ df = load_data()
110
+
111
+ # Handle Missing Values for Visualizations
112
+ # 1. Remove rows with missing `Original Issue Date`
113
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
114
+ df_time = df.dropna(subset=['Original Issue Date'])
115
+
116
+ # Title
117
+ st.title("Licenses Dataset Visualizations")
118
+ st.markdown("This app presents three visualizations of the licenses dataset.")
119
+
120
+ # Visualization 1: Bar Chart (Distribution of Licenses by Type)
121
+ st.header("1. Distribution of Licenses by Type")
122
+ license_count = df['License Type'].value_counts().reset_index()
123
+ license_count.columns = ['License Type', 'Count']
124
+
125
+ bar_chart = alt.Chart(license_count).mark_bar().encode(
126
+ x=alt.X('Count:Q', title='Number of Licenses'),
127
+ y=alt.Y('License Type:N', sort='-x', title='License Type'),
128
+ color=alt.Color('License Type:N', legend=None)
129
+ ).properties(title="Number of Licenses by Type")
130
+
131
+ st.altair_chart(bar_chart, use_container_width=True)
132
+
133
+ st.markdown("""
134
+ **Highlights**: This bar chart shows the distribution of licenses by type.
135
+ **Design Choices**: A horizontal bar chart is used for better readability of license types.
136
+ """)
137
+
138
+ # Visualization 2: Line Chart (Trend of Licenses Over Time)
139
+ st.header("2. Trend of Licenses Over Time")
140
+ time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
141
+ time_data.columns = ['Year', 'Count']
142
+
143
+ line_chart = alt.Chart(time_data).mark_line(point=True).encode(
144
+ x=alt.X('Year:O', title='Year'),
145
+ y=alt.Y('Count:Q', title='Number of Licenses'),
146
+ color=alt.value('blue')
147
+ ).properties(title="Trend of Licenses Issued Over Years")
148
+
149
+ st.altair_chart(line_chart, use_container_width=True)
150
+
151
+ st.markdown("""
152
+ **Highlights**: This line chart shows the trend of license issuances over time.
153
+ **Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
154
+ """)
155
+
156
+
157
+ # Count the number of licenses for each License Type and sort by count
158
+ license_type_counts = df['License Type'].value_counts().reset_index()
159
+ license_type_counts.columns = ['License Type', 'Count']
160
+
161
+ # Get the top 10 License Types
162
+ top_10_license_types = license_type_counts.head(10)
163
+
164
+ # Create the bar chart
165
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
166
+ x=alt.X('Count:Q', title='License Count'),
167
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
168
+ color=alt.Color('License Type:N', legend=None),
169
+ tooltip=['License Type', 'Count']
170
+ ).properties(
171
+ title='Top 10 License Types by Frequency'
172
+ )
173
+
174
+ # Show the chart in the Streamlit app
175
+ st.altair_chart(bar_chart, use_container_width=True)
176
+
177
+ # Convert the Expiration Date to datetime (if not already)
178
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
179
+
180
+ # Extract the month and year from Expiration Date
181
+ df['Expiration Year'] = df['Expiration Date'].dt.year
182
+ df['Expiration Month'] = df['Expiration Date'].dt.month
183
+
184
+ # Group by year and month and count the number of licenses expiring
185
+ expiration_counts = df.groupby(['Expiration Year', 'Expiration Month']).size().reset_index(name='License Count')
186
+
187
+ # Convert the Expiration Date to datetime (if not already)
188
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
189
+
190
+ # Extract the year from Expiration Date
191
+ df['Expiration Year'] = df['Expiration Date'].dt.year
192
+
193
+ # Group by Expiration Year and count the number of licenses expiring
194
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
195
+
196
+ # Create the trend line (line chart)
197
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
198
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
199
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
200
+ tooltip=['Expiration Year', 'License Count']
201
+ ).properties(
202
+ title='Trend Line for License Expirations Over Time'
203
+ )
204
+
205
+ # Show the chart in the Streamlit app
206
+ st.altair_chart(line_chart, use_container_width=True)
207
+ # Convert the Expiration Date to datetime (if not already)
208
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
209
+
210
+ # Extract the year from Expiration Date
211
+ df['Expiration Year'] = df['Expiration Date'].dt.year
212
+
213
+ # Group by Expiration Year and count the number of licenses expiring
214
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
215
+
216
+ # Create the trend line (line chart)
217
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
218
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
219
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
220
+ tooltip=['Expiration Year', 'License Count']
221
+ ).properties(
222
+ title='Trend Line for Licenses Expiring Over Time'
223
+ )
224
+
225
+ # Show the chart in the Streamlit app
226
+ st.altair_chart(line_chart, use_container_width=True)
227
+
228
+
229
+
230
+ # Convert the Expiration Date to datetime (if not already)
231
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
232
+
233
+ # Extract the year from Expiration Date
234
+ df['Expiration Year'] = df['Expiration Date'].dt.year
235
+
236
+ # Group by Expiration Year and count the number of licenses expiring
237
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
238
+
239
+ # Create the trend line (line chart) with interactive data points (circle marks)
240
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
241
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
242
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
243
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
244
+ ).properties(
245
+ title='Trend Line for Licenses Expiring Over Time'
246
+ ).interactive() # Make the chart interactive
247
+
248
+ # Add circle marks at data points
249
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=100).encode(
250
+ x=alt.X('Expiration Year:O'),
251
+ y=alt.Y('License Count:Q'),
252
+ tooltip=['Expiration Year', 'License Count']
253
+ )
254
+
255
+ # Combine the line and point marks
256
+ final_chart = line_chart + points
257
+
258
+ # Show the chart in the Streamlit app
259
+ st.altair_chart(final_chart, use_container_width=True)
260
+
.local/share/code-server/User/History/450b0aa/P3er.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ # Load Dataset
7
+ @st.cache
8
+ def load_data():
9
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
10
+ return pd.read_csv(url)
11
+
12
+ df = load_data()
13
+
14
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
15
+ df_time = df.dropna(subset=['Original Issue Date'])
16
+
17
+ # Title
18
+ st.title("Licenses Dataset Visualizations")
19
+ st.markdown("This app presents three visualizations of the licenses dataset.")
20
+
21
+ # Visualization 1: Bar Chart (Distribution of Licenses by Type)
22
+ st.header("1. Distribution of Licenses by Type")
23
+ license_count = df['License Type'].value_counts().reset_index()
24
+ license_count.columns = ['License Type', 'Count']
25
+
26
+ bar_chart = alt.Chart(license_count).mark_bar().encode(
27
+ x=alt.X('Count:Q', title='Number of Licenses'),
28
+ y=alt.Y('License Type:N', sort='-x', title='License Type'),
29
+ color=alt.Color('License Type:N', legend=None)
30
+ ).properties(title="Number of Licenses by Type")
31
+
32
+ st.altair_chart(bar_chart, use_container_width=True)
33
+
34
+ st.markdown("""
35
+ **Highlights**: This bar chart shows the distribution of licenses by type.
36
+ **Design Choices**: A horizontal bar chart is used for better readability of license types.
37
+ """)
38
+
39
+ # Visualization 2: Line Chart (Trend of Licenses Over Time)
40
+ st.header("2. Trend of Licenses Over Time")
41
+ time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
42
+ time_data.columns = ['Year', 'Count']
43
+
44
+ line_chart = alt.Chart(time_data).mark_line(point=True).encode(
45
+ x=alt.X('Year:O', title='Year'),
46
+ y=alt.Y('Count:Q', title='Number of Licenses'),
47
+ color=alt.value('blue')
48
+ ).properties(title="Trend of Licenses Issued Over Years")
49
+
50
+ st.altair_chart(line_chart, use_container_width=True)
51
+
52
+ st.markdown("""
53
+ **Highlights**: This line chart shows the trend of license issuances over time.
54
+ **Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
55
+ """)
56
+
57
+
58
+ # Count the number of licenses for each License Type and sort by count
59
+ license_type_counts = df['License Type'].value_counts().reset_index()
60
+ license_type_counts.columns = ['License Type', 'Count']
61
+
62
+ # Get the top 10 License Types
63
+ top_10_license_types = license_type_counts.head(10)
64
+ st.header("3. Top 10 License Types by Frequency")
65
+ # Create the bar chart
66
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
67
+ x=alt.X('Count:Q', title='License Count'),
68
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
69
+ color=alt.Color('License Type:N', legend=None),
70
+ tooltip=['License Type', 'Count']
71
+ )
72
+
73
+
74
+ # Show the chart in the Streamlit app
75
+ st.altair_chart(bar_chart, use_container_width=True)
76
+
77
+
78
+ # Convert the Expiration Date to datetime (if not already)
79
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
80
+
81
+ # Extract the year from Expiration Date
82
+ df['Expiration Year'] = df['Expiration Date'].dt.year
83
+
84
+ # Group by Expiration Year and count the number of licenses expiring
85
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
86
+
87
+ # Create the trend line (line chart) with interactive data points (circle marks)
88
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
89
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
90
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
91
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
92
+ )
93
+
94
+
95
+ # Add circle marks at data points
96
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
97
+ x=alt.X('Expiration Year:O'),
98
+ y=alt.Y('License Count:Q'),
99
+ tooltip=['Expiration Year', 'License Count']
100
+ )
101
+
102
+ # Combine the line and point marks
103
+ final_chart = line_chart + points
104
+
105
+ # Show the chart in the Streamlit app
106
+ st.altair_chart(final_chart, use_container_width=True)
107
+
.local/share/code-server/User/History/450b0aa/Q5uV.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
37
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
38
+ Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
39
+
40
+ # Convert 'Expiration Date' to datetime and extract the year
41
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
42
+ df['Expiration Year'] = df['Expiration Date'].dt.year
43
+
44
+ # Group by Expiration Year and count the number of licenses expiring
45
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
46
+ st.header("4. Trend of Licenses Expiring Over Time")
47
+
48
+ # Create trend line (line chart) with circle marks at data points
49
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
50
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
51
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
52
+ tooltip=['Expiration Year', 'License Count']
53
+ )
54
+
55
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
56
+ x=alt.X('Expiration Year:O'),
57
+ y=alt.Y('License Count:Q'),
58
+ tooltip=['Expiration Year', 'License Count']
59
+ )
60
+
61
+ # Combine line and points
62
+ final_chart = line_chart + points
63
+
64
+ st.altair_chart(final_chart, use_container_width=True)
65
+
66
+ # Markdown Write-Up for the trend line chart
67
+ st.markdown("""
68
+ This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
69
+ The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
70
+
71
+ I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
72
+ By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
73
+ The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
74
+ If I had more time, I would like to break down the expiration data further by month or quarter to detect more granular patterns and find a may to make it less cluttered.
75
+ """)
.local/share/code-server/User/History/450b0aa/QeUw.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset.
34
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered and less informative. To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset. Since some license names were long, I made them hoverable to ensure that the full text is visible when interacting with the chart.""")
35
+
36
+ # Convert 'Expiration Date' to datetime and extract the year
37
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
38
+ df['Expiration Year'] = df['Expiration Date'].dt.year
39
+
40
+ # Group by Expiration Year and count the number of licenses expiring
41
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
42
+ st.header("4. Trend of Licenses Expiring Over Time")
43
+
44
+ # Create trend line (line chart) with circle marks at data points
45
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
46
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
47
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
48
+ tooltip=['Expiration Year', 'License Count']
49
+ )
50
+
51
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
52
+ x=alt.X('Expiration Year:O'),
53
+ y=alt.Y('License Count:Q'),
54
+ tooltip=['Expiration Year', 'License Count']
55
+ )
56
+
57
+ # Combine line and points
58
+ final_chart = line_chart + points
59
+
60
+ st.altair_chart(final_chart, use_container_width=True)
61
+
62
+ # Markdown Write-Up for the trend line chart
63
+ st.markdown("""
64
+ **Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
65
+
66
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
67
+
68
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
69
+ """)
.local/share/code-server/User/History/450b0aa/RWSp.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
37
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
38
+ Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
39
+
40
+ # Convert 'Expiration Date' to datetime and extract the year
41
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
42
+ df['Expiration Year'] = df['Expiration Date'].dt.year
43
+
44
+ # Group by Expiration Year and count the number of licenses expiring
45
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
46
+ st.header("4. Trend of Licenses Expiring Over Time")
47
+
48
+ # Create trend line (line chart) with circle marks at data points
49
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
50
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
51
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
52
+ tooltip=['Expiration Year', 'License Count']
53
+ )
54
+
55
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
56
+ x=alt.X('Expiration Year:O'),
57
+ y=alt.Y('License Count:Q'),
58
+ tooltip=['Expiration Year', 'License Count']
59
+ )
60
+
61
+ # Combine line and points
62
+ final_chart = line_chart + points
63
+
64
+ st.altair_chart(final_chart, use_container_width=True)
65
+
66
+ # Markdown Write-Up for the trend line chart
67
+ st.markdown("""
68
+ **Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
69
+
70
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
73
+ """)
.local/share/code-server/User/History/450b0aa/RuPD.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+
30
+ st.altair_chart(bar_chart, use_container_width=True)
31
+
32
+ # Markdown Write-Up for the bar chart
33
+ st.markdown("""
34
+ **Highlights**: This bar chart highlights the top 10 most frequent license types in the dataset, allowing us to easily identify which types of licenses are the most prevalent.
35
+
36
+ **Design Choices**: The x-axis represents the count of each license type, while the y-axis shows the name of the license type. The bars are color-coded to differentiate between the license types, and tooltips are included for better interaction. The chart is sorted in descending order of the license count.
37
+
38
+ **Improvements**: If I had more time, I would add more granular details about the licenses or break down the data further by state or city for more localized insights.
39
+ """)
40
+
41
+ # Convert 'Expiration Date' to datetime and extract the year
42
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
43
+ df['Expiration Year'] = df['Expiration Date'].dt.year
44
+
45
+ # Group by Expiration Year and count the number of licenses expiring
46
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
47
+ st.header("4. Trend of Licenses Expiring Over Time")
48
+
49
+ # Create trend line (line chart) with circle marks at data points
50
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
51
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
52
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
53
+ tooltip=['Expiration Year', 'License Count']
54
+ )
55
+
56
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
57
+ x=alt.X('Expiration Year:O'),
58
+ y=alt.Y('License Count:Q'),
59
+ tooltip=['Expiration Year', 'License Count']
60
+ )
61
+
62
+ # Combine line and points
63
+ final_chart = line_chart + points
64
+
65
+ st.altair_chart(final_chart, use_container_width=True)
66
+
67
+ # Markdown Write-Up for the trend line chart
68
+ st.markdown("""
69
+ **Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
70
+
71
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
72
+
73
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
74
+ """)
.local/share/code-server/User/History/450b0aa/TdSt.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ # Load Dataset
7
+ @st.cache
8
+ def load_data():
9
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
10
+ return pd.read_csv(url)
11
+
12
+ df = load_data()
13
+
14
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
15
+ df_time = df.dropna(subset=['Original Issue Date'])
16
+
17
+ # Title
18
+ st.title("Licenses Dataset Visualizations")
19
+ st.markdown("This app presents three visualizations of the licenses dataset.")
20
+
21
+ # Visualization 1: Bar Chart (Distribution of Licenses by Type)
22
+ st.header("1. Distribution of Licenses by Type")
23
+ license_count = df['License Type'].value_counts().reset_index()
24
+ license_count.columns = ['License Type', 'Count']
25
+
26
+ bar_chart = alt.Chart(license_count).mark_bar().encode(
27
+ x=alt.X('Count:Q', title='Number of Licenses'),
28
+ y=alt.Y('License Type:N', sort='-x', title='License Type'),
29
+ color=alt.Color('License Type:N', legend=None)
30
+ ).properties(title="Number of Licenses by Type")
31
+
32
+ st.altair_chart(bar_chart, use_container_width=True)
33
+
34
+ st.markdown("""
35
+ **Highlights**: This bar chart shows the distribution of licenses by type.
36
+ **Design Choices**: A horizontal bar chart is used for better readability of license types.
37
+ """)
38
+
39
+ # Visualization 2: Line Chart (Trend of Licenses Over Time)
40
+ st.header("2. Trend of Licenses Over Time")
41
+ time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
42
+ time_data.columns = ['Year', 'Count']
43
+
44
+ line_chart = alt.Chart(time_data).mark_line(point=True).encode(
45
+ x=alt.X('Year:O', title='Year'),
46
+ y=alt.Y('Count:Q', title='Number of Licenses'),
47
+ color=alt.value('blue')
48
+ ).properties(title="Trend of Licenses Issued Over Years")
49
+
50
+ st.altair_chart(line_chart, use_container_width=True)
51
+
52
+ st.markdown("""
53
+ **Highlights**: This line chart shows the trend of license issuances over time.
54
+ **Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
55
+ """)
56
+
57
+
58
+ # Count the number of licenses for each License Type and sort by count
59
+ license_type_counts = df['License Type'].value_counts().reset_index()
60
+ license_type_counts.columns = ['License Type', 'Count']
61
+
62
+ # Get the top 10 License Types
63
+ top_10_license_types = license_type_counts.head(10)
64
+
65
+ # Create the bar chart
66
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
67
+ x=alt.X('Count:Q', title='License Count'),
68
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
69
+ color=alt.Color('License Type:N', legend=None),
70
+ tooltip=['License Type', 'Count']
71
+ ).properties(
72
+ title='Top 10 License Types by Frequency'
73
+ )
74
+
75
+ # Show the chart in the Streamlit app
76
+ st.altair_chart(bar_chart, use_container_width=True)
77
+
78
+
79
+ # Convert the Expiration Date to datetime (if not already)
80
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
81
+
82
+ # Extract the year from Expiration Date
83
+ df['Expiration Year'] = df['Expiration Date'].dt.year
84
+
85
+ # Group by Expiration Year and count the number of licenses expiring
86
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
87
+
88
+ # Create the trend line (line chart) with interactive data points (circle marks)
89
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
90
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
91
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
92
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
93
+ ).properties(
94
+ title='Trend Line for Licenses Expiring Over Time'
95
+ )
96
+
97
+ # Add circle marks at data points
98
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
99
+ x=alt.X('Expiration Year:O'),
100
+ y=alt.Y('License Count:Q'),
101
+ tooltip=['Expiration Year', 'License Count']
102
+ )
103
+
104
+ # Combine the line and point marks
105
+ final_chart = line_chart + points
106
+
107
+ # Show the chart in the Streamlit app
108
+ st.altair_chart(final_chart, use_container_width=True)
109
+
.local/share/code-server/User/History/450b0aa/Vrxk.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
37
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
38
+ Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
39
+
40
+ # Convert 'Expiration Date' to datetime and extract the year
41
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
42
+ df['Expiration Year'] = df['Expiration Date'].dt.year
43
+
44
+ # Group by Expiration Year and count the number of licenses expiring
45
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
46
+ st.header("2. Trend of Licenses Expiring Over Time")
47
+
48
+ # Create trend line (line chart) with circle marks at data points
49
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
50
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
51
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
52
+ tooltip=['Expiration Year', 'License Count']
53
+ )
54
+
55
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
56
+ x=alt.X('Expiration Year:O'),
57
+ y=alt.Y('License Count:Q'),
58
+ tooltip=['Expiration Year', 'License Count']
59
+ )
60
+
61
+ # Combine line and points
62
+ final_chart = line_chart + points
63
+
64
+ st.altair_chart(final_chart, use_container_width=True)
65
+
66
+ # Markdown Write-Up for the trend line chart
67
+ st.markdown("""
68
+ This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
69
+ The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
70
+
71
+ I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
72
+ By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
73
+ The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
74
+ If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
.local/share/code-server/User/History/450b0aa/W0T9.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
37
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
38
+ Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
39
+
40
+ # Convert 'Expiration Date' to datetime and extract the year
41
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
42
+ df['Expiration Year'] = df['Expiration Date'].dt.year
43
+
44
+ # Group by Expiration Year and count the number of licenses expiring
45
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
46
+ st.header("4. Trend of Licenses Expiring Over Time")
47
+
48
+ # Create trend line (line chart) with circle marks at data points
49
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
50
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
51
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
52
+ tooltip=['Expiration Year', 'License Count']
53
+ )
54
+
55
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
56
+ x=alt.X('Expiration Year:O'),
57
+ y=alt.Y('License Count:Q'),
58
+ tooltip=['Expiration Year', 'License Count']
59
+ )
60
+
61
+ # Combine line and points
62
+ final_chart = line_chart + points
63
+
64
+ st.altair_chart(final_chart, use_container_width=True)
65
+
66
+ # Markdown Write-Up for the trend line chart
67
+ st.markdown("""
68
+ **Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
69
+
70
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
73
+ """)
.local/share/code-server/User/History/450b0aa/WCDW.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ # Load Dataset
7
+
8
+ def load_data():
9
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
10
+ return pd.read_csv(url)
11
+
12
+ df = load_data()
13
+
14
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
15
+ df_time = df.dropna(subset=['Original Issue Date'])
16
+
17
+
18
+
19
+
20
+ # Count the number of licenses for each License Type and sort by count
21
+ license_type_counts = df['License Type'].value_counts().reset_index()
22
+ license_type_counts.columns = ['License Type', 'Count']
23
+
24
+ # Get the top 10 License Types
25
+ top_10_license_types = license_type_counts.head(10)
26
+ st.header("3. Top 10 License Types by Frequency")
27
+ # Create the bar chart
28
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
29
+ x=alt.X('Count:Q', title='License Count'),
30
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
31
+ color=alt.Color('License Type:N', legend=None),
32
+ tooltip=['License Type', 'Count']
33
+ )
34
+
35
+
36
+ # Show the chart in the Streamlit app
37
+ st.altair_chart(bar_chart, use_container_width=True)
38
+
39
+ st.markdown("""
40
+ **Highlights**: This line chart shows .
41
+ **Design Choices**: Points are added to highlight.
42
+ """)
43
+
44
+ # Convert the Expiration Date to datetime (if not already)
45
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
46
+
47
+ # Extract the year from Expiration Date
48
+ df['Expiration Year'] = df['Expiration Date'].dt.year
49
+
50
+ # Group by Expiration Year and count the number of licenses expiring
51
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
52
+ st.header("4. Trend of Licenses Expiring Over Time")
53
+ # Create the trend line (line chart) with interactive data points (circle marks)
54
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
55
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
56
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
57
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
58
+ )
59
+
60
+
61
+ # Add circle marks at data points
62
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
63
+ x=alt.X('Expiration Year:O'),
64
+ y=alt.Y('License Count:Q'),
65
+ tooltip=['Expiration Year', 'License Count']
66
+ )
67
+
68
+ # Combine the line and point marks
69
+ final_chart = line_chart + points
70
+
71
+ # Show the chart in the Streamlit app
72
+ st.altair_chart(final_chart, use_container_width=True)
73
+
.local/share/code-server/User/History/450b0aa/WLXq.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset.
34
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
35
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset. Since some license names were long, I made them hoverable to ensure that the full text is visible when interacting with the chart.""")
36
+
37
+ # Convert 'Expiration Date' to datetime and extract the year
38
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
39
+ df['Expiration Year'] = df['Expiration Date'].dt.year
40
+
41
+ # Group by Expiration Year and count the number of licenses expiring
42
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
43
+ st.header("4. Trend of Licenses Expiring Over Time")
44
+
45
+ # Create trend line (line chart) with circle marks at data points
46
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
47
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
48
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
49
+ tooltip=['Expiration Year', 'License Count']
50
+ )
51
+
52
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
53
+ x=alt.X('Expiration Year:O'),
54
+ y=alt.Y('License Count:Q'),
55
+ tooltip=['Expiration Year', 'License Count']
56
+ )
57
+
58
+ # Combine line and points
59
+ final_chart = line_chart + points
60
+
61
+ st.altair_chart(final_chart, use_container_width=True)
62
+
63
+ # Markdown Write-Up for the trend line chart
64
+ st.markdown("""
65
+ **Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
66
+
67
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
68
+
69
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
70
+ """)
.local/share/code-server/User/History/450b0aa/YJb8.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
37
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
38
+ Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
39
+
40
+ # Convert 'Expiration Date' to datetime and extract the year
41
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
42
+ df['Expiration Year'] = df['Expiration Date'].dt.year
43
+
44
+ # Group by Expiration Year and count the number of licenses expiring
45
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
46
+ st.header("4. Trend of Licenses Expiring Over Time")
47
+
48
+ # Create trend line (line chart) with circle marks at data points
49
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
50
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
51
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
52
+ tooltip=['Expiration Year', 'License Count']
53
+ )
54
+
55
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
56
+ x=alt.X('Expiration Year:O'),
57
+ y=alt.Y('License Count:Q'),
58
+ tooltip=['Expiration Year', 'License Count']
59
+ )
60
+
61
+ # Combine line and points
62
+ final_chart = line_chart + points
63
+
64
+ st.altair_chart(final_chart, use_container_width=True)
65
+
66
+ # Markdown Write-Up for the trend line chart
67
+ st.markdown("""
68
+ **Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
69
+
70
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
73
+ """)
.local/share/code-server/User/History/450b0aa/YSbm.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency.
37
+ To improve this, I focused on the top 10 most frequent license types, allowing for a clearer and more insightful visualization.
38
+ Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.
39
+ If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
40
+
41
+ # Convert 'Expiration Date' to datetime and extract the year
42
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
43
+ df['Expiration Year'] = df['Expiration Date'].dt.year
44
+
45
+ # Group by Expiration Year and count the number of licenses expiring
46
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
47
+ st.header("2. Trend of Licenses Expiring Over Time")
48
+
49
+ # Create trend line (line chart) with circle marks at data points
50
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
51
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
52
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
53
+ tooltip=['Expiration Year', 'License Count']
54
+ )
55
+
56
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
57
+ x=alt.X('Expiration Year:O'),
58
+ y=alt.Y('License Count:Q'),
59
+ tooltip=['Expiration Year', 'License Count']
60
+ )
61
+
62
+ # Combine line and points
63
+ final_chart = line_chart + points
64
+
65
+ st.altair_chart(final_chart, use_container_width=True)
66
+
67
+ # Markdown Write-Up for the trend line chart
68
+ st.markdown("""
69
+ This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
70
+ The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
73
+ By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
74
+ The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
75
+ If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
.local/share/code-server/User/History/450b0aa/cWhv.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ #import the data
7
+ #url = "https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/ufo-scrubbed-geocoded-time-standardized-00.csv"
8
+ #df = pd.read_csv(url)
9
+ #df.sum().isnull()
10
+
11
+
12
+
13
+ # #Fill the missing data with Unknown
14
+ # df['County'].fillna('Not available', inplace=True)
15
+ # df['Rep Full Name'].fillna('Not available', inplace=True)
16
+ # df['Year Constructed'].fillna(df['Year Constructed'].median(), inplace=True)
17
+ # df['Senator Full Name'].fillna('Unknown', inplace=True)
18
+ # df['Usage Description 2'].fillna(df['Usage Description 2'].mode()[0], inplace=True)
19
+ # df['Usage Description 3'].fillna(df['Usage Description 3'].mode()[0], inplace=True)
20
+ # df['Address'].fillna('Not available', inplace=True)
21
+ # df['Congressional Full Name'].fillna('Unknown', inplace=True)
22
+
23
+ # #Page Title
24
+ # st.markdown("<h1 style='text-align: center;'>Homework 5.1</h1>", unsafe_allow_html=True)
25
+
26
+ # st.subheader("Analyzing the Building Inventory Dataset")
27
+ # #Visualization 1
28
+ # st.markdown("<h4 style='text-decoration: underline;'>Visualization 1</h4>", unsafe_allow_html=True)
29
+
30
+ # df_filtered = df.dropna(subset=['Bldg Status', 'Year Constructed'])
31
+ # df_filtered['Year Constructed'] = pd.to_numeric(df_filtered['Year Constructed'], errors='coerce')
32
+ # df_filtered = df_filtered[(df_filtered['Year Constructed'] >= 1600) & (df_filtered['Year Constructed'] <= 2100)]
33
+
34
+
35
+ # line_chart = alt.Chart(df_filtered).mark_line(point=True).encode(
36
+ # x=alt.X('Year Constructed:Q', title='Year Constructed'),
37
+ # y=alt.Y('count()', title='Number of Buildings'),
38
+ # color=alt.Color('Bldg Status:N', title='Building Status'),
39
+ # tooltip=['Year Constructed', 'count()', 'Bldg Status']
40
+ # ).properties(
41
+ # width=700,
42
+ # height=400,
43
+ # title="Trend of Building Construction by Status"
44
+ # )
45
+ # st.altair_chart(line_chart, use_container_width=True)
46
+ # st.write("""
47
+ # This line chart highlights the trend in the number of buildings constructed over time,
48
+ # categorized by their building status (e.g., whether they are currently in use or not).
49
+ # The x-axis represents the 'Year Constructed' and the y-axis represents the count of buildings.
50
+ # The color encoding separates the buildings by their 'Bldg Status'
51
+ # I used a line plot with points to clearly indicate the number of buildings per year,
52
+ # which helps in identifying trends and peaks. Another reason is that line chart is ideal for
53
+ # visualizing time-series data. The colors are chosen to differentiate the building statuses
54
+ # effectively.
55
+
56
+ # If I had more time, I would consider adding labels to the data points for clarity and
57
+ # perhaps break down the data further by usage description or location for a more detailed analysis.
58
+ # Probably adding hover effects could display additional information, such as the exact count of
59
+ # buildings and their status, when hovering over each data point. This would allow users to gain
60
+ # deeper insights without cluttering the chart. I would also include filtering options so users
61
+ # could select specific building statuses or even a range of years to focus on, which would make
62
+ # the analysis more targeted.
63
+ # """)
64
+
65
+
66
+ # # Visualization 2
67
+ # st.markdown("<h4 style='text-decoration: underline;'>Visualization 2</h4>", unsafe_allow_html=True)
68
+ # county_agency_data = df.dropna(subset=['County', 'Agency Name'])
69
+ # county_agency_count = county_agency_data.groupby('County')['Agency Name'].nunique().reset_index()
70
+ # county_agency_count.rename(columns={'Agency Name': 'Unique Agencies'}, inplace=True)
71
+
72
+ # county_agency_chart = alt.Chart(county_agency_count).mark_bar().encode(
73
+ # x=alt.X('County:N', sort='-y', title="County"),
74
+ # y=alt.Y('Unique Agencies:Q', title="Number of Agencies"),
75
+ # color=alt.Color('Unique Agencies:Q', scale=alt.Scale(scheme='viridis')),
76
+ # tooltip=['County:N', 'Unique Agencies:Q']
77
+ # ).properties(
78
+ # width=700,
79
+ # height=400,
80
+ # title="Unique Agencies in Each County"
81
+ # )
82
+ # st.altair_chart(county_agency_chart, use_container_width=True)
83
+
84
+ # st.write("""
85
+ # This bar chart highlights the number of unique agencies operating in each county.
86
+ # The x-axis represents the 'County' and the y-axis represents the 'Number of Unique Agencies'
87
+ # I used a bar chart as it allows for easy comparison between counties based on the number of
88
+ # agencies, and sorting the counties by agency count makes it easier to identify areas with
89
+ # more agencies. The color scale uses a 'viridis' color scheme, to show how the number of agencies
90
+ # differs accross counties.
91
+
92
+ # If I had more time, I would consider adding interactivity, like a filter for 'Building Status', so that users can
93
+ # focus on specific statuses, such as buildings that are still in progress or abandoned for each county.
94
+ # I would also implement a slider for 'Year Constructed' to allow users to focus on specific time periods,
95
+ # enabling them to analyze trends and compare the distribution of agencies by different time periods.
96
+ # I would also consider adding a search function so that users can look for specific counties
97
+ # or agency types.
98
+ # """)
99
+ # Load State GeoJSON for Map (example if supported)
100
+ # Visualization: Licenses by State
101
+
102
+
103
+ # Load Dataset
104
+ @st.cache
105
+ def load_data():
106
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
107
+ return pd.read_csv(url)
108
+
109
+ df = load_data()
110
+
111
+ # Handle Missing Values for Visualizations
112
+ # 1. Remove rows with missing `Original Issue Date`
113
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
114
+ df_time = df.dropna(subset=['Original Issue Date'])
115
+
116
+ # Title
117
+ st.title("Licenses Dataset Visualizations")
118
+ st.markdown("This app presents three visualizations of the licenses dataset.")
119
+
120
+ # Visualization 1: Bar Chart (Distribution of Licenses by Type)
121
+ st.header("1. Distribution of Licenses by Type")
122
+ license_count = df['License Type'].value_counts().reset_index()
123
+ license_count.columns = ['License Type', 'Count']
124
+
125
+ bar_chart = alt.Chart(license_count).mark_bar().encode(
126
+ x=alt.X('Count:Q', title='Number of Licenses'),
127
+ y=alt.Y('License Type:N', sort='-x', title='License Type'),
128
+ color=alt.Color('License Type:N', legend=None)
129
+ ).properties(title="Number of Licenses by Type")
130
+
131
+ st.altair_chart(bar_chart, use_container_width=True)
132
+
133
+ st.markdown("""
134
+ **Highlights**: This bar chart shows the distribution of licenses by type.
135
+ **Design Choices**: A horizontal bar chart is used for better readability of license types.
136
+ """)
137
+
138
+ # Visualization 2: Line Chart (Trend of Licenses Over Time)
139
+ st.header("2. Trend of Licenses Over Time")
140
+ time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
141
+ time_data.columns = ['Year', 'Count']
142
+
143
+ line_chart = alt.Chart(time_data).mark_line(point=True).encode(
144
+ x=alt.X('Year:O', title='Year'),
145
+ y=alt.Y('Count:Q', title='Number of Licenses'),
146
+ color=alt.value('blue')
147
+ ).properties(title="Trend of Licenses Issued Over Years")
148
+
149
+ st.altair_chart(line_chart, use_container_width=True)
150
+
151
+ st.markdown("""
152
+ **Highlights**: This line chart shows the trend of license issuances over time.
153
+ **Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
154
+ """)
155
+
156
+
157
+ # Count the number of licenses for each License Type and sort by count
158
+ license_type_counts = df['License Type'].value_counts().reset_index()
159
+ license_type_counts.columns = ['License Type', 'Count']
160
+
161
+ # Get the top 10 License Types
162
+ top_10_license_types = license_type_counts.head(10)
163
+
164
+ # Create the bar chart
165
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
166
+ x=alt.X('Count:Q', title='License Count'),
167
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
168
+ color=alt.Color('License Type:N', legend=None),
169
+ tooltip=['License Type', 'Count']
170
+ ).properties(
171
+ title='Top 10 License Types by Frequency'
172
+ )
173
+
174
+ # Show the chart in the Streamlit app
175
+ st.altair_chart(bar_chart, use_container_width=True)
176
+
177
+
178
+ # Convert the Expiration Date to datetime (if not already)
179
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
180
+
181
+ # Extract the year from Expiration Date
182
+ df['Expiration Year'] = df['Expiration Date'].dt.year
183
+
184
+ # Group by Expiration Year and count the number of licenses expiring
185
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
186
+
187
+ # Create the trend line (line chart) with interactive data points (circle marks)
188
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
189
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
190
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
191
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
192
+ ).properties(
193
+ title='Trend Line for Licenses Expiring Over Time'
194
+ ).interactive() # Make the chart interactive
195
+
196
+ # Add circle marks at data points
197
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
198
+ x=alt.X('Expiration Year:O'),
199
+ y=alt.Y('License Count:Q'),
200
+ tooltip=['Expiration Year', 'License Count']
201
+ )
202
+
203
+ # Combine the line and point marks
204
+ final_chart = line_chart + points
205
+
206
+ # Show the chart in the Streamlit app
207
+ st.altair_chart(final_chart, use_container_width=True)
208
+
.local/share/code-server/User/History/450b0aa/e7iD.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ # Load Dataset
7
+ @st.cache
8
+ def load_data():
9
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
10
+ return pd.read_csv(url)
11
+
12
+ df = load_data()
13
+
14
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
15
+ df_time = df.dropna(subset=['Original Issue Date'])
16
+
17
+
18
+
19
+
20
+ # Count the number of licenses for each License Type and sort by count
21
+ license_type_counts = df['License Type'].value_counts().reset_index()
22
+ license_type_counts.columns = ['License Type', 'Count']
23
+
24
+ # Get the top 10 License Types
25
+ top_10_license_types = license_type_counts.head(10)
26
+ st.header("3. Top 10 License Types by Frequency")
27
+ # Create the bar chart
28
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
29
+ x=alt.X('Count:Q', title='License Count'),
30
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
31
+ color=alt.Color('License Type:N', legend=None),
32
+ tooltip=['License Type', 'Count']
33
+ )
34
+
35
+
36
+ # Show the chart in the Streamlit app
37
+ st.altair_chart(bar_chart, use_container_width=True)
38
+
39
+ st.markdown("""
40
+ **Highlights**: This line chart shows .
41
+ **Design Choices**: Points are added to highlight.
42
+ """)
43
+
44
+ # Convert the Expiration Date to datetime (if not already)
45
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
46
+
47
+ # Extract the year from Expiration Date
48
+ df['Expiration Year'] = df['Expiration Date'].dt.year
49
+
50
+ # Group by Expiration Year and count the number of licenses expiring
51
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
52
+ st.header("4. Trend of Licenses Expiring Over Time")
53
+ # Create the trend line (line chart) with interactive data points (circle marks)
54
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
55
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
56
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
57
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
58
+ )
59
+
60
+
61
+ # Add circle marks at data points
62
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
63
+ x=alt.X('Expiration Year:O'),
64
+ y=alt.Y('License Count:Q'),
65
+ tooltip=['Expiration Year', 'License Count']
66
+ )
67
+
68
+ # Combine the line and point marks
69
+ final_chart = line_chart + points
70
+
71
+ # Show the chart in the Streamlit app
72
+ st.altair_chart(final_chart, use_container_width=True)
73
+
.local/share/code-server/User/History/450b0aa/entries.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"version":1,"resource":"vscode-remote://us.prairielearn.com/home/coder/app.py","entries":[{"id":"s0Hs.py","timestamp":1732610271002},{"id":"O3TC.py","timestamp":1732610434817},{"id":"BvGZ.py","timestamp":1732610476290},{"id":"cWhv.py","timestamp":1732610582621},{"id":"AIA9.py","timestamp":1732610651134},{"id":"TdSt.py","timestamp":1732610735299},{"id":"I3Tc.py","timestamp":1732610806873},{"id":"iwOA.py","timestamp":1732610839592},{"id":"P3er.py","timestamp":1732610855923},{"id":"HxIz.py","timestamp":1732610881797},{"id":"fdvg.py","timestamp":1732610922722},{"id":"e7iD.py","timestamp":1732611034843},{"id":"WCDW.py","timestamp":1732611365984},{"id":"v1EL.py","timestamp":1732611436229},{"id":"gwPw.py","source":"undoRedo.source","timestamp":1732611467020},{"id":"C2VD.py","source":"undoRedo.source","timestamp":1732611484102},{"id":"vTno.py","timestamp":1732611509771},{"id":"jtCb.py","timestamp":1732611541992},{"id":"2wHf.py","timestamp":1732611585052},{"id":"RuPD.py","timestamp":1732611595384},{"id":"Dqul.py","timestamp":1732611615454},{"id":"BA55.py","timestamp":1732611631889},{"id":"2sco.py","timestamp":1732611708324},{"id":"4TtO.py","timestamp":1732611993598},{"id":"QeUw.py","timestamp":1732612015814},{"id":"WLXq.py","timestamp":1732612053461},{"id":"hisc.py","timestamp":1732612075256},{"id":"zuML.py","timestamp":1732612088958},{"id":"YJb8.py","timestamp":1732612185652},{"id":"ksQB.py","timestamp":1732612338428},{"id":"RWSp.py","source":"undoRedo.source","timestamp":1732612367906},{"id":"zPF4.py","timestamp":1732612468393},{"id":"W0T9.py","timestamp":1732612478852},{"id":"8wPI.py","timestamp":1732612490534},{"id":"vzqC.py","timestamp":1732612520566},{"id":"0rFy.py","timestamp":1732612794232},{"id":"gJ7Y.py","timestamp":1732612838289},{"id":"Q5uV.py","timestamp":1732612850712},{"id":"8qMH.py","timestamp":1732612947711},{"id":"Vrxk.py","timestamp":1732612978297},{"id":"GB0z.py","timestamp":1732613153833},{"id":"fjsF.py","timestamp":1732613170235},{"id":"53ry.py","timestamp":1732613225839},{"id":"YSbm.py","timestamp":1732613251281},{"id":"mE68.py","timestamp":1732613291160},{"id":"53Ql.py","timestamp":1732613323548},{"id":"rykv.py","timestamp":1732613389594},{"id":"qzJp.py","timestamp":1732613401918},{"id":"zoVr.py","timestamp":1732613418498},{"id":"Km75.py","timestamp":1732613439341}]}
.local/share/code-server/User/History/450b0aa/fdvg.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ # Load Dataset
7
+ @st.cache
8
+ def load_data():
9
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
10
+ return pd.read_csv(url)
11
+
12
+ df = load_data()
13
+
14
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
15
+ df_time = df.dropna(subset=['Original Issue Date'])
16
+
17
+ # Title
18
+ st.title("Licenses Dataset Visualizations")
19
+ st.markdown("This app presents three visualizations of the licenses dataset.")
20
+
21
+ # Visualization 1: Bar Chart (Distribution of Licenses by Type)
22
+ st.header("1. Distribution of Licenses by Type")
23
+ license_count = df['License Type'].value_counts().reset_index()
24
+ license_count.columns = ['License Type', 'Count']
25
+
26
+ bar_chart = alt.Chart(license_count).mark_bar().encode(
27
+ x=alt.X('Count:Q', title='Number of Licenses'),
28
+ y=alt.Y('License Type:N', sort='-x', title='License Type'),
29
+ color=alt.Color('License Type:N', legend=None)
30
+ ).properties(title="Number of Licenses by Type")
31
+
32
+ st.altair_chart(bar_chart, use_container_width=True)
33
+
34
+ st.markdown("""
35
+ **Highlights**: This bar chart shows the distribution of licenses by type.
36
+ **Design Choices**: A horizontal bar chart is used for better readability of license types.
37
+ """)
38
+
39
+ # Visualization 2: Line Chart (Trend of Licenses Over Time)
40
+ st.header("2. Trend of Licenses Over Time")
41
+ time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
42
+ time_data.columns = ['Year', 'Count']
43
+
44
+ line_chart = alt.Chart(time_data).mark_line(point=True).encode(
45
+ x=alt.X('Year:O', title='Year'),
46
+ y=alt.Y('Count:Q', title='Number of Licenses'),
47
+ color=alt.value('blue')
48
+ ).properties(title="Trend of Licenses Issued Over Years")
49
+
50
+ st.altair_chart(line_chart, use_container_width=True)
51
+
52
+ st.markdown("""
53
+ **Highlights**: This line chart shows the trend of license issuances over time.
54
+ **Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
55
+ """)
56
+
57
+
58
+ # Count the number of licenses for each License Type and sort by count
59
+ license_type_counts = df['License Type'].value_counts().reset_index()
60
+ license_type_counts.columns = ['License Type', 'Count']
61
+
62
+ # Get the top 10 License Types
63
+ top_10_license_types = license_type_counts.head(10)
64
+ st.header("3. Top 10 License Types by Frequency")
65
+ # Create the bar chart
66
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
67
+ x=alt.X('Count:Q', title='License Count'),
68
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
69
+ color=alt.Color('License Type:N', legend=None),
70
+ tooltip=['License Type', 'Count']
71
+ )
72
+
73
+
74
+ # Show the chart in the Streamlit app
75
+ st.altair_chart(bar_chart, use_container_width=True)
76
+
77
+
78
+ # Convert the Expiration Date to datetime (if not already)
79
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
80
+
81
+ # Extract the year from Expiration Date
82
+ df['Expiration Year'] = df['Expiration Date'].dt.year
83
+
84
+ # Group by Expiration Year and count the number of licenses expiring
85
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
86
+ st.header("4. Trend of Licenses Expiring Over Time")
87
+ # Create the trend line (line chart) with interactive data points (circle marks)
88
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
89
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
90
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
91
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
92
+ )
93
+
94
+
95
+ # Add circle marks at data points
96
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
97
+ x=alt.X('Expiration Year:O'),
98
+ y=alt.Y('License Count:Q'),
99
+ tooltip=['Expiration Year', 'License Count']
100
+ )
101
+
102
+ # Combine the line and point marks
103
+ final_chart = line_chart + points
104
+
105
+ # Show the chart in the Streamlit app
106
+ st.altair_chart(final_chart, use_container_width=True)
107
+
.local/share/code-server/User/History/450b0aa/fjsF.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
37
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
38
+ Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.
39
+ If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
40
+
41
+ # Convert 'Expiration Date' to datetime and extract the year
42
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
43
+ df['Expiration Year'] = df['Expiration Date'].dt.year
44
+
45
+ # Group by Expiration Year and count the number of licenses expiring
46
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
47
+ st.header("2. Trend of Licenses Expiring Over Time")
48
+
49
+ # Create trend line (line chart) with circle marks at data points
50
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
51
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
52
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
53
+ tooltip=['Expiration Year', 'License Count']
54
+ )
55
+
56
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
57
+ x=alt.X('Expiration Year:O'),
58
+ y=alt.Y('License Count:Q'),
59
+ tooltip=['Expiration Year', 'License Count']
60
+ )
61
+
62
+ # Combine line and points
63
+ final_chart = line_chart + points
64
+
65
+ st.altair_chart(final_chart, use_container_width=True)
66
+
67
+ # Markdown Write-Up for the trend line chart
68
+ st.markdown("""
69
+ This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
70
+ The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
73
+ By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
74
+ The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
75
+ If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
.local/share/code-server/User/History/450b0aa/gJ7Y.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
37
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
38
+ Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
39
+
40
+ # Convert 'Expiration Date' to datetime and extract the year
41
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
42
+ df['Expiration Year'] = df['Expiration Date'].dt.year
43
+
44
+ # Group by Expiration Year and count the number of licenses expiring
45
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
46
+ st.header("4. Trend of Licenses Expiring Over Time")
47
+
48
+ # Create trend line (line chart) with circle marks at data points
49
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
50
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
51
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
52
+ tooltip=['Expiration Year', 'License Count']
53
+ )
54
+
55
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
56
+ x=alt.X('Expiration Year:O'),
57
+ y=alt.Y('License Count:Q'),
58
+ tooltip=['Expiration Year', 'License Count']
59
+ )
60
+
61
+ # Combine line and points
62
+ final_chart = line_chart + points
63
+
64
+ st.altair_chart(final_chart, use_container_width=True)
65
+
66
+ # Markdown Write-Up for the trend line chart
67
+ st.markdown("""
68
+ This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
69
+ The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
70
+
71
+ I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
72
+ By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
73
+ The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
74
+ If I had more time, I would like to break down the expiration data further by month or quarter to detect more granular patterns a
75
+ """)
.local/share/code-server/User/History/450b0aa/gwPw.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ # Load Dataset
7
+ def load_data():
8
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
9
+ return pd.read_csv(url)
10
+
11
+ df = load_data()
12
+
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+
17
+
18
+
19
+ # Count the number of licenses for each License Type and sort by count
20
+ license_type_counts = df['License Type'].value_counts().reset_index()
21
+ license_type_counts.columns = ['License Type', 'Count']
22
+
23
+ # Get the top 10 License Types
24
+ top_10_license_types = license_type_counts.head(10)
25
+ st.header("3. Top 10 License Types by Frequency")
26
+ # Create the bar chart
27
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
28
+ x=alt.X('Count:Q', title='License Count'),
29
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
30
+ color=alt.Color('License Type:N', legend=None),
31
+ tooltip=['License Type', 'Count']
32
+ )
33
+
34
+
35
+ # Show the chart in the Streamlit app
36
+ st.altair_chart(bar_chart, use_container_width=True)
37
+
38
+ st.markdown("""
39
+ **Highlights**: This line chart shows .
40
+ **Design Choices**: Points are added to highlight.
41
+ """)
42
+
43
+ # Convert the Expiration Date to datetime (if not already)
44
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
45
+
46
+ # Extract the year from Expiration Date
47
+ df['Expiration Year'] = df['Expiration Date'].dt.year
48
+
49
+ # Group by Expiration Year and count the number of licenses expiring
50
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
51
+ st.header("4. Trend of Licenses Expiring Over Time")
52
+ # Create the trend line (line chart) with interactive data points (circle marks)
53
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
54
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
55
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
56
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
57
+ )
58
+
59
+
60
+ # Add circle marks at data points
61
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
62
+ x=alt.X('Expiration Year:O'),
63
+ y=alt.Y('License Count:Q'),
64
+ tooltip=['Expiration Year', 'License Count']
65
+ )
66
+
67
+ # Combine the line and point marks
68
+ final_chart = line_chart + points
69
+
70
+ # Show the chart in the Streamlit app
71
+ st.altair_chart(final_chart, use_container_width=True)
72
+
.local/share/code-server/User/History/450b0aa/hisc.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset.
34
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
35
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
36
+ Since some license names were long, I made sure that the full text is visible when interacting with the chart.""")
37
+
38
+ # Convert 'Expiration Date' to datetime and extract the year
39
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
40
+ df['Expiration Year'] = df['Expiration Date'].dt.year
41
+
42
+ # Group by Expiration Year and count the number of licenses expiring
43
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
44
+ st.header("4. Trend of Licenses Expiring Over Time")
45
+
46
+ # Create trend line (line chart) with circle marks at data points
47
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
48
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
49
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
50
+ tooltip=['Expiration Year', 'License Count']
51
+ )
52
+
53
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
54
+ x=alt.X('Expiration Year:O'),
55
+ y=alt.Y('License Count:Q'),
56
+ tooltip=['Expiration Year', 'License Count']
57
+ )
58
+
59
+ # Combine line and points
60
+ final_chart = line_chart + points
61
+
62
+ st.altair_chart(final_chart, use_container_width=True)
63
+
64
+ # Markdown Write-Up for the trend line chart
65
+ st.markdown("""
66
+ **Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
67
+
68
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
69
+
70
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
71
+ """)
.local/share/code-server/User/History/450b0aa/iwOA.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ # Load Dataset
7
+ @st.cache
8
+ def load_data():
9
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
10
+ return pd.read_csv(url)
11
+
12
+ df = load_data()
13
+
14
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
15
+ df_time = df.dropna(subset=['Original Issue Date'])
16
+
17
+ # Title
18
+ st.title("Licenses Dataset Visualizations")
19
+ st.markdown("This app presents three visualizations of the licenses dataset.")
20
+
21
+ # Visualization 1: Bar Chart (Distribution of Licenses by Type)
22
+ st.header("1. Distribution of Licenses by Type")
23
+ license_count = df['License Type'].value_counts().reset_index()
24
+ license_count.columns = ['License Type', 'Count']
25
+
26
+ bar_chart = alt.Chart(license_count).mark_bar().encode(
27
+ x=alt.X('Count:Q', title='Number of Licenses'),
28
+ y=alt.Y('License Type:N', sort='-x', title='License Type'),
29
+ color=alt.Color('License Type:N', legend=None)
30
+ ).properties(title="Number of Licenses by Type")
31
+
32
+ st.altair_chart(bar_chart, use_container_width=True)
33
+
34
+ st.markdown("""
35
+ **Highlights**: This bar chart shows the distribution of licenses by type.
36
+ **Design Choices**: A horizontal bar chart is used for better readability of license types.
37
+ """)
38
+
39
+ # Visualization 2: Line Chart (Trend of Licenses Over Time)
40
+ st.header("2. Trend of Licenses Over Time")
41
+ time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
42
+ time_data.columns = ['Year', 'Count']
43
+
44
+ line_chart = alt.Chart(time_data).mark_line(point=True).encode(
45
+ x=alt.X('Year:O', title='Year'),
46
+ y=alt.Y('Count:Q', title='Number of Licenses'),
47
+ color=alt.value('blue')
48
+ ).properties(title="Trend of Licenses Issued Over Years")
49
+
50
+ st.altair_chart(line_chart, use_container_width=True)
51
+
52
+ st.markdown("""
53
+ **Highlights**: This line chart shows the trend of license issuances over time.
54
+ **Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
55
+ """)
56
+
57
+
58
+ # Count the number of licenses for each License Type and sort by count
59
+ license_type_counts = df['License Type'].value_counts().reset_index()
60
+ license_type_counts.columns = ['License Type', 'Count']
61
+
62
+ # Get the top 10 License Types
63
+ top_10_license_types = license_type_counts.head(10)
64
+ st.header(". Trend of Licenses Over Time")
65
+ # Create the bar chart
66
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
67
+ x=alt.X('Count:Q', title='License Count'),
68
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
69
+ color=alt.Color('License Type:N', legend=None),
70
+ tooltip=['License Type', 'Count']
71
+ ).properties(
72
+ title='Top 10 License Types by Frequency'
73
+ )
74
+
75
+ # Show the chart in the Streamlit app
76
+ st.altair_chart(bar_chart, use_container_width=True)
77
+
78
+
79
+ # Convert the Expiration Date to datetime (if not already)
80
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
81
+
82
+ # Extract the year from Expiration Date
83
+ df['Expiration Year'] = df['Expiration Date'].dt.year
84
+
85
+ # Group by Expiration Year and count the number of licenses expiring
86
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
87
+
88
+ # Create the trend line (line chart) with interactive data points (circle marks)
89
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
90
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
91
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
92
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
93
+ )
94
+
95
+
96
+ # Add circle marks at data points
97
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
98
+ x=alt.X('Expiration Year:O'),
99
+ y=alt.Y('License Count:Q'),
100
+ tooltip=['Expiration Year', 'License Count']
101
+ )
102
+
103
+ # Combine the line and point marks
104
+ final_chart = line_chart + points
105
+
106
+ # Show the chart in the Streamlit app
107
+ st.altair_chart(final_chart, use_container_width=True)
108
+
.local/share/code-server/User/History/450b0aa/jtCb.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("3. Top 10 License Types by Frequency")
21
+
22
+ # Create bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+
30
+ st.altair_chart(bar_chart, use_container_width=True)
31
+
32
+ # Markdown Write-Up for the bar chart
33
+ st.markdown("""
34
+ **Highlights**: This bar chart highlights the top 10 most frequent license types in the dataset, allowing us to easily identify which types of licenses are the most prevalent.
35
+
36
+ **Design Choices**: The x-axis represents the count of each license type, while the y-axis shows the name of the license type. The bars are color-coded to differentiate between the license types, and tooltips are included for better interaction. The chart is sorted in descending order of the license count.
37
+
38
+ **Improvements**: If I had more time, I would add more granular details about the licenses or break down the data further by state or city for more localized insights.
39
+ """)
40
+
41
+ # Convert 'Expiration Date' to datetime and extract the year
42
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
43
+ df['Expiration Year'] = df['Expiration Date'].dt.year
44
+
45
+ # Group by Expiration Year and count the number of licenses expiring
46
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
47
+ st.header("4. Trend of Licenses Expiring Over Time")
48
+
49
+ # Create trend line (line chart) with circle marks at data points
50
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
51
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
52
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
53
+ tooltip=['Expiration Year', 'License Count']
54
+ )
55
+
56
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
57
+ x=alt.X('Expiration Year:O'),
58
+ y=alt.Y('License Count:Q'),
59
+ tooltip=['Expiration Year', 'License Count']
60
+ )
61
+
62
+ # Combine line and points
63
+ final_chart = line_chart + points
64
+
65
+ st.altair_chart(final_chart, use_container_width=True)
66
+
67
+ # Markdown Write-Up for the trend line chart
68
+ st.markdown("""
69
+ **Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
70
+
71
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
72
+
73
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
74
+ """)
.local/share/code-server/User/History/450b0aa/ksQB.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
37
+ To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
38
+ Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
39
+
40
+ # Convert 'Expiration Date' to datetime and extract the year
41
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
42
+ df['Expiration Year'] = df['Expiration Date'].dt.year
43
+
44
+ # Group by Expiration Year and count the number of licenses expiring
45
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
46
+ st.header("4. Trend of Licenses Expiring Over Time")
47
+
48
+ # Create trend line (line chart) with circle marks at data points
49
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
50
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
51
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
52
+ tooltip=['Expiration Year', 'License Count']
53
+ )
54
+
55
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
56
+ x=alt.X('Expiration Year:O'),
57
+ y=alt.Y('License Count:Q'),
58
+ tooltip=['Expiration Year', 'License Count']
59
+ )
60
+
61
+ # Combine line and points
62
+ final_chart = line_chart + points
63
+
64
+ st.altair_chart(final_chart, use_container_width=True)
65
+
66
+ # Markdown Write-Up for the trend line chart
67
+ st.markdown("""
68
+ **Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
69
+
70
+ **Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ **Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
73
+ """)
.local/share/code-server/User/History/450b0aa/mE68.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency.
37
+ To improve this, I focused on the top 10 most frequent license types.
38
+ Some license names were long, I made sure that the full text is visible when you hover on the bar chart.
39
+ If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
40
+
41
+ # Convert 'Expiration Date' to datetime and extract the year
42
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
43
+ df['Expiration Year'] = df['Expiration Date'].dt.year
44
+
45
+ # Group by Expiration Year and count the number of licenses expiring
46
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
47
+ st.header("2. Trend of Licenses Expiring Over Time")
48
+
49
+ # Create trend line (line chart) with circle marks at data points
50
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
51
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
52
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
53
+ tooltip=['Expiration Year', 'License Count']
54
+ )
55
+
56
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
57
+ x=alt.X('Expiration Year:O'),
58
+ y=alt.Y('License Count:Q'),
59
+ tooltip=['Expiration Year', 'License Count']
60
+ )
61
+
62
+ # Combine line and points
63
+ final_chart = line_chart + points
64
+
65
+ st.altair_chart(final_chart, use_container_width=True)
66
+
67
+ # Markdown Write-Up for the trend line chart
68
+ st.markdown("""
69
+ This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
70
+ The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
73
+ By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
74
+ The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
75
+ If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
.local/share/code-server/User/History/450b0aa/qzJp.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency.
37
+ To improve this, I focused on the top 10 most frequent license types.
38
+ Some license names were long, so I made sure that full text is visible when you hover on the bars.
39
+ If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
40
+
41
+ # Convert 'Expiration Date' to datetime and extract the year
42
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
43
+ df['Expiration Year'] = df['Expiration Date'].dt.year
44
+
45
+ # Group by Expiration Year and count the number of licenses expiring
46
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
47
+ st.header("2. Trend of Licenses Expiring Over Time")
48
+
49
+ # Trend line
50
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
51
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
52
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
53
+ tooltip=['Expiration Year', 'License Count']
54
+ )
55
+
56
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
57
+ x=alt.X('Expiration Year:O'),
58
+ y=alt.Y('License Count:Q'),
59
+ tooltip=['Expiration Year', 'License Count']
60
+ )
61
+
62
+ # Combine line and points
63
+ final_chart = line_chart + points
64
+
65
+ st.altair_chart(final_chart, use_container_width=True)
66
+
67
+ # Write-Up for second plot
68
+ st.markdown("""
69
+ This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
70
+ The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
73
+ By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
74
+ The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
75
+ If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
.local/share/code-server/User/History/450b0aa/rykv.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import altair as alt
3
+ import streamlit as st
4
+
5
+ # Load the dataset
6
+ def load_data():
7
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
8
+ return pd.read_csv(url)
9
+
10
+ df = load_data()
11
+
12
+ # Convert 'Original Issue Date' to datetime and dropping rows with missing data
13
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
14
+ df_time = df.dropna(subset=['Original Issue Date'])
15
+
16
+ # Count the number of licenses for each License Type and get top 10
17
+ license_type_counts = df['License Type'].value_counts().reset_index()
18
+ license_type_counts.columns = ['License Type', 'Count']
19
+ top_10_license_types = license_type_counts.head(10)
20
+ st.header("1. Top 10 License Types by Frequency")
21
+
22
+ # Bar chart for the top 10 license types
23
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
24
+ x=alt.X('Count:Q', title='License Count'),
25
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
26
+ color=alt.Color('License Type:N', legend=None),
27
+ tooltip=['License Type', 'Count']
28
+ )
29
+ st.altair_chart(bar_chart, use_container_width=True)
30
+
31
+ # Write-Up for first plot
32
+ st.markdown("""
33
+ This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
34
+ The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
35
+
36
+ I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency.
37
+ To improve this, I focused on the top 10 most frequent license types.
38
+ Some license names were long, so I made sure that full text is visible when you hover on the bars.
39
+ If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
40
+
41
+ # Convert 'Expiration Date' to datetime and extract the year
42
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
43
+ df['Expiration Year'] = df['Expiration Date'].dt.year
44
+
45
+ # Group by Expiration Year and count the number of licenses expiring
46
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
47
+ st.header("2. Trend of Licenses Expiring Over Time")
48
+
49
+ # Create trend line (line chart) with circle marks at data points
50
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
51
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
52
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
53
+ tooltip=['Expiration Year', 'License Count']
54
+ )
55
+
56
+ points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
57
+ x=alt.X('Expiration Year:O'),
58
+ y=alt.Y('License Count:Q'),
59
+ tooltip=['Expiration Year', 'License Count']
60
+ )
61
+
62
+ # Combine line and points
63
+ final_chart = line_chart + points
64
+
65
+ st.altair_chart(final_chart, use_container_width=True)
66
+
67
+ # Write-Up for second plot
68
+ st.markdown("""
69
+ This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
70
+ The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
71
+
72
+ I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
73
+ By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
74
+ The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
75
+ If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
.local/share/code-server/User/History/450b0aa/s0Hs.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put streamlit code here as needed
2
+ import pandas as pd
3
+ import altair as alt
4
+ import streamlit as st
5
+
6
+ #import the data
7
+ #url = "https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/ufo-scrubbed-geocoded-time-standardized-00.csv"
8
+ #df = pd.read_csv(url)
9
+ #df.sum().isnull()
10
+
11
+
12
+
13
+ # #Fill the missing data with Unknown
14
+ # df['County'].fillna('Not available', inplace=True)
15
+ # df['Rep Full Name'].fillna('Not available', inplace=True)
16
+ # df['Year Constructed'].fillna(df['Year Constructed'].median(), inplace=True)
17
+ # df['Senator Full Name'].fillna('Unknown', inplace=True)
18
+ # df['Usage Description 2'].fillna(df['Usage Description 2'].mode()[0], inplace=True)
19
+ # df['Usage Description 3'].fillna(df['Usage Description 3'].mode()[0], inplace=True)
20
+ # df['Address'].fillna('Not available', inplace=True)
21
+ # df['Congressional Full Name'].fillna('Unknown', inplace=True)
22
+
23
+ # #Page Title
24
+ # st.markdown("<h1 style='text-align: center;'>Homework 5.1</h1>", unsafe_allow_html=True)
25
+
26
+ # st.subheader("Analyzing the Building Inventory Dataset")
27
+ # #Visualization 1
28
+ # st.markdown("<h4 style='text-decoration: underline;'>Visualization 1</h4>", unsafe_allow_html=True)
29
+
30
+ # df_filtered = df.dropna(subset=['Bldg Status', 'Year Constructed'])
31
+ # df_filtered['Year Constructed'] = pd.to_numeric(df_filtered['Year Constructed'], errors='coerce')
32
+ # df_filtered = df_filtered[(df_filtered['Year Constructed'] >= 1600) & (df_filtered['Year Constructed'] <= 2100)]
33
+
34
+
35
+ # line_chart = alt.Chart(df_filtered).mark_line(point=True).encode(
36
+ # x=alt.X('Year Constructed:Q', title='Year Constructed'),
37
+ # y=alt.Y('count()', title='Number of Buildings'),
38
+ # color=alt.Color('Bldg Status:N', title='Building Status'),
39
+ # tooltip=['Year Constructed', 'count()', 'Bldg Status']
40
+ # ).properties(
41
+ # width=700,
42
+ # height=400,
43
+ # title="Trend of Building Construction by Status"
44
+ # )
45
+ # st.altair_chart(line_chart, use_container_width=True)
46
+ # st.write("""
47
+ # This line chart highlights the trend in the number of buildings constructed over time,
48
+ # categorized by their building status (e.g., whether they are currently in use or not).
49
+ # The x-axis represents the 'Year Constructed' and the y-axis represents the count of buildings.
50
+ # The color encoding separates the buildings by their 'Bldg Status'
51
+ # I used a line plot with points to clearly indicate the number of buildings per year,
52
+ # which helps in identifying trends and peaks. Another reason is that line chart is ideal for
53
+ # visualizing time-series data. The colors are chosen to differentiate the building statuses
54
+ # effectively.
55
+
56
+ # If I had more time, I would consider adding labels to the data points for clarity and
57
+ # perhaps break down the data further by usage description or location for a more detailed analysis.
58
+ # Probably adding hover effects could display additional information, such as the exact count of
59
+ # buildings and their status, when hovering over each data point. This would allow users to gain
60
+ # deeper insights without cluttering the chart. I would also include filtering options so users
61
+ # could select specific building statuses or even a range of years to focus on, which would make
62
+ # the analysis more targeted.
63
+ # """)
64
+
65
+
66
+ # # Visualization 2
67
+ # st.markdown("<h4 style='text-decoration: underline;'>Visualization 2</h4>", unsafe_allow_html=True)
68
+ # county_agency_data = df.dropna(subset=['County', 'Agency Name'])
69
+ # county_agency_count = county_agency_data.groupby('County')['Agency Name'].nunique().reset_index()
70
+ # county_agency_count.rename(columns={'Agency Name': 'Unique Agencies'}, inplace=True)
71
+
72
+ # county_agency_chart = alt.Chart(county_agency_count).mark_bar().encode(
73
+ # x=alt.X('County:N', sort='-y', title="County"),
74
+ # y=alt.Y('Unique Agencies:Q', title="Number of Agencies"),
75
+ # color=alt.Color('Unique Agencies:Q', scale=alt.Scale(scheme='viridis')),
76
+ # tooltip=['County:N', 'Unique Agencies:Q']
77
+ # ).properties(
78
+ # width=700,
79
+ # height=400,
80
+ # title="Unique Agencies in Each County"
81
+ # )
82
+ # st.altair_chart(county_agency_chart, use_container_width=True)
83
+
84
+ # st.write("""
85
+ # This bar chart highlights the number of unique agencies operating in each county.
86
+ # The x-axis represents the 'County' and the y-axis represents the 'Number of Unique Agencies'
87
+ # I used a bar chart as it allows for easy comparison between counties based on the number of
88
+ # agencies, and sorting the counties by agency count makes it easier to identify areas with
89
+ # more agencies. The color scale uses a 'viridis' color scheme, to show how the number of agencies
90
+ # differs accross counties.
91
+
92
+ # If I had more time, I would consider adding interactivity, like a filter for 'Building Status', so that users can
93
+ # focus on specific statuses, such as buildings that are still in progress or abandoned for each county.
94
+ # I would also implement a slider for 'Year Constructed' to allow users to focus on specific time periods,
95
+ # enabling them to analyze trends and compare the distribution of agencies by different time periods.
96
+ # I would also consider adding a search function so that users can look for specific counties
97
+ # or agency types.
98
+ # """)
99
+ # Load State GeoJSON for Map (example if supported)
100
+ # Visualization: Licenses by State
101
+
102
+
103
+ # Load Dataset
104
+ @st.cache
105
+ def load_data():
106
+ url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
107
+ return pd.read_csv(url)
108
+
109
+ df = load_data()
110
+
111
+ # Handle Missing Values for Visualizations
112
+ # 1. Remove rows with missing `Original Issue Date`
113
+ df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
114
+ df_time = df.dropna(subset=['Original Issue Date'])
115
+
116
+ # Title
117
+ st.title("Licenses Dataset Visualizations")
118
+ st.markdown("This app presents three visualizations of the licenses dataset.")
119
+
120
+ # Visualization 1: Bar Chart (Distribution of Licenses by Type)
121
+ st.header("1. Distribution of Licenses by Type")
122
+ license_count = df['License Type'].value_counts().reset_index()
123
+ license_count.columns = ['License Type', 'Count']
124
+
125
+ bar_chart = alt.Chart(license_count).mark_bar().encode(
126
+ x=alt.X('Count:Q', title='Number of Licenses'),
127
+ y=alt.Y('License Type:N', sort='-x', title='License Type'),
128
+ color=alt.Color('License Type:N', legend=None)
129
+ ).properties(title="Number of Licenses by Type")
130
+
131
+ st.altair_chart(bar_chart, use_container_width=True)
132
+
133
+ st.markdown("""
134
+ **Highlights**: This bar chart shows the distribution of licenses by type.
135
+ **Design Choices**: A horizontal bar chart is used for better readability of license types.
136
+ """)
137
+
138
+ # Visualization 2: Line Chart (Trend of Licenses Over Time)
139
+ st.header("2. Trend of Licenses Over Time")
140
+ time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
141
+ time_data.columns = ['Year', 'Count']
142
+
143
+ line_chart = alt.Chart(time_data).mark_line(point=True).encode(
144
+ x=alt.X('Year:O', title='Year'),
145
+ y=alt.Y('Count:Q', title='Number of Licenses'),
146
+ color=alt.value('blue')
147
+ ).properties(title="Trend of Licenses Issued Over Years")
148
+
149
+ st.altair_chart(line_chart, use_container_width=True)
150
+
151
+ st.markdown("""
152
+ **Highlights**: This line chart shows the trend of license issuances over time.
153
+ **Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
154
+ """)
155
+
156
+
157
+ # Count the number of licenses for each License Type and sort by count
158
+ license_type_counts = df['License Type'].value_counts().reset_index()
159
+ license_type_counts.columns = ['License Type', 'Count']
160
+
161
+ # Get the top 10 License Types
162
+ top_10_license_types = license_type_counts.head(10)
163
+
164
+ # Create the bar chart
165
+ bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
166
+ x=alt.X('Count:Q', title='License Count'),
167
+ y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
168
+ color=alt.Color('License Type:N', legend=None),
169
+ tooltip=['License Type', 'Count']
170
+ ).properties(
171
+ title='Top 10 License Types by Frequency'
172
+ )
173
+
174
+ # Show the chart in the Streamlit app
175
+ st.altair_chart(bar_chart, use_container_width=True)
176
+
177
+ # Convert the Expiration Date to datetime (if not already)
178
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
179
+
180
+ # Extract the month and year from Expiration Date
181
+ df['Expiration Year'] = df['Expiration Date'].dt.year
182
+ df['Expiration Month'] = df['Expiration Date'].dt.month
183
+
184
+ # Group by year and month and count the number of licenses expiring
185
+ expiration_counts = df.groupby(['Expiration Year', 'Expiration Month']).size().reset_index(name='License Count')
186
+
187
+ # Convert the Expiration Date to datetime (if not already)
188
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
189
+
190
+ # Extract the year from Expiration Date
191
+ df['Expiration Year'] = df['Expiration Date'].dt.year
192
+
193
+ # Group by Expiration Year and count the number of licenses expiring
194
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
195
+
196
+ # Create the trend line (line chart)
197
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
198
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
199
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
200
+ tooltip=['Expiration Year', 'License Count']
201
+ ).properties(
202
+ title='Trend Line for License Expirations Over Time'
203
+ )
204
+
205
+ # Show the chart in the Streamlit app
206
+ st.altair_chart(line_chart, use_container_width=True)
207
+ # Convert the Expiration Date to datetime (if not already)
208
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
209
+
210
+ # Extract the year from Expiration Date
211
+ df['Expiration Year'] = df['Expiration Date'].dt.year
212
+
213
+ # Group by Expiration Year and count the number of licenses expiring
214
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
215
+
216
+ # Create the trend line (line chart)
217
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
218
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
219
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
220
+ tooltip=['Expiration Year', 'License Count']
221
+ ).properties(
222
+ title='Trend Line for Licenses Expiring Over Time'
223
+ )
224
+
225
+ # Show the chart in the Streamlit app
226
+ st.altair_chart(line_chart, use_container_width=True)
227
+
228
+
229
+
230
+
231
+ # Convert the Expiration Date to datetime (if not already)
232
+ df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
233
+
234
+ # Extract the year from Expiration Date
235
+ df['Expiration Year'] = df['Expiration Date'].dt.year
236
+
237
+ # Group by Expiration Year and count the number of licenses expiring
238
+ expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
239
+
240
+ # Create the trend line (line chart) with interactive data points
241
+ line_chart = alt.Chart(expiration_counts).mark_line().encode(
242
+ x=alt.X('Expiration Year:O', title='Expiration Year'),
243
+ y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
244
+ tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
245
+ ).properties(
246
+ title='Trend Line for Licenses Expiring Over Time'
247
+ ).interactive() # Make the chart interactive
248
+
249
+ # Show the chart in the Streamlit app
250
+ st.altair_chart(line_chart, use_container_width=True)