Spaces:
Sleeping
Sleeping
Shah-Miloni commited on
Commit ·
a137ed9
0
Parent(s):
Homework 5.1
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .bash_history +5 -0
- .cache/Microsoft/DeveloperTools/deviceid +1 -0
- .config/code-server/config.yaml +4 -0
- .local/share/code-server/CachedProfilesData/__default__profile__/extensions.builtin.cache +0 -0
- .local/share/code-server/CachedProfilesData/__default__profile__/extensions.user.cache +1 -0
- .local/share/code-server/User/History/450b0aa/0rFy.py +75 -0
- .local/share/code-server/User/History/450b0aa/2sco.py +73 -0
- .local/share/code-server/User/History/450b0aa/2wHf.py +74 -0
- .local/share/code-server/User/History/450b0aa/4TtO.py +73 -0
- .local/share/code-server/User/History/450b0aa/53Ql.py +75 -0
- .local/share/code-server/User/History/450b0aa/53ry.py +75 -0
- .local/share/code-server/User/History/450b0aa/8qMH.py +74 -0
- .local/share/code-server/User/History/450b0aa/8wPI.py +73 -0
- .local/share/code-server/User/History/450b0aa/AIA9.py +208 -0
- .local/share/code-server/User/History/450b0aa/BA55.py +73 -0
- .local/share/code-server/User/History/450b0aa/BvGZ.py +260 -0
- .local/share/code-server/User/History/450b0aa/C2VD.py +73 -0
- .local/share/code-server/User/History/450b0aa/Dqul.py +73 -0
- .local/share/code-server/User/History/450b0aa/GB0z.py +75 -0
- .local/share/code-server/User/History/450b0aa/HxIz.py +107 -0
- .local/share/code-server/User/History/450b0aa/I3Tc.py +108 -0
- .local/share/code-server/User/History/450b0aa/Km75.py +75 -0
- .local/share/code-server/User/History/450b0aa/O3TC.py +260 -0
- .local/share/code-server/User/History/450b0aa/P3er.py +107 -0
- .local/share/code-server/User/History/450b0aa/Q5uV.py +75 -0
- .local/share/code-server/User/History/450b0aa/QeUw.py +69 -0
- .local/share/code-server/User/History/450b0aa/RWSp.py +73 -0
- .local/share/code-server/User/History/450b0aa/RuPD.py +74 -0
- .local/share/code-server/User/History/450b0aa/TdSt.py +109 -0
- .local/share/code-server/User/History/450b0aa/Vrxk.py +74 -0
- .local/share/code-server/User/History/450b0aa/W0T9.py +73 -0
- .local/share/code-server/User/History/450b0aa/WCDW.py +73 -0
- .local/share/code-server/User/History/450b0aa/WLXq.py +70 -0
- .local/share/code-server/User/History/450b0aa/YJb8.py +73 -0
- .local/share/code-server/User/History/450b0aa/YSbm.py +75 -0
- .local/share/code-server/User/History/450b0aa/cWhv.py +208 -0
- .local/share/code-server/User/History/450b0aa/e7iD.py +73 -0
- .local/share/code-server/User/History/450b0aa/entries.json +1 -0
- .local/share/code-server/User/History/450b0aa/fdvg.py +107 -0
- .local/share/code-server/User/History/450b0aa/fjsF.py +75 -0
- .local/share/code-server/User/History/450b0aa/gJ7Y.py +75 -0
- .local/share/code-server/User/History/450b0aa/gwPw.py +72 -0
- .local/share/code-server/User/History/450b0aa/hisc.py +71 -0
- .local/share/code-server/User/History/450b0aa/iwOA.py +108 -0
- .local/share/code-server/User/History/450b0aa/jtCb.py +74 -0
- .local/share/code-server/User/History/450b0aa/ksQB.py +73 -0
- .local/share/code-server/User/History/450b0aa/mE68.py +75 -0
- .local/share/code-server/User/History/450b0aa/qzJp.py +75 -0
- .local/share/code-server/User/History/450b0aa/rykv.py +75 -0
- .local/share/code-server/User/History/450b0aa/s0Hs.py +250 -0
.bash_history
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit run app.py
|
| 2 |
+
streamlit run app.py
|
| 3 |
+
streamlit run app.py
|
| 4 |
+
streamlit run app.py
|
| 5 |
+
streamlit run app.py
|
.cache/Microsoft/DeveloperTools/deviceid
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
a5c936e7-e0c1-4067-98d1-c3fa3de57425
|
.config/code-server/config.yaml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
bind-addr: 127.0.0.1:8080
|
| 2 |
+
auth: password
|
| 3 |
+
password: 903bae018fa9e47b8195dac4
|
| 4 |
+
cert: false
|
.local/share/code-server/CachedProfilesData/__default__profile__/extensions.builtin.cache
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
.local/share/code-server/CachedProfilesData/__default__profile__/extensions.user.cache
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"input":{"location":{"$mid":1,"fsPath":"/home/coder/.local/share/code-server/extensions/extensions.json","external":"file:///home/coder/.local/share/code-server/extensions/extensions.json","path":"/home/coder/.local/share/code-server/extensions/extensions.json","scheme":"file"},"mtime":1731806263514,"profile":true,"profileScanOptions":{"bailOutWhenFileNotFound":true},"type":1,"excludeObsolete":true,"validate":true,"productVersion":"1.91.1","productDate":"2024-07-15T18:41:47.410Z","productCommit":"1962f48b7f71772dc2c060dbaa5a6b4c0792a549","devMode":false,"language":"en","translations":{}},"result":[]}
|
.local/share/code-server/User/History/450b0aa/0rFy.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
|
| 38 |
+
Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
|
| 39 |
+
|
| 40 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 41 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 42 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 43 |
+
|
| 44 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 45 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 46 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 47 |
+
|
| 48 |
+
# Create trend line (line chart) with circle marks at data points
|
| 49 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 50 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 51 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 52 |
+
tooltip=['Expiration Year', 'License Count']
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 56 |
+
x=alt.X('Expiration Year:O'),
|
| 57 |
+
y=alt.Y('License Count:Q'),
|
| 58 |
+
tooltip=['Expiration Year', 'License Count']
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Combine line and points
|
| 62 |
+
final_chart = line_chart + points
|
| 63 |
+
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Markdown Write-Up for the trend line chart
|
| 67 |
+
st.markdown("""
|
| 68 |
+
This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
|
| 69 |
+
The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 70 |
+
|
| 71 |
+
I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
|
| 72 |
+
By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
|
| 73 |
+
The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
|
| 74 |
+
If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 75 |
+
""")
|
.local/share/code-server/User/History/450b0aa/2sco.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset, allowing us to easily identify which types of licenses are the most prevalent.
|
| 34 |
+
|
| 35 |
+
**Design Choices**: The x-axis represents the count of each license type, while the y-axis shows the name of the license type. The bars are color-coded to differentiate between the license types, and tooltips are included for better interaction. The chart is sorted in descending order of the license count.
|
| 36 |
+
|
| 37 |
+
**Improvements**: If I had more time, I would add more granular details about the licenses or break down the data further by state or city for more localized insights.
|
| 38 |
+
""")
|
| 39 |
+
|
| 40 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 41 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 42 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 43 |
+
|
| 44 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 45 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 46 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 47 |
+
|
| 48 |
+
# Create trend line (line chart) with circle marks at data points
|
| 49 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 50 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 51 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 52 |
+
tooltip=['Expiration Year', 'License Count']
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 56 |
+
x=alt.X('Expiration Year:O'),
|
| 57 |
+
y=alt.Y('License Count:Q'),
|
| 58 |
+
tooltip=['Expiration Year', 'License Count']
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Combine line and points
|
| 62 |
+
final_chart = line_chart + points
|
| 63 |
+
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Markdown Write-Up for the trend line chart
|
| 67 |
+
st.markdown("""
|
| 68 |
+
**Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 69 |
+
|
| 70 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 73 |
+
""")
|
.local/share/code-server/User/History/450b0aa/2wHf.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Create bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 31 |
+
|
| 32 |
+
# Markdown Write-Up for the bar chart
|
| 33 |
+
st.markdown("""
|
| 34 |
+
**Highlights**: This bar chart highlights the top 10 most frequent license types in the dataset, allowing us to easily identify which types of licenses are the most prevalent.
|
| 35 |
+
|
| 36 |
+
**Design Choices**: The x-axis represents the count of each license type, while the y-axis shows the name of the license type. The bars are color-coded to differentiate between the license types, and tooltips are included for better interaction. The chart is sorted in descending order of the license count.
|
| 37 |
+
|
| 38 |
+
**Improvements**: If I had more time, I would add more granular details about the licenses or break down the data further by state or city for more localized insights.
|
| 39 |
+
""")
|
| 40 |
+
|
| 41 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 42 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 43 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 44 |
+
|
| 45 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 46 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 47 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 48 |
+
|
| 49 |
+
# Create trend line (line chart) with circle marks at data points
|
| 50 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 51 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 52 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 53 |
+
tooltip=['Expiration Year', 'License Count']
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 57 |
+
x=alt.X('Expiration Year:O'),
|
| 58 |
+
y=alt.Y('License Count:Q'),
|
| 59 |
+
tooltip=['Expiration Year', 'License Count']
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Combine line and points
|
| 63 |
+
final_chart = line_chart + points
|
| 64 |
+
|
| 65 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 66 |
+
|
| 67 |
+
# Markdown Write-Up for the trend line chart
|
| 68 |
+
st.markdown("""
|
| 69 |
+
**Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 70 |
+
|
| 71 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 72 |
+
|
| 73 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 74 |
+
""")
|
.local/share/code-server/User/History/450b0aa/4TtO.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. allowing us to easily identify which types of licenses are the most prevalent.
|
| 34 |
+
|
| 35 |
+
**Design Choices**: The x-axis represents the count of each license type, while the y-axis shows the name of the license type. The bars are color-coded to differentiate between the license types, and tooltips are included for better interaction. The chart is sorted in descending order of the license count.
|
| 36 |
+
|
| 37 |
+
**Improvements**: If I had more time, I would add more granular details about the licenses or break down the data further by state or city for more localized insights.
|
| 38 |
+
""")
|
| 39 |
+
|
| 40 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 41 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 42 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 43 |
+
|
| 44 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 45 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 46 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 47 |
+
|
| 48 |
+
# Create trend line (line chart) with circle marks at data points
|
| 49 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 50 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 51 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 52 |
+
tooltip=['Expiration Year', 'License Count']
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 56 |
+
x=alt.X('Expiration Year:O'),
|
| 57 |
+
y=alt.Y('License Count:Q'),
|
| 58 |
+
tooltip=['Expiration Year', 'License Count']
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Combine line and points
|
| 62 |
+
final_chart = line_chart + points
|
| 63 |
+
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Markdown Write-Up for the trend line chart
|
| 67 |
+
st.markdown("""
|
| 68 |
+
**Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 69 |
+
|
| 70 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 73 |
+
""")
|
.local/share/code-server/User/History/450b0aa/53Ql.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types.
|
| 38 |
+
Some license names were long, so I made sure that full text is visible when you hover on the bars.
|
| 39 |
+
If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
|
| 40 |
+
|
| 41 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 42 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 43 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 44 |
+
|
| 45 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 46 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 47 |
+
st.header("2. Trend of Licenses Expiring Over Time")
|
| 48 |
+
|
| 49 |
+
# Create trend line (line chart) with circle marks at data points
|
| 50 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 51 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 52 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 53 |
+
tooltip=['Expiration Year', 'License Count']
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 57 |
+
x=alt.X('Expiration Year:O'),
|
| 58 |
+
y=alt.Y('License Count:Q'),
|
| 59 |
+
tooltip=['Expiration Year', 'License Count']
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Combine line and points
|
| 63 |
+
final_chart = line_chart + points
|
| 64 |
+
|
| 65 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 66 |
+
|
| 67 |
+
# Markdown Write-Up for the trend line chart
|
| 68 |
+
st.markdown("""
|
| 69 |
+
This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
|
| 70 |
+
The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
|
| 73 |
+
By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
|
| 74 |
+
The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
|
| 75 |
+
If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
|
.local/share/code-server/User/History/450b0aa/53ry.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
|
| 38 |
+
Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.
|
| 39 |
+
If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
|
| 40 |
+
|
| 41 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 42 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 43 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 44 |
+
|
| 45 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 46 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 47 |
+
st.header("2. Trend of Licenses Expiring Over Time")
|
| 48 |
+
|
| 49 |
+
# Create trend line (line chart) with circle marks at data points
|
| 50 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 51 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 52 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 53 |
+
tooltip=['Expiration Year', 'License Count']
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 57 |
+
x=alt.X('Expiration Year:O'),
|
| 58 |
+
y=alt.Y('License Count:Q'),
|
| 59 |
+
tooltip=['Expiration Year', 'License Count']
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Combine line and points
|
| 63 |
+
final_chart = line_chart + points
|
| 64 |
+
|
| 65 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 66 |
+
|
| 67 |
+
# Markdown Write-Up for the trend line chart
|
| 68 |
+
st.markdown("""
|
| 69 |
+
This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
|
| 70 |
+
The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
|
| 73 |
+
By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
|
| 74 |
+
The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
|
| 75 |
+
If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
|
.local/share/code-server/User/History/450b0aa/8qMH.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
|
| 38 |
+
Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
|
| 39 |
+
|
| 40 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 41 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 42 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 43 |
+
|
| 44 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 45 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 46 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 47 |
+
|
| 48 |
+
# Create trend line (line chart) with circle marks at data points
|
| 49 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 50 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 51 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 52 |
+
tooltip=['Expiration Year', 'License Count']
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 56 |
+
x=alt.X('Expiration Year:O'),
|
| 57 |
+
y=alt.Y('License Count:Q'),
|
| 58 |
+
tooltip=['Expiration Year', 'License Count']
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Combine line and points
|
| 62 |
+
final_chart = line_chart + points
|
| 63 |
+
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Markdown Write-Up for the trend line chart
|
| 67 |
+
st.markdown("""
|
| 68 |
+
This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
|
| 69 |
+
The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 70 |
+
|
| 71 |
+
I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
|
| 72 |
+
By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
|
| 73 |
+
The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
|
| 74 |
+
If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
|
.local/share/code-server/User/History/450b0aa/8wPI.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
|
| 38 |
+
Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
|
| 39 |
+
|
| 40 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 41 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 42 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 43 |
+
|
| 44 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 45 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 46 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 47 |
+
|
| 48 |
+
# Create trend line (line chart) with circle marks at data points
|
| 49 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 50 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 51 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 52 |
+
tooltip=['Expiration Year', 'License Count']
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 56 |
+
x=alt.X('Expiration Year:O'),
|
| 57 |
+
y=alt.Y('License Count:Q'),
|
| 58 |
+
tooltip=['Expiration Year', 'License Count']
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Combine line and points
|
| 62 |
+
final_chart = line_chart + points
|
| 63 |
+
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Markdown Write-Up for the trend line chart
|
| 67 |
+
st.markdown("""
|
| 68 |
+
This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 69 |
+
|
| 70 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 73 |
+
""")
|
.local/share/code-server/User/History/450b0aa/AIA9.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
#import the data
|
| 7 |
+
#url = "https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/ufo-scrubbed-geocoded-time-standardized-00.csv"
|
| 8 |
+
#df = pd.read_csv(url)
|
| 9 |
+
#df.sum().isnull()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# #Fill the missing data with Unknown
|
| 14 |
+
# df['County'].fillna('Not available', inplace=True)
|
| 15 |
+
# df['Rep Full Name'].fillna('Not available', inplace=True)
|
| 16 |
+
# df['Year Constructed'].fillna(df['Year Constructed'].median(), inplace=True)
|
| 17 |
+
# df['Senator Full Name'].fillna('Unknown', inplace=True)
|
| 18 |
+
# df['Usage Description 2'].fillna(df['Usage Description 2'].mode()[0], inplace=True)
|
| 19 |
+
# df['Usage Description 3'].fillna(df['Usage Description 3'].mode()[0], inplace=True)
|
| 20 |
+
# df['Address'].fillna('Not available', inplace=True)
|
| 21 |
+
# df['Congressional Full Name'].fillna('Unknown', inplace=True)
|
| 22 |
+
|
| 23 |
+
# #Page Title
|
| 24 |
+
# st.markdown("<h1 style='text-align: center;'>Homework 5.1</h1>", unsafe_allow_html=True)
|
| 25 |
+
|
| 26 |
+
# st.subheader("Analyzing the Building Inventory Dataset")
|
| 27 |
+
# #Visualization 1
|
| 28 |
+
# st.markdown("<h4 style='text-decoration: underline;'>Visualization 1</h4>", unsafe_allow_html=True)
|
| 29 |
+
|
| 30 |
+
# df_filtered = df.dropna(subset=['Bldg Status', 'Year Constructed'])
|
| 31 |
+
# df_filtered['Year Constructed'] = pd.to_numeric(df_filtered['Year Constructed'], errors='coerce')
|
| 32 |
+
# df_filtered = df_filtered[(df_filtered['Year Constructed'] >= 1600) & (df_filtered['Year Constructed'] <= 2100)]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# line_chart = alt.Chart(df_filtered).mark_line(point=True).encode(
|
| 36 |
+
# x=alt.X('Year Constructed:Q', title='Year Constructed'),
|
| 37 |
+
# y=alt.Y('count()', title='Number of Buildings'),
|
| 38 |
+
# color=alt.Color('Bldg Status:N', title='Building Status'),
|
| 39 |
+
# tooltip=['Year Constructed', 'count()', 'Bldg Status']
|
| 40 |
+
# ).properties(
|
| 41 |
+
# width=700,
|
| 42 |
+
# height=400,
|
| 43 |
+
# title="Trend of Building Construction by Status"
|
| 44 |
+
# )
|
| 45 |
+
# st.altair_chart(line_chart, use_container_width=True)
|
| 46 |
+
# st.write("""
|
| 47 |
+
# This line chart highlights the trend in the number of buildings constructed over time,
|
| 48 |
+
# categorized by their building status (e.g., whether they are currently in use or not).
|
| 49 |
+
# The x-axis represents the 'Year Constructed' and the y-axis represents the count of buildings.
|
| 50 |
+
# The color encoding separates the buildings by their 'Bldg Status'
|
| 51 |
+
# I used a line plot with points to clearly indicate the number of buildings per year,
|
| 52 |
+
# which helps in identifying trends and peaks. Another reason is that line chart is ideal for
|
| 53 |
+
# visualizing time-series data. The colors are chosen to differentiate the building statuses
|
| 54 |
+
# effectively.
|
| 55 |
+
|
| 56 |
+
# If I had more time, I would consider adding labels to the data points for clarity and
|
| 57 |
+
# perhaps break down the data further by usage description or location for a more detailed analysis.
|
| 58 |
+
# Probably adding hover effects could display additional information, such as the exact count of
|
| 59 |
+
# buildings and their status, when hovering over each data point. This would allow users to gain
|
| 60 |
+
# deeper insights without cluttering the chart. I would also include filtering options so users
|
| 61 |
+
# could select specific building statuses or even a range of years to focus on, which would make
|
| 62 |
+
# the analysis more targeted.
|
| 63 |
+
# """)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# # Visualization 2
|
| 67 |
+
# st.markdown("<h4 style='text-decoration: underline;'>Visualization 2</h4>", unsafe_allow_html=True)
|
| 68 |
+
# county_agency_data = df.dropna(subset=['County', 'Agency Name'])
|
| 69 |
+
# county_agency_count = county_agency_data.groupby('County')['Agency Name'].nunique().reset_index()
|
| 70 |
+
# county_agency_count.rename(columns={'Agency Name': 'Unique Agencies'}, inplace=True)
|
| 71 |
+
|
| 72 |
+
# county_agency_chart = alt.Chart(county_agency_count).mark_bar().encode(
|
| 73 |
+
# x=alt.X('County:N', sort='-y', title="County"),
|
| 74 |
+
# y=alt.Y('Unique Agencies:Q', title="Number of Agencies"),
|
| 75 |
+
# color=alt.Color('Unique Agencies:Q', scale=alt.Scale(scheme='viridis')),
|
| 76 |
+
# tooltip=['County:N', 'Unique Agencies:Q']
|
| 77 |
+
# ).properties(
|
| 78 |
+
# width=700,
|
| 79 |
+
# height=400,
|
| 80 |
+
# title="Unique Agencies in Each County"
|
| 81 |
+
# )
|
| 82 |
+
# st.altair_chart(county_agency_chart, use_container_width=True)
|
| 83 |
+
|
| 84 |
+
# st.write("""
|
| 85 |
+
# This bar chart highlights the number of unique agencies operating in each county.
|
| 86 |
+
# The x-axis represents the 'County' and the y-axis represents the 'Number of Unique Agencies'
|
| 87 |
+
# I used a bar chart as it allows for easy comparison between counties based on the number of
|
| 88 |
+
# agencies, and sorting the counties by agency count makes it easier to identify areas with
|
| 89 |
+
# more agencies. The color scale uses a 'viridis' color scheme, to show how the number of agencies
|
| 90 |
+
# differs accross counties.
|
| 91 |
+
|
| 92 |
+
# If I had more time, I would consider adding interactivity, like a filter for 'Building Status', so that users can
|
| 93 |
+
# focus on specific statuses, such as buildings that are still in progress or abandoned for each county.
|
| 94 |
+
# I would also implement a slider for 'Year Constructed' to allow users to focus on specific time periods,
|
| 95 |
+
# enabling them to analyze trends and compare the distribution of agencies by different time periods.
|
| 96 |
+
# I would also consider adding a search function so that users can look for specific counties
|
| 97 |
+
# or agency types.
|
| 98 |
+
# """)
|
| 99 |
+
# Load State GeoJSON for Map (example if supported)
|
| 100 |
+
# Visualization: Licenses by State
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# Load Dataset
|
| 104 |
+
@st.cache
|
| 105 |
+
def load_data():
|
| 106 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 107 |
+
return pd.read_csv(url)
|
| 108 |
+
|
| 109 |
+
df = load_data()
|
| 110 |
+
|
| 111 |
+
# Handle Missing Values for Visualizations
|
| 112 |
+
# 1. Remove rows with missing `Original Issue Date`
|
| 113 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 114 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 115 |
+
|
| 116 |
+
# Title
|
| 117 |
+
st.title("Licenses Dataset Visualizations")
|
| 118 |
+
st.markdown("This app presents three visualizations of the licenses dataset.")
|
| 119 |
+
|
| 120 |
+
# Visualization 1: Bar Chart (Distribution of Licenses by Type)
|
| 121 |
+
st.header("1. Distribution of Licenses by Type")
|
| 122 |
+
license_count = df['License Type'].value_counts().reset_index()
|
| 123 |
+
license_count.columns = ['License Type', 'Count']
|
| 124 |
+
|
| 125 |
+
bar_chart = alt.Chart(license_count).mark_bar().encode(
|
| 126 |
+
x=alt.X('Count:Q', title='Number of Licenses'),
|
| 127 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type'),
|
| 128 |
+
color=alt.Color('License Type:N', legend=None)
|
| 129 |
+
).properties(title="Number of Licenses by Type")
|
| 130 |
+
|
| 131 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 132 |
+
|
| 133 |
+
st.markdown("""
|
| 134 |
+
**Highlights**: This bar chart shows the distribution of licenses by type.
|
| 135 |
+
**Design Choices**: A horizontal bar chart is used for better readability of license types.
|
| 136 |
+
""")
|
| 137 |
+
|
| 138 |
+
# Visualization 2: Line Chart (Trend of Licenses Over Time)
|
| 139 |
+
st.header("2. Trend of Licenses Over Time")
|
| 140 |
+
time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
|
| 141 |
+
time_data.columns = ['Year', 'Count']
|
| 142 |
+
|
| 143 |
+
line_chart = alt.Chart(time_data).mark_line(point=True).encode(
|
| 144 |
+
x=alt.X('Year:O', title='Year'),
|
| 145 |
+
y=alt.Y('Count:Q', title='Number of Licenses'),
|
| 146 |
+
color=alt.value('blue')
|
| 147 |
+
).properties(title="Trend of Licenses Issued Over Years")
|
| 148 |
+
|
| 149 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 150 |
+
|
| 151 |
+
st.markdown("""
|
| 152 |
+
**Highlights**: This line chart shows the trend of license issuances over time.
|
| 153 |
+
**Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
|
| 154 |
+
""")
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
# Count the number of licenses for each License Type and sort by count
|
| 158 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 159 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 160 |
+
|
| 161 |
+
# Get the top 10 License Types
|
| 162 |
+
top_10_license_types = license_type_counts.head(10)
|
| 163 |
+
|
| 164 |
+
# Create the bar chart
|
| 165 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 166 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 167 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 168 |
+
color=alt.Color('License Type:N', legend=None),
|
| 169 |
+
tooltip=['License Type', 'Count']
|
| 170 |
+
).properties(
|
| 171 |
+
title='Top 10 License Types by Frequency'
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
# Show the chart in the Streamlit app
|
| 175 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 179 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 180 |
+
|
| 181 |
+
# Extract the year from Expiration Date
|
| 182 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 183 |
+
|
| 184 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 185 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 186 |
+
|
| 187 |
+
# Create the trend line (line chart) with interactive data points (circle marks)
|
| 188 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 189 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 190 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 191 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 192 |
+
).properties(
|
| 193 |
+
title='Trend Line for Licenses Expiring Over Time'
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
# Add circle marks at data points
|
| 197 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 198 |
+
x=alt.X('Expiration Year:O'),
|
| 199 |
+
y=alt.Y('License Count:Q'),
|
| 200 |
+
tooltip=['Expiration Year', 'License Count']
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
# Combine the line and point marks
|
| 204 |
+
final_chart = line_chart + points
|
| 205 |
+
|
| 206 |
+
# Show the chart in the Streamlit app
|
| 207 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 208 |
+
|
.local/share/code-server/User/History/450b0aa/BA55.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
**Highlights**: This bar chart highlights the top 10 most frequent license types in the dataset, allowing us to easily identify which types of licenses are the most prevalent.
|
| 34 |
+
|
| 35 |
+
**Design Choices**: The x-axis represents the count of each license type, while the y-axis shows the name of the license type. The bars are color-coded to differentiate between the license types, and tooltips are included for better interaction. The chart is sorted in descending order of the license count.
|
| 36 |
+
|
| 37 |
+
**Improvements**: If I had more time, I would add more granular details about the licenses or break down the data further by state or city for more localized insights.
|
| 38 |
+
""")
|
| 39 |
+
|
| 40 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 41 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 42 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 43 |
+
|
| 44 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 45 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 46 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 47 |
+
|
| 48 |
+
# Create trend line (line chart) with circle marks at data points
|
| 49 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 50 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 51 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 52 |
+
tooltip=['Expiration Year', 'License Count']
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 56 |
+
x=alt.X('Expiration Year:O'),
|
| 57 |
+
y=alt.Y('License Count:Q'),
|
| 58 |
+
tooltip=['Expiration Year', 'License Count']
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Combine line and points
|
| 62 |
+
final_chart = line_chart + points
|
| 63 |
+
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Markdown Write-Up for the trend line chart
|
| 67 |
+
st.markdown("""
|
| 68 |
+
**Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 69 |
+
|
| 70 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 73 |
+
""")
|
.local/share/code-server/User/History/450b0aa/BvGZ.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
#import the data
|
| 7 |
+
#url = "https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/ufo-scrubbed-geocoded-time-standardized-00.csv"
|
| 8 |
+
#df = pd.read_csv(url)
|
| 9 |
+
#df.sum().isnull()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# #Fill the missing data with Unknown
|
| 14 |
+
# df['County'].fillna('Not available', inplace=True)
|
| 15 |
+
# df['Rep Full Name'].fillna('Not available', inplace=True)
|
| 16 |
+
# df['Year Constructed'].fillna(df['Year Constructed'].median(), inplace=True)
|
| 17 |
+
# df['Senator Full Name'].fillna('Unknown', inplace=True)
|
| 18 |
+
# df['Usage Description 2'].fillna(df['Usage Description 2'].mode()[0], inplace=True)
|
| 19 |
+
# df['Usage Description 3'].fillna(df['Usage Description 3'].mode()[0], inplace=True)
|
| 20 |
+
# df['Address'].fillna('Not available', inplace=True)
|
| 21 |
+
# df['Congressional Full Name'].fillna('Unknown', inplace=True)
|
| 22 |
+
|
| 23 |
+
# #Page Title
|
| 24 |
+
# st.markdown("<h1 style='text-align: center;'>Homework 5.1</h1>", unsafe_allow_html=True)
|
| 25 |
+
|
| 26 |
+
# st.subheader("Analyzing the Building Inventory Dataset")
|
| 27 |
+
# #Visualization 1
|
| 28 |
+
# st.markdown("<h4 style='text-decoration: underline;'>Visualization 1</h4>", unsafe_allow_html=True)
|
| 29 |
+
|
| 30 |
+
# df_filtered = df.dropna(subset=['Bldg Status', 'Year Constructed'])
|
| 31 |
+
# df_filtered['Year Constructed'] = pd.to_numeric(df_filtered['Year Constructed'], errors='coerce')
|
| 32 |
+
# df_filtered = df_filtered[(df_filtered['Year Constructed'] >= 1600) & (df_filtered['Year Constructed'] <= 2100)]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# line_chart = alt.Chart(df_filtered).mark_line(point=True).encode(
|
| 36 |
+
# x=alt.X('Year Constructed:Q', title='Year Constructed'),
|
| 37 |
+
# y=alt.Y('count()', title='Number of Buildings'),
|
| 38 |
+
# color=alt.Color('Bldg Status:N', title='Building Status'),
|
| 39 |
+
# tooltip=['Year Constructed', 'count()', 'Bldg Status']
|
| 40 |
+
# ).properties(
|
| 41 |
+
# width=700,
|
| 42 |
+
# height=400,
|
| 43 |
+
# title="Trend of Building Construction by Status"
|
| 44 |
+
# )
|
| 45 |
+
# st.altair_chart(line_chart, use_container_width=True)
|
| 46 |
+
# st.write("""
|
| 47 |
+
# This line chart highlights the trend in the number of buildings constructed over time,
|
| 48 |
+
# categorized by their building status (e.g., whether they are currently in use or not).
|
| 49 |
+
# The x-axis represents the 'Year Constructed' and the y-axis represents the count of buildings.
|
| 50 |
+
# The color encoding separates the buildings by their 'Bldg Status'
|
| 51 |
+
# I used a line plot with points to clearly indicate the number of buildings per year,
|
| 52 |
+
# which helps in identifying trends and peaks. Another reason is that line chart is ideal for
|
| 53 |
+
# visualizing time-series data. The colors are chosen to differentiate the building statuses
|
| 54 |
+
# effectively.
|
| 55 |
+
|
| 56 |
+
# If I had more time, I would consider adding labels to the data points for clarity and
|
| 57 |
+
# perhaps break down the data further by usage description or location for a more detailed analysis.
|
| 58 |
+
# Probably adding hover effects could display additional information, such as the exact count of
|
| 59 |
+
# buildings and their status, when hovering over each data point. This would allow users to gain
|
| 60 |
+
# deeper insights without cluttering the chart. I would also include filtering options so users
|
| 61 |
+
# could select specific building statuses or even a range of years to focus on, which would make
|
| 62 |
+
# the analysis more targeted.
|
| 63 |
+
# """)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# # Visualization 2
|
| 67 |
+
# st.markdown("<h4 style='text-decoration: underline;'>Visualization 2</h4>", unsafe_allow_html=True)
|
| 68 |
+
# county_agency_data = df.dropna(subset=['County', 'Agency Name'])
|
| 69 |
+
# county_agency_count = county_agency_data.groupby('County')['Agency Name'].nunique().reset_index()
|
| 70 |
+
# county_agency_count.rename(columns={'Agency Name': 'Unique Agencies'}, inplace=True)
|
| 71 |
+
|
| 72 |
+
# county_agency_chart = alt.Chart(county_agency_count).mark_bar().encode(
|
| 73 |
+
# x=alt.X('County:N', sort='-y', title="County"),
|
| 74 |
+
# y=alt.Y('Unique Agencies:Q', title="Number of Agencies"),
|
| 75 |
+
# color=alt.Color('Unique Agencies:Q', scale=alt.Scale(scheme='viridis')),
|
| 76 |
+
# tooltip=['County:N', 'Unique Agencies:Q']
|
| 77 |
+
# ).properties(
|
| 78 |
+
# width=700,
|
| 79 |
+
# height=400,
|
| 80 |
+
# title="Unique Agencies in Each County"
|
| 81 |
+
# )
|
| 82 |
+
# st.altair_chart(county_agency_chart, use_container_width=True)
|
| 83 |
+
|
| 84 |
+
# st.write("""
|
| 85 |
+
# This bar chart highlights the number of unique agencies operating in each county.
|
| 86 |
+
# The x-axis represents the 'County' and the y-axis represents the 'Number of Unique Agencies'
|
| 87 |
+
# I used a bar chart as it allows for easy comparison between counties based on the number of
|
| 88 |
+
# agencies, and sorting the counties by agency count makes it easier to identify areas with
|
| 89 |
+
# more agencies. The color scale uses a 'viridis' color scheme, to show how the number of agencies
|
| 90 |
+
# differs accross counties.
|
| 91 |
+
|
| 92 |
+
# If I had more time, I would consider adding interactivity, like a filter for 'Building Status', so that users can
|
| 93 |
+
# focus on specific statuses, such as buildings that are still in progress or abandoned for each county.
|
| 94 |
+
# I would also implement a slider for 'Year Constructed' to allow users to focus on specific time periods,
|
| 95 |
+
# enabling them to analyze trends and compare the distribution of agencies by different time periods.
|
| 96 |
+
# I would also consider adding a search function so that users can look for specific counties
|
| 97 |
+
# or agency types.
|
| 98 |
+
# """)
|
| 99 |
+
# Load State GeoJSON for Map (example if supported)
|
| 100 |
+
# Visualization: Licenses by State
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# Load Dataset
|
| 104 |
+
@st.cache
|
| 105 |
+
def load_data():
|
| 106 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 107 |
+
return pd.read_csv(url)
|
| 108 |
+
|
| 109 |
+
df = load_data()
|
| 110 |
+
|
| 111 |
+
# Handle Missing Values for Visualizations
|
| 112 |
+
# 1. Remove rows with missing `Original Issue Date`
|
| 113 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 114 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 115 |
+
|
| 116 |
+
# Title
|
| 117 |
+
st.title("Licenses Dataset Visualizations")
|
| 118 |
+
st.markdown("This app presents three visualizations of the licenses dataset.")
|
| 119 |
+
|
| 120 |
+
# Visualization 1: Bar Chart (Distribution of Licenses by Type)
|
| 121 |
+
st.header("1. Distribution of Licenses by Type")
|
| 122 |
+
license_count = df['License Type'].value_counts().reset_index()
|
| 123 |
+
license_count.columns = ['License Type', 'Count']
|
| 124 |
+
|
| 125 |
+
bar_chart = alt.Chart(license_count).mark_bar().encode(
|
| 126 |
+
x=alt.X('Count:Q', title='Number of Licenses'),
|
| 127 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type'),
|
| 128 |
+
color=alt.Color('License Type:N', legend=None)
|
| 129 |
+
).properties(title="Number of Licenses by Type")
|
| 130 |
+
|
| 131 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 132 |
+
|
| 133 |
+
st.markdown("""
|
| 134 |
+
**Highlights**: This bar chart shows the distribution of licenses by type.
|
| 135 |
+
**Design Choices**: A horizontal bar chart is used for better readability of license types.
|
| 136 |
+
""")
|
| 137 |
+
|
| 138 |
+
# Visualization 2: Line Chart (Trend of Licenses Over Time)
|
| 139 |
+
st.header("2. Trend of Licenses Over Time")
|
| 140 |
+
time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
|
| 141 |
+
time_data.columns = ['Year', 'Count']
|
| 142 |
+
|
| 143 |
+
line_chart = alt.Chart(time_data).mark_line(point=True).encode(
|
| 144 |
+
x=alt.X('Year:O', title='Year'),
|
| 145 |
+
y=alt.Y('Count:Q', title='Number of Licenses'),
|
| 146 |
+
color=alt.value('blue')
|
| 147 |
+
).properties(title="Trend of Licenses Issued Over Years")
|
| 148 |
+
|
| 149 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 150 |
+
|
| 151 |
+
st.markdown("""
|
| 152 |
+
**Highlights**: This line chart shows the trend of license issuances over time.
|
| 153 |
+
**Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
|
| 154 |
+
""")
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
# Count the number of licenses for each License Type and sort by count
|
| 158 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 159 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 160 |
+
|
| 161 |
+
# Get the top 10 License Types
|
| 162 |
+
top_10_license_types = license_type_counts.head(10)
|
| 163 |
+
|
| 164 |
+
# Create the bar chart
|
| 165 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 166 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 167 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 168 |
+
color=alt.Color('License Type:N', legend=None),
|
| 169 |
+
tooltip=['License Type', 'Count']
|
| 170 |
+
).properties(
|
| 171 |
+
title='Top 10 License Types by Frequency'
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
# Show the chart in the Streamlit app
|
| 175 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 176 |
+
|
| 177 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 178 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 179 |
+
|
| 180 |
+
# Extract the month and year from Expiration Date
|
| 181 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 182 |
+
df['Expiration Month'] = df['Expiration Date'].dt.month
|
| 183 |
+
|
| 184 |
+
# Group by year and month and count the number of licenses expiring
|
| 185 |
+
expiration_counts = df.groupby(['Expiration Year', 'Expiration Month']).size().reset_index(name='License Count')
|
| 186 |
+
|
| 187 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 188 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 189 |
+
|
| 190 |
+
# Extract the year from Expiration Date
|
| 191 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 192 |
+
|
| 193 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 194 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 195 |
+
|
| 196 |
+
# Create the trend line (line chart)
|
| 197 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 198 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 199 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 200 |
+
tooltip=['Expiration Year', 'License Count']
|
| 201 |
+
).properties(
|
| 202 |
+
title='Trend Line for License Expirations Over Time'
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
# Show the chart in the Streamlit app
|
| 206 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 207 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 208 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 209 |
+
|
| 210 |
+
# Extract the year from Expiration Date
|
| 211 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 212 |
+
|
| 213 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 214 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 215 |
+
|
| 216 |
+
# Create the trend line (line chart)
|
| 217 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 218 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 219 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 220 |
+
tooltip=['Expiration Year', 'License Count']
|
| 221 |
+
).properties(
|
| 222 |
+
title='Trend Line for Licenses Expiring Over Time'
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
# Show the chart in the Streamlit app
|
| 226 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 231 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 232 |
+
|
| 233 |
+
# Extract the year from Expiration Date
|
| 234 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 235 |
+
|
| 236 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 237 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 238 |
+
|
| 239 |
+
# Create the trend line (line chart) with interactive data points (circle marks)
|
| 240 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 241 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 242 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 243 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 244 |
+
).properties(
|
| 245 |
+
title='Trend Line for Licenses Expiring Over Time'
|
| 246 |
+
).interactive() # Make the chart interactive
|
| 247 |
+
|
| 248 |
+
# Add circle marks at data points
|
| 249 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 250 |
+
x=alt.X('Expiration Year:O'),
|
| 251 |
+
y=alt.Y('License Count:Q'),
|
| 252 |
+
tooltip=['Expiration Year', 'License Count']
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
# Combine the line and point marks
|
| 256 |
+
final_chart = line_chart + points
|
| 257 |
+
|
| 258 |
+
# Show the chart in the Streamlit app
|
| 259 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 260 |
+
|
.local/share/code-server/User/History/450b0aa/C2VD.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
# Load Dataset
|
| 7 |
+
@st.cache
|
| 8 |
+
def load_data():
|
| 9 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 10 |
+
return pd.read_csv(url)
|
| 11 |
+
|
| 12 |
+
df = load_data()
|
| 13 |
+
|
| 14 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 15 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# Count the number of licenses for each License Type and sort by count
|
| 21 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 22 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 23 |
+
|
| 24 |
+
# Get the top 10 License Types
|
| 25 |
+
top_10_license_types = license_type_counts.head(10)
|
| 26 |
+
st.header("3. Top 10 License Types by Frequency")
|
| 27 |
+
# Create the bar chart
|
| 28 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 29 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 30 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 31 |
+
color=alt.Color('License Type:N', legend=None),
|
| 32 |
+
tooltip=['License Type', 'Count']
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# Show the chart in the Streamlit app
|
| 37 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 38 |
+
|
| 39 |
+
st.markdown("""
|
| 40 |
+
**Highlights**: This line chart shows .
|
| 41 |
+
**Design Choices**: Points are added to highlight.
|
| 42 |
+
""")
|
| 43 |
+
|
| 44 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 45 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 46 |
+
|
| 47 |
+
# Extract the year from Expiration Date
|
| 48 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 49 |
+
|
| 50 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 51 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 52 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 53 |
+
# Create the trend line (line chart) with interactive data points (circle marks)
|
| 54 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 55 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 56 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 57 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
# Add circle marks at data points
|
| 62 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 63 |
+
x=alt.X('Expiration Year:O'),
|
| 64 |
+
y=alt.Y('License Count:Q'),
|
| 65 |
+
tooltip=['Expiration Year', 'License Count']
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
# Combine the line and point marks
|
| 69 |
+
final_chart = line_chart + points
|
| 70 |
+
|
| 71 |
+
# Show the chart in the Streamlit app
|
| 72 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 73 |
+
|
.local/share/code-server/User/History/450b0aa/Dqul.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for the bar chart
|
| 32 |
+
st.markdown("""
|
| 33 |
+
**Highlights**: This bar chart highlights the top 10 most frequent license types in the dataset, allowing us to easily identify which types of licenses are the most prevalent.
|
| 34 |
+
|
| 35 |
+
**Design Choices**: The x-axis represents the count of each license type, while the y-axis shows the name of the license type. The bars are color-coded to differentiate between the license types, and tooltips are included for better interaction. The chart is sorted in descending order of the license count.
|
| 36 |
+
|
| 37 |
+
**Improvements**: If I had more time, I would add more granular details about the licenses or break down the data further by state or city for more localized insights.
|
| 38 |
+
""")
|
| 39 |
+
|
| 40 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 41 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 42 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 43 |
+
|
| 44 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 45 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 46 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 47 |
+
|
| 48 |
+
# Create trend line (line chart) with circle marks at data points
|
| 49 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 50 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 51 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 52 |
+
tooltip=['Expiration Year', 'License Count']
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 56 |
+
x=alt.X('Expiration Year:O'),
|
| 57 |
+
y=alt.Y('License Count:Q'),
|
| 58 |
+
tooltip=['Expiration Year', 'License Count']
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Combine line and points
|
| 62 |
+
final_chart = line_chart + points
|
| 63 |
+
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Markdown Write-Up for the trend line chart
|
| 67 |
+
st.markdown("""
|
| 68 |
+
**Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 69 |
+
|
| 70 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 73 |
+
""")
|
.local/share/code-server/User/History/450b0aa/GB0z.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
|
| 38 |
+
Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.
|
| 39 |
+
If I had more time, I would like to find a way to make the column names more readable and include all license types without making the visualization too cluttered""")
|
| 40 |
+
|
| 41 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 42 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 43 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 44 |
+
|
| 45 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 46 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 47 |
+
st.header("2. Trend of Licenses Expiring Over Time")
|
| 48 |
+
|
| 49 |
+
# Create trend line (line chart) with circle marks at data points
|
| 50 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 51 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 52 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 53 |
+
tooltip=['Expiration Year', 'License Count']
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 57 |
+
x=alt.X('Expiration Year:O'),
|
| 58 |
+
y=alt.Y('License Count:Q'),
|
| 59 |
+
tooltip=['Expiration Year', 'License Count']
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Combine line and points
|
| 63 |
+
final_chart = line_chart + points
|
| 64 |
+
|
| 65 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 66 |
+
|
| 67 |
+
# Markdown Write-Up for the trend line chart
|
| 68 |
+
st.markdown("""
|
| 69 |
+
This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
|
| 70 |
+
The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
|
| 73 |
+
By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
|
| 74 |
+
The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
|
| 75 |
+
If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
|
.local/share/code-server/User/History/450b0aa/HxIz.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
# Load Dataset
|
| 7 |
+
@st.cache
|
| 8 |
+
def load_data():
|
| 9 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 10 |
+
return pd.read_csv(url)
|
| 11 |
+
|
| 12 |
+
df = load_data()
|
| 13 |
+
|
| 14 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 15 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 16 |
+
|
| 17 |
+
# Title
|
| 18 |
+
st.title("Licenses Dataset Visualizations")
|
| 19 |
+
st.markdown("This app presents three visualizations of the licenses dataset.")
|
| 20 |
+
|
| 21 |
+
# Visualization 1: Bar Chart (Distribution of Licenses by Type)
|
| 22 |
+
st.header("1. Distribution of Licenses by Type")
|
| 23 |
+
license_count = df['License Type'].value_counts().reset_index()
|
| 24 |
+
license_count.columns = ['License Type', 'Count']
|
| 25 |
+
|
| 26 |
+
bar_chart = alt.Chart(license_count).mark_bar().encode(
|
| 27 |
+
x=alt.X('Count:Q', title='Number of Licenses'),
|
| 28 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type'),
|
| 29 |
+
color=alt.Color('License Type:N', legend=None)
|
| 30 |
+
).properties(title="Number of Licenses by Type")
|
| 31 |
+
|
| 32 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 33 |
+
|
| 34 |
+
st.markdown("""
|
| 35 |
+
**Highlights**: This bar chart shows the distribution of licenses by type.
|
| 36 |
+
**Design Choices**: A horizontal bar chart is used for better readability of license types.
|
| 37 |
+
""")
|
| 38 |
+
|
| 39 |
+
# Visualization 2: Line Chart (Trend of Licenses Over Time)
|
| 40 |
+
st.header("2. Trend of Licenses Over Time")
|
| 41 |
+
time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
|
| 42 |
+
time_data.columns = ['Year', 'Count']
|
| 43 |
+
|
| 44 |
+
line_chart = alt.Chart(time_data).mark_line(point=True).encode(
|
| 45 |
+
x=alt.X('Year:O', title='Year'),
|
| 46 |
+
y=alt.Y('Count:Q', title='Number of Licenses'),
|
| 47 |
+
color=alt.value('blue')
|
| 48 |
+
).properties(title="Trend of Licenses Issued Over Years")
|
| 49 |
+
|
| 50 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 51 |
+
|
| 52 |
+
st.markdown("""
|
| 53 |
+
**Highlights**: This line chart shows the trend of license issuances over time.
|
| 54 |
+
**Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
|
| 55 |
+
""")
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# Count the number of licenses for each License Type and sort by count
|
| 59 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 60 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 61 |
+
|
| 62 |
+
# Get the top 10 License Types
|
| 63 |
+
top_10_license_types = license_type_counts.head(10)
|
| 64 |
+
st.header("3. Top 10 License Types by Frequency")
|
| 65 |
+
# Create the bar chart
|
| 66 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 67 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 68 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 69 |
+
color=alt.Color('License Type:N', legend=None),
|
| 70 |
+
tooltip=['License Type', 'Count']
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
# Show the chart in the Streamlit app
|
| 75 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 79 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 80 |
+
|
| 81 |
+
# Extract the year from Expiration Date
|
| 82 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 83 |
+
|
| 84 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 85 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 86 |
+
st.header(". Top 10 License Types by Frequency")
|
| 87 |
+
# Create the trend line (line chart) with interactive data points (circle marks)
|
| 88 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 89 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 90 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 91 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
# Add circle marks at data points
|
| 96 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 97 |
+
x=alt.X('Expiration Year:O'),
|
| 98 |
+
y=alt.Y('License Count:Q'),
|
| 99 |
+
tooltip=['Expiration Year', 'License Count']
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
# Combine the line and point marks
|
| 103 |
+
final_chart = line_chart + points
|
| 104 |
+
|
| 105 |
+
# Show the chart in the Streamlit app
|
| 106 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 107 |
+
|
.local/share/code-server/User/History/450b0aa/I3Tc.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
# Load Dataset
|
| 7 |
+
@st.cache
|
| 8 |
+
def load_data():
|
| 9 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 10 |
+
return pd.read_csv(url)
|
| 11 |
+
|
| 12 |
+
df = load_data()
|
| 13 |
+
|
| 14 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 15 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 16 |
+
|
| 17 |
+
# Title
|
| 18 |
+
st.title("Licenses Dataset Visualizations")
|
| 19 |
+
st.markdown("This app presents three visualizations of the licenses dataset.")
|
| 20 |
+
|
| 21 |
+
# Visualization 1: Bar Chart (Distribution of Licenses by Type)
|
| 22 |
+
st.header("1. Distribution of Licenses by Type")
|
| 23 |
+
license_count = df['License Type'].value_counts().reset_index()
|
| 24 |
+
license_count.columns = ['License Type', 'Count']
|
| 25 |
+
|
| 26 |
+
bar_chart = alt.Chart(license_count).mark_bar().encode(
|
| 27 |
+
x=alt.X('Count:Q', title='Number of Licenses'),
|
| 28 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type'),
|
| 29 |
+
color=alt.Color('License Type:N', legend=None)
|
| 30 |
+
).properties(title="Number of Licenses by Type")
|
| 31 |
+
|
| 32 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 33 |
+
|
| 34 |
+
st.markdown("""
|
| 35 |
+
**Highlights**: This bar chart shows the distribution of licenses by type.
|
| 36 |
+
**Design Choices**: A horizontal bar chart is used for better readability of license types.
|
| 37 |
+
""")
|
| 38 |
+
|
| 39 |
+
# Visualization 2: Line Chart (Trend of Licenses Over Time)
|
| 40 |
+
st.header("2. Trend of Licenses Over Time")
|
| 41 |
+
time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
|
| 42 |
+
time_data.columns = ['Year', 'Count']
|
| 43 |
+
|
| 44 |
+
line_chart = alt.Chart(time_data).mark_line(point=True).encode(
|
| 45 |
+
x=alt.X('Year:O', title='Year'),
|
| 46 |
+
y=alt.Y('Count:Q', title='Number of Licenses'),
|
| 47 |
+
color=alt.value('blue')
|
| 48 |
+
).properties(title="Trend of Licenses Issued Over Years")
|
| 49 |
+
|
| 50 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 51 |
+
|
| 52 |
+
st.markdown("""
|
| 53 |
+
**Highlights**: This line chart shows the trend of license issuances over time.
|
| 54 |
+
**Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
|
| 55 |
+
""")
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# Count the number of licenses for each License Type and sort by count
|
| 59 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 60 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 61 |
+
|
| 62 |
+
# Get the top 10 License Types
|
| 63 |
+
top_10_license_types = license_type_counts.head(10)
|
| 64 |
+
|
| 65 |
+
# Create the bar chart
|
| 66 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 67 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 68 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 69 |
+
color=alt.Color('License Type:N', legend=None),
|
| 70 |
+
tooltip=['License Type', 'Count']
|
| 71 |
+
).properties(
|
| 72 |
+
title='Top 10 License Types by Frequency'
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# Show the chart in the Streamlit app
|
| 76 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 80 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 81 |
+
|
| 82 |
+
# Extract the year from Expiration Date
|
| 83 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 84 |
+
|
| 85 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 86 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 87 |
+
|
| 88 |
+
# Create the trend line (line chart) with interactive data points (circle marks)
|
| 89 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 90 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 91 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 92 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 93 |
+
)
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# Add circle marks at data points
|
| 97 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 98 |
+
x=alt.X('Expiration Year:O'),
|
| 99 |
+
y=alt.Y('License Count:Q'),
|
| 100 |
+
tooltip=['Expiration Year', 'License Count']
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
# Combine the line and point marks
|
| 104 |
+
final_chart = line_chart + points
|
| 105 |
+
|
| 106 |
+
# Show the chart in the Streamlit app
|
| 107 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 108 |
+
|
.local/share/code-server/User/History/450b0aa/Km75.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types.
|
| 38 |
+
Some license names were long, so I made sure that full text is visible when you hover on the bars.
|
| 39 |
+
If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
|
| 40 |
+
|
| 41 |
+
#Secon
|
| 42 |
+
# Converting 'Expiration Date' to datetime
|
| 43 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 44 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 45 |
+
|
| 46 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 47 |
+
st.header("2. Trend of Licenses Expiring Over Time")
|
| 48 |
+
|
| 49 |
+
# Trend line
|
| 50 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 51 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 52 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 53 |
+
tooltip=['Expiration Year', 'License Count']
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 57 |
+
x=alt.X('Expiration Year:O'),
|
| 58 |
+
y=alt.Y('License Count:Q'),
|
| 59 |
+
tooltip=['Expiration Year', 'License Count']
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Combine line and points
|
| 63 |
+
final_chart = line_chart + points
|
| 64 |
+
|
| 65 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 66 |
+
|
| 67 |
+
# Write-Up for second plot
|
| 68 |
+
st.markdown("""
|
| 69 |
+
This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
|
| 70 |
+
The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
|
| 73 |
+
By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
|
| 74 |
+
The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
|
| 75 |
+
If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
|
.local/share/code-server/User/History/450b0aa/O3TC.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
#import the data
|
| 7 |
+
#url = "https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/ufo-scrubbed-geocoded-time-standardized-00.csv"
|
| 8 |
+
#df = pd.read_csv(url)
|
| 9 |
+
#df.sum().isnull()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# #Fill the missing data with Unknown
|
| 14 |
+
# df['County'].fillna('Not available', inplace=True)
|
| 15 |
+
# df['Rep Full Name'].fillna('Not available', inplace=True)
|
| 16 |
+
# df['Year Constructed'].fillna(df['Year Constructed'].median(), inplace=True)
|
| 17 |
+
# df['Senator Full Name'].fillna('Unknown', inplace=True)
|
| 18 |
+
# df['Usage Description 2'].fillna(df['Usage Description 2'].mode()[0], inplace=True)
|
| 19 |
+
# df['Usage Description 3'].fillna(df['Usage Description 3'].mode()[0], inplace=True)
|
| 20 |
+
# df['Address'].fillna('Not available', inplace=True)
|
| 21 |
+
# df['Congressional Full Name'].fillna('Unknown', inplace=True)
|
| 22 |
+
|
| 23 |
+
# #Page Title
|
| 24 |
+
# st.markdown("<h1 style='text-align: center;'>Homework 5.1</h1>", unsafe_allow_html=True)
|
| 25 |
+
|
| 26 |
+
# st.subheader("Analyzing the Building Inventory Dataset")
|
| 27 |
+
# #Visualization 1
|
| 28 |
+
# st.markdown("<h4 style='text-decoration: underline;'>Visualization 1</h4>", unsafe_allow_html=True)
|
| 29 |
+
|
| 30 |
+
# df_filtered = df.dropna(subset=['Bldg Status', 'Year Constructed'])
|
| 31 |
+
# df_filtered['Year Constructed'] = pd.to_numeric(df_filtered['Year Constructed'], errors='coerce')
|
| 32 |
+
# df_filtered = df_filtered[(df_filtered['Year Constructed'] >= 1600) & (df_filtered['Year Constructed'] <= 2100)]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# line_chart = alt.Chart(df_filtered).mark_line(point=True).encode(
|
| 36 |
+
# x=alt.X('Year Constructed:Q', title='Year Constructed'),
|
| 37 |
+
# y=alt.Y('count()', title='Number of Buildings'),
|
| 38 |
+
# color=alt.Color('Bldg Status:N', title='Building Status'),
|
| 39 |
+
# tooltip=['Year Constructed', 'count()', 'Bldg Status']
|
| 40 |
+
# ).properties(
|
| 41 |
+
# width=700,
|
| 42 |
+
# height=400,
|
| 43 |
+
# title="Trend of Building Construction by Status"
|
| 44 |
+
# )
|
| 45 |
+
# st.altair_chart(line_chart, use_container_width=True)
|
| 46 |
+
# st.write("""
|
| 47 |
+
# This line chart highlights the trend in the number of buildings constructed over time,
|
| 48 |
+
# categorized by their building status (e.g., whether they are currently in use or not).
|
| 49 |
+
# The x-axis represents the 'Year Constructed' and the y-axis represents the count of buildings.
|
| 50 |
+
# The color encoding separates the buildings by their 'Bldg Status'
|
| 51 |
+
# I used a line plot with points to clearly indicate the number of buildings per year,
|
| 52 |
+
# which helps in identifying trends and peaks. Another reason is that line chart is ideal for
|
| 53 |
+
# visualizing time-series data. The colors are chosen to differentiate the building statuses
|
| 54 |
+
# effectively.
|
| 55 |
+
|
| 56 |
+
# If I had more time, I would consider adding labels to the data points for clarity and
|
| 57 |
+
# perhaps break down the data further by usage description or location for a more detailed analysis.
|
| 58 |
+
# Probably adding hover effects could display additional information, such as the exact count of
|
| 59 |
+
# buildings and their status, when hovering over each data point. This would allow users to gain
|
| 60 |
+
# deeper insights without cluttering the chart. I would also include filtering options so users
|
| 61 |
+
# could select specific building statuses or even a range of years to focus on, which would make
|
| 62 |
+
# the analysis more targeted.
|
| 63 |
+
# """)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# # Visualization 2
|
| 67 |
+
# st.markdown("<h4 style='text-decoration: underline;'>Visualization 2</h4>", unsafe_allow_html=True)
|
| 68 |
+
# county_agency_data = df.dropna(subset=['County', 'Agency Name'])
|
| 69 |
+
# county_agency_count = county_agency_data.groupby('County')['Agency Name'].nunique().reset_index()
|
| 70 |
+
# county_agency_count.rename(columns={'Agency Name': 'Unique Agencies'}, inplace=True)
|
| 71 |
+
|
| 72 |
+
# county_agency_chart = alt.Chart(county_agency_count).mark_bar().encode(
|
| 73 |
+
# x=alt.X('County:N', sort='-y', title="County"),
|
| 74 |
+
# y=alt.Y('Unique Agencies:Q', title="Number of Agencies"),
|
| 75 |
+
# color=alt.Color('Unique Agencies:Q', scale=alt.Scale(scheme='viridis')),
|
| 76 |
+
# tooltip=['County:N', 'Unique Agencies:Q']
|
| 77 |
+
# ).properties(
|
| 78 |
+
# width=700,
|
| 79 |
+
# height=400,
|
| 80 |
+
# title="Unique Agencies in Each County"
|
| 81 |
+
# )
|
| 82 |
+
# st.altair_chart(county_agency_chart, use_container_width=True)
|
| 83 |
+
|
| 84 |
+
# st.write("""
|
| 85 |
+
# This bar chart highlights the number of unique agencies operating in each county.
|
| 86 |
+
# The x-axis represents the 'County' and the y-axis represents the 'Number of Unique Agencies'
|
| 87 |
+
# I used a bar chart as it allows for easy comparison between counties based on the number of
|
| 88 |
+
# agencies, and sorting the counties by agency count makes it easier to identify areas with
|
| 89 |
+
# more agencies. The color scale uses a 'viridis' color scheme, to show how the number of agencies
|
| 90 |
+
# differs accross counties.
|
| 91 |
+
|
| 92 |
+
# If I had more time, I would consider adding interactivity, like a filter for 'Building Status', so that users can
|
| 93 |
+
# focus on specific statuses, such as buildings that are still in progress or abandoned for each county.
|
| 94 |
+
# I would also implement a slider for 'Year Constructed' to allow users to focus on specific time periods,
|
| 95 |
+
# enabling them to analyze trends and compare the distribution of agencies by different time periods.
|
| 96 |
+
# I would also consider adding a search function so that users can look for specific counties
|
| 97 |
+
# or agency types.
|
| 98 |
+
# """)
|
| 99 |
+
# Load State GeoJSON for Map (example if supported)
|
| 100 |
+
# Visualization: Licenses by State
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# Load Dataset
|
| 104 |
+
@st.cache
|
| 105 |
+
def load_data():
|
| 106 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 107 |
+
return pd.read_csv(url)
|
| 108 |
+
|
| 109 |
+
df = load_data()
|
| 110 |
+
|
| 111 |
+
# Handle Missing Values for Visualizations
|
| 112 |
+
# 1. Remove rows with missing `Original Issue Date`
|
| 113 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 114 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 115 |
+
|
| 116 |
+
# Title
|
| 117 |
+
st.title("Licenses Dataset Visualizations")
|
| 118 |
+
st.markdown("This app presents three visualizations of the licenses dataset.")
|
| 119 |
+
|
| 120 |
+
# Visualization 1: Bar Chart (Distribution of Licenses by Type)
|
| 121 |
+
st.header("1. Distribution of Licenses by Type")
|
| 122 |
+
license_count = df['License Type'].value_counts().reset_index()
|
| 123 |
+
license_count.columns = ['License Type', 'Count']
|
| 124 |
+
|
| 125 |
+
bar_chart = alt.Chart(license_count).mark_bar().encode(
|
| 126 |
+
x=alt.X('Count:Q', title='Number of Licenses'),
|
| 127 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type'),
|
| 128 |
+
color=alt.Color('License Type:N', legend=None)
|
| 129 |
+
).properties(title="Number of Licenses by Type")
|
| 130 |
+
|
| 131 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 132 |
+
|
| 133 |
+
st.markdown("""
|
| 134 |
+
**Highlights**: This bar chart shows the distribution of licenses by type.
|
| 135 |
+
**Design Choices**: A horizontal bar chart is used for better readability of license types.
|
| 136 |
+
""")
|
| 137 |
+
|
| 138 |
+
# Visualization 2: Line Chart (Trend of Licenses Over Time)
|
| 139 |
+
st.header("2. Trend of Licenses Over Time")
|
| 140 |
+
time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
|
| 141 |
+
time_data.columns = ['Year', 'Count']
|
| 142 |
+
|
| 143 |
+
line_chart = alt.Chart(time_data).mark_line(point=True).encode(
|
| 144 |
+
x=alt.X('Year:O', title='Year'),
|
| 145 |
+
y=alt.Y('Count:Q', title='Number of Licenses'),
|
| 146 |
+
color=alt.value('blue')
|
| 147 |
+
).properties(title="Trend of Licenses Issued Over Years")
|
| 148 |
+
|
| 149 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 150 |
+
|
| 151 |
+
st.markdown("""
|
| 152 |
+
**Highlights**: This line chart shows the trend of license issuances over time.
|
| 153 |
+
**Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
|
| 154 |
+
""")
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
# Count the number of licenses for each License Type and sort by count
|
| 158 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 159 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 160 |
+
|
| 161 |
+
# Get the top 10 License Types
|
| 162 |
+
top_10_license_types = license_type_counts.head(10)
|
| 163 |
+
|
| 164 |
+
# Create the bar chart
|
| 165 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 166 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 167 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 168 |
+
color=alt.Color('License Type:N', legend=None),
|
| 169 |
+
tooltip=['License Type', 'Count']
|
| 170 |
+
).properties(
|
| 171 |
+
title='Top 10 License Types by Frequency'
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
# Show the chart in the Streamlit app
|
| 175 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 176 |
+
|
| 177 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 178 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 179 |
+
|
| 180 |
+
# Extract the month and year from Expiration Date
|
| 181 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 182 |
+
df['Expiration Month'] = df['Expiration Date'].dt.month
|
| 183 |
+
|
| 184 |
+
# Group by year and month and count the number of licenses expiring
|
| 185 |
+
expiration_counts = df.groupby(['Expiration Year', 'Expiration Month']).size().reset_index(name='License Count')
|
| 186 |
+
|
| 187 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 188 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 189 |
+
|
| 190 |
+
# Extract the year from Expiration Date
|
| 191 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 192 |
+
|
| 193 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 194 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 195 |
+
|
| 196 |
+
# Create the trend line (line chart)
|
| 197 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 198 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 199 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 200 |
+
tooltip=['Expiration Year', 'License Count']
|
| 201 |
+
).properties(
|
| 202 |
+
title='Trend Line for License Expirations Over Time'
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
# Show the chart in the Streamlit app
|
| 206 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 207 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 208 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 209 |
+
|
| 210 |
+
# Extract the year from Expiration Date
|
| 211 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 212 |
+
|
| 213 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 214 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 215 |
+
|
| 216 |
+
# Create the trend line (line chart)
|
| 217 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 218 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 219 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 220 |
+
tooltip=['Expiration Year', 'License Count']
|
| 221 |
+
).properties(
|
| 222 |
+
title='Trend Line for Licenses Expiring Over Time'
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
# Show the chart in the Streamlit app
|
| 226 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 231 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 232 |
+
|
| 233 |
+
# Extract the year from Expiration Date
|
| 234 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 235 |
+
|
| 236 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 237 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 238 |
+
|
| 239 |
+
# Create the trend line (line chart) with interactive data points (circle marks)
|
| 240 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 241 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 242 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 243 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 244 |
+
).properties(
|
| 245 |
+
title='Trend Line for Licenses Expiring Over Time'
|
| 246 |
+
).interactive() # Make the chart interactive
|
| 247 |
+
|
| 248 |
+
# Add circle marks at data points
|
| 249 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=100).encode(
|
| 250 |
+
x=alt.X('Expiration Year:O'),
|
| 251 |
+
y=alt.Y('License Count:Q'),
|
| 252 |
+
tooltip=['Expiration Year', 'License Count']
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
# Combine the line and point marks
|
| 256 |
+
final_chart = line_chart + points
|
| 257 |
+
|
| 258 |
+
# Show the chart in the Streamlit app
|
| 259 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 260 |
+
|
.local/share/code-server/User/History/450b0aa/P3er.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
# Load Dataset
|
| 7 |
+
@st.cache
|
| 8 |
+
def load_data():
|
| 9 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 10 |
+
return pd.read_csv(url)
|
| 11 |
+
|
| 12 |
+
df = load_data()
|
| 13 |
+
|
| 14 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 15 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 16 |
+
|
| 17 |
+
# Title
|
| 18 |
+
st.title("Licenses Dataset Visualizations")
|
| 19 |
+
st.markdown("This app presents three visualizations of the licenses dataset.")
|
| 20 |
+
|
| 21 |
+
# Visualization 1: Bar Chart (Distribution of Licenses by Type)
|
| 22 |
+
st.header("1. Distribution of Licenses by Type")
|
| 23 |
+
license_count = df['License Type'].value_counts().reset_index()
|
| 24 |
+
license_count.columns = ['License Type', 'Count']
|
| 25 |
+
|
| 26 |
+
bar_chart = alt.Chart(license_count).mark_bar().encode(
|
| 27 |
+
x=alt.X('Count:Q', title='Number of Licenses'),
|
| 28 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type'),
|
| 29 |
+
color=alt.Color('License Type:N', legend=None)
|
| 30 |
+
).properties(title="Number of Licenses by Type")
|
| 31 |
+
|
| 32 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 33 |
+
|
| 34 |
+
st.markdown("""
|
| 35 |
+
**Highlights**: This bar chart shows the distribution of licenses by type.
|
| 36 |
+
**Design Choices**: A horizontal bar chart is used for better readability of license types.
|
| 37 |
+
""")
|
| 38 |
+
|
| 39 |
+
# Visualization 2: Line Chart (Trend of Licenses Over Time)
|
| 40 |
+
st.header("2. Trend of Licenses Over Time")
|
| 41 |
+
time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
|
| 42 |
+
time_data.columns = ['Year', 'Count']
|
| 43 |
+
|
| 44 |
+
line_chart = alt.Chart(time_data).mark_line(point=True).encode(
|
| 45 |
+
x=alt.X('Year:O', title='Year'),
|
| 46 |
+
y=alt.Y('Count:Q', title='Number of Licenses'),
|
| 47 |
+
color=alt.value('blue')
|
| 48 |
+
).properties(title="Trend of Licenses Issued Over Years")
|
| 49 |
+
|
| 50 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 51 |
+
|
| 52 |
+
st.markdown("""
|
| 53 |
+
**Highlights**: This line chart shows the trend of license issuances over time.
|
| 54 |
+
**Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
|
| 55 |
+
""")
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# Count the number of licenses for each License Type and sort by count
|
| 59 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 60 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 61 |
+
|
| 62 |
+
# Get the top 10 License Types
|
| 63 |
+
top_10_license_types = license_type_counts.head(10)
|
| 64 |
+
st.header("3. Top 10 License Types by Frequency")
|
| 65 |
+
# Create the bar chart
|
| 66 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 67 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 68 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 69 |
+
color=alt.Color('License Type:N', legend=None),
|
| 70 |
+
tooltip=['License Type', 'Count']
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
# Show the chart in the Streamlit app
|
| 75 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 79 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 80 |
+
|
| 81 |
+
# Extract the year from Expiration Date
|
| 82 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 83 |
+
|
| 84 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 85 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 86 |
+
|
| 87 |
+
# Create the trend line (line chart) with interactive data points (circle marks)
|
| 88 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 89 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 90 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 91 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
# Add circle marks at data points
|
| 96 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 97 |
+
x=alt.X('Expiration Year:O'),
|
| 98 |
+
y=alt.Y('License Count:Q'),
|
| 99 |
+
tooltip=['Expiration Year', 'License Count']
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
# Combine the line and point marks
|
| 103 |
+
final_chart = line_chart + points
|
| 104 |
+
|
| 105 |
+
# Show the chart in the Streamlit app
|
| 106 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 107 |
+
|
.local/share/code-server/User/History/450b0aa/Q5uV.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
|
| 38 |
+
Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
|
| 39 |
+
|
| 40 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 41 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 42 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 43 |
+
|
| 44 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 45 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 46 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 47 |
+
|
| 48 |
+
# Create trend line (line chart) with circle marks at data points
|
| 49 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 50 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 51 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 52 |
+
tooltip=['Expiration Year', 'License Count']
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 56 |
+
x=alt.X('Expiration Year:O'),
|
| 57 |
+
y=alt.Y('License Count:Q'),
|
| 58 |
+
tooltip=['Expiration Year', 'License Count']
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Combine line and points
|
| 62 |
+
final_chart = line_chart + points
|
| 63 |
+
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Markdown Write-Up for the trend line chart
|
| 67 |
+
st.markdown("""
|
| 68 |
+
This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
|
| 69 |
+
The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 70 |
+
|
| 71 |
+
I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
|
| 72 |
+
By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
|
| 73 |
+
The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
|
| 74 |
+
If I had more time, I would like to break down the expiration data further by month or quarter to detect more granular patterns and find a may to make it less cluttered.
|
| 75 |
+
""")
|
.local/share/code-server/User/History/450b0aa/QeUw.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset.
|
| 34 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered and less informative. To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset. Since some license names were long, I made them hoverable to ensure that the full text is visible when interacting with the chart.""")
|
| 35 |
+
|
| 36 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 37 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 38 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 39 |
+
|
| 40 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 41 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 42 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 43 |
+
|
| 44 |
+
# Create trend line (line chart) with circle marks at data points
|
| 45 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 46 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 47 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 48 |
+
tooltip=['Expiration Year', 'License Count']
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 52 |
+
x=alt.X('Expiration Year:O'),
|
| 53 |
+
y=alt.Y('License Count:Q'),
|
| 54 |
+
tooltip=['Expiration Year', 'License Count']
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
# Combine line and points
|
| 58 |
+
final_chart = line_chart + points
|
| 59 |
+
|
| 60 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 61 |
+
|
| 62 |
+
# Markdown Write-Up for the trend line chart
|
| 63 |
+
st.markdown("""
|
| 64 |
+
**Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 65 |
+
|
| 66 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 67 |
+
|
| 68 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 69 |
+
""")
|
.local/share/code-server/User/History/450b0aa/RWSp.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
|
| 38 |
+
Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
|
| 39 |
+
|
| 40 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 41 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 42 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 43 |
+
|
| 44 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 45 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 46 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 47 |
+
|
| 48 |
+
# Create trend line (line chart) with circle marks at data points
|
| 49 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 50 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 51 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 52 |
+
tooltip=['Expiration Year', 'License Count']
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 56 |
+
x=alt.X('Expiration Year:O'),
|
| 57 |
+
y=alt.Y('License Count:Q'),
|
| 58 |
+
tooltip=['Expiration Year', 'License Count']
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Combine line and points
|
| 62 |
+
final_chart = line_chart + points
|
| 63 |
+
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Markdown Write-Up for the trend line chart
|
| 67 |
+
st.markdown("""
|
| 68 |
+
**Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 69 |
+
|
| 70 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 73 |
+
""")
|
.local/share/code-server/User/History/450b0aa/RuPD.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 31 |
+
|
| 32 |
+
# Markdown Write-Up for the bar chart
|
| 33 |
+
st.markdown("""
|
| 34 |
+
**Highlights**: This bar chart highlights the top 10 most frequent license types in the dataset, allowing us to easily identify which types of licenses are the most prevalent.
|
| 35 |
+
|
| 36 |
+
**Design Choices**: The x-axis represents the count of each license type, while the y-axis shows the name of the license type. The bars are color-coded to differentiate between the license types, and tooltips are included for better interaction. The chart is sorted in descending order of the license count.
|
| 37 |
+
|
| 38 |
+
**Improvements**: If I had more time, I would add more granular details about the licenses or break down the data further by state or city for more localized insights.
|
| 39 |
+
""")
|
| 40 |
+
|
| 41 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 42 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 43 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 44 |
+
|
| 45 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 46 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 47 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 48 |
+
|
| 49 |
+
# Create trend line (line chart) with circle marks at data points
|
| 50 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 51 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 52 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 53 |
+
tooltip=['Expiration Year', 'License Count']
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 57 |
+
x=alt.X('Expiration Year:O'),
|
| 58 |
+
y=alt.Y('License Count:Q'),
|
| 59 |
+
tooltip=['Expiration Year', 'License Count']
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Combine line and points
|
| 63 |
+
final_chart = line_chart + points
|
| 64 |
+
|
| 65 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 66 |
+
|
| 67 |
+
# Markdown Write-Up for the trend line chart
|
| 68 |
+
st.markdown("""
|
| 69 |
+
**Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 70 |
+
|
| 71 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 72 |
+
|
| 73 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 74 |
+
""")
|
.local/share/code-server/User/History/450b0aa/TdSt.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
# Load Dataset
|
| 7 |
+
@st.cache
|
| 8 |
+
def load_data():
|
| 9 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 10 |
+
return pd.read_csv(url)
|
| 11 |
+
|
| 12 |
+
df = load_data()
|
| 13 |
+
|
| 14 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 15 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 16 |
+
|
| 17 |
+
# Title
|
| 18 |
+
st.title("Licenses Dataset Visualizations")
|
| 19 |
+
st.markdown("This app presents three visualizations of the licenses dataset.")
|
| 20 |
+
|
| 21 |
+
# Visualization 1: Bar Chart (Distribution of Licenses by Type)
|
| 22 |
+
st.header("1. Distribution of Licenses by Type")
|
| 23 |
+
license_count = df['License Type'].value_counts().reset_index()
|
| 24 |
+
license_count.columns = ['License Type', 'Count']
|
| 25 |
+
|
| 26 |
+
bar_chart = alt.Chart(license_count).mark_bar().encode(
|
| 27 |
+
x=alt.X('Count:Q', title='Number of Licenses'),
|
| 28 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type'),
|
| 29 |
+
color=alt.Color('License Type:N', legend=None)
|
| 30 |
+
).properties(title="Number of Licenses by Type")
|
| 31 |
+
|
| 32 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 33 |
+
|
| 34 |
+
st.markdown("""
|
| 35 |
+
**Highlights**: This bar chart shows the distribution of licenses by type.
|
| 36 |
+
**Design Choices**: A horizontal bar chart is used for better readability of license types.
|
| 37 |
+
""")
|
| 38 |
+
|
| 39 |
+
# Visualization 2: Line Chart (Trend of Licenses Over Time)
|
| 40 |
+
st.header("2. Trend of Licenses Over Time")
|
| 41 |
+
time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
|
| 42 |
+
time_data.columns = ['Year', 'Count']
|
| 43 |
+
|
| 44 |
+
line_chart = alt.Chart(time_data).mark_line(point=True).encode(
|
| 45 |
+
x=alt.X('Year:O', title='Year'),
|
| 46 |
+
y=alt.Y('Count:Q', title='Number of Licenses'),
|
| 47 |
+
color=alt.value('blue')
|
| 48 |
+
).properties(title="Trend of Licenses Issued Over Years")
|
| 49 |
+
|
| 50 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 51 |
+
|
| 52 |
+
st.markdown("""
|
| 53 |
+
**Highlights**: This line chart shows the trend of license issuances over time.
|
| 54 |
+
**Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
|
| 55 |
+
""")
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# Count the number of licenses for each License Type and sort by count
|
| 59 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 60 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 61 |
+
|
| 62 |
+
# Get the top 10 License Types
|
| 63 |
+
top_10_license_types = license_type_counts.head(10)
|
| 64 |
+
|
| 65 |
+
# Create the bar chart
|
| 66 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 67 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 68 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 69 |
+
color=alt.Color('License Type:N', legend=None),
|
| 70 |
+
tooltip=['License Type', 'Count']
|
| 71 |
+
).properties(
|
| 72 |
+
title='Top 10 License Types by Frequency'
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# Show the chart in the Streamlit app
|
| 76 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 80 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 81 |
+
|
| 82 |
+
# Extract the year from Expiration Date
|
| 83 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 84 |
+
|
| 85 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 86 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 87 |
+
|
| 88 |
+
# Create the trend line (line chart) with interactive data points (circle marks)
|
| 89 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 90 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 91 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 92 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 93 |
+
).properties(
|
| 94 |
+
title='Trend Line for Licenses Expiring Over Time'
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
# Add circle marks at data points
|
| 98 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 99 |
+
x=alt.X('Expiration Year:O'),
|
| 100 |
+
y=alt.Y('License Count:Q'),
|
| 101 |
+
tooltip=['Expiration Year', 'License Count']
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
# Combine the line and point marks
|
| 105 |
+
final_chart = line_chart + points
|
| 106 |
+
|
| 107 |
+
# Show the chart in the Streamlit app
|
| 108 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 109 |
+
|
.local/share/code-server/User/History/450b0aa/Vrxk.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
|
| 38 |
+
Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
|
| 39 |
+
|
| 40 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 41 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 42 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 43 |
+
|
| 44 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 45 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 46 |
+
st.header("2. Trend of Licenses Expiring Over Time")
|
| 47 |
+
|
| 48 |
+
# Create trend line (line chart) with circle marks at data points
|
| 49 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 50 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 51 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 52 |
+
tooltip=['Expiration Year', 'License Count']
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 56 |
+
x=alt.X('Expiration Year:O'),
|
| 57 |
+
y=alt.Y('License Count:Q'),
|
| 58 |
+
tooltip=['Expiration Year', 'License Count']
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Combine line and points
|
| 62 |
+
final_chart = line_chart + points
|
| 63 |
+
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Markdown Write-Up for the trend line chart
|
| 67 |
+
st.markdown("""
|
| 68 |
+
This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
|
| 69 |
+
The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 70 |
+
|
| 71 |
+
I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
|
| 72 |
+
By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
|
| 73 |
+
The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
|
| 74 |
+
If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
|
.local/share/code-server/User/History/450b0aa/W0T9.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
|
| 38 |
+
Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
|
| 39 |
+
|
| 40 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 41 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 42 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 43 |
+
|
| 44 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 45 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 46 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 47 |
+
|
| 48 |
+
# Create trend line (line chart) with circle marks at data points
|
| 49 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 50 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 51 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 52 |
+
tooltip=['Expiration Year', 'License Count']
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 56 |
+
x=alt.X('Expiration Year:O'),
|
| 57 |
+
y=alt.Y('License Count:Q'),
|
| 58 |
+
tooltip=['Expiration Year', 'License Count']
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Combine line and points
|
| 62 |
+
final_chart = line_chart + points
|
| 63 |
+
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Markdown Write-Up for the trend line chart
|
| 67 |
+
st.markdown("""
|
| 68 |
+
**Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 69 |
+
|
| 70 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 73 |
+
""")
|
.local/share/code-server/User/History/450b0aa/WCDW.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
# Load Dataset
|
| 7 |
+
|
| 8 |
+
def load_data():
|
| 9 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 10 |
+
return pd.read_csv(url)
|
| 11 |
+
|
| 12 |
+
df = load_data()
|
| 13 |
+
|
| 14 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 15 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# Count the number of licenses for each License Type and sort by count
|
| 21 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 22 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 23 |
+
|
| 24 |
+
# Get the top 10 License Types
|
| 25 |
+
top_10_license_types = license_type_counts.head(10)
|
| 26 |
+
st.header("3. Top 10 License Types by Frequency")
|
| 27 |
+
# Create the bar chart
|
| 28 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 29 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 30 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 31 |
+
color=alt.Color('License Type:N', legend=None),
|
| 32 |
+
tooltip=['License Type', 'Count']
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# Show the chart in the Streamlit app
|
| 37 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 38 |
+
|
| 39 |
+
st.markdown("""
|
| 40 |
+
**Highlights**: This line chart shows .
|
| 41 |
+
**Design Choices**: Points are added to highlight.
|
| 42 |
+
""")
|
| 43 |
+
|
| 44 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 45 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 46 |
+
|
| 47 |
+
# Extract the year from Expiration Date
|
| 48 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 49 |
+
|
| 50 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 51 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 52 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 53 |
+
# Create the trend line (line chart) with interactive data points (circle marks)
|
| 54 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 55 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 56 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 57 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
# Add circle marks at data points
|
| 62 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 63 |
+
x=alt.X('Expiration Year:O'),
|
| 64 |
+
y=alt.Y('License Count:Q'),
|
| 65 |
+
tooltip=['Expiration Year', 'License Count']
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
# Combine the line and point marks
|
| 69 |
+
final_chart = line_chart + points
|
| 70 |
+
|
| 71 |
+
# Show the chart in the Streamlit app
|
| 72 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 73 |
+
|
.local/share/code-server/User/History/450b0aa/WLXq.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset.
|
| 34 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
|
| 35 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset. Since some license names were long, I made them hoverable to ensure that the full text is visible when interacting with the chart.""")
|
| 36 |
+
|
| 37 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 38 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 39 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 40 |
+
|
| 41 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 42 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 43 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 44 |
+
|
| 45 |
+
# Create trend line (line chart) with circle marks at data points
|
| 46 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 47 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 48 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 49 |
+
tooltip=['Expiration Year', 'License Count']
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 53 |
+
x=alt.X('Expiration Year:O'),
|
| 54 |
+
y=alt.Y('License Count:Q'),
|
| 55 |
+
tooltip=['Expiration Year', 'License Count']
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Combine line and points
|
| 59 |
+
final_chart = line_chart + points
|
| 60 |
+
|
| 61 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 62 |
+
|
| 63 |
+
# Markdown Write-Up for the trend line chart
|
| 64 |
+
st.markdown("""
|
| 65 |
+
**Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 66 |
+
|
| 67 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 68 |
+
|
| 69 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 70 |
+
""")
|
.local/share/code-server/User/History/450b0aa/YJb8.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
|
| 38 |
+
Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
|
| 39 |
+
|
| 40 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 41 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 42 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 43 |
+
|
| 44 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 45 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 46 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 47 |
+
|
| 48 |
+
# Create trend line (line chart) with circle marks at data points
|
| 49 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 50 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 51 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 52 |
+
tooltip=['Expiration Year', 'License Count']
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 56 |
+
x=alt.X('Expiration Year:O'),
|
| 57 |
+
y=alt.Y('License Count:Q'),
|
| 58 |
+
tooltip=['Expiration Year', 'License Count']
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Combine line and points
|
| 62 |
+
final_chart = line_chart + points
|
| 63 |
+
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Markdown Write-Up for the trend line chart
|
| 67 |
+
st.markdown("""
|
| 68 |
+
**Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 69 |
+
|
| 70 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 73 |
+
""")
|
.local/share/code-server/User/History/450b0aa/YSbm.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types, allowing for a clearer and more insightful visualization.
|
| 38 |
+
Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.
|
| 39 |
+
If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
|
| 40 |
+
|
| 41 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 42 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 43 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 44 |
+
|
| 45 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 46 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 47 |
+
st.header("2. Trend of Licenses Expiring Over Time")
|
| 48 |
+
|
| 49 |
+
# Create trend line (line chart) with circle marks at data points
|
| 50 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 51 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 52 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 53 |
+
tooltip=['Expiration Year', 'License Count']
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 57 |
+
x=alt.X('Expiration Year:O'),
|
| 58 |
+
y=alt.Y('License Count:Q'),
|
| 59 |
+
tooltip=['Expiration Year', 'License Count']
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Combine line and points
|
| 63 |
+
final_chart = line_chart + points
|
| 64 |
+
|
| 65 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 66 |
+
|
| 67 |
+
# Markdown Write-Up for the trend line chart
|
| 68 |
+
st.markdown("""
|
| 69 |
+
This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
|
| 70 |
+
The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
|
| 73 |
+
By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
|
| 74 |
+
The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
|
| 75 |
+
If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
|
.local/share/code-server/User/History/450b0aa/cWhv.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
#import the data
|
| 7 |
+
#url = "https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/ufo-scrubbed-geocoded-time-standardized-00.csv"
|
| 8 |
+
#df = pd.read_csv(url)
|
| 9 |
+
#df.sum().isnull()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# #Fill the missing data with Unknown
|
| 14 |
+
# df['County'].fillna('Not available', inplace=True)
|
| 15 |
+
# df['Rep Full Name'].fillna('Not available', inplace=True)
|
| 16 |
+
# df['Year Constructed'].fillna(df['Year Constructed'].median(), inplace=True)
|
| 17 |
+
# df['Senator Full Name'].fillna('Unknown', inplace=True)
|
| 18 |
+
# df['Usage Description 2'].fillna(df['Usage Description 2'].mode()[0], inplace=True)
|
| 19 |
+
# df['Usage Description 3'].fillna(df['Usage Description 3'].mode()[0], inplace=True)
|
| 20 |
+
# df['Address'].fillna('Not available', inplace=True)
|
| 21 |
+
# df['Congressional Full Name'].fillna('Unknown', inplace=True)
|
| 22 |
+
|
| 23 |
+
# #Page Title
|
| 24 |
+
# st.markdown("<h1 style='text-align: center;'>Homework 5.1</h1>", unsafe_allow_html=True)
|
| 25 |
+
|
| 26 |
+
# st.subheader("Analyzing the Building Inventory Dataset")
|
| 27 |
+
# #Visualization 1
|
| 28 |
+
# st.markdown("<h4 style='text-decoration: underline;'>Visualization 1</h4>", unsafe_allow_html=True)
|
| 29 |
+
|
| 30 |
+
# df_filtered = df.dropna(subset=['Bldg Status', 'Year Constructed'])
|
| 31 |
+
# df_filtered['Year Constructed'] = pd.to_numeric(df_filtered['Year Constructed'], errors='coerce')
|
| 32 |
+
# df_filtered = df_filtered[(df_filtered['Year Constructed'] >= 1600) & (df_filtered['Year Constructed'] <= 2100)]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# line_chart = alt.Chart(df_filtered).mark_line(point=True).encode(
|
| 36 |
+
# x=alt.X('Year Constructed:Q', title='Year Constructed'),
|
| 37 |
+
# y=alt.Y('count()', title='Number of Buildings'),
|
| 38 |
+
# color=alt.Color('Bldg Status:N', title='Building Status'),
|
| 39 |
+
# tooltip=['Year Constructed', 'count()', 'Bldg Status']
|
| 40 |
+
# ).properties(
|
| 41 |
+
# width=700,
|
| 42 |
+
# height=400,
|
| 43 |
+
# title="Trend of Building Construction by Status"
|
| 44 |
+
# )
|
| 45 |
+
# st.altair_chart(line_chart, use_container_width=True)
|
| 46 |
+
# st.write("""
|
| 47 |
+
# This line chart highlights the trend in the number of buildings constructed over time,
|
| 48 |
+
# categorized by their building status (e.g., whether they are currently in use or not).
|
| 49 |
+
# The x-axis represents the 'Year Constructed' and the y-axis represents the count of buildings.
|
| 50 |
+
# The color encoding separates the buildings by their 'Bldg Status'
|
| 51 |
+
# I used a line plot with points to clearly indicate the number of buildings per year,
|
| 52 |
+
# which helps in identifying trends and peaks. Another reason is that line chart is ideal for
|
| 53 |
+
# visualizing time-series data. The colors are chosen to differentiate the building statuses
|
| 54 |
+
# effectively.
|
| 55 |
+
|
| 56 |
+
# If I had more time, I would consider adding labels to the data points for clarity and
|
| 57 |
+
# perhaps break down the data further by usage description or location for a more detailed analysis.
|
| 58 |
+
# Probably adding hover effects could display additional information, such as the exact count of
|
| 59 |
+
# buildings and their status, when hovering over each data point. This would allow users to gain
|
| 60 |
+
# deeper insights without cluttering the chart. I would also include filtering options so users
|
| 61 |
+
# could select specific building statuses or even a range of years to focus on, which would make
|
| 62 |
+
# the analysis more targeted.
|
| 63 |
+
# """)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# # Visualization 2
|
| 67 |
+
# st.markdown("<h4 style='text-decoration: underline;'>Visualization 2</h4>", unsafe_allow_html=True)
|
| 68 |
+
# county_agency_data = df.dropna(subset=['County', 'Agency Name'])
|
| 69 |
+
# county_agency_count = county_agency_data.groupby('County')['Agency Name'].nunique().reset_index()
|
| 70 |
+
# county_agency_count.rename(columns={'Agency Name': 'Unique Agencies'}, inplace=True)
|
| 71 |
+
|
| 72 |
+
# county_agency_chart = alt.Chart(county_agency_count).mark_bar().encode(
|
| 73 |
+
# x=alt.X('County:N', sort='-y', title="County"),
|
| 74 |
+
# y=alt.Y('Unique Agencies:Q', title="Number of Agencies"),
|
| 75 |
+
# color=alt.Color('Unique Agencies:Q', scale=alt.Scale(scheme='viridis')),
|
| 76 |
+
# tooltip=['County:N', 'Unique Agencies:Q']
|
| 77 |
+
# ).properties(
|
| 78 |
+
# width=700,
|
| 79 |
+
# height=400,
|
| 80 |
+
# title="Unique Agencies in Each County"
|
| 81 |
+
# )
|
| 82 |
+
# st.altair_chart(county_agency_chart, use_container_width=True)
|
| 83 |
+
|
| 84 |
+
# st.write("""
|
| 85 |
+
# This bar chart highlights the number of unique agencies operating in each county.
|
| 86 |
+
# The x-axis represents the 'County' and the y-axis represents the 'Number of Unique Agencies'
|
| 87 |
+
# I used a bar chart as it allows for easy comparison between counties based on the number of
|
| 88 |
+
# agencies, and sorting the counties by agency count makes it easier to identify areas with
|
| 89 |
+
# more agencies. The color scale uses a 'viridis' color scheme, to show how the number of agencies
|
| 90 |
+
# differs accross counties.
|
| 91 |
+
|
| 92 |
+
# If I had more time, I would consider adding interactivity, like a filter for 'Building Status', so that users can
|
| 93 |
+
# focus on specific statuses, such as buildings that are still in progress or abandoned for each county.
|
| 94 |
+
# I would also implement a slider for 'Year Constructed' to allow users to focus on specific time periods,
|
| 95 |
+
# enabling them to analyze trends and compare the distribution of agencies by different time periods.
|
| 96 |
+
# I would also consider adding a search function so that users can look for specific counties
|
| 97 |
+
# or agency types.
|
| 98 |
+
# """)
|
| 99 |
+
# Load State GeoJSON for Map (example if supported)
|
| 100 |
+
# Visualization: Licenses by State
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# Load Dataset
|
| 104 |
+
@st.cache
|
| 105 |
+
def load_data():
|
| 106 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 107 |
+
return pd.read_csv(url)
|
| 108 |
+
|
| 109 |
+
df = load_data()
|
| 110 |
+
|
| 111 |
+
# Handle Missing Values for Visualizations
|
| 112 |
+
# 1. Remove rows with missing `Original Issue Date`
|
| 113 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 114 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 115 |
+
|
| 116 |
+
# Title
|
| 117 |
+
st.title("Licenses Dataset Visualizations")
|
| 118 |
+
st.markdown("This app presents three visualizations of the licenses dataset.")
|
| 119 |
+
|
| 120 |
+
# Visualization 1: Bar Chart (Distribution of Licenses by Type)
|
| 121 |
+
st.header("1. Distribution of Licenses by Type")
|
| 122 |
+
license_count = df['License Type'].value_counts().reset_index()
|
| 123 |
+
license_count.columns = ['License Type', 'Count']
|
| 124 |
+
|
| 125 |
+
bar_chart = alt.Chart(license_count).mark_bar().encode(
|
| 126 |
+
x=alt.X('Count:Q', title='Number of Licenses'),
|
| 127 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type'),
|
| 128 |
+
color=alt.Color('License Type:N', legend=None)
|
| 129 |
+
).properties(title="Number of Licenses by Type")
|
| 130 |
+
|
| 131 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 132 |
+
|
| 133 |
+
st.markdown("""
|
| 134 |
+
**Highlights**: This bar chart shows the distribution of licenses by type.
|
| 135 |
+
**Design Choices**: A horizontal bar chart is used for better readability of license types.
|
| 136 |
+
""")
|
| 137 |
+
|
| 138 |
+
# Visualization 2: Line Chart (Trend of Licenses Over Time)
|
| 139 |
+
st.header("2. Trend of Licenses Over Time")
|
| 140 |
+
time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
|
| 141 |
+
time_data.columns = ['Year', 'Count']
|
| 142 |
+
|
| 143 |
+
line_chart = alt.Chart(time_data).mark_line(point=True).encode(
|
| 144 |
+
x=alt.X('Year:O', title='Year'),
|
| 145 |
+
y=alt.Y('Count:Q', title='Number of Licenses'),
|
| 146 |
+
color=alt.value('blue')
|
| 147 |
+
).properties(title="Trend of Licenses Issued Over Years")
|
| 148 |
+
|
| 149 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 150 |
+
|
| 151 |
+
st.markdown("""
|
| 152 |
+
**Highlights**: This line chart shows the trend of license issuances over time.
|
| 153 |
+
**Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
|
| 154 |
+
""")
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
# Count the number of licenses for each License Type and sort by count
|
| 158 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 159 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 160 |
+
|
| 161 |
+
# Get the top 10 License Types
|
| 162 |
+
top_10_license_types = license_type_counts.head(10)
|
| 163 |
+
|
| 164 |
+
# Create the bar chart
|
| 165 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 166 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 167 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 168 |
+
color=alt.Color('License Type:N', legend=None),
|
| 169 |
+
tooltip=['License Type', 'Count']
|
| 170 |
+
).properties(
|
| 171 |
+
title='Top 10 License Types by Frequency'
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
# Show the chart in the Streamlit app
|
| 175 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 179 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 180 |
+
|
| 181 |
+
# Extract the year from Expiration Date
|
| 182 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 183 |
+
|
| 184 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 185 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 186 |
+
|
| 187 |
+
# Create the trend line (line chart) with interactive data points (circle marks)
|
| 188 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 189 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 190 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 191 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 192 |
+
).properties(
|
| 193 |
+
title='Trend Line for Licenses Expiring Over Time'
|
| 194 |
+
).interactive() # Make the chart interactive
|
| 195 |
+
|
| 196 |
+
# Add circle marks at data points
|
| 197 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 198 |
+
x=alt.X('Expiration Year:O'),
|
| 199 |
+
y=alt.Y('License Count:Q'),
|
| 200 |
+
tooltip=['Expiration Year', 'License Count']
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
# Combine the line and point marks
|
| 204 |
+
final_chart = line_chart + points
|
| 205 |
+
|
| 206 |
+
# Show the chart in the Streamlit app
|
| 207 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 208 |
+
|
.local/share/code-server/User/History/450b0aa/e7iD.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
# Load Dataset
|
| 7 |
+
@st.cache
|
| 8 |
+
def load_data():
|
| 9 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 10 |
+
return pd.read_csv(url)
|
| 11 |
+
|
| 12 |
+
df = load_data()
|
| 13 |
+
|
| 14 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 15 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# Count the number of licenses for each License Type and sort by count
|
| 21 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 22 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 23 |
+
|
| 24 |
+
# Get the top 10 License Types
|
| 25 |
+
top_10_license_types = license_type_counts.head(10)
|
| 26 |
+
st.header("3. Top 10 License Types by Frequency")
|
| 27 |
+
# Create the bar chart
|
| 28 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 29 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 30 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 31 |
+
color=alt.Color('License Type:N', legend=None),
|
| 32 |
+
tooltip=['License Type', 'Count']
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# Show the chart in the Streamlit app
|
| 37 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 38 |
+
|
| 39 |
+
st.markdown("""
|
| 40 |
+
**Highlights**: This line chart shows .
|
| 41 |
+
**Design Choices**: Points are added to highlight.
|
| 42 |
+
""")
|
| 43 |
+
|
| 44 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 45 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 46 |
+
|
| 47 |
+
# Extract the year from Expiration Date
|
| 48 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 49 |
+
|
| 50 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 51 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 52 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 53 |
+
# Create the trend line (line chart) with interactive data points (circle marks)
|
| 54 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 55 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 56 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 57 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
# Add circle marks at data points
|
| 62 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 63 |
+
x=alt.X('Expiration Year:O'),
|
| 64 |
+
y=alt.Y('License Count:Q'),
|
| 65 |
+
tooltip=['Expiration Year', 'License Count']
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
# Combine the line and point marks
|
| 69 |
+
final_chart = line_chart + points
|
| 70 |
+
|
| 71 |
+
# Show the chart in the Streamlit app
|
| 72 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 73 |
+
|
.local/share/code-server/User/History/450b0aa/entries.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"version":1,"resource":"vscode-remote://us.prairielearn.com/home/coder/app.py","entries":[{"id":"s0Hs.py","timestamp":1732610271002},{"id":"O3TC.py","timestamp":1732610434817},{"id":"BvGZ.py","timestamp":1732610476290},{"id":"cWhv.py","timestamp":1732610582621},{"id":"AIA9.py","timestamp":1732610651134},{"id":"TdSt.py","timestamp":1732610735299},{"id":"I3Tc.py","timestamp":1732610806873},{"id":"iwOA.py","timestamp":1732610839592},{"id":"P3er.py","timestamp":1732610855923},{"id":"HxIz.py","timestamp":1732610881797},{"id":"fdvg.py","timestamp":1732610922722},{"id":"e7iD.py","timestamp":1732611034843},{"id":"WCDW.py","timestamp":1732611365984},{"id":"v1EL.py","timestamp":1732611436229},{"id":"gwPw.py","source":"undoRedo.source","timestamp":1732611467020},{"id":"C2VD.py","source":"undoRedo.source","timestamp":1732611484102},{"id":"vTno.py","timestamp":1732611509771},{"id":"jtCb.py","timestamp":1732611541992},{"id":"2wHf.py","timestamp":1732611585052},{"id":"RuPD.py","timestamp":1732611595384},{"id":"Dqul.py","timestamp":1732611615454},{"id":"BA55.py","timestamp":1732611631889},{"id":"2sco.py","timestamp":1732611708324},{"id":"4TtO.py","timestamp":1732611993598},{"id":"QeUw.py","timestamp":1732612015814},{"id":"WLXq.py","timestamp":1732612053461},{"id":"hisc.py","timestamp":1732612075256},{"id":"zuML.py","timestamp":1732612088958},{"id":"YJb8.py","timestamp":1732612185652},{"id":"ksQB.py","timestamp":1732612338428},{"id":"RWSp.py","source":"undoRedo.source","timestamp":1732612367906},{"id":"zPF4.py","timestamp":1732612468393},{"id":"W0T9.py","timestamp":1732612478852},{"id":"8wPI.py","timestamp":1732612490534},{"id":"vzqC.py","timestamp":1732612520566},{"id":"0rFy.py","timestamp":1732612794232},{"id":"gJ7Y.py","timestamp":1732612838289},{"id":"Q5uV.py","timestamp":1732612850712},{"id":"8qMH.py","timestamp":1732612947711},{"id":"Vrxk.py","timestamp":1732612978297},{"id":"GB0z.py","timestamp":1732613153833},{"id":"fjsF.py","timestamp":1732613170235},{"id":"53ry.py","timestamp":1732613225839},{"id":"YSbm.py","timestamp":1732613251281},{"id":"mE68.py","timestamp":1732613291160},{"id":"53Ql.py","timestamp":1732613323548},{"id":"rykv.py","timestamp":1732613389594},{"id":"qzJp.py","timestamp":1732613401918},{"id":"zoVr.py","timestamp":1732613418498},{"id":"Km75.py","timestamp":1732613439341}]}
|
.local/share/code-server/User/History/450b0aa/fdvg.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
# Load Dataset
|
| 7 |
+
@st.cache
|
| 8 |
+
def load_data():
|
| 9 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 10 |
+
return pd.read_csv(url)
|
| 11 |
+
|
| 12 |
+
df = load_data()
|
| 13 |
+
|
| 14 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 15 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 16 |
+
|
| 17 |
+
# Title
|
| 18 |
+
st.title("Licenses Dataset Visualizations")
|
| 19 |
+
st.markdown("This app presents three visualizations of the licenses dataset.")
|
| 20 |
+
|
| 21 |
+
# Visualization 1: Bar Chart (Distribution of Licenses by Type)
|
| 22 |
+
st.header("1. Distribution of Licenses by Type")
|
| 23 |
+
license_count = df['License Type'].value_counts().reset_index()
|
| 24 |
+
license_count.columns = ['License Type', 'Count']
|
| 25 |
+
|
| 26 |
+
bar_chart = alt.Chart(license_count).mark_bar().encode(
|
| 27 |
+
x=alt.X('Count:Q', title='Number of Licenses'),
|
| 28 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type'),
|
| 29 |
+
color=alt.Color('License Type:N', legend=None)
|
| 30 |
+
).properties(title="Number of Licenses by Type")
|
| 31 |
+
|
| 32 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 33 |
+
|
| 34 |
+
st.markdown("""
|
| 35 |
+
**Highlights**: This bar chart shows the distribution of licenses by type.
|
| 36 |
+
**Design Choices**: A horizontal bar chart is used for better readability of license types.
|
| 37 |
+
""")
|
| 38 |
+
|
| 39 |
+
# Visualization 2: Line Chart (Trend of Licenses Over Time)
|
| 40 |
+
st.header("2. Trend of Licenses Over Time")
|
| 41 |
+
time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
|
| 42 |
+
time_data.columns = ['Year', 'Count']
|
| 43 |
+
|
| 44 |
+
line_chart = alt.Chart(time_data).mark_line(point=True).encode(
|
| 45 |
+
x=alt.X('Year:O', title='Year'),
|
| 46 |
+
y=alt.Y('Count:Q', title='Number of Licenses'),
|
| 47 |
+
color=alt.value('blue')
|
| 48 |
+
).properties(title="Trend of Licenses Issued Over Years")
|
| 49 |
+
|
| 50 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 51 |
+
|
| 52 |
+
st.markdown("""
|
| 53 |
+
**Highlights**: This line chart shows the trend of license issuances over time.
|
| 54 |
+
**Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
|
| 55 |
+
""")
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# Count the number of licenses for each License Type and sort by count
|
| 59 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 60 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 61 |
+
|
| 62 |
+
# Get the top 10 License Types
|
| 63 |
+
top_10_license_types = license_type_counts.head(10)
|
| 64 |
+
st.header("3. Top 10 License Types by Frequency")
|
| 65 |
+
# Create the bar chart
|
| 66 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 67 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 68 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 69 |
+
color=alt.Color('License Type:N', legend=None),
|
| 70 |
+
tooltip=['License Type', 'Count']
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
# Show the chart in the Streamlit app
|
| 75 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 79 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 80 |
+
|
| 81 |
+
# Extract the year from Expiration Date
|
| 82 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 83 |
+
|
| 84 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 85 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 86 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 87 |
+
# Create the trend line (line chart) with interactive data points (circle marks)
|
| 88 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 89 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 90 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 91 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
# Add circle marks at data points
|
| 96 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 97 |
+
x=alt.X('Expiration Year:O'),
|
| 98 |
+
y=alt.Y('License Count:Q'),
|
| 99 |
+
tooltip=['Expiration Year', 'License Count']
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
# Combine the line and point marks
|
| 103 |
+
final_chart = line_chart + points
|
| 104 |
+
|
| 105 |
+
# Show the chart in the Streamlit app
|
| 106 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 107 |
+
|
.local/share/code-server/User/History/450b0aa/fjsF.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
|
| 38 |
+
Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.
|
| 39 |
+
If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
|
| 40 |
+
|
| 41 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 42 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 43 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 44 |
+
|
| 45 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 46 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 47 |
+
st.header("2. Trend of Licenses Expiring Over Time")
|
| 48 |
+
|
| 49 |
+
# Create trend line (line chart) with circle marks at data points
|
| 50 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 51 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 52 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 53 |
+
tooltip=['Expiration Year', 'License Count']
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 57 |
+
x=alt.X('Expiration Year:O'),
|
| 58 |
+
y=alt.Y('License Count:Q'),
|
| 59 |
+
tooltip=['Expiration Year', 'License Count']
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Combine line and points
|
| 63 |
+
final_chart = line_chart + points
|
| 64 |
+
|
| 65 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 66 |
+
|
| 67 |
+
# Markdown Write-Up for the trend line chart
|
| 68 |
+
st.markdown("""
|
| 69 |
+
This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
|
| 70 |
+
The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
|
| 73 |
+
By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
|
| 74 |
+
The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
|
| 75 |
+
If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
|
.local/share/code-server/User/History/450b0aa/gJ7Y.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
|
| 38 |
+
Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
|
| 39 |
+
|
| 40 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 41 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 42 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 43 |
+
|
| 44 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 45 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 46 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 47 |
+
|
| 48 |
+
# Create trend line (line chart) with circle marks at data points
|
| 49 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 50 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 51 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 52 |
+
tooltip=['Expiration Year', 'License Count']
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 56 |
+
x=alt.X('Expiration Year:O'),
|
| 57 |
+
y=alt.Y('License Count:Q'),
|
| 58 |
+
tooltip=['Expiration Year', 'License Count']
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Combine line and points
|
| 62 |
+
final_chart = line_chart + points
|
| 63 |
+
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Markdown Write-Up for the trend line chart
|
| 67 |
+
st.markdown("""
|
| 68 |
+
This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
|
| 69 |
+
The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 70 |
+
|
| 71 |
+
I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
|
| 72 |
+
By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
|
| 73 |
+
The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
|
| 74 |
+
If I had more time, I would like to break down the expiration data further by month or quarter to detect more granular patterns a
|
| 75 |
+
""")
|
.local/share/code-server/User/History/450b0aa/gwPw.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
# Load Dataset
|
| 7 |
+
def load_data():
|
| 8 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 9 |
+
return pd.read_csv(url)
|
| 10 |
+
|
| 11 |
+
df = load_data()
|
| 12 |
+
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# Count the number of licenses for each License Type and sort by count
|
| 20 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 21 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 22 |
+
|
| 23 |
+
# Get the top 10 License Types
|
| 24 |
+
top_10_license_types = license_type_counts.head(10)
|
| 25 |
+
st.header("3. Top 10 License Types by Frequency")
|
| 26 |
+
# Create the bar chart
|
| 27 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 28 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 29 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 30 |
+
color=alt.Color('License Type:N', legend=None),
|
| 31 |
+
tooltip=['License Type', 'Count']
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# Show the chart in the Streamlit app
|
| 36 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 37 |
+
|
| 38 |
+
st.markdown("""
|
| 39 |
+
**Highlights**: This line chart shows .
|
| 40 |
+
**Design Choices**: Points are added to highlight.
|
| 41 |
+
""")
|
| 42 |
+
|
| 43 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 44 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 45 |
+
|
| 46 |
+
# Extract the year from Expiration Date
|
| 47 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 48 |
+
|
| 49 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 50 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 51 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 52 |
+
# Create the trend line (line chart) with interactive data points (circle marks)
|
| 53 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 54 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 55 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 56 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# Add circle marks at data points
|
| 61 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 62 |
+
x=alt.X('Expiration Year:O'),
|
| 63 |
+
y=alt.Y('License Count:Q'),
|
| 64 |
+
tooltip=['Expiration Year', 'License Count']
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# Combine the line and point marks
|
| 68 |
+
final_chart = line_chart + points
|
| 69 |
+
|
| 70 |
+
# Show the chart in the Streamlit app
|
| 71 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 72 |
+
|
.local/share/code-server/User/History/450b0aa/hisc.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset.
|
| 34 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
|
| 35 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
|
| 36 |
+
Since some license names were long, I made sure that the full text is visible when interacting with the chart.""")
|
| 37 |
+
|
| 38 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 39 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 40 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 41 |
+
|
| 42 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 43 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 44 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 45 |
+
|
| 46 |
+
# Create trend line (line chart) with circle marks at data points
|
| 47 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 48 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 49 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 50 |
+
tooltip=['Expiration Year', 'License Count']
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 54 |
+
x=alt.X('Expiration Year:O'),
|
| 55 |
+
y=alt.Y('License Count:Q'),
|
| 56 |
+
tooltip=['Expiration Year', 'License Count']
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
# Combine line and points
|
| 60 |
+
final_chart = line_chart + points
|
| 61 |
+
|
| 62 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 63 |
+
|
| 64 |
+
# Markdown Write-Up for the trend line chart
|
| 65 |
+
st.markdown("""
|
| 66 |
+
**Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 67 |
+
|
| 68 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 69 |
+
|
| 70 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 71 |
+
""")
|
.local/share/code-server/User/History/450b0aa/iwOA.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
# Load Dataset
|
| 7 |
+
@st.cache
|
| 8 |
+
def load_data():
|
| 9 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 10 |
+
return pd.read_csv(url)
|
| 11 |
+
|
| 12 |
+
df = load_data()
|
| 13 |
+
|
| 14 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 15 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 16 |
+
|
| 17 |
+
# Title
|
| 18 |
+
st.title("Licenses Dataset Visualizations")
|
| 19 |
+
st.markdown("This app presents three visualizations of the licenses dataset.")
|
| 20 |
+
|
| 21 |
+
# Visualization 1: Bar Chart (Distribution of Licenses by Type)
|
| 22 |
+
st.header("1. Distribution of Licenses by Type")
|
| 23 |
+
license_count = df['License Type'].value_counts().reset_index()
|
| 24 |
+
license_count.columns = ['License Type', 'Count']
|
| 25 |
+
|
| 26 |
+
bar_chart = alt.Chart(license_count).mark_bar().encode(
|
| 27 |
+
x=alt.X('Count:Q', title='Number of Licenses'),
|
| 28 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type'),
|
| 29 |
+
color=alt.Color('License Type:N', legend=None)
|
| 30 |
+
).properties(title="Number of Licenses by Type")
|
| 31 |
+
|
| 32 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 33 |
+
|
| 34 |
+
st.markdown("""
|
| 35 |
+
**Highlights**: This bar chart shows the distribution of licenses by type.
|
| 36 |
+
**Design Choices**: A horizontal bar chart is used for better readability of license types.
|
| 37 |
+
""")
|
| 38 |
+
|
| 39 |
+
# Visualization 2: Line Chart (Trend of Licenses Over Time)
|
| 40 |
+
st.header("2. Trend of Licenses Over Time")
|
| 41 |
+
time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
|
| 42 |
+
time_data.columns = ['Year', 'Count']
|
| 43 |
+
|
| 44 |
+
line_chart = alt.Chart(time_data).mark_line(point=True).encode(
|
| 45 |
+
x=alt.X('Year:O', title='Year'),
|
| 46 |
+
y=alt.Y('Count:Q', title='Number of Licenses'),
|
| 47 |
+
color=alt.value('blue')
|
| 48 |
+
).properties(title="Trend of Licenses Issued Over Years")
|
| 49 |
+
|
| 50 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 51 |
+
|
| 52 |
+
st.markdown("""
|
| 53 |
+
**Highlights**: This line chart shows the trend of license issuances over time.
|
| 54 |
+
**Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
|
| 55 |
+
""")
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# Count the number of licenses for each License Type and sort by count
|
| 59 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 60 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 61 |
+
|
| 62 |
+
# Get the top 10 License Types
|
| 63 |
+
top_10_license_types = license_type_counts.head(10)
|
| 64 |
+
st.header(". Trend of Licenses Over Time")
|
| 65 |
+
# Create the bar chart
|
| 66 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 67 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 68 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 69 |
+
color=alt.Color('License Type:N', legend=None),
|
| 70 |
+
tooltip=['License Type', 'Count']
|
| 71 |
+
).properties(
|
| 72 |
+
title='Top 10 License Types by Frequency'
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# Show the chart in the Streamlit app
|
| 76 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 80 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 81 |
+
|
| 82 |
+
# Extract the year from Expiration Date
|
| 83 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 84 |
+
|
| 85 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 86 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 87 |
+
|
| 88 |
+
# Create the trend line (line chart) with interactive data points (circle marks)
|
| 89 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 90 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 91 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 92 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# Add circle marks at data points
|
| 97 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 98 |
+
x=alt.X('Expiration Year:O'),
|
| 99 |
+
y=alt.Y('License Count:Q'),
|
| 100 |
+
tooltip=['Expiration Year', 'License Count']
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
# Combine the line and point marks
|
| 104 |
+
final_chart = line_chart + points
|
| 105 |
+
|
| 106 |
+
# Show the chart in the Streamlit app
|
| 107 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 108 |
+
|
.local/share/code-server/User/History/450b0aa/jtCb.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("3. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Create bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 31 |
+
|
| 32 |
+
# Markdown Write-Up for the bar chart
|
| 33 |
+
st.markdown("""
|
| 34 |
+
**Highlights**: This bar chart highlights the top 10 most frequent license types in the dataset, allowing us to easily identify which types of licenses are the most prevalent.
|
| 35 |
+
|
| 36 |
+
**Design Choices**: The x-axis represents the count of each license type, while the y-axis shows the name of the license type. The bars are color-coded to differentiate between the license types, and tooltips are included for better interaction. The chart is sorted in descending order of the license count.
|
| 37 |
+
|
| 38 |
+
**Improvements**: If I had more time, I would add more granular details about the licenses or break down the data further by state or city for more localized insights.
|
| 39 |
+
""")
|
| 40 |
+
|
| 41 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 42 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 43 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 44 |
+
|
| 45 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 46 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 47 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 48 |
+
|
| 49 |
+
# Create trend line (line chart) with circle marks at data points
|
| 50 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 51 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 52 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 53 |
+
tooltip=['Expiration Year', 'License Count']
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 57 |
+
x=alt.X('Expiration Year:O'),
|
| 58 |
+
y=alt.Y('License Count:Q'),
|
| 59 |
+
tooltip=['Expiration Year', 'License Count']
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Combine line and points
|
| 63 |
+
final_chart = line_chart + points
|
| 64 |
+
|
| 65 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 66 |
+
|
| 67 |
+
# Markdown Write-Up for the trend line chart
|
| 68 |
+
st.markdown("""
|
| 69 |
+
**Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 70 |
+
|
| 71 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 72 |
+
|
| 73 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 74 |
+
""")
|
.local/share/code-server/User/History/450b0aa/ksQB.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency, making the visualization cluttered.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types, which allowed for a clearer and more insightful view of the most prevalent licenses in the dataset.
|
| 38 |
+
Since some license names were long, I made sure that the full text is visible when you hover on the bar chart.""")
|
| 39 |
+
|
| 40 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 41 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 42 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 43 |
+
|
| 44 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 45 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 46 |
+
st.header("4. Trend of Licenses Expiring Over Time")
|
| 47 |
+
|
| 48 |
+
# Create trend line (line chart) with circle marks at data points
|
| 49 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 50 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 51 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 52 |
+
tooltip=['Expiration Year', 'License Count']
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 56 |
+
x=alt.X('Expiration Year:O'),
|
| 57 |
+
y=alt.Y('License Count:Q'),
|
| 58 |
+
tooltip=['Expiration Year', 'License Count']
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Combine line and points
|
| 62 |
+
final_chart = line_chart + points
|
| 63 |
+
|
| 64 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Markdown Write-Up for the trend line chart
|
| 67 |
+
st.markdown("""
|
| 68 |
+
**Highlights**: This line chart shows the trend of licenses expiring over time, highlighting seasonal or yearly variations in license expirations.
|
| 69 |
+
|
| 70 |
+
**Design Choices**: The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring. The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
**Improvements**: If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns.
|
| 73 |
+
""")
|
.local/share/code-server/User/History/450b0aa/mE68.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types.
|
| 38 |
+
Some license names were long, I made sure that the full text is visible when you hover on the bar chart.
|
| 39 |
+
If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
|
| 40 |
+
|
| 41 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 42 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 43 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 44 |
+
|
| 45 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 46 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 47 |
+
st.header("2. Trend of Licenses Expiring Over Time")
|
| 48 |
+
|
| 49 |
+
# Create trend line (line chart) with circle marks at data points
|
| 50 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 51 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 52 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 53 |
+
tooltip=['Expiration Year', 'License Count']
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 57 |
+
x=alt.X('Expiration Year:O'),
|
| 58 |
+
y=alt.Y('License Count:Q'),
|
| 59 |
+
tooltip=['Expiration Year', 'License Count']
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Combine line and points
|
| 63 |
+
final_chart = line_chart + points
|
| 64 |
+
|
| 65 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 66 |
+
|
| 67 |
+
# Markdown Write-Up for the trend line chart
|
| 68 |
+
st.markdown("""
|
| 69 |
+
This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
|
| 70 |
+
The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
|
| 73 |
+
By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
|
| 74 |
+
The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
|
| 75 |
+
If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
|
.local/share/code-server/User/History/450b0aa/qzJp.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types.
|
| 38 |
+
Some license names were long, so I made sure that full text is visible when you hover on the bars.
|
| 39 |
+
If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
|
| 40 |
+
|
| 41 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 42 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 43 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 44 |
+
|
| 45 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 46 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 47 |
+
st.header("2. Trend of Licenses Expiring Over Time")
|
| 48 |
+
|
| 49 |
+
# Trend line
|
| 50 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 51 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 52 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 53 |
+
tooltip=['Expiration Year', 'License Count']
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 57 |
+
x=alt.X('Expiration Year:O'),
|
| 58 |
+
y=alt.Y('License Count:Q'),
|
| 59 |
+
tooltip=['Expiration Year', 'License Count']
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Combine line and points
|
| 63 |
+
final_chart = line_chart + points
|
| 64 |
+
|
| 65 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 66 |
+
|
| 67 |
+
# Write-Up for second plot
|
| 68 |
+
st.markdown("""
|
| 69 |
+
This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
|
| 70 |
+
The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
|
| 73 |
+
By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
|
| 74 |
+
The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
|
| 75 |
+
If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
|
.local/share/code-server/User/History/450b0aa/rykv.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
# Load the dataset
|
| 6 |
+
def load_data():
|
| 7 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 8 |
+
return pd.read_csv(url)
|
| 9 |
+
|
| 10 |
+
df = load_data()
|
| 11 |
+
|
| 12 |
+
# Convert 'Original Issue Date' to datetime and dropping rows with missing data
|
| 13 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 14 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 15 |
+
|
| 16 |
+
# Count the number of licenses for each License Type and get top 10
|
| 17 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 18 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 19 |
+
top_10_license_types = license_type_counts.head(10)
|
| 20 |
+
st.header("1. Top 10 License Types by Frequency")
|
| 21 |
+
|
| 22 |
+
# Bar chart for the top 10 license types
|
| 23 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 24 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 25 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 26 |
+
color=alt.Color('License Type:N', legend=None),
|
| 27 |
+
tooltip=['License Type', 'Count']
|
| 28 |
+
)
|
| 29 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 30 |
+
|
| 31 |
+
# Write-Up for first plot
|
| 32 |
+
st.markdown("""
|
| 33 |
+
This bar chart highlights the top 10 most frequent license types in the dataset. The x-axis represents the count of each license type, while the y-axis shows the name of the license type.
|
| 34 |
+
The bars are color-coded to differentiate between the license types. This makes it easy for users to visually distinguish between each category. The chart is sorted in descending order of the license count.
|
| 35 |
+
|
| 36 |
+
I started by creating a bar chart for all license types, but the chart was difficult to read because many types had very low frequency.
|
| 37 |
+
To improve this, I focused on the top 10 most frequent license types.
|
| 38 |
+
Some license names were long, so I made sure that full text is visible when you hover on the bars.
|
| 39 |
+
If I had more time, I would like to find a way to make the column names more readable on the y-axis and include all license types without making the visualization too cluttered""")
|
| 40 |
+
|
| 41 |
+
# Convert 'Expiration Date' to datetime and extract the year
|
| 42 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 43 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 44 |
+
|
| 45 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 46 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 47 |
+
st.header("2. Trend of Licenses Expiring Over Time")
|
| 48 |
+
|
| 49 |
+
# Create trend line (line chart) with circle marks at data points
|
| 50 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 51 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 52 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 53 |
+
tooltip=['Expiration Year', 'License Count']
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
points = alt.Chart(expiration_counts).mark_point(shape='circle', filled=True, size=50).encode(
|
| 57 |
+
x=alt.X('Expiration Year:O'),
|
| 58 |
+
y=alt.Y('License Count:Q'),
|
| 59 |
+
tooltip=['Expiration Year', 'License Count']
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Combine line and points
|
| 63 |
+
final_chart = line_chart + points
|
| 64 |
+
|
| 65 |
+
st.altair_chart(final_chart, use_container_width=True)
|
| 66 |
+
|
| 67 |
+
# Write-Up for second plot
|
| 68 |
+
st.markdown("""
|
| 69 |
+
This line chart shows the trend of licenses expiring over time, highlighting variations in license expirations. The x-axis represents the expiration year, while the y-axis shows the number of licenses expiring.
|
| 70 |
+
The line shows the overall trend, with circles marking individual data points for better visibility. Tooltips provide more details when hovering.
|
| 71 |
+
|
| 72 |
+
I chose a trend line because it clearly shows how the number of licenses expiring changes over time.
|
| 73 |
+
By plotting the data in this way, I can easily spot patterns or spikes in expiration rates.
|
| 74 |
+
The trend line gives a quick overview of the overall trend, while the circles on the data points highlight key years with significant changes.
|
| 75 |
+
If I had more time, I would break down the expiration data further by month or quarter to detect more granular patterns and find a way to make it all of it more organized""")
|
.local/share/code-server/User/History/450b0aa/s0Hs.py
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# put streamlit code here as needed
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import altair as alt
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
#import the data
|
| 7 |
+
#url = "https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/ufo-scrubbed-geocoded-time-standardized-00.csv"
|
| 8 |
+
#df = pd.read_csv(url)
|
| 9 |
+
#df.sum().isnull()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# #Fill the missing data with Unknown
|
| 14 |
+
# df['County'].fillna('Not available', inplace=True)
|
| 15 |
+
# df['Rep Full Name'].fillna('Not available', inplace=True)
|
| 16 |
+
# df['Year Constructed'].fillna(df['Year Constructed'].median(), inplace=True)
|
| 17 |
+
# df['Senator Full Name'].fillna('Unknown', inplace=True)
|
| 18 |
+
# df['Usage Description 2'].fillna(df['Usage Description 2'].mode()[0], inplace=True)
|
| 19 |
+
# df['Usage Description 3'].fillna(df['Usage Description 3'].mode()[0], inplace=True)
|
| 20 |
+
# df['Address'].fillna('Not available', inplace=True)
|
| 21 |
+
# df['Congressional Full Name'].fillna('Unknown', inplace=True)
|
| 22 |
+
|
| 23 |
+
# #Page Title
|
| 24 |
+
# st.markdown("<h1 style='text-align: center;'>Homework 5.1</h1>", unsafe_allow_html=True)
|
| 25 |
+
|
| 26 |
+
# st.subheader("Analyzing the Building Inventory Dataset")
|
| 27 |
+
# #Visualization 1
|
| 28 |
+
# st.markdown("<h4 style='text-decoration: underline;'>Visualization 1</h4>", unsafe_allow_html=True)
|
| 29 |
+
|
| 30 |
+
# df_filtered = df.dropna(subset=['Bldg Status', 'Year Constructed'])
|
| 31 |
+
# df_filtered['Year Constructed'] = pd.to_numeric(df_filtered['Year Constructed'], errors='coerce')
|
| 32 |
+
# df_filtered = df_filtered[(df_filtered['Year Constructed'] >= 1600) & (df_filtered['Year Constructed'] <= 2100)]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# line_chart = alt.Chart(df_filtered).mark_line(point=True).encode(
|
| 36 |
+
# x=alt.X('Year Constructed:Q', title='Year Constructed'),
|
| 37 |
+
# y=alt.Y('count()', title='Number of Buildings'),
|
| 38 |
+
# color=alt.Color('Bldg Status:N', title='Building Status'),
|
| 39 |
+
# tooltip=['Year Constructed', 'count()', 'Bldg Status']
|
| 40 |
+
# ).properties(
|
| 41 |
+
# width=700,
|
| 42 |
+
# height=400,
|
| 43 |
+
# title="Trend of Building Construction by Status"
|
| 44 |
+
# )
|
| 45 |
+
# st.altair_chart(line_chart, use_container_width=True)
|
| 46 |
+
# st.write("""
|
| 47 |
+
# This line chart highlights the trend in the number of buildings constructed over time,
|
| 48 |
+
# categorized by their building status (e.g., whether they are currently in use or not).
|
| 49 |
+
# The x-axis represents the 'Year Constructed' and the y-axis represents the count of buildings.
|
| 50 |
+
# The color encoding separates the buildings by their 'Bldg Status'
|
| 51 |
+
# I used a line plot with points to clearly indicate the number of buildings per year,
|
| 52 |
+
# which helps in identifying trends and peaks. Another reason is that line chart is ideal for
|
| 53 |
+
# visualizing time-series data. The colors are chosen to differentiate the building statuses
|
| 54 |
+
# effectively.
|
| 55 |
+
|
| 56 |
+
# If I had more time, I would consider adding labels to the data points for clarity and
|
| 57 |
+
# perhaps break down the data further by usage description or location for a more detailed analysis.
|
| 58 |
+
# Probably adding hover effects could display additional information, such as the exact count of
|
| 59 |
+
# buildings and their status, when hovering over each data point. This would allow users to gain
|
| 60 |
+
# deeper insights without cluttering the chart. I would also include filtering options so users
|
| 61 |
+
# could select specific building statuses or even a range of years to focus on, which would make
|
| 62 |
+
# the analysis more targeted.
|
| 63 |
+
# """)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# # Visualization 2
|
| 67 |
+
# st.markdown("<h4 style='text-decoration: underline;'>Visualization 2</h4>", unsafe_allow_html=True)
|
| 68 |
+
# county_agency_data = df.dropna(subset=['County', 'Agency Name'])
|
| 69 |
+
# county_agency_count = county_agency_data.groupby('County')['Agency Name'].nunique().reset_index()
|
| 70 |
+
# county_agency_count.rename(columns={'Agency Name': 'Unique Agencies'}, inplace=True)
|
| 71 |
+
|
| 72 |
+
# county_agency_chart = alt.Chart(county_agency_count).mark_bar().encode(
|
| 73 |
+
# x=alt.X('County:N', sort='-y', title="County"),
|
| 74 |
+
# y=alt.Y('Unique Agencies:Q', title="Number of Agencies"),
|
| 75 |
+
# color=alt.Color('Unique Agencies:Q', scale=alt.Scale(scheme='viridis')),
|
| 76 |
+
# tooltip=['County:N', 'Unique Agencies:Q']
|
| 77 |
+
# ).properties(
|
| 78 |
+
# width=700,
|
| 79 |
+
# height=400,
|
| 80 |
+
# title="Unique Agencies in Each County"
|
| 81 |
+
# )
|
| 82 |
+
# st.altair_chart(county_agency_chart, use_container_width=True)
|
| 83 |
+
|
| 84 |
+
# st.write("""
|
| 85 |
+
# This bar chart highlights the number of unique agencies operating in each county.
|
| 86 |
+
# The x-axis represents the 'County' and the y-axis represents the 'Number of Unique Agencies'
|
| 87 |
+
# I used a bar chart as it allows for easy comparison between counties based on the number of
|
| 88 |
+
# agencies, and sorting the counties by agency count makes it easier to identify areas with
|
| 89 |
+
# more agencies. The color scale uses a 'viridis' color scheme, to show how the number of agencies
|
| 90 |
+
# differs accross counties.
|
| 91 |
+
|
| 92 |
+
# If I had more time, I would consider adding interactivity, like a filter for 'Building Status', so that users can
|
| 93 |
+
# focus on specific statuses, such as buildings that are still in progress or abandoned for each county.
|
| 94 |
+
# I would also implement a slider for 'Year Constructed' to allow users to focus on specific time periods,
|
| 95 |
+
# enabling them to analyze trends and compare the distribution of agencies by different time periods.
|
| 96 |
+
# I would also consider adding a search function so that users can look for specific counties
|
| 97 |
+
# or agency types.
|
| 98 |
+
# """)
|
| 99 |
+
# Load State GeoJSON for Map (example if supported)
|
| 100 |
+
# Visualization: Licenses by State
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# Load Dataset
|
| 104 |
+
@st.cache
|
| 105 |
+
def load_data():
|
| 106 |
+
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv"
|
| 107 |
+
return pd.read_csv(url)
|
| 108 |
+
|
| 109 |
+
df = load_data()
|
| 110 |
+
|
| 111 |
+
# Handle Missing Values for Visualizations
|
| 112 |
+
# 1. Remove rows with missing `Original Issue Date`
|
| 113 |
+
df['Original Issue Date'] = pd.to_datetime(df['Original Issue Date'], errors='coerce')
|
| 114 |
+
df_time = df.dropna(subset=['Original Issue Date'])
|
| 115 |
+
|
| 116 |
+
# Title
|
| 117 |
+
st.title("Licenses Dataset Visualizations")
|
| 118 |
+
st.markdown("This app presents three visualizations of the licenses dataset.")
|
| 119 |
+
|
| 120 |
+
# Visualization 1: Bar Chart (Distribution of Licenses by Type)
|
| 121 |
+
st.header("1. Distribution of Licenses by Type")
|
| 122 |
+
license_count = df['License Type'].value_counts().reset_index()
|
| 123 |
+
license_count.columns = ['License Type', 'Count']
|
| 124 |
+
|
| 125 |
+
bar_chart = alt.Chart(license_count).mark_bar().encode(
|
| 126 |
+
x=alt.X('Count:Q', title='Number of Licenses'),
|
| 127 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type'),
|
| 128 |
+
color=alt.Color('License Type:N', legend=None)
|
| 129 |
+
).properties(title="Number of Licenses by Type")
|
| 130 |
+
|
| 131 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 132 |
+
|
| 133 |
+
st.markdown("""
|
| 134 |
+
**Highlights**: This bar chart shows the distribution of licenses by type.
|
| 135 |
+
**Design Choices**: A horizontal bar chart is used for better readability of license types.
|
| 136 |
+
""")
|
| 137 |
+
|
| 138 |
+
# Visualization 2: Line Chart (Trend of Licenses Over Time)
|
| 139 |
+
st.header("2. Trend of Licenses Over Time")
|
| 140 |
+
time_data = df_time.groupby(df_time['Original Issue Date'].dt.year).size().reset_index(name='Count')
|
| 141 |
+
time_data.columns = ['Year', 'Count']
|
| 142 |
+
|
| 143 |
+
line_chart = alt.Chart(time_data).mark_line(point=True).encode(
|
| 144 |
+
x=alt.X('Year:O', title='Year'),
|
| 145 |
+
y=alt.Y('Count:Q', title='Number of Licenses'),
|
| 146 |
+
color=alt.value('blue')
|
| 147 |
+
).properties(title="Trend of Licenses Issued Over Years")
|
| 148 |
+
|
| 149 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 150 |
+
|
| 151 |
+
st.markdown("""
|
| 152 |
+
**Highlights**: This line chart shows the trend of license issuances over time.
|
| 153 |
+
**Design Choices**: Points are added to highlight specific data points, and the color blue is chosen for simplicity.
|
| 154 |
+
""")
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
# Count the number of licenses for each License Type and sort by count
|
| 158 |
+
license_type_counts = df['License Type'].value_counts().reset_index()
|
| 159 |
+
license_type_counts.columns = ['License Type', 'Count']
|
| 160 |
+
|
| 161 |
+
# Get the top 10 License Types
|
| 162 |
+
top_10_license_types = license_type_counts.head(10)
|
| 163 |
+
|
| 164 |
+
# Create the bar chart
|
| 165 |
+
bar_chart = alt.Chart(top_10_license_types).mark_bar().encode(
|
| 166 |
+
x=alt.X('Count:Q', title='License Count'),
|
| 167 |
+
y=alt.Y('License Type:N', sort='-x', title='License Type', axis=alt.Axis(labelPadding=15)),
|
| 168 |
+
color=alt.Color('License Type:N', legend=None),
|
| 169 |
+
tooltip=['License Type', 'Count']
|
| 170 |
+
).properties(
|
| 171 |
+
title='Top 10 License Types by Frequency'
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
# Show the chart in the Streamlit app
|
| 175 |
+
st.altair_chart(bar_chart, use_container_width=True)
|
| 176 |
+
|
| 177 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 178 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 179 |
+
|
| 180 |
+
# Extract the month and year from Expiration Date
|
| 181 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 182 |
+
df['Expiration Month'] = df['Expiration Date'].dt.month
|
| 183 |
+
|
| 184 |
+
# Group by year and month and count the number of licenses expiring
|
| 185 |
+
expiration_counts = df.groupby(['Expiration Year', 'Expiration Month']).size().reset_index(name='License Count')
|
| 186 |
+
|
| 187 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 188 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 189 |
+
|
| 190 |
+
# Extract the year from Expiration Date
|
| 191 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 192 |
+
|
| 193 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 194 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 195 |
+
|
| 196 |
+
# Create the trend line (line chart)
|
| 197 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 198 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 199 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 200 |
+
tooltip=['Expiration Year', 'License Count']
|
| 201 |
+
).properties(
|
| 202 |
+
title='Trend Line for License Expirations Over Time'
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
# Show the chart in the Streamlit app
|
| 206 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 207 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 208 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 209 |
+
|
| 210 |
+
# Extract the year from Expiration Date
|
| 211 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 212 |
+
|
| 213 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 214 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 215 |
+
|
| 216 |
+
# Create the trend line (line chart)
|
| 217 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 218 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 219 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 220 |
+
tooltip=['Expiration Year', 'License Count']
|
| 221 |
+
).properties(
|
| 222 |
+
title='Trend Line for Licenses Expiring Over Time'
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
# Show the chart in the Streamlit app
|
| 226 |
+
st.altair_chart(line_chart, use_container_width=True)
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
# Convert the Expiration Date to datetime (if not already)
|
| 232 |
+
df['Expiration Date'] = pd.to_datetime(df['Expiration Date'], errors='coerce')
|
| 233 |
+
|
| 234 |
+
# Extract the year from Expiration Date
|
| 235 |
+
df['Expiration Year'] = df['Expiration Date'].dt.year
|
| 236 |
+
|
| 237 |
+
# Group by Expiration Year and count the number of licenses expiring
|
| 238 |
+
expiration_counts = df.groupby('Expiration Year').size().reset_index(name='License Count')
|
| 239 |
+
|
| 240 |
+
# Create the trend line (line chart) with interactive data points
|
| 241 |
+
line_chart = alt.Chart(expiration_counts).mark_line().encode(
|
| 242 |
+
x=alt.X('Expiration Year:O', title='Expiration Year'),
|
| 243 |
+
y=alt.Y('License Count:Q', title='Number of Licenses Expiring'),
|
| 244 |
+
tooltip=['Expiration Year', 'License Count'] # Tooltip for interactivity
|
| 245 |
+
).properties(
|
| 246 |
+
title='Trend Line for Licenses Expiring Over Time'
|
| 247 |
+
).interactive() # Make the chart interactive
|
| 248 |
+
|
| 249 |
+
# Show the chart in the Streamlit app
|
| 250 |
+
st.altair_chart(line_chart, use_container_width=True)
|