ralate2 commited on
Commit
984393f
·
verified ·
1 Parent(s): b76d87f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -71
app.py CHANGED
@@ -2,87 +2,30 @@ import streamlit as st
2
  import pandas as pd
3
  import altair as alt
4
 
5
- # Load the dataset
6
  url = 'https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv'
7
  df = pd.read_csv(url)
8
 
9
- # Drop unnecessary columns to prevent serialization errors and reduce data size
10
  df.drop(columns=['Title', 'Prefix', 'Suffix', 'BusinessDBA', '_id', 'Delegated Controlled Substance Schedule', 'Case Number'], inplace=True, errors='ignore')
11
 
12
- # Convert date columns to datetime format, handling errors
13
  date_columns = ['Original Issue Date', 'Effective Date', 'Expiration Date', 'LastModifiedDate']
14
  for col in date_columns:
15
  df[col] = pd.to_datetime(df[col], errors='coerce')
16
 
17
- # Convert object columns to category for performance improvement
18
  for col in df.select_dtypes(include='object').columns:
19
  df[col] = df[col].astype('category')
20
 
21
- # Drop rows with missing values in essential columns to ensure Arrow compatibility
22
  df.dropna(subset=['License Type', 'License Status', 'Original Issue Date'], inplace=True)
23
 
24
- # Add Year columns for visualizations
25
  df['Original Issue Year'] = df['Original Issue Date'].dt.year
26
 
27
- # # Visualization 1: Bar chart of licenses by Status
28
- # st.subheader("Licenses by Status")
29
- # category_counts = df['License Status'].value_counts().reset_index()
30
- # category_counts.columns = ['License Status', 'Count']
31
-
32
- # if not category_counts.empty:
33
- # chart1 = alt.Chart(category_counts).mark_bar().encode(
34
- # x=alt.X('License Status', sort='-y', title='License Type'),
35
- # y=alt.Y('Count', title='Number of Licenses',scale=alt.Scale(domain=(0, 600), nice=False)),
36
- # color=alt.Color('License Status', legend=None)
37
- # ).properties(
38
- # width=600,
39
- # height=400,
40
- # title="Number of Licenses by Status"
41
- # )
42
- # st.altair_chart(chart1)
43
- # else:
44
- # st.write("No data available for the 'License Status' plot.")
45
-
46
- # # Visualization 1: Pie chart of licenses by Status
47
- # st.subheader("Licenses by Status")
48
- # category_counts = df['License Status'].value_counts().reset_index()
49
- # category_counts.columns = ['License Status', 'Count']
50
-
51
- # if not category_counts.empty:
52
- # chart1 = alt.Chart(category_counts).mark_arc().encode(
53
- # theta=alt.Theta(field="Count", type="quantitative", title="Number of Licenses"),
54
- # color=alt.Color(field="License Status", type="nominal", legend=None),
55
- # tooltip=['License Status', 'Count']
56
- # ).properties(
57
- # width=400,
58
- # height=400,
59
- # title="Licenses by Status"
60
- # )
61
- # st.altair_chart(chart1)
62
- # else:
63
- # st.write("No data available for the 'License Status' plot.")
64
-
65
- # # Visualization 1: Pie chart of licenses by Status with distinct colors and legend
66
- # st.subheader("Licenses by Status")
67
- # category_counts = df['License Status'].value_counts().reset_index()
68
- # category_counts.columns = ['License Status', 'Count']
69
-
70
- # if not category_counts.empty:
71
- # chart1 = alt.Chart(category_counts).mark_arc().encode(
72
- # theta=alt.Theta(field="Count", type="quantitative", title="Number of Licenses"),
73
- # color=alt.Color(field="License Status", type="nominal", legend=alt.Legend(title="License Status"),
74
- # scale=alt.Scale(scheme='pastel1')), # Distinct color scheme
75
- # tooltip=['License Status', 'Count']
76
- # ).properties(
77
- # width=400,
78
- # height=400,
79
- # title="Licenses by Status"
80
- # )
81
- # st.altair_chart(chart1)
82
- # else:
83
- # st.write("No data available for the 'License Status' plot.")
84
-
85
- st.subheader("Licenses by Status")
86
  category_counts = df['License Status'].value_counts().reset_index()
87
  category_counts.columns = ['License Status', 'Count']
88
 
@@ -95,22 +38,18 @@ if not category_counts_top5.empty:
95
  sort='-y'), # Sorting by count in descending order
96
  y=alt.Y(field="Count", type="quantitative", title="Number of Licenses"),
97
  color=alt.Color(field="License Status", type="nominal", legend=alt.Legend(title="License Status"),
98
- scale=alt.Scale(scheme='pastel1')), # Pastel color scheme
99
  tooltip=['License Status', 'Count']
100
  ).properties(
101
- width=600, # Increased width to accommodate long labels
102
  height=400,
103
  title="Licenses by Status (Top 5)"
104
- ).configure_axis(
105
- labelAngle=45, # Rotate labels for better visibility
106
- labelPadding=10 # Add padding to avoid clipping
107
  )
108
  st.altair_chart(chart1)
109
  else:
110
  st.write("No data available for the 'License Status' plot.")
111
 
112
 
113
-
114
  # Write-up for Visualization 1
115
  st.write("""
116
  **Licenses by Status**: This visualization highlights the distribution of licenses across different statuses.
 
2
  import pandas as pd
3
  import altair as alt
4
 
5
+ # Loading the dataset
6
  url = 'https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/licenses_fall2022.csv'
7
  df = pd.read_csv(url)
8
 
9
+ # Dropping unnecessary columns to prevent serialization errors and reduce data size
10
  df.drop(columns=['Title', 'Prefix', 'Suffix', 'BusinessDBA', '_id', 'Delegated Controlled Substance Schedule', 'Case Number'], inplace=True, errors='ignore')
11
 
12
+ # Converting date columns to datetime format, handling errors
13
  date_columns = ['Original Issue Date', 'Effective Date', 'Expiration Date', 'LastModifiedDate']
14
  for col in date_columns:
15
  df[col] = pd.to_datetime(df[col], errors='coerce')
16
 
17
+ # Converting object columns to category for performance improvement
18
  for col in df.select_dtypes(include='object').columns:
19
  df[col] = df[col].astype('category')
20
 
21
+ # Dropping rows with missing values in essential columns to ensure Arrow compatibility
22
  df.dropna(subset=['License Type', 'License Status', 'Original Issue Date'], inplace=True)
23
 
24
+ # Adding Year columns for visualizations
25
  df['Original Issue Year'] = df['Original Issue Date'].dt.year
26
 
27
+ # # Visualization 1: Bar chart of licenses by Status ( Top 5)
28
+ #st.subheader("Licenses by Status")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  category_counts = df['License Status'].value_counts().reset_index()
30
  category_counts.columns = ['License Status', 'Count']
31
 
 
38
  sort='-y'), # Sorting by count in descending order
39
  y=alt.Y(field="Count", type="quantitative", title="Number of Licenses"),
40
  color=alt.Color(field="License Status", type="nominal", legend=alt.Legend(title="License Status"),
41
+ scale=alt.Scale(scheme='pastel1')),
42
  tooltip=['License Status', 'Count']
43
  ).properties(
44
+ width=500,
45
  height=400,
46
  title="Licenses by Status (Top 5)"
 
 
 
47
  )
48
  st.altair_chart(chart1)
49
  else:
50
  st.write("No data available for the 'License Status' plot.")
51
 
52
 
 
53
  # Write-up for Visualization 1
54
  st.write("""
55
  **Licenses by Status**: This visualization highlights the distribution of licenses across different statuses.