ralate2 commited on
Commit
8b4e4f2
Β·
verified Β·
1 Parent(s): 3071a2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -51
app.py CHANGED
@@ -1,19 +1,17 @@
1
  import streamlit as st
2
  import pandas as pd
 
3
  import plotly.graph_objects as go
4
- import matplotlib.pyplot as plt
5
- import seaborn as sns
6
- #trial
7
- # Set Streamlit page config
8
- st.set_page_config(page_title="Legislative Visualizations", layout="wide")
9
 
 
 
10
  st.title("Legislative Bill Analysis Dashboard")
11
 
12
- # Upload dataset
13
- uploaded_file = st.file_uploader("Illinois_Entire_Data_Insights_Final_v2.csv", type=["csv", "xlsx"])
14
 
15
  if uploaded_file:
16
- # Load dataset
17
  if uploaded_file.name.endswith('.csv'):
18
  df = pd.read_csv(uploaded_file)
19
  else:
@@ -21,52 +19,126 @@ if uploaded_file:
21
 
22
  st.success("File uploaded and read successfully!")
23
 
24
- # Sankey Diagram
25
- st.header("πŸ”— Sankey Diagram: Intent β†’ Stance β†’ Beneficiaries")
26
- sankey_df = df[['intent_standardized', 'stance_standardized', 'intended_beneficiaries_standardized']].dropna()
27
 
28
- if not sankey_df.empty:
29
- labels = list(pd.unique(sankey_df['intent_standardized'].tolist() +
30
- sankey_df['stance_standardized'].tolist() +
31
- sankey_df['intended_beneficiaries_standardized'].tolist()))
32
- label_map = {label: i for i, label in enumerate(labels)}
33
 
34
- intent_stance = sankey_df.groupby(['intent_standardized', 'stance_standardized']).size().reset_index(name='count')
35
- stance_beneficiary = sankey_df.groupby(['stance_standardized', 'intended_beneficiaries_standardized']).size().reset_index(name='count')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- source = intent_stance['intent_standardized'].map(label_map).tolist() + stance_beneficiary['stance_standardized'].map(label_map).tolist()
38
- target = intent_stance['stance_standardized'].map(label_map).tolist() + stance_beneficiary['intended_beneficiaries_standardized'].map(label_map).tolist()
39
- value = intent_stance['count'].tolist() + stance_beneficiary['count'].tolist()
 
 
40
 
41
- fig_sankey = go.Figure(data=[go.Sankey(
42
- node=dict(pad=15, thickness=20, line=dict(color="black", width=0.5), label=labels),
43
- link=dict(source=source, target=target, value=value)
44
- )])
45
- fig_sankey.update_layout(title_text="Sankey: Intent β†’ Stance β†’ Beneficiary", font_size=12)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- st.plotly_chart(fig_sankey, use_container_width=True)
48
- else:
49
- st.warning("Sankey input columns contain only null values or are missing.")
50
-
51
- # Heatmap
52
- st.header("🧯 Heatmap: Category vs Policy Impact Area")
53
- heat_df = df[['category_&_subcategory_standardized', 'policy_impact_areas_standardized']].dropna()
54
-
55
- if not heat_df.empty:
56
- heat = heat_df.pivot_table(index='category_&_subcategory_standardized',
57
- columns='policy_impact_areas_standardized',
58
- aggfunc=len,
59
- fill_value=0)
60
-
61
- plt.figure(figsize=(14, 8))
62
- sns.heatmap(heat, cmap='coolwarm', annot=False)
63
- plt.title("Heatmap: Category vs Policy Impact Area")
64
- plt.xlabel("Policy Impact Area")
65
- plt.ylabel("Category")
66
- plt.tight_layout()
67
-
68
- st.pyplot(plt)
69
- else:
70
- st.warning("Heatmap input columns contain only null values or are missing.")
71
  else:
72
- st.info("Please upload a dataset file to view the visualizations.")
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import plotly.express as px
4
  import plotly.graph_objects as go
 
 
 
 
 
5
 
6
+ # Streamlit page setup
7
+ st.set_page_config(page_title="Legislative Bill Analysis", layout="wide")
8
  st.title("Legislative Bill Analysis Dashboard")
9
 
10
+ # File uploader
11
+ uploaded_file = st.file_uploader("Upload Illinois_Entire_Data_Insights_Final_v2.csv", type=["csv", "xlsx"])
12
 
13
  if uploaded_file:
14
+ # Read file
15
  if uploaded_file.name.endswith('.csv'):
16
  df = pd.read_csv(uploaded_file)
17
  else:
 
19
 
20
  st.success("File uploaded and read successfully!")
21
 
22
+ # Preprocessing date and year
23
+ df['status_date'] = pd.to_datetime(df['status_date'], errors='coerce')
24
+ df['year'] = df['status_date'].dt.year
25
 
26
+ # ------------------------
27
+ # Visualization 1: Yearly Bills by Intent
28
+ # ------------------------
29
+ st.header(" Bills Over Time by Intent")
30
+ yearly_intent_counts = df.groupby(['year', 'intent_standardized']).size().reset_index(name='bill_count')
31
 
32
+ fig1 = px.bar(
33
+ yearly_intent_counts,
34
+ x='year',
35
+ y='bill_count',
36
+ color='intent_standardized',
37
+ title='Bills Over Time by Intent',
38
+ labels={'year': 'Year', 'bill_count': 'Number of Bills', 'intent_standardized': 'Intent'},
39
+ barmode='group',
40
+ height=500,
41
+ color_discrete_sequence=px.colors.qualitative.Set2
42
+ )
43
+ fig1.update_layout(
44
+ xaxis=dict(tickangle=0),
45
+ legend_title_text='Intent',
46
+ plot_bgcolor='white',
47
+ paper_bgcolor='white',
48
+ font=dict(color='black'),
49
+ title_font=dict(size=20)
50
+ )
51
+ st.plotly_chart(fig1, use_container_width=True)
52
 
53
+ # ------------------------
54
+ # Visualization 2: Animated Stance Distribution by Policy Area
55
+ # ------------------------
56
+ st.header("Stance Distribution Across Policy Areas (Animated by Year)")
57
+ grouped = df.groupby(['year', 'policy_impact_areas_standardized', 'stance_standardized']).size().reset_index(name='count')
58
 
59
+ fig2 = px.bar(
60
+ grouped,
61
+ x='count',
62
+ y='policy_impact_areas_standardized',
63
+ color='stance_standardized',
64
+ orientation='h',
65
+ animation_frame='year',
66
+ title='Stance Distribution Across Policy Areas (Animated by Year)',
67
+ labels={
68
+ 'count': 'Number of Bills',
69
+ 'policy_impact_areas_standardized': 'Policy Area',
70
+ 'stance_standardized': 'Stance'
71
+ },
72
+ height=600,
73
+ color_discrete_sequence=px.colors.qualitative.Set2
74
+ )
75
+ fig2.update_layout(
76
+ legend_title='Stance',
77
+ xaxis_title='Number of Bills',
78
+ yaxis_title='Policy Area',
79
+ plot_bgcolor='white',
80
+ paper_bgcolor='white',
81
+ font=dict(color='black'),
82
+ title_font=dict(size=20),
83
+ margin=dict(t=60, l=150)
84
+ )
85
+ st.plotly_chart(fig2, use_container_width=True)
86
+
87
+ # ------------------------
88
+ # Visualization 3: Sankey Diagram - Intent β†’ Beneficiaries β†’ Increasing Aspects
89
+ # ------------------------
90
+ st.header("πŸ”— Top Intent β†’ Beneficiaries β†’ Increasing Aspect Flows (Sankey)")
91
+
92
+ def shorten(text, max_len=35):
93
+ return text if len(text) <= max_len else text[:max_len] + "..."
94
+
95
+ sankey_data = df[['intent_standardized', 'intended_beneficiaries_standardized', 'increasing_aspects_standardized']].dropna()
96
+ path_counts = (
97
+ sankey_data.groupby(['intent_standardized', 'intended_beneficiaries_standardized', 'increasing_aspects_standardized'])
98
+ .size()
99
+ .reset_index(name='count')
100
+ .sort_values(by='count', ascending=False)
101
+ )
102
+
103
+ TOP_N = 15
104
+ filtered_paths = path_counts.head(TOP_N)
105
+ unique_labels = pd.unique(filtered_paths[['intent_standardized', 'intended_beneficiaries_standardized', 'increasing_aspects_standardized']].values.ravel())
106
+ short_labels = [shorten(label) for label in unique_labels]
107
+ label_to_index = {label: i for i, label in enumerate(unique_labels)}
108
+ label_to_short = dict(zip(unique_labels, short_labels))
109
+
110
+ sources = list(filtered_paths['intent_standardized'].map(label_to_index))
111
+ targets = list(filtered_paths['intended_beneficiaries_standardized'].map(label_to_index))
112
+ values = list(filtered_paths['count'])
113
+
114
+ sources += list(filtered_paths['intended_beneficiaries_standardized'].map(label_to_index))
115
+ targets += list(filtered_paths['increasing_aspects_standardized'].map(label_to_index))
116
+ values += list(filtered_paths['count'])
117
+
118
+ fig3 = go.Figure(data=[go.Sankey(
119
+ arrangement="snap",
120
+ node=dict(
121
+ pad=25,
122
+ thickness=20,
123
+ line=dict(color="black", width=0.3),
124
+ label=[label_to_short[label] for label in unique_labels],
125
+ color="lightsteelblue"
126
+ ),
127
+ link=dict(
128
+ source=sources,
129
+ target=targets,
130
+ value=values,
131
+ color="rgba(150,150,150,0.4)"
132
+ )
133
+ )])
134
+
135
+ fig3.update_layout(
136
+ title_text="Top Intent β†’ Beneficiaries β†’ Increasing Aspect Flows",
137
+ font_size=12,
138
+ height=600,
139
+ margin=dict(l=50, r=50, t=80, b=30)
140
+ )
141
+ st.plotly_chart(fig3, use_container_width=True)
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  else:
144
+ st.info(" Please upload a dataset file to view the visualizations.")