BulatF commited on
Commit
3837d93
Β·
verified Β·
1 Parent(s): 8dfa16b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +308 -185
app.py CHANGED
@@ -1,195 +1,318 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import plotly.express as px
 
 
4
 
5
  def main():
6
- st.set_page_config(page_title="Enhanced CSV to Map", layout="wide")
7
- st.title("πŸ“ Enhanced CSV to Interactive Map")
8
- st.write("""
9
- Upload a CSV file, select the latitude and longitude columns, customize the map settings, and visualize the points on an interactive map.
10
- """)
11
-
12
- # File uploader
13
- uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
14
-
15
- if uploaded_file is not None:
16
- try:
17
- # Read the CSV file
18
- df = pd.read_csv(uploaded_file)
19
-
20
- st.success("File uploaded successfully!")
21
- st.write("### Preview of the Data")
22
- st.dataframe(df.head())
23
-
24
- # Identify numeric columns for latitude and longitude
25
- numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns.tolist()
26
-
27
- if len(numeric_columns) < 2:
28
- st.error("The uploaded CSV does not contain enough numeric columns for latitude and longitude.")
29
- return
30
-
31
- # Select latitude and longitude columns
32
- col1, col2 = st.columns(2)
33
-
34
- with col1:
35
- lat_col = st.selectbox("Select Latitude Column", options=numeric_columns, key="lat")
36
-
37
- with col2:
38
- lon_col = st.selectbox("Select Longitude Column", options=numeric_columns, key="lon")
39
-
40
- if lat_col and lon_col:
41
- # Validate latitude and longitude ranges
42
- if not df[lat_col].between(-90, 90).all():
43
- st.warning("Some latitude values are out of the valid range (-90 to 90).")
44
- if not df[lon_col].between(-180, 180).all():
45
- st.warning("Some longitude values are out of the valid range (-180 to 180).")
46
-
47
- # Drop rows with missing or invalid coordinates
48
- valid_df = df.dropna(subset=[lat_col, lon_col])
49
- valid_df = valid_df[
50
- (valid_df[lat_col].between(-90, 90)) &
51
- (valid_df[lon_col].between(-180, 180))
52
- ]
53
-
54
- if valid_df.empty:
55
- st.error("No valid data points to display on the map.")
56
- return
57
-
58
- st.write(f"### Displaying {len(valid_df)} Points on the Map")
59
-
60
- # Sidebar for Map Settings
61
- st.sidebar.header("πŸ—ΊοΈ Map Settings")
62
-
63
- # Bubble Size Selection
64
- size_options = valid_df.select_dtypes(include=['float64', 'int64']).columns.tolist()
65
- size_options.insert(0, None) # Option for no sizing
66
- size_col = st.sidebar.selectbox("Select a column for bubble size (optional)", options=size_options)
67
-
68
- # Hover Name Selection
69
- hover_options = [col for col in df.columns if col not in [lat_col, lon_col]]
70
- hover_options.insert(0, None) # Option for no hover name
71
- hover_col = st.sidebar.selectbox("Select a column for hover name (optional)", options=hover_options)
72
-
73
- # Color Customization
74
- color_mode = st.sidebar.radio("Select Color Mode", options=["Single Color", "Color Scale"], index=0)
75
- if color_mode == "Single Color":
76
- color = st.sidebar.color_picker("Pick a color for the points", "#FF5733")
77
- color_column = None
78
  else:
79
- # Allow user to select a column for color scale
80
- color_column_options = [col for col in df.columns if col not in [lat_col, lon_col]]
81
- color_column_options.insert(0, None)
82
- color_column = st.sidebar.selectbox("Select a column for color scale (optional)", options=color_column_options)
83
- color = None # Color will be determined by the color_column
84
-
85
- # Map Style Selection
86
- map_style_options = [
87
- "open-street-map",
88
- "carto-positron",
89
- "carto-darkmatter",
90
- "stamen-terrain",
91
- "stamen-toner",
92
- "stamen-watercolor",
93
- "white-bg",
94
- "basic",
95
- "light",
96
- "dark",
97
- "satellite",
98
- "satellite-streets",
99
- "outdoors",
100
- "traffic-day",
101
- "traffic-night"
102
- ]
103
- map_style = st.sidebar.selectbox("Select Map Style", options=map_style_options, index=0)
104
-
105
- # Default Bubble Size
106
- default_size = st.sidebar.slider("Default Bubble Size (when no size column selected)", min_value=5, max_value=20, value=10)
107
-
108
- # Map Height Adjustment
109
- map_height = st.sidebar.slider("Map Height (pixels)", min_value=400, max_value=1000, value=600)
110
-
111
- # Initial Zoom Level
112
- zoom_level = st.sidebar.slider("Initial Zoom Level", min_value=1, max_value=20, value=3)
113
-
114
- # Validate Color Scale Selection
115
- if color_mode == "Color Scale" and not color_column:
116
- st.sidebar.warning("Please select a column for color scale.")
117
- st.stop() # Prevent further execution until a column is selected
118
-
119
- # Determine color parameters based on color mode
120
- if color_mode == "Color Scale" and color_column:
121
- # Color Scale Mode with a selected color column
122
- color_param = color_column
123
- color_discrete_sequence = None
124
- color_continuous_scale = "Viridis"
125
- elif color_mode == "Color Scale" and not color_column:
126
- # Color Scale Mode without a selected color column (handled above)
127
- color_param = None
128
- color_discrete_sequence = "Viridis"
129
- color_continuous_scale = None
130
- elif color_mode == "Single Color":
131
- # Single Color Mode
132
- color_param = None
133
- color_discrete_sequence = [color]
134
- color_continuous_scale = None
135
-
136
- # Create the map based on size and color settings
137
- if size_col:
138
- fig = px.scatter_mapbox(
139
- valid_df,
140
- lat=lat_col,
141
- lon=lon_col,
142
- size=size_col,
143
- size_max=15,
144
- color=color_param,
145
- color_continuous_scale=color_continuous_scale,
146
- color_discrete_sequence=color_discrete_sequence,
147
- hover_name=hover_col if hover_col else None,
148
- hover_data=[col for col in valid_df.columns if col not in [lat_col, lon_col]],
149
- zoom=zoom_level,
150
- mapbox_style=map_style,
151
- height=map_height,
152
- title="Interactive Map"
153
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  else:
155
- fig = px.scatter_mapbox(
156
- valid_df,
157
- lat=lat_col,
158
- lon=lon_col,
159
- color=color_param,
160
- color_continuous_scale=color_continuous_scale,
161
- color_discrete_sequence=color_discrete_sequence,
162
- hover_name=hover_col if hover_col else None,
163
- hover_data=[col for col in valid_df.columns if col not in [lat_col, lon_col]],
164
- zoom=zoom_level,
165
- mapbox_style=map_style,
166
- height=map_height,
167
- title="Interactive Map"
168
- )
169
- if color_mode == "Single Color":
170
- # Update marker color directly for Single Color Mode
171
- fig.update_traces(marker=dict(color=color))
172
- else:
173
- # If no color is set, ensure default color scale
174
- pass
175
-
176
- # Apply default bubble size when no size column is selected
177
- if not size_col:
178
- fig.update_traces(marker=dict(size=default_size))
179
-
180
- # Update layout for better aesthetics
181
- fig.update_layout(
182
- margin={"r":0,"t":30,"l":0,"b":0},
183
- title_x=0.5
184
- )
185
-
186
- st.plotly_chart(fig, use_container_width=True)
187
-
188
- except Exception as e:
189
- st.error(f"An error occurred while processing the file: {e}")
190
-
191
- else:
192
- st.info("Awaiting CSV file upload.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  if __name__ == "__main__":
195
  main()
 
1
  import streamlit as st
2
  import pandas as pd
3
  import plotly.express as px
4
+ from geopy.geocoders import Nominatim
5
+ from geopy.extra.rate_limiter import RateLimiter
6
 
7
  def main():
8
+ st.set_page_config(page_title="Enhanced CSV Tool", layout="wide")
9
+ st.title("πŸ“ Enhanced CSV Tool: Mapping & Data Enrichment")
10
+
11
+ tabs = st.tabs(["Map Visualization", "Data Enrichment"])
12
+
13
+ ### Map Visualization Tab
14
+ with tabs[0]:
15
+ st.write("""
16
+ ## πŸ“ Interactive Map Visualization
17
+ Upload your CSV file, select latitude and longitude columns, customize map settings, and visualize the data points on an interactive map.
18
+ """)
19
+
20
+ uploaded_file_map = st.file_uploader("πŸ“‚ Upload CSV for Mapping", type=["csv"], key="map_upload")
21
+
22
+ if uploaded_file_map is not None:
23
+ try:
24
+ df_map = pd.read_csv(uploaded_file_map)
25
+ st.success("βœ… File uploaded successfully!")
26
+ st.write("### πŸ“Š Data Preview")
27
+ st.dataframe(df_map.head())
28
+
29
+ numeric_columns_map = df_map.select_dtypes(include=['float64', 'int64']).columns.tolist()
30
+
31
+ if len(numeric_columns_map) < 2:
32
+ st.error("❌ The uploaded CSV does not contain enough numeric columns for latitude and longitude.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  else:
34
+ col1_map, col2_map = st.columns(2)
35
+ with col1_map:
36
+ lat_col_map = st.selectbox("πŸ“ Select Latitude Column", options=numeric_columns_map, key="lat_map")
37
+ with col2_map:
38
+ lon_col_map = st.selectbox("🧭 Select Longitude Column", options=numeric_columns_map, key="lon_map")
39
+
40
+ if lat_col_map and lon_col_map:
41
+ if not df_map[lat_col_map].between(-90, 90).all():
42
+ st.warning("⚠️ Some latitude values are out of the valid range (-90 to 90).")
43
+ if not df_map[lon_col_map].between(-180, 180).all():
44
+ st.warning("⚠️ Some longitude values are out of the valid range (-180 to 180).")
45
+
46
+ valid_df_map = df_map.dropna(subset=[lat_col_map, lon_col_map])
47
+ valid_df_map = valid_df_map[
48
+ (valid_df_map[lat_col_map].between(-90, 90)) &
49
+ (valid_df_map[lon_col_map].between(-180, 180))
50
+ ]
51
+
52
+ if valid_df_map.empty:
53
+ st.error("❌ No valid data points to display on the map.")
54
+ else:
55
+ st.write(f"### πŸ“ Displaying {len(valid_df_map)} Points on the Map")
56
+
57
+ st.sidebar.header("πŸ—ΊοΈ Map Settings")
58
+
59
+ size_options_map = valid_df_map.select_dtypes(include=['float64', 'int64']).columns.tolist()
60
+ size_options_map.insert(0, None)
61
+ size_col_map = st.sidebar.selectbox("πŸ”Ή Select a column for bubble size (optional)", options=size_options_map, key="size_map")
62
+
63
+ hover_options_map = [col for col in df_map.columns if col not in [lat_col_map, lon_col_map]]
64
+ hover_options_map.insert(0, None)
65
+ hover_col_map = st.sidebar.selectbox("πŸ”Ή Select a column for hover name (optional)", options=hover_options_map, key="hover_map")
66
+
67
+ color_mode_map = st.sidebar.radio("🎨 Select Color Mode", options=["Single Color", "Color Scale"], index=0, key="color_mode_map")
68
+ if color_mode_map == "Single Color":
69
+ color_map = st.sidebar.color_picker("πŸ”Ή Pick a color for the points", "#FF5733", key="color_map")
70
+ color_column_map = None
71
+ else:
72
+ color_column_options_map = [col for col in df_map.columns if col not in [lat_col_map, lon_col_map]]
73
+ color_column_options_map.insert(0, None)
74
+ color_column_map = st.sidebar.selectbox("πŸ”Ή Select a column for color scale (optional)", options=color_column_options_map, key="color_column_map")
75
+ color_map = None
76
+
77
+ map_style_options = [
78
+ "open-street-map",
79
+ "carto-positron",
80
+ "carto-darkmatter",
81
+ "stamen-terrain",
82
+ "stamen-toner",
83
+ "stamen-watercolor",
84
+ "white-bg",
85
+ "basic",
86
+ "light",
87
+ "dark",
88
+ "satellite",
89
+ "satellite-streets",
90
+ "outdoors",
91
+ "traffic-day",
92
+ "traffic-night"
93
+ ]
94
+ map_style_map = st.sidebar.selectbox("🌐 Select Map Style", options=map_style_options, index=0, key="map_style_map")
95
+
96
+ default_size_map = st.sidebar.slider("πŸ”Ή Default Bubble Size", min_value=5, max_value=20, value=10, key="default_size_map")
97
+ map_height_map = st.sidebar.slider("πŸ”Ή Map Height (pixels)", min_value=400, max_value=1000, value=600, key="map_height_map")
98
+ zoom_level_map = st.sidebar.slider("πŸ”Ή Initial Zoom Level", min_value=1, max_value=20, value=3, key="zoom_level_map")
99
+
100
+ if color_mode_map == "Color Scale" and not color_column_map:
101
+ st.sidebar.warning("⚠️ Please select a column for color scale.")
102
+ st.stop()
103
+
104
+ if color_mode_map == "Color Scale" and color_column_map:
105
+ color_param_map = color_column_map
106
+ color_discrete_sequence_map = None
107
+ color_continuous_scale_map = "Viridis"
108
+ elif color_mode_map == "Color Scale" and not color_column_map:
109
+ color_param_map = None
110
+ color_discrete_sequence_map = "Viridis"
111
+ color_continuous_scale_map = None
112
+ elif color_mode_map == "Single Color":
113
+ color_param_map = None
114
+ color_discrete_sequence_map = [color_map]
115
+ color_continuous_scale_map = None
116
+
117
+ if size_col_map:
118
+ fig_map = px.scatter_mapbox(
119
+ valid_df_map,
120
+ lat=lat_col_map,
121
+ lon=lon_col_map,
122
+ size=size_col_map,
123
+ size_max=15,
124
+ color=color_param_map,
125
+ color_continuous_scale=color_continuous_scale_map,
126
+ color_discrete_sequence=color_discrete_sequence_map,
127
+ hover_name=hover_col_map if hover_col_map else None,
128
+ hover_data=[col for col in valid_df_map.columns if col not in [lat_col_map, lon_col_map]],
129
+ zoom=zoom_level_map,
130
+ mapbox_style=map_style_map,
131
+ height=map_height_map,
132
+ title="πŸ“ Interactive Map"
133
+ )
134
+ else:
135
+ fig_map = px.scatter_mapbox(
136
+ valid_df_map,
137
+ lat=lat_col_map,
138
+ lon=lon_col_map,
139
+ color=color_param_map,
140
+ color_continuous_scale=color_continuous_scale_map,
141
+ color_discrete_sequence=color_discrete_sequence_map,
142
+ hover_name=hover_col_map if hover_col_map else None,
143
+ hover_data=[col for col in valid_df_map.columns if col not in [lat_col_map, lon_col_map]],
144
+ zoom=zoom_level_map,
145
+ mapbox_style=map_style_map,
146
+ height=map_height_map,
147
+ title="πŸ“ Interactive Map"
148
+ )
149
+ if color_mode_map == "Single Color":
150
+ fig_map.update_traces(marker=dict(color=color_map))
151
+
152
+ if not size_col_map:
153
+ fig_map.update_traces(marker=dict(size=default_size_map))
154
+
155
+ fig_map.update_layout(
156
+ margin={"r":0,"t":30,"l":0,"b":0},
157
+ title_x=0.5
158
+ )
159
+
160
+ st.plotly_chart(fig_map, use_container_width=True)
161
+
162
+ except Exception as e:
163
+ st.error(f"❌ An error occurred while processing the file: {e}")
164
+ else:
165
+ st.info("πŸ“₯ Awaiting CSV file upload for mapping.")
166
+
167
+ ### Data Enrichment Tab
168
+ with tabs[1]:
169
+ st.write("""
170
+ ## πŸ” Data Enrichment with Geopy
171
+ Enrich your CSV data by retrieving additional geographic information based on latitude and longitude. Select the desired information to add to your dataset.
172
+ """)
173
+
174
+ uploaded_file_enrich = st.file_uploader("πŸ“‚ Upload CSV for Enrichment", type=["csv"], key="enrich_upload")
175
+
176
+ if uploaded_file_enrich is not None:
177
+ try:
178
+ df_enrich = pd.read_csv(uploaded_file_enrich)
179
+ st.success("βœ… File uploaded successfully!")
180
+ st.write("### πŸ“Š Data Preview")
181
+ st.dataframe(df_enrich.head())
182
+
183
+ numeric_columns_enrich = df_enrich.select_dtypes(include=['float64', 'int64']).columns.tolist()
184
+
185
+ if len(numeric_columns_enrich) < 2:
186
+ st.error("❌ The uploaded CSV does not contain enough numeric columns for latitude and longitude.")
187
  else:
188
+ col1_enrich, col2_enrich = st.columns(2)
189
+ with col1_enrich:
190
+ lat_col_enrich = st.selectbox("πŸ“ Select Latitude Column", options=numeric_columns_enrich, key="lat_enrich")
191
+ with col2_enrich:
192
+ lon_col_enrich = st.selectbox("🧭 Select Longitude Column", options=numeric_columns_enrich, key="lon_enrich")
193
+
194
+ if lat_col_enrich and lon_col_enrich:
195
+ if not df_enrich[lat_col_enrich].between(-90, 90).all():
196
+ st.warning("⚠️ Some latitude values are out of the valid range (-90 to 90).")
197
+ if not df_enrich[lon_col_enrich].between(-180, 180).all():
198
+ st.warning("⚠️ Some longitude values are out of the valid range (-180 to 180).")
199
+
200
+ valid_df_enrich = df_enrich.dropna(subset=[lat_col_enrich, lon_col_enrich])
201
+ valid_df_enrich = valid_df_enrich[
202
+ (valid_df_enrich[lat_col_enrich].between(-90, 90)) &
203
+ (valid_df_enrich[lon_col_enrich].between(-180, 180))
204
+ ]
205
+
206
+ if valid_df_enrich.empty:
207
+ st.error("❌ No valid data points to enrich.")
208
+ else:
209
+ st.write(f"### πŸ” Enriching {len(valid_df_enrich)} Points")
210
+
211
+ st.sidebar.header("πŸ“ Enrichment Settings")
212
+ address_fields = [
213
+ "City",
214
+ "County",
215
+ "Region",
216
+ "Postcode",
217
+ "Country",
218
+ "Neighbourhood",
219
+ "Road",
220
+ "Suburb",
221
+ "State District",
222
+ "State",
223
+ "Town",
224
+ "Village",
225
+ "ISO3166-1 Alpha-2",
226
+ "ISO3166-1 Alpha-3"
227
+ ]
228
+ selected_info = []
229
+ for field in address_fields:
230
+ if st.sidebar.checkbox(field):
231
+ selected_info.append(field)
232
+
233
+ if not selected_info:
234
+ st.sidebar.warning("⚠️ Please select at least one information to retrieve.")
235
+
236
+ enrich_button = st.button("πŸ”„ Enrich Data")
237
+
238
+ if enrich_button:
239
+ if not selected_info:
240
+ st.warning("⚠️ Please select at least one information to retrieve before enriching.")
241
+ else:
242
+ geolocator = Nominatim(user_agent="streamlit_app_enrichment")
243
+ reverse = RateLimiter(geolocator.reverse, min_delay_seconds=1)
244
+
245
+ for info in selected_info:
246
+ if info not in df_enrich.columns:
247
+ df_enrich[info] = ""
248
+
249
+ progress_bar = st.progress(0)
250
+ status_text = st.empty()
251
+
252
+ for index, row in valid_df_enrich.iterrows():
253
+ try:
254
+ location = reverse((row[lat_col_enrich], row[lon_col_enrich]), exactly_one=True)
255
+ if location and location.raw and 'address' in location.raw:
256
+ address = location.raw['address']
257
+ for info in selected_info:
258
+ if info == "City":
259
+ df_enrich.at[index, "City"] = address.get('city', address.get('town', address.get('village', '')))
260
+ elif info == "County":
261
+ df_enrich.at[index, "County"] = address.get('county', '')
262
+ elif info == "Region":
263
+ df_enrich.at[index, "Region"] = address.get('state', '')
264
+ elif info == "Postcode":
265
+ df_enrich.at[index, "Postcode"] = address.get('postcode', '')
266
+ elif info == "Country":
267
+ df_enrich.at[index, "Country"] = address.get('country', '')
268
+ elif info == "Neighbourhood":
269
+ df_enrich.at[index, "Neighbourhood"] = address.get('neighbourhood', '')
270
+ elif info == "Road":
271
+ df_enrich.at[index, "Road"] = address.get('road', '')
272
+ elif info == "Suburb":
273
+ df_enrich.at[index, "Suburb"] = address.get('suburb', '')
274
+ elif info == "State District":
275
+ df_enrich.at[index, "State District"] = address.get('state_district', '')
276
+ elif info == "State":
277
+ df_enrich.at[index, "State"] = address.get('state', '')
278
+ elif info == "Town":
279
+ df_enrich.at[index, "Town"] = address.get('town', '')
280
+ elif info == "Village":
281
+ df_enrich.at[index, "Village"] = address.get('village', '')
282
+ elif info == "ISO3166-1 Alpha-2":
283
+ df_enrich.at[index, "ISO3166-1 Alpha-2"] = address.get('ISO3166-1_alpha-2', '')
284
+ elif info == "ISO3166-1 Alpha-3":
285
+ df_enrich.at[index, "ISO3166-1 Alpha-3"] = address.get('ISO3166-1_alpha-3', '')
286
+ else:
287
+ df_enrich.at[index, info] = address.get(info.lower(), '')
288
+ except Exception as e:
289
+ for info in selected_info:
290
+ df_enrich.at[index, info] = 'Error'
291
+
292
+ progress = (index + 1) / len(valid_df_enrich)
293
+ progress_bar.progress(min(progress, 1.0))
294
+ status_text.text(f"Processing {index + 1} of {len(valid_df_enrich)}")
295
+
296
+ progress_bar.empty()
297
+ status_text.empty()
298
+
299
+ st.success("βœ… Enrichment complete!")
300
+
301
+ st.write("### πŸ“ˆ Enriched Data")
302
+ st.dataframe(df_enrich.head())
303
+
304
+ csv_enriched = df_enrich.to_csv(index=False).encode('utf-8')
305
+ st.download_button(
306
+ label="πŸ“₯ Download Enriched CSV",
307
+ data=csv_enriched,
308
+ file_name='enriched_data.csv',
309
+ mime='text/csv'
310
+ )
311
+
312
+ except Exception as e:
313
+ st.error(f"❌ An error occurred while processing the file: {e}")
314
+ else:
315
+ st.info("πŸ“₯ Awaiting CSV file upload for enrichment.")
316
 
317
  if __name__ == "__main__":
318
  main()