Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| from geopy.geocoders import Nominatim | |
| from geopy.extra.rate_limiter import RateLimiter | |
| def main(): | |
| st.set_page_config(page_title="Enhanced CSV Tool", layout="wide") | |
| st.title("π Enhanced CSV Tool: Mapping & Data Enrichment") | |
| tabs = st.tabs(["Map Visualization", "Data Enrichment"]) | |
| ### Map Visualization Tab | |
| with tabs[0]: | |
| st.write(""" | |
| ## π Interactive Map Visualization | |
| Upload your CSV file, select latitude and longitude columns, customize map settings, and visualize the data points on an interactive map. | |
| """) | |
| uploaded_file_map = st.file_uploader("π Upload CSV for Mapping", type=["csv"], key="map_upload") | |
| if uploaded_file_map is not None: | |
| try: | |
| df_map = pd.read_csv(uploaded_file_map) | |
| st.success("β File uploaded successfully!") | |
| st.write("### π Data Preview") | |
| st.dataframe(df_map.head()) | |
| numeric_columns_map = df_map.select_dtypes(include=['float64', 'int64']).columns.tolist() | |
| if len(numeric_columns_map) < 2: | |
| st.error("β The uploaded CSV does not contain enough numeric columns for latitude and longitude.") | |
| else: | |
| col1_map, col2_map = st.columns(2) | |
| with col1_map: | |
| lat_col_map = st.selectbox("π Select Latitude Column", options=numeric_columns_map, key="lat_map") | |
| with col2_map: | |
| lon_col_map = st.selectbox("π§ Select Longitude Column", options=numeric_columns_map, key="lon_map") | |
| if lat_col_map and lon_col_map: | |
| if not df_map[lat_col_map].between(-90, 90).all(): | |
| st.warning("β οΈ Some latitude values are out of the valid range (-90 to 90).") | |
| if not df_map[lon_col_map].between(-180, 180).all(): | |
| st.warning("β οΈ Some longitude values are out of the valid range (-180 to 180).") | |
| valid_df_map = df_map.dropna(subset=[lat_col_map, lon_col_map]) | |
| valid_df_map = valid_df_map[ | |
| (valid_df_map[lat_col_map].between(-90, 90)) & | |
| (valid_df_map[lon_col_map].between(-180, 180)) | |
| ] | |
| if valid_df_map.empty: | |
| st.error("β No valid data points to display on the map.") | |
| else: | |
| st.write(f"### π Displaying {len(valid_df_map)} Points on the Map") | |
| st.sidebar.header("πΊοΈ Map Settings") | |
| size_options_map = valid_df_map.select_dtypes(include=['float64', 'int64']).columns.tolist() | |
| size_options_map.insert(0, None) | |
| size_col_map = st.sidebar.selectbox("πΉ Select a column for bubble size (optional)", options=size_options_map, key="size_map") | |
| hover_options_map = [col for col in df_map.columns if col not in [lat_col_map, lon_col_map]] | |
| hover_options_map.insert(0, None) | |
| hover_col_map = st.sidebar.selectbox("πΉ Select a column for hover name (optional)", options=hover_options_map, key="hover_map") | |
| color_mode_map = st.sidebar.radio("π¨ Select Color Mode", options=["Single Color", "Color Scale"], index=0, key="color_mode_map") | |
| if color_mode_map == "Single Color": | |
| color_map = st.sidebar.color_picker("πΉ Pick a color for the points", "#FF5733", key="color_map") | |
| color_column_map = None | |
| else: | |
| color_column_options_map = [col for col in df_map.columns if col not in [lat_col_map, lon_col_map]] | |
| color_column_options_map.insert(0, None) | |
| color_column_map = st.sidebar.selectbox("πΉ Select a column for color scale (optional)", options=color_column_options_map, key="color_column_map") | |
| color_map = None | |
| map_style_options = [ | |
| "open-street-map", | |
| "carto-positron", | |
| "carto-darkmatter", | |
| "stamen-terrain", | |
| "stamen-toner", | |
| "stamen-watercolor", | |
| "white-bg", | |
| "basic", | |
| "light", | |
| "dark", | |
| "satellite", | |
| "satellite-streets", | |
| "outdoors", | |
| "traffic-day", | |
| "traffic-night" | |
| ] | |
| map_style_map = st.sidebar.selectbox("π Select Map Style", options=map_style_options, index=0, key="map_style_map") | |
| default_size_map = st.sidebar.slider("πΉ Default Bubble Size", min_value=5, max_value=20, value=10, key="default_size_map") | |
| map_height_map = st.sidebar.slider("πΉ Map Height (pixels)", min_value=400, max_value=1000, value=600, key="map_height_map") | |
| zoom_level_map = st.sidebar.slider("πΉ Initial Zoom Level", min_value=1, max_value=20, value=3, key="zoom_level_map") | |
| if color_mode_map == "Color Scale" and not color_column_map: | |
| st.sidebar.warning("β οΈ Please select a column for color scale.") | |
| st.stop() | |
| if color_mode_map == "Color Scale" and color_column_map: | |
| color_param_map = color_column_map | |
| color_discrete_sequence_map = None | |
| color_continuous_scale_map = "Viridis" | |
| elif color_mode_map == "Color Scale" and not color_column_map: | |
| color_param_map = None | |
| color_discrete_sequence_map = "Viridis" | |
| color_continuous_scale_map = None | |
| elif color_mode_map == "Single Color": | |
| color_param_map = None | |
| color_discrete_sequence_map = [color_map] | |
| color_continuous_scale_map = None | |
| if size_col_map: | |
| fig_map = px.scatter_mapbox( | |
| valid_df_map, | |
| lat=lat_col_map, | |
| lon=lon_col_map, | |
| size=size_col_map, | |
| size_max=15, | |
| color=color_param_map, | |
| color_continuous_scale=color_continuous_scale_map, | |
| color_discrete_sequence=color_discrete_sequence_map, | |
| hover_name=hover_col_map if hover_col_map else None, | |
| hover_data=[col for col in valid_df_map.columns if col not in [lat_col_map, lon_col_map]], | |
| zoom=zoom_level_map, | |
| mapbox_style=map_style_map, | |
| height=map_height_map, | |
| title="π Interactive Map" | |
| ) | |
| else: | |
| fig_map = px.scatter_mapbox( | |
| valid_df_map, | |
| lat=lat_col_map, | |
| lon=lon_col_map, | |
| color=color_param_map, | |
| color_continuous_scale=color_continuous_scale_map, | |
| color_discrete_sequence=color_discrete_sequence_map, | |
| hover_name=hover_col_map if hover_col_map else None, | |
| hover_data=[col for col in valid_df_map.columns if col not in [lat_col_map, lon_col_map]], | |
| zoom=zoom_level_map, | |
| mapbox_style=map_style_map, | |
| height=map_height_map, | |
| title="π Interactive Map" | |
| ) | |
| if color_mode_map == "Single Color": | |
| fig_map.update_traces(marker=dict(color=color_map)) | |
| if not size_col_map: | |
| fig_map.update_traces(marker=dict(size=default_size_map)) | |
| fig_map.update_layout( | |
| margin={"r":0,"t":30,"l":0,"b":0}, | |
| title_x=0.5 | |
| ) | |
| st.plotly_chart(fig_map, use_container_width=True) | |
| except Exception as e: | |
| st.error(f"β An error occurred while processing the file: {e}") | |
| else: | |
| st.info("π₯ Awaiting CSV file upload for mapping.") | |
| ### Data Enrichment Tab | |
| with tabs[1]: | |
| st.write(""" | |
| ## π Data Enrichment with Geopy | |
| Enrich your CSV data by retrieving additional geographic information based on latitude and longitude. Select the desired information to add to your dataset. | |
| """) | |
| uploaded_file_enrich = st.file_uploader("π Upload CSV for Enrichment", type=["csv"], key="enrich_upload") | |
| if uploaded_file_enrich is not None: | |
| try: | |
| df_enrich = pd.read_csv(uploaded_file_enrich) | |
| st.success("β File uploaded successfully!") | |
| st.write("### π Data Preview") | |
| st.dataframe(df_enrich.head()) | |
| numeric_columns_enrich = df_enrich.select_dtypes(include=['float64', 'int64']).columns.tolist() | |
| if len(numeric_columns_enrich) < 2: | |
| st.error("β The uploaded CSV does not contain enough numeric columns for latitude and longitude.") | |
| else: | |
| col1_enrich, col2_enrich = st.columns(2) | |
| with col1_enrich: | |
| lat_col_enrich = st.selectbox("π Select Latitude Column", options=numeric_columns_enrich, key="lat_enrich") | |
| with col2_enrich: | |
| lon_col_enrich = st.selectbox("π§ Select Longitude Column", options=numeric_columns_enrich, key="lon_enrich") | |
| if lat_col_enrich and lon_col_enrich: | |
| if not df_enrich[lat_col_enrich].between(-90, 90).all(): | |
| st.warning("β οΈ Some latitude values are out of the valid range (-90 to 90).") | |
| if not df_enrich[lon_col_enrich].between(-180, 180).all(): | |
| st.warning("β οΈ Some longitude values are out of the valid range (-180 to 180).") | |
| valid_df_enrich = df_enrich.dropna(subset=[lat_col_enrich, lon_col_enrich]) | |
| valid_df_enrich = valid_df_enrich[ | |
| (valid_df_enrich[lat_col_enrich].between(-90, 90)) & | |
| (valid_df_enrich[lon_col_enrich].between(-180, 180)) | |
| ] | |
| if valid_df_enrich.empty: | |
| st.error("β No valid data points to enrich.") | |
| else: | |
| st.write(f"### π Enriching {len(valid_df_enrich)} Points") | |
| st.sidebar.header("π Enrichment Settings") | |
| address_fields = [ | |
| "City", | |
| "County", | |
| "Region", | |
| "Postcode", | |
| "Country", | |
| "Neighbourhood", | |
| "Road", | |
| "Suburb", | |
| "State District", | |
| "State", | |
| "Town", | |
| "Village", | |
| "ISO3166-1 Alpha-2", | |
| "ISO3166-1 Alpha-3" | |
| ] | |
| selected_info = [] | |
| for field in address_fields: | |
| if st.sidebar.checkbox(field): | |
| selected_info.append(field) | |
| if not selected_info: | |
| st.sidebar.warning("β οΈ Please select at least one information to retrieve.") | |
| enrich_button = st.button("π Enrich Data") | |
| if enrich_button: | |
| if not selected_info: | |
| st.warning("β οΈ Please select at least one information to retrieve before enriching.") | |
| else: | |
| geolocator = Nominatim(user_agent="streamlit_app_enrichment") | |
| reverse = RateLimiter(geolocator.reverse, min_delay_seconds=1) | |
| for info in selected_info: | |
| if info not in df_enrich.columns: | |
| df_enrich[info] = "" | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| for index, row in valid_df_enrich.iterrows(): | |
| try: | |
| location = reverse((row[lat_col_enrich], row[lon_col_enrich]), exactly_one=True) | |
| if location and location.raw and 'address' in location.raw: | |
| address = location.raw['address'] | |
| for info in selected_info: | |
| if info == "City": | |
| df_enrich.at[index, "City"] = address.get('city', address.get('town', address.get('village', ''))) | |
| elif info == "County": | |
| df_enrich.at[index, "County"] = address.get('county', '') | |
| elif info == "Region": | |
| df_enrich.at[index, "Region"] = address.get('state', '') | |
| elif info == "Postcode": | |
| df_enrich.at[index, "Postcode"] = address.get('postcode', '') | |
| elif info == "Country": | |
| df_enrich.at[index, "Country"] = address.get('country', '') | |
| elif info == "Neighbourhood": | |
| df_enrich.at[index, "Neighbourhood"] = address.get('neighbourhood', '') | |
| elif info == "Road": | |
| df_enrich.at[index, "Road"] = address.get('road', '') | |
| elif info == "Suburb": | |
| df_enrich.at[index, "Suburb"] = address.get('suburb', '') | |
| elif info == "State District": | |
| df_enrich.at[index, "State District"] = address.get('state_district', '') | |
| elif info == "State": | |
| df_enrich.at[index, "State"] = address.get('state', '') | |
| elif info == "Town": | |
| df_enrich.at[index, "Town"] = address.get('town', '') | |
| elif info == "Village": | |
| df_enrich.at[index, "Village"] = address.get('village', '') | |
| elif info == "ISO3166-1 Alpha-2": | |
| df_enrich.at[index, "ISO3166-1 Alpha-2"] = address.get('ISO3166-1_alpha-2', '') | |
| elif info == "ISO3166-1 Alpha-3": | |
| df_enrich.at[index, "ISO3166-1 Alpha-3"] = address.get('ISO3166-1_alpha-3', '') | |
| else: | |
| df_enrich.at[index, info] = address.get(info.lower(), '') | |
| except Exception as e: | |
| for info in selected_info: | |
| df_enrich.at[index, info] = 'Error' | |
| progress = (index + 1) / len(valid_df_enrich) | |
| progress_bar.progress(min(progress, 1.0)) | |
| status_text.text(f"Processing {index + 1} of {len(valid_df_enrich)}") | |
| progress_bar.empty() | |
| status_text.empty() | |
| st.success("β Enrichment complete!") | |
| st.write("### π Enriched Data") | |
| st.dataframe(df_enrich.head()) | |
| csv_enriched = df_enrich.to_csv(index=False).encode('utf-8') | |
| st.download_button( | |
| label="π₯ Download Enriched CSV", | |
| data=csv_enriched, | |
| file_name='enriched_data.csv', | |
| mime='text/csv' | |
| ) | |
| except Exception as e: | |
| st.error(f"β An error occurred while processing the file: {e}") | |
| else: | |
| st.info("π₯ Awaiting CSV file upload for enrichment.") | |
| if __name__ == "__main__": | |
| main() | |