import streamlit as st import pandas as pd import plotly.express as px from geopy.geocoders import Nominatim from geopy.extra.rate_limiter import RateLimiter def main(): st.set_page_config(page_title="Enhanced CSV Tool", layout="wide") st.title("πŸ“ Enhanced CSV Tool: Mapping & Data Enrichment") tabs = st.tabs(["Map Visualization", "Data Enrichment"]) ### Map Visualization Tab with tabs[0]: st.write(""" ## πŸ“ Interactive Map Visualization Upload your CSV file, select latitude and longitude columns, customize map settings, and visualize the data points on an interactive map. """) uploaded_file_map = st.file_uploader("πŸ“‚ Upload CSV for Mapping", type=["csv"], key="map_upload") if uploaded_file_map is not None: try: df_map = pd.read_csv(uploaded_file_map) st.success("βœ… File uploaded successfully!") st.write("### πŸ“Š Data Preview") st.dataframe(df_map.head()) numeric_columns_map = df_map.select_dtypes(include=['float64', 'int64']).columns.tolist() if len(numeric_columns_map) < 2: st.error("❌ The uploaded CSV does not contain enough numeric columns for latitude and longitude.") else: col1_map, col2_map = st.columns(2) with col1_map: lat_col_map = st.selectbox("πŸ“ Select Latitude Column", options=numeric_columns_map, key="lat_map") with col2_map: lon_col_map = st.selectbox("🧭 Select Longitude Column", options=numeric_columns_map, key="lon_map") if lat_col_map and lon_col_map: if not df_map[lat_col_map].between(-90, 90).all(): st.warning("⚠️ Some latitude values are out of the valid range (-90 to 90).") if not df_map[lon_col_map].between(-180, 180).all(): st.warning("⚠️ Some longitude values are out of the valid range (-180 to 180).") valid_df_map = df_map.dropna(subset=[lat_col_map, lon_col_map]) valid_df_map = valid_df_map[ (valid_df_map[lat_col_map].between(-90, 90)) & (valid_df_map[lon_col_map].between(-180, 180)) ] if valid_df_map.empty: st.error("❌ No valid data points to display on the map.") else: st.write(f"### πŸ“ Displaying {len(valid_df_map)} Points on the Map") st.sidebar.header("πŸ—ΊοΈ Map Settings") size_options_map = valid_df_map.select_dtypes(include=['float64', 'int64']).columns.tolist() size_options_map.insert(0, None) size_col_map = st.sidebar.selectbox("πŸ”Ή Select a column for bubble size (optional)", options=size_options_map, key="size_map") hover_options_map = [col for col in df_map.columns if col not in [lat_col_map, lon_col_map]] hover_options_map.insert(0, None) hover_col_map = st.sidebar.selectbox("πŸ”Ή Select a column for hover name (optional)", options=hover_options_map, key="hover_map") color_mode_map = st.sidebar.radio("🎨 Select Color Mode", options=["Single Color", "Color Scale"], index=0, key="color_mode_map") if color_mode_map == "Single Color": color_map = st.sidebar.color_picker("πŸ”Ή Pick a color for the points", "#FF5733", key="color_map") color_column_map = None else: color_column_options_map = [col for col in df_map.columns if col not in [lat_col_map, lon_col_map]] color_column_options_map.insert(0, None) color_column_map = st.sidebar.selectbox("πŸ”Ή Select a column for color scale (optional)", options=color_column_options_map, key="color_column_map") color_map = None map_style_options = [ "open-street-map", "carto-positron", "carto-darkmatter", "stamen-terrain", "stamen-toner", "stamen-watercolor", "white-bg", "basic", "light", "dark", "satellite", "satellite-streets", "outdoors", "traffic-day", "traffic-night" ] map_style_map = st.sidebar.selectbox("🌐 Select Map Style", options=map_style_options, index=0, key="map_style_map") default_size_map = st.sidebar.slider("πŸ”Ή Default Bubble Size", min_value=5, max_value=20, value=10, key="default_size_map") map_height_map = st.sidebar.slider("πŸ”Ή Map Height (pixels)", min_value=400, max_value=1000, value=600, key="map_height_map") zoom_level_map = st.sidebar.slider("πŸ”Ή Initial Zoom Level", min_value=1, max_value=20, value=3, key="zoom_level_map") if color_mode_map == "Color Scale" and not color_column_map: st.sidebar.warning("⚠️ Please select a column for color scale.") st.stop() if color_mode_map == "Color Scale" and color_column_map: color_param_map = color_column_map color_discrete_sequence_map = None color_continuous_scale_map = "Viridis" elif color_mode_map == "Color Scale" and not color_column_map: color_param_map = None color_discrete_sequence_map = "Viridis" color_continuous_scale_map = None elif color_mode_map == "Single Color": color_param_map = None color_discrete_sequence_map = [color_map] color_continuous_scale_map = None if size_col_map: fig_map = px.scatter_mapbox( valid_df_map, lat=lat_col_map, lon=lon_col_map, size=size_col_map, size_max=15, color=color_param_map, color_continuous_scale=color_continuous_scale_map, color_discrete_sequence=color_discrete_sequence_map, hover_name=hover_col_map if hover_col_map else None, hover_data=[col for col in valid_df_map.columns if col not in [lat_col_map, lon_col_map]], zoom=zoom_level_map, mapbox_style=map_style_map, height=map_height_map, title="πŸ“ Interactive Map" ) else: fig_map = px.scatter_mapbox( valid_df_map, lat=lat_col_map, lon=lon_col_map, color=color_param_map, color_continuous_scale=color_continuous_scale_map, color_discrete_sequence=color_discrete_sequence_map, hover_name=hover_col_map if hover_col_map else None, hover_data=[col for col in valid_df_map.columns if col not in [lat_col_map, lon_col_map]], zoom=zoom_level_map, mapbox_style=map_style_map, height=map_height_map, title="πŸ“ Interactive Map" ) if color_mode_map == "Single Color": fig_map.update_traces(marker=dict(color=color_map)) if not size_col_map: fig_map.update_traces(marker=dict(size=default_size_map)) fig_map.update_layout( margin={"r":0,"t":30,"l":0,"b":0}, title_x=0.5 ) st.plotly_chart(fig_map, use_container_width=True) except Exception as e: st.error(f"❌ An error occurred while processing the file: {e}") else: st.info("πŸ“₯ Awaiting CSV file upload for mapping.") ### Data Enrichment Tab with tabs[1]: st.write(""" ## πŸ” Data Enrichment with Geopy Enrich your CSV data by retrieving additional geographic information based on latitude and longitude. Select the desired information to add to your dataset. """) uploaded_file_enrich = st.file_uploader("πŸ“‚ Upload CSV for Enrichment", type=["csv"], key="enrich_upload") if uploaded_file_enrich is not None: try: df_enrich = pd.read_csv(uploaded_file_enrich) st.success("βœ… File uploaded successfully!") st.write("### πŸ“Š Data Preview") st.dataframe(df_enrich.head()) numeric_columns_enrich = df_enrich.select_dtypes(include=['float64', 'int64']).columns.tolist() if len(numeric_columns_enrich) < 2: st.error("❌ The uploaded CSV does not contain enough numeric columns for latitude and longitude.") else: col1_enrich, col2_enrich = st.columns(2) with col1_enrich: lat_col_enrich = st.selectbox("πŸ“ Select Latitude Column", options=numeric_columns_enrich, key="lat_enrich") with col2_enrich: lon_col_enrich = st.selectbox("🧭 Select Longitude Column", options=numeric_columns_enrich, key="lon_enrich") if lat_col_enrich and lon_col_enrich: if not df_enrich[lat_col_enrich].between(-90, 90).all(): st.warning("⚠️ Some latitude values are out of the valid range (-90 to 90).") if not df_enrich[lon_col_enrich].between(-180, 180).all(): st.warning("⚠️ Some longitude values are out of the valid range (-180 to 180).") valid_df_enrich = df_enrich.dropna(subset=[lat_col_enrich, lon_col_enrich]) valid_df_enrich = valid_df_enrich[ (valid_df_enrich[lat_col_enrich].between(-90, 90)) & (valid_df_enrich[lon_col_enrich].between(-180, 180)) ] if valid_df_enrich.empty: st.error("❌ No valid data points to enrich.") else: st.write(f"### πŸ” Enriching {len(valid_df_enrich)} Points") st.sidebar.header("πŸ“ Enrichment Settings") address_fields = [ "City", "County", "Region", "Postcode", "Country", "Neighbourhood", "Road", "Suburb", "State District", "State", "Town", "Village", "ISO3166-1 Alpha-2", "ISO3166-1 Alpha-3" ] selected_info = [] for field in address_fields: if st.sidebar.checkbox(field): selected_info.append(field) if not selected_info: st.sidebar.warning("⚠️ Please select at least one information to retrieve.") enrich_button = st.button("πŸ”„ Enrich Data") if enrich_button: if not selected_info: st.warning("⚠️ Please select at least one information to retrieve before enriching.") else: geolocator = Nominatim(user_agent="streamlit_app_enrichment") reverse = RateLimiter(geolocator.reverse, min_delay_seconds=1) for info in selected_info: if info not in df_enrich.columns: df_enrich[info] = "" progress_bar = st.progress(0) status_text = st.empty() for index, row in valid_df_enrich.iterrows(): try: location = reverse((row[lat_col_enrich], row[lon_col_enrich]), exactly_one=True) if location and location.raw and 'address' in location.raw: address = location.raw['address'] for info in selected_info: if info == "City": df_enrich.at[index, "City"] = address.get('city', address.get('town', address.get('village', ''))) elif info == "County": df_enrich.at[index, "County"] = address.get('county', '') elif info == "Region": df_enrich.at[index, "Region"] = address.get('state', '') elif info == "Postcode": df_enrich.at[index, "Postcode"] = address.get('postcode', '') elif info == "Country": df_enrich.at[index, "Country"] = address.get('country', '') elif info == "Neighbourhood": df_enrich.at[index, "Neighbourhood"] = address.get('neighbourhood', '') elif info == "Road": df_enrich.at[index, "Road"] = address.get('road', '') elif info == "Suburb": df_enrich.at[index, "Suburb"] = address.get('suburb', '') elif info == "State District": df_enrich.at[index, "State District"] = address.get('state_district', '') elif info == "State": df_enrich.at[index, "State"] = address.get('state', '') elif info == "Town": df_enrich.at[index, "Town"] = address.get('town', '') elif info == "Village": df_enrich.at[index, "Village"] = address.get('village', '') elif info == "ISO3166-1 Alpha-2": df_enrich.at[index, "ISO3166-1 Alpha-2"] = address.get('ISO3166-1_alpha-2', '') elif info == "ISO3166-1 Alpha-3": df_enrich.at[index, "ISO3166-1 Alpha-3"] = address.get('ISO3166-1_alpha-3', '') else: df_enrich.at[index, info] = address.get(info.lower(), '') except Exception as e: for info in selected_info: df_enrich.at[index, info] = 'Error' progress = (index + 1) / len(valid_df_enrich) progress_bar.progress(min(progress, 1.0)) status_text.text(f"Processing {index + 1} of {len(valid_df_enrich)}") progress_bar.empty() status_text.empty() st.success("βœ… Enrichment complete!") st.write("### πŸ“ˆ Enriched Data") st.dataframe(df_enrich.head()) csv_enriched = df_enrich.to_csv(index=False).encode('utf-8') st.download_button( label="πŸ“₯ Download Enriched CSV", data=csv_enriched, file_name='enriched_data.csv', mime='text/csv' ) except Exception as e: st.error(f"❌ An error occurred while processing the file: {e}") else: st.info("πŸ“₯ Awaiting CSV file upload for enrichment.") if __name__ == "__main__": main()