Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import gradio as gr | |
| def find_postcode_column(df): | |
| # UK Gov postcode regex | |
| postcode_pattern = r"([Gg][Ii][Rr] 0[Aa]{2})|((([A-Za-z][0-9]{1,2})|(([A-Za-z][A-Ha-hJ-Yj-y][0-9]{1,2})|(([A-Za-z][0-9][A-Za-z])|([A-Za-z][A-Ha-hJ-Yj-y][0-9][A-Za-z]?))))\s?[0-9][A-Za-z]{2})" | |
| max_count = 0 | |
| postcode_column = None | |
| for column in df.columns: | |
| # Count matches of the postcode pattern in each column | |
| matches = df[column].astype(str).str.match(postcode_pattern) | |
| valid_count = matches.sum() # Sum of True values indicating valid postcodes | |
| # Select the column with the maximum count of valid postcodes | |
| if valid_count > max_count: | |
| max_count = valid_count | |
| postcode_column = column | |
| return postcode_column | |
| def get_lat_lon(postcodes_df, postcode_mapping): | |
| try: | |
| # Attempt to identify the postcode column dynamically | |
| postcode_column = find_postcode_column(postcodes_df) | |
| if not postcode_column: | |
| raise gr.Error("No valid postcode column found") | |
| # Rename columns for consistency | |
| postcode_mapping.rename(columns={'postcode': 'Postal code'}, inplace=True) | |
| # Normalize postcodes to ensure matching and count occurrences | |
| postcodes_df[postcode_column] = postcodes_df[postcode_column].str.lower().str.replace(' ', '') | |
| postcode_counts = postcodes_df[postcode_column].value_counts().reset_index() | |
| postcode_counts.columns = ['Postal code', 'count'] | |
| # Normalize the postcodes in the mapping DataFrame | |
| postcode_mapping['Postal code'] = postcode_mapping['Postal code'].str.lower().str.replace(' ', '') | |
| # Merge the counts with the mapping data | |
| result_df = pd.merge(postcode_counts, postcode_mapping, on='Postal code', how='left') | |
| # Fill NaN values for latitude and longitude where postcode was not found in the mapping | |
| result_df['latitude'] = result_df['latitude'].fillna('') | |
| result_df['longitude'] = result_df['longitude'].fillna('') | |
| # Optionally, convert the DataFrame to a dictionary if needed, or work directly with the DataFrame | |
| results = result_df.to_dict(orient='records') | |
| except Exception as e: | |
| raise gr.Error("Error processing postal codes: " + str(e)) | |
| return results |