Spaces:

Alealejandrooo
/

Postcodes

Sleeping

App Files Files Community

Alealejandrooo commited on Apr 21, 2024

Commit

da9a0aa

verified ·

1 Parent(s): 80e4fa3

Update process.py

Browse files

Files changed (1) hide show

process.py +28 -7

process.py CHANGED Viewed

@@ -1,16 +1,37 @@
 import pandas as pd
 import gradio as gr
-def get_lat_lon(postcodes_df, postcode_mapping):
     try:
         postcode_mapping.rename(columns={'postcode': 'Postal code'}, inplace=True)
         # Normalize postcodes to ensure matching and count occurrences
-        postcodes_df['Postal code'] = postcodes_df['Postal code'].str.lower().str.replace(' ', '')
-        postcode_counts = postcodes_df['Postal code'].value_counts().reset_index()
         postcode_counts.columns = ['Postal code', 'count']
         # Normalize the postcodes in the mapping DataFrame
@@ -25,8 +46,8 @@ def get_lat_lon(postcodes_df, postcode_mapping):
         # Optionally, convert the DataFrame to a dictionary if needed, or work directly with the DataFrame
         results = result_df.to_dict(orient='records')
-    except:
-        raise gr.Error('Make sure your file contains the postal codes under a column named "Postal code"')
     return results

 import pandas as pd
 import gradio as gr
+def find_postcode_column(df):
+    # UK Gov postcode regex
+    postcode_pattern = r"([Gg][Ii][Rr] 0[Aa]{2})|((([A-Za-z][0-9]{1,2})|(([A-Za-z][A-Ha-hJ-Yj-y][0-9]{1,2})|(([A-Za-z][0-9][A-Za-z])|([A-Za-z][A-Ha-hJ-Yj-y][0-9][A-Za-z]?))))\s?[0-9][A-Za-z]{2})"
+    max_count = 0
+    postcode_column = None
+    for column in df.columns:
+        # Count matches of the postcode pattern in each column
+        matches = df[column].astype(str).str.match(postcode_pattern)
+        valid_count = matches.sum()  # Sum of True values indicating valid postcodes
+        # Select the column with the maximum count of valid postcodes
+        if valid_count > max_count:
+            max_count = valid_count
+            postcode_column = column
+    return postcode_column
+def get_lat_lon(postcodes_df, postcode_mapping):
     try:
+        # Attempt to identify the postcode column dynamically
+        postcode_column = find_postcode_column(postcodes_df)
+        if not postcode_column:
+            raise ValueError("No valid postcode column found")
+        # Rename columns for consistency
         postcode_mapping.rename(columns={'postcode': 'Postal code'}, inplace=True)
         # Normalize postcodes to ensure matching and count occurrences
+        postcodes_df[postcode_column] = postcodes_df[postcode_column].str.lower().str.replace(' ', '')
+        postcode_counts = postcodes_df[postcode_column].value_counts().reset_index()
         postcode_counts.columns = ['Postal code', 'count']
         # Normalize the postcodes in the mapping DataFrame
         # Optionally, convert the DataFrame to a dictionary if needed, or work directly with the DataFrame
         results = result_df.to_dict(orient='records')
+    except Exception as e:
+        raise Exception("Error processing postal codes: " + str(e))
     return results