Spaces:
Sleeping
Sleeping
| import logging | |
| from io import BytesIO | |
| def setup_logging(): | |
| """Set up logging configuration.""" | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[logging.StreamHandler()] # Only console logging | |
| ) | |
| return logging.getLogger(__name__) | |
| def meters_to_miles(meters): | |
| """Convert distance in meters to miles.""" | |
| return meters * 0.000621371 | |
| def validate_excel_file(file_stream: BytesIO) -> tuple[bool, str]: | |
| """Validate the uploaded file is an Excel file by its magic numbers.""" | |
| try: | |
| # Read the first 4 bytes to check the file signature | |
| header = file_stream.read(4) | |
| file_stream.seek(0) # Reset stream position for further processing | |
| # Check for Excel file signatures | |
| if header == b'\x50\x4B\x03\x04': # ZIP archive (xlsx) | |
| return True, "Valid Excel file" | |
| elif header == b'\xD0\xCF\x11\xE0': # Compound File (xls) | |
| return True, "Valid Excel file" | |
| else: | |
| return False, "Invalid file type: Not an Excel file" | |
| except Exception as e: | |
| return False, f"Validation error: {str(e)}" | |
| def clean_address(address): | |
| """Clean and standardize address strings.""" | |
| if not isinstance(address, str): | |
| return "" | |
| # Remove extra whitespace | |
| cleaned = " ".join(address.split()) | |
| # Remove common abbreviations and standardize format | |
| replacements = { | |
| "ST.": "STREET", | |
| "ST ": "STREET ", | |
| "AVE.": "AVENUE", | |
| "AVE ": "AVENUE ", | |
| "RD.": "ROAD", | |
| "RD ": "ROAD ", | |
| "BLVD.": "BOULEVARD", | |
| "BLVD ": "BOULEVARD ", | |
| "DR.": "DRIVE", | |
| "DR ": "DRIVE ", | |
| } | |
| for old, new in replacements.items(): | |
| cleaned = cleaned.replace(old, new) | |
| return cleaned | |
| def handle_empty_values(df, required_columns): | |
| """Handle empty values in required columns.""" | |
| # Create a copy to avoid modifying the original DataFrame | |
| clean_df = df.copy() | |
| # Fill empty values with empty strings | |
| for col in required_columns: | |
| if col in clean_df.columns: | |
| clean_df[col] = clean_df[col].fillna("") | |
| return clean_df |