Spaces:
Sleeping
Sleeping
File size: 7,932 Bytes
18935fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
#!/usr/bin/env python3
"""
Data Import Script for AI-Powered Shipment Route Optimization System
Created by: Zayeem Khateeb
This script helps you import your real data into the system.
"""
import pandas as pd
import sys
import os
from datetime import datetime
# Add src directory to path
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
from config.database import DatabaseConfig
class DataImporter:
def __init__(self):
self.db = DatabaseConfig()
def import_from_csv(self, csv_file_path, table_name):
"""Import data from CSV file to database table"""
try:
# Read CSV file
df = pd.read_csv(csv_file_path)
print(f"π Loaded {len(df)} records from {csv_file_path}")
# Convert to database format
if table_name == 'shipments':
df = self._prepare_shipments_data(df)
elif table_name == 'weather_data':
df = self._prepare_weather_data(df)
elif table_name == 'traffic_data':
df = self._prepare_traffic_data(df)
# Insert into database
self._insert_dataframe(df, table_name)
print(f"β
Successfully imported {len(df)} records to {table_name} table")
except Exception as e:
print(f"β Error importing data: {e}")
def _prepare_shipments_data(self, df):
"""Prepare shipments data for database insertion"""
# Ensure required columns exist
required_cols = ['tracking_number', 'origin_lat', 'origin_lng',
'destination_lat', 'destination_lng', 'scheduled_delivery']
for col in required_cols:
if col not in df.columns:
raise ValueError(f"Missing required column: {col}")
# Convert datetime columns
if 'scheduled_delivery' in df.columns:
df['scheduled_delivery'] = pd.to_datetime(df['scheduled_delivery'])
if 'actual_delivery' in df.columns:
df['actual_delivery'] = pd.to_datetime(df['actual_delivery'])
# Add created_at if not present
if 'created_at' not in df.columns:
df['created_at'] = datetime.now()
# Fill missing values
df['status'] = df.get('status', 'pending')
df['delay_minutes'] = df.get('delay_minutes', 0)
return df
def _prepare_weather_data(self, df):
"""Prepare weather data for database insertion"""
if 'timestamp' in df.columns:
df['timestamp'] = pd.to_datetime(df['timestamp'])
return df
def _prepare_traffic_data(self, df):
"""Prepare traffic data for database insertion"""
if 'timestamp' in df.columns:
df['timestamp'] = pd.to_datetime(df['timestamp'])
return df
def _insert_dataframe(self, df, table_name):
"""Insert DataFrame into database table"""
# This would use SQLAlchemy or pandas to_sql in production
# For now, we'll show the structure
print(f"π Data structure for {table_name}:")
print(df.head())
print(f"Columns: {list(df.columns)}")
def import_from_excel(self, excel_file_path, sheet_name='Sheet1', table_name='shipments'):
"""Import data from Excel file"""
try:
df = pd.read_excel(excel_file_path, sheet_name=sheet_name)
print(f"π Loaded {len(df)} records from {excel_file_path}")
# Convert to CSV temporarily and use CSV import
temp_csv = f"temp_{table_name}.csv"
df.to_csv(temp_csv, index=False)
self.import_from_csv(temp_csv, table_name)
# Clean up temp file
os.remove(temp_csv)
except Exception as e:
print(f"β Error importing Excel data: {e}")
def validate_data_format(self, csv_file_path, expected_format='shipments'):
"""Validate that your data has the correct format"""
try:
df = pd.read_csv(csv_file_path)
if expected_format == 'shipments':
required_cols = ['tracking_number', 'origin_lat', 'origin_lng',
'destination_lat', 'destination_lng']
optional_cols = ['origin_address', 'destination_address',
'scheduled_delivery', 'actual_delivery', 'status', 'delay_minutes']
elif expected_format == 'weather':
required_cols = ['location_lat', 'location_lng', 'timestamp', 'temperature']
optional_cols = ['humidity', 'wind_speed', 'precipitation', 'weather_condition']
elif expected_format == 'traffic':
required_cols = ['route_start_lat', 'route_start_lng', 'route_end_lat',
'route_end_lng', 'timestamp']
optional_cols = ['travel_time_minutes', 'distance_km', 'traffic_level']
# Check required columns
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
print(f"β Missing required columns: {missing_cols}")
return False
# Show available columns
print(f"β
Data validation passed for {expected_format} format")
print(f"π Found columns: {list(df.columns)}")
print(f"π Data shape: {df.shape}")
print(f"π Sample data:")
print(df.head(3))
return True
except Exception as e:
print(f"β Error validating data: {e}")
return False
def main():
"""Main function to demonstrate data import"""
importer = DataImporter()
print("=" * 60)
print("π DATA IMPORT TOOL - AI Shipment Route Optimization")
print("Created by: Zayeem Khateeb")
print("=" * 60)
print()
# Example usage
print("π USAGE EXAMPLES:")
print()
print("1. Validate your data format:")
print(" python data_import.py validate shipments.csv")
print()
print("2. Import shipments from CSV:")
print(" python data_import.py import shipments.csv shipments")
print()
print("3. Import weather data:")
print(" python data_import.py import weather_data.csv weather_data")
print()
print("4. Import from Excel:")
print(" python data_import.py excel shipments.xlsx Sheet1 shipments")
print()
# Handle command line arguments
if len(sys.argv) > 1:
command = sys.argv[1]
if command == 'validate' and len(sys.argv) >= 3:
file_path = sys.argv[2]
data_type = sys.argv[3] if len(sys.argv) > 3 else 'shipments'
importer.validate_data_format(file_path, data_type)
elif command == 'import' and len(sys.argv) >= 4:
file_path = sys.argv[2]
table_name = sys.argv[3]
importer.import_from_csv(file_path, table_name)
elif command == 'excel' and len(sys.argv) >= 5:
file_path = sys.argv[2]
sheet_name = sys.argv[3]
table_name = sys.argv[4]
importer.import_from_excel(file_path, sheet_name, table_name)
else:
print("π‘ QUICK START:")
print("1. Prepare your data in CSV format (see DATA_INTEGRATION_GUIDE.md)")
print("2. Run: python data_import.py validate your_file.csv")
print("3. Run: python data_import.py import your_file.csv shipments")
if __name__ == "__main__":
main()
|