File size: 7,932 Bytes
18935fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#!/usr/bin/env python3
"""

Data Import Script for AI-Powered Shipment Route Optimization System

Created by: Zayeem Khateeb



This script helps you import your real data into the system.

"""

import pandas as pd
import sys
import os
from datetime import datetime

# Add src directory to path
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))

from config.database import DatabaseConfig

class DataImporter:
    def __init__(self):
        self.db = DatabaseConfig()
        
    def import_from_csv(self, csv_file_path, table_name):
        """Import data from CSV file to database table"""
        try:
            # Read CSV file
            df = pd.read_csv(csv_file_path)
            print(f"πŸ“ Loaded {len(df)} records from {csv_file_path}")
            
            # Convert to database format
            if table_name == 'shipments':
                df = self._prepare_shipments_data(df)
            elif table_name == 'weather_data':
                df = self._prepare_weather_data(df)
            elif table_name == 'traffic_data':
                df = self._prepare_traffic_data(df)
            
            # Insert into database
            self._insert_dataframe(df, table_name)
            print(f"βœ… Successfully imported {len(df)} records to {table_name} table")
            
        except Exception as e:
            print(f"❌ Error importing data: {e}")
    
    def _prepare_shipments_data(self, df):
        """Prepare shipments data for database insertion"""
        # Ensure required columns exist
        required_cols = ['tracking_number', 'origin_lat', 'origin_lng', 
                        'destination_lat', 'destination_lng', 'scheduled_delivery']
        
        for col in required_cols:
            if col not in df.columns:
                raise ValueError(f"Missing required column: {col}")
        
        # Convert datetime columns
        if 'scheduled_delivery' in df.columns:
            df['scheduled_delivery'] = pd.to_datetime(df['scheduled_delivery'])
        if 'actual_delivery' in df.columns:
            df['actual_delivery'] = pd.to_datetime(df['actual_delivery'])
        
        # Add created_at if not present
        if 'created_at' not in df.columns:
            df['created_at'] = datetime.now()
        
        # Fill missing values
        df['status'] = df.get('status', 'pending')
        df['delay_minutes'] = df.get('delay_minutes', 0)
        
        return df
    
    def _prepare_weather_data(self, df):
        """Prepare weather data for database insertion"""
        if 'timestamp' in df.columns:
            df['timestamp'] = pd.to_datetime(df['timestamp'])
        return df
    
    def _prepare_traffic_data(self, df):
        """Prepare traffic data for database insertion"""
        if 'timestamp' in df.columns:
            df['timestamp'] = pd.to_datetime(df['timestamp'])
        return df
    
    def _insert_dataframe(self, df, table_name):
        """Insert DataFrame into database table"""
        # This would use SQLAlchemy or pandas to_sql in production
        # For now, we'll show the structure
        print(f"πŸ“Š Data structure for {table_name}:")
        print(df.head())
        print(f"Columns: {list(df.columns)}")
    
    def import_from_excel(self, excel_file_path, sheet_name='Sheet1', table_name='shipments'):
        """Import data from Excel file"""
        try:
            df = pd.read_excel(excel_file_path, sheet_name=sheet_name)
            print(f"πŸ“ Loaded {len(df)} records from {excel_file_path}")
            
            # Convert to CSV temporarily and use CSV import
            temp_csv = f"temp_{table_name}.csv"
            df.to_csv(temp_csv, index=False)
            self.import_from_csv(temp_csv, table_name)
            
            # Clean up temp file
            os.remove(temp_csv)
            
        except Exception as e:
            print(f"❌ Error importing Excel data: {e}")
    
    def validate_data_format(self, csv_file_path, expected_format='shipments'):
        """Validate that your data has the correct format"""
        try:
            df = pd.read_csv(csv_file_path)
            
            if expected_format == 'shipments':
                required_cols = ['tracking_number', 'origin_lat', 'origin_lng', 
                               'destination_lat', 'destination_lng']
                optional_cols = ['origin_address', 'destination_address', 
                               'scheduled_delivery', 'actual_delivery', 'status', 'delay_minutes']
            
            elif expected_format == 'weather':
                required_cols = ['location_lat', 'location_lng', 'timestamp', 'temperature']
                optional_cols = ['humidity', 'wind_speed', 'precipitation', 'weather_condition']
            
            elif expected_format == 'traffic':
                required_cols = ['route_start_lat', 'route_start_lng', 'route_end_lat', 
                               'route_end_lng', 'timestamp']
                optional_cols = ['travel_time_minutes', 'distance_km', 'traffic_level']
            
            # Check required columns
            missing_cols = [col for col in required_cols if col not in df.columns]
            if missing_cols:
                print(f"❌ Missing required columns: {missing_cols}")
                return False
            
            # Show available columns
            print(f"βœ… Data validation passed for {expected_format} format")
            print(f"πŸ“Š Found columns: {list(df.columns)}")
            print(f"πŸ“ˆ Data shape: {df.shape}")
            print(f"πŸ” Sample data:")
            print(df.head(3))
            
            return True
            
        except Exception as e:
            print(f"❌ Error validating data: {e}")
            return False

def main():
    """Main function to demonstrate data import"""
    importer = DataImporter()
    
    print("=" * 60)
    print("πŸš› DATA IMPORT TOOL - AI Shipment Route Optimization")
    print("Created by: Zayeem Khateeb")
    print("=" * 60)
    print()
    
    # Example usage
    print("πŸ“‹ USAGE EXAMPLES:")
    print()
    print("1. Validate your data format:")
    print("   python data_import.py validate shipments.csv")
    print()
    print("2. Import shipments from CSV:")
    print("   python data_import.py import shipments.csv shipments")
    print()
    print("3. Import weather data:")
    print("   python data_import.py import weather_data.csv weather_data")
    print()
    print("4. Import from Excel:")
    print("   python data_import.py excel shipments.xlsx Sheet1 shipments")
    print()
    
    # Handle command line arguments
    if len(sys.argv) > 1:
        command = sys.argv[1]
        
        if command == 'validate' and len(sys.argv) >= 3:
            file_path = sys.argv[2]
            data_type = sys.argv[3] if len(sys.argv) > 3 else 'shipments'
            importer.validate_data_format(file_path, data_type)
            
        elif command == 'import' and len(sys.argv) >= 4:
            file_path = sys.argv[2]
            table_name = sys.argv[3]
            importer.import_from_csv(file_path, table_name)
            
        elif command == 'excel' and len(sys.argv) >= 5:
            file_path = sys.argv[2]
            sheet_name = sys.argv[3]
            table_name = sys.argv[4]
            importer.import_from_excel(file_path, sheet_name, table_name)
    
    else:
        print("πŸ’‘ QUICK START:")
        print("1. Prepare your data in CSV format (see DATA_INTEGRATION_GUIDE.md)")
        print("2. Run: python data_import.py validate your_file.csv")
        print("3. Run: python data_import.py import your_file.csv shipments")

if __name__ == "__main__":
    main()