from flask import Flask, render_template, request, jsonify import pandas as pd import numpy as np from datetime import datetime import joblib import warnings import plotly.graph_objects as go import plotly.express as px from utils.preprocess import FlightDataPreprocessor import os app = Flask(__name__, static_folder='static', static_url_path='/static') # Add this route before if __name__ == '__main__': @app.route('/static/images/background.png') def serve_background(): try: from flask import send_from_directory return send_from_directory('static/images', 'background.png') except: return '', 404 # Global variables model = None data = None airlines_mapping = {} airports_mapping = {} def load_model_and_data(): global model, data, airlines_mapping, airports_mapping try: # Initialize preprocessor preprocessor = FlightDataPreprocessor() # Load data if preprocessor.load_data(): data = preprocessor.preprocess_flights_data() airlines_mapping = preprocessor.clean_airlines_data() airports_mapping = preprocessor.clean_airports_data() print("Data loaded successfully!") print(f"Loaded {len(data)} flight records") print(f"Airlines: {len(airlines_mapping)}") print(f"Airports: {len(airports_mapping)}") else: print("Failed to load data") data = None airlines_mapping = {} airports_mapping = {} # Load model if os.path.exists('model/flight_delay_model.pkl'): model = joblib.load('model/flight_delay_model.pkl') print("Model loaded successfully!") else: print("Model file not found") model = None except Exception as e: print(f"Error loading model/data: {e}") model = None data = None airlines_mapping = {} airports_mapping = {} def calculate_season(date_str, origin_airport): """Calculate season based on date and origin airport hemisphere""" try: # Parse date date_obj = datetime.strptime(date_str, '%Y-%m-%d') month = date_obj.month day = date_obj.day # Northern hemisphere airports (default) northern_airports = ['JFK', 'LAX', 'ORD', 'SFO', 'BOS', 'DCA', 'ATL', 'DFW', 'DEN', 'SEA', 'MSP', 'DTW', 'PHL', 'CLT', 'LGA', 'BWI', 'SLC', 'DCA', 'MCO', 'TPA', 'MDW', 'FLL', 'RDU', 'SAN', 'AUS', 'LAS', 'PHX', 'PDX', 'SMF'] # Major Indian airports (northern hemisphere) indian_airports = ['DEL', 'BOM', 'BLR', 'HYD', 'MAA', 'CCU', 'COK', 'TRV', 'AMD', 'PNQ', 'GOI', 'IXC', 'JLR', 'IDR', 'VGA', 'NAG', 'RPR', 'BHU', 'JDH'] # Check if origin is in southern hemisphere (simplified list) southern_airports = ['SYD', 'MEL', 'BNE', 'ADL', 'PER', 'CBR', 'HBA', 'DRW', 'CNS', 'OOL'] # Determine hemisphere is_southern = origin_airport in southern_airports # Calculate season based on hemisphere if is_southern: # Southern hemisphere seasons are reversed if month in [12, 1, 2]: return 'Summer' elif month in [3, 4, 5]: return 'Fall' elif month in [6, 7, 8]: return 'Winter' else: # [9, 10, 11] return 'Spring' else: # Northern hemisphere seasons if month in [12, 1, 2]: return 'Winter' elif month in [3, 4, 5]: return 'Spring' elif month in [6, 7, 8]: return 'Summer' else: # [9, 10, 11] return 'Fall' except: return 'Spring' # Default fallback def calculate_distance(origin, destination): """Calculate approximate distance between airports""" try: # Airport coordinates (latitude, longitude) - simplified dataset airport_coords = { # Major US airports 'JFK': (40.64, -73.78), 'LAX': (33.94, -118.41), 'ORD': (41.98, -87.90), 'SFO': (37.62, -122.38), 'BOS': (42.36, -71.01), 'ATL': (33.64, -84.43), 'DFW': (32.90, -97.04), 'DEN': (39.86, -104.67), 'SEA': (47.45, -122.31), 'MSP': (44.88, -93.22), 'DTW': (42.21, -83.35), 'PHL': (39.87, -75.25), 'CLT': (35.21, -80.95), 'LGA': (40.77, -73.87), 'BWI': (39.18, -76.67), 'SLC': (40.79, -111.98), 'DCA': (38.85, -77.04), 'MCO': (28.43, -81.31), 'TPA': (27.98, -82.53), 'MDW': (41.78, -87.75), 'FLL': (26.07, -80.15), 'RDU': (35.87, -78.78), 'SAN': (32.73, -117.17), 'AUS': (30.19, -97.67), 'LAS': (36.08, -115.15), 'PHX': (33.45, -112.33), 'PDX': (45.59, -122.60), # Major Indian airports 'DEL': (28.57, 77.21), 'BOM': (19.09, 72.87), 'BLR': (12.97, 77.59), 'HYD': (17.25, 78.43), 'MAA': (12.99, 80.18), 'CCU': (22.66, 88.45), 'COK': (10.15, 76.41), 'TRV': (8.48, 76.92), 'AMD': (23.08, 72.63), 'PNQ': (18.58, 73.92), 'GOI': (15.38, 73.83), 'IXC': (30.70, 76.72), 'JLR': (23.25, 79.93), 'IDR': (22.80, 75.90), 'VGA': (20.30, 75.70), 'NAG': (21.10, 79.07), 'RPR': (21.50, 81.73), 'BHU': (21.75, 72.15), 'JDH': (26.28, 73.02), # Major international airports 'SYD': (-33.94, 151.18), 'MEL': (-37.81, 144.96), 'BNE': (-27.39, 153.13), 'ADL': (-34.95, 138.53), 'PER': (-31.94, 115.97), 'LHR': (51.47, -0.46), 'CDG': (49.01, 2.55), 'NRT': (35.77, 140.39), 'ICN': (37.46, 126.44), 'DXB': (25.25, 55.36), 'SIN': (1.36, 103.99), 'BKK': (13.69, 100.75), 'HKG': (22.31, 114.19), 'KUL': (3.13, 101.70), 'CAI': (30.12, 31.40) } # Extract airport codes from full names origin_code = extract_airport_code(origin) dest_code = extract_airport_code(destination) # Get coordinates if origin_code in airport_coords and dest_code in airport_coords: lat1, lon1 = airport_coords[origin_code] lat2, lon2 = airport_coords[dest_code] # Calculate distance using Haversine formula import math # Convert to radians lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2]) # Haversine formula dlat = lat2 - lat1 dlon = lon2 - lon1 a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2 c = 2 * math.asin(math.sqrt(a)) # Earth's radius in miles r = 3959 distance = c * r return max(100, min(8000, distance)) # Clamp between 100 and 8000 miles # Fallback distance if coordinates not found return 1000 # Default 1000 miles except Exception as e: print(f"Error calculating distance: {e}") return 1000 # Fallback distance def extract_airport_code(airport_name): """Extract airport code from full airport name""" try: # Try to extract code from parentheses if '(' in airport_name and ')' in airport_name: code = airport_name.split('(')[-1].split(')')[0].strip() if len(code) == 3: # Standard IATA code length return code.upper() # Try to extract from first word if it's 3 letters words = airport_name.split() for word in words: if len(word) == 3 and word.isalpha(): return word.upper() # Common airport name mappings airport_mappings = { 'delhi': 'DEL', 'mumbai': 'BOM', 'bangalore': 'BLR', 'hyderabad': 'HYD', 'chennai': 'MAA', 'kolkata': 'CCU', 'kochi': 'COK', 'trivandrum': 'TRV', 'ahmedabad': 'AMD', 'pune': 'PNQ', 'goa': 'GOI', 'chandigarh': 'IXC', 'new york': 'JFK', 'los angeles': 'LAX', 'chicago': 'ORD', 'san francisco': 'SFO', 'boston': 'BOS', 'atlanta': 'ATL', 'dallas': 'DFW', 'denver': 'DEN', 'seattle': 'SEA', 'minneapolis': 'MSP', 'detroit': 'DTW', 'philadelphia': 'PHL', 'charlotte': 'CLT', 'london': 'LHR', 'paris': 'CDG', 'tokyo': 'NRT', 'seoul': 'ICN', 'dubai': 'DXB', 'singapore': 'SIN', 'bangkok': 'BKK' } # Check if airport name contains any known mappings name_lower = airport_name.lower() for key, code in airport_mappings.items(): if key in name_lower: return code # Return first 3 letters as fallback return airport_name[:3].upper() except: return 'JFK' # Default fallback @app.route('/') def index(): return render_template('index.html') @app.route('/api/test', methods=['GET', 'POST']) def test_endpoint(): """Test endpoint to debug issues""" try: if request.method == 'GET': return jsonify({ 'status': 'Test endpoint working', 'model_loaded': model is not None, 'data_loaded': data is not None, 'airlines_mapping': len(airlines_mapping) if airlines_mapping else 0, 'airports_mapping': len(airports_mapping) if airports_mapping else 0 }) else: # Test prediction with sample data airline = request.form.get('airline', 'Air India') origin = request.form.get('origin', 'Indira Gandhi International Airport (DEL)') destination = request.form.get('destination', 'Chatrapati Shivaji International Airport (BOM)') departure_hour = int(request.form.get('departure_hour', 10)) flight_date = request.form.get('flight_date', '2024-12-15') print(f"Test prediction: {airline}, {origin}, {destination}, {departure_hour}, {flight_date}") # Test distance calculation distance = calculate_distance(origin, destination) print(f"Calculated distance: {distance}") # Test season calculation origin_code = extract_airport_code(origin) season = calculate_season(flight_date, origin_code) print(f"Calculated season: {season}") return jsonify({ 'status': 'Test successful', 'airline': airline, 'origin': origin, 'destination': destination, 'distance': distance, 'season': season, 'origin_code': origin_code }) except Exception as e: print(f"Test endpoint error: {e}") return jsonify({'error': f'Test failed: {str(e)}'}) @app.route('/api/predict', methods=['POST']) def predict(): try: print("=== PREDICTION REQUEST STARTED ===") # Get form data airline = request.form.get('airline') origin = request.form.get('origin') destination = request.form.get('destination') departure_hour = request.form.get('departure_hour') flight_date = request.form.get('flight_date') print(f"Form data received: airline={airline}, origin={origin}, destination={destination}, hour={departure_hour}, date={flight_date}") # Validate required fields if not all([airline, origin, destination, departure_hour, flight_date]): error_msg = 'All fields are required' print(f"Validation error: {error_msg}") return jsonify({'error': error_msg}) # Convert departure_hour to int try: departure_hour = int(departure_hour) except ValueError: error_msg = 'Invalid departure hour' print(f"Departure hour error: {error_msg}") return jsonify({'error': error_msg}) # Extract origin airport code for season calculation origin_code = extract_airport_code(origin) print(f"Extracted origin code: {origin_code}") # Calculate season automatically based on date and origin season = calculate_season(flight_date, origin_code) print(f"Calculated season: {season}") # Calculate distance automatically between airports distance = calculate_distance(origin, destination) print(f"Calculated distance: {distance}") # Extract day of week and month from date date_obj = datetime.strptime(flight_date, '%Y-%m-%d') day_of_week = date_obj.weekday() + 1 # Monday=1 month = date_obj.month print(f"Date parsed: day_of_week={day_of_week}, month={month}") # Enhanced prediction logic if model is not None and data is not None: print("Using model for prediction") try: # Create feature array features = np.array([[departure_hour, day_of_week, month, distance]]) print(f"Features array: {features}") # Make prediction prediction = model.predict(features)[0] probability = model.predict_proba(features)[0][1] * 100 print(f"Model prediction: {prediction}, probability: {probability}") # Add some randomness for demo probability = min(95, max(5, probability + np.random.normal(0, 5))) except Exception as model_error: print(f"Model prediction error: {model_error}") # Fallback logic prediction, probability = fallback_prediction_logic(departure_hour, distance, season) else: print("Using fallback prediction logic") # Fallback logic based on patterns prediction, probability = fallback_prediction_logic(departure_hour, distance, season) result = { 'prediction': int(prediction), 'probability': round(probability, 1), 'status': 'Delayed' if prediction == 1 else 'On Time', 'departure_hour': departure_hour, 'flight_date': flight_date, 'season': season, 'day_of_week': day_of_week, 'month': month, 'airline': airline, 'origin': origin, 'destination': destination, 'distance': round(distance, 1) } print(f"Final result: {result}") print("=== PREDICTION REQUEST COMPLETED ===") return jsonify(result) except Exception as e: print(f"Prediction error: {e}") import traceback traceback.print_exc() return jsonify({'error': f'Prediction failed: {str(e)}'}) def fallback_prediction_logic(departure_hour, distance, season): """Fallback prediction logic when model is not available""" try: # Base probability calculation base_probability = 20 # Base 20% delay rate # Time-based adjustments if departure_hour in [6, 7, 8, 20, 21, 22]: # Peak hours base_probability += 25 elif departure_hour in [0, 1, 2, 3, 4, 5]: # Late night/early morning base_probability -= 10 # Distance-based adjustments if distance > 2000: # Long flights base_probability += 15 elif distance < 300: # Short flights base_probability += 5 # Season-based adjustments if season in ['Winter', 'Summer']: # Higher delay seasons base_probability += 10 # Add some randomness probability = base_probability + np.random.randint(-10, 10) probability = max(5, min(90, probability)) # Determine prediction prediction = 1 if probability > 40 else 0 return prediction, probability except: return 0, 25 # Safe fallback @app.route('/api/insights') def insights(): try: if data is None: return jsonify({'error': 'Data not available'}) # Calculate metrics delay_rate = data['IS_DELAYED'].mean() * 100 total_flights = len(data) avg_delay = data[data['ARRIVAL_DELAY'] > 0]['ARRIVAL_DELAY'].mean() if len(data[data['ARRIVAL_DELAY'] > 0]) > 0 else 0 on_time_flights = len(data[data['IS_DELAYED'] == 0]) # Delays by hour delays_by_hour = data.groupby('DEPARTURE_HOUR')['IS_DELAYED'].mean() * 100 # Delays by season delays_by_season = data.groupby('SEASON')['IS_DELAYED'].mean() * 100 # Feature importance (simplified) feature_importance = { 'DEPARTURE_HOUR': 0.25, 'DISTANCE': 0.20, 'MONTH': 0.18, 'DAY_OF_WEEK': 0.15, 'AIRLINE': 0.12, 'ORIGIN': 0.10 } return jsonify({ 'delay_rate': round(delay_rate, 1), 'total_flights': total_flights, 'on_time_flights': on_time_flights, 'avg_delay': round(avg_delay, 1), 'delays_by_hour': delays_by_hour.to_dict(), 'delays_by_season': delays_by_season.to_dict(), 'feature_importance': feature_importance }) except Exception as e: return jsonify({'error': str(e)}) @app.route('/api/airlines') def get_airlines(): try: if data is None: return jsonify({'error': 'Data not available'}) # Get unique airlines with full names if 'AIRLINE_NAME' in data.columns: airlines = sorted(data['AIRLINE_NAME'].dropna().unique().tolist()) else: # Use airlines_mapping as fallback if airlines_mapping: airlines = sorted(list(set(airlines_mapping.values()))) else: airlines = sorted(data['AIRLINE'].dropna().unique().tolist()) return jsonify({'airlines': airlines}) except Exception as e: return jsonify({'error': str(e)}) @app.route('/api/airports') def get_airports(): try: if data is None: return jsonify({'error': 'Data not available'}) # Get unique airports with full names airports = [] # Try to get origin airport names if 'ORIGIN_AIRPORT_NAME' in data.columns: origin_airports = data['ORIGIN_AIRPORT_NAME'].dropna().unique().tolist() airports.extend(origin_airports) # Try to get destination airport names if 'DESTINATION_AIRPORT_NAME' in data.columns: dest_airports = data['DESTINATION_AIRPORT_NAME'].dropna().unique().tolist() airports.extend(dest_airports) # Remove duplicates and sort airports = sorted(list(set(airports))) # Fallback to airports_mapping if names not available if not airports and airports_mapping: airports = sorted(list(set(airports_mapping.values()))) # Final fallback to codes if nothing else works if not airports: origin_codes = sorted(data['ORIGIN_AIRPORT'].dropna().unique().tolist()) dest_codes = sorted(data['DESTINATION_AIRPORT'].dropna().unique().tolist()) airports = sorted(list(set(origin_codes + dest_codes))) return jsonify({'airports': airports}) except Exception as e: return jsonify({'error': str(e)}) @app.route('/api/chart-data/hourly') def chart_data_hourly(): try: if data is None: return jsonify({'error': 'Data not available'}) # Delays by hour for chart delays_by_hour = data.groupby('DEPARTURE_HOUR')['IS_DELAYED'].mean() * 100 # Create readable hour labels hour_labels = [] for hour in range(24): if hour == 0: hour_labels.append('12:00 AM') elif hour < 12: hour_labels.append(f'{hour}:00 AM') elif hour == 12: hour_labels.append('12:00 PM') else: hour_labels.append(f'{hour-12}:00 PM') return jsonify({ 'labels': hour_labels, 'data': [delays_by_hour.get(hour, 0) for hour in range(24)] }) except Exception as e: return jsonify({'error': str(e)}) @app.route('/api/chart-data/seasonal') def chart_data_seasonal(): try: if data is None: return jsonify({'error': 'Data not available'}) # Delays by season for chart delays_by_season = data.groupby('SEASON')['IS_DELAYED'].mean() * 100 # Ensure consistent season order and readable labels season_order = ['Spring', 'Summer', 'Fall', 'Winter'] seasonal_data = [] for season in season_order: if season in delays_by_season: seasonal_data.append(float(delays_by_season[season])) else: seasonal_data.append(0.0) return jsonify({ 'labels': season_order, 'data': seasonal_data }) except Exception as e: return jsonify({'error': str(e)}) @app.route('/api/chart-data/feature-importance') def chart_data_feature_importance(): try: # Dynamic feature importance with readable labels base_importance = { 'Departure Hour': 0.25, 'Flight Distance': 0.20, 'Month of Year': 0.18, 'Day of Week': 0.15, 'Airline': 0.12, 'Origin Airport': 0.10 } # Add some randomness for demo np.random.seed(int(datetime.now().timestamp())) dynamic_importance = {} for feature, importance in base_importance.items(): dynamic_importance[feature] = max(0.05, min(0.30, importance * (1 + np.random.normal(0, 0.05)))) return jsonify({ 'labels': list(dynamic_importance.keys()), 'data': list(dynamic_importance.values()) }) except Exception as e: return jsonify({'error': str(e)}) @app.route('/api/chart-data/prediction-trends') def chart_data_prediction_trends(): try: if data is None: return jsonify({'error': 'Data not available'}) # Simulate prediction trends over time predictions = [] labels = [] for i in range(10): # Generate some sample predictions predictions.append(np.random.randint(15, 85)) labels.append(f'Day {i+1}') return jsonify({ 'labels': labels, 'data': predictions }) except Exception as e: return jsonify({'error': str(e)}) @app.route('/api/chart-data/airline-performance') def chart_data_airline_performance(): try: if data is None: return jsonify({'error': 'Data not available'}) # Calculate delay rates by airline if 'AIRLINE_NAME' in data.columns: airline_delay_rates = data.groupby('AIRLINE_NAME')['IS_DELAYED'].mean() * 100 else: airline_delay_rates = data.groupby('AIRLINE')['IS_DELAYED'].mean() * 100 # Convert airline codes to full names using mapping if airlines_mapping: renamed_rates = {} for code, rate in airline_delay_rates.items(): full_name = airlines_mapping.get(code, code) renamed_rates[full_name] = float(rate) airline_delay_rates = renamed_rates # Sort by delay rate and take top 10 for better visualization sorted_rates = dict(sorted(airline_delay_rates.items(), key=lambda x: x[1], reverse=True)[:10]) # Convert to regular Python types to avoid JSON serialization issues labels = [str(label) for label in sorted_rates.keys()] values = [float(value) for value in sorted_rates.values()] return jsonify({ 'labels': labels, 'data': values }) except Exception as e: return jsonify({'error': str(e)}) @app.route('/api/chart-data/airport-performance') def chart_data_airport_performance(): try: if data is None: return jsonify({'error': 'Data not available'}) # Calculate delay rates by airport if 'ORIGIN_AIRPORT_NAME' in data.columns: airport_delay_rates = data.groupby('ORIGIN_AIRPORT_NAME')['IS_DELAYED'].mean() * 100 top_airports = data['ORIGIN_AIRPORT_NAME'].value_counts().head(10) else: airport_delay_rates = data.groupby('ORIGIN_AIRPORT')['IS_DELAYED'].mean() * 100 top_airports = data['ORIGIN_AIRPORT'].value_counts().head(10) # Convert airport codes to full names using mapping if airports_mapping: renamed_rates = {} renamed_top = {} for code, rate in airport_delay_rates.items(): full_name = airports_mapping.get(code, code) renamed_rates[full_name] = float(rate) for code, count in top_airports.items(): full_name = airports_mapping.get(code, code) renamed_top[full_name] = int(count) airport_delay_rates = renamed_rates top_airports = renamed_top # Get top 10 airports by flight volume for consistent display top_10_airports = dict(sorted(top_airports.items(), key=lambda x: x[1], reverse=True)[:10]) # Get delay rates for these top airports only filtered_delay_rates = {} for airport in top_10_airports.keys(): if airport in airport_delay_rates: filtered_delay_rates[airport] = airport_delay_rates[airport] # Convert to regular Python types to avoid JSON serialization issues delay_rates_dict = {str(k): float(v) for k, v in filtered_delay_rates.items()} top_airports_dict = {str(k): int(v) for k, v in top_10_airports.items()} return jsonify({ 'delay_rates': delay_rates_dict, 'top_airports': top_airports_dict }) except Exception as e: return jsonify({'error': str(e)}) @app.route('/api/flight-visualization', methods=['POST']) def flight_visualization(): try: # Get flight details from request airline = request.form.get('airline', 'Unknown Airline') origin = request.form.get('origin', 'Unknown Origin') destination = request.form.get('destination', 'Unknown Destination') probability = float(request.form.get('probability', 50)) season = request.form.get('season', 'Spring') # Create flight visualization data # Simulate flight path with multiple data points flight_phases = ['Pre-Flight', 'Boarding', 'Takeoff', 'Cruise', 'Descent', 'Landing', 'Post-Flight'] # Generate confidence levels for each phase base_confidence = probability / 100 phase_confidences = [] for i, phase in enumerate(flight_phases): # Vary confidence based on phase phase_confidence = base_confidence + (np.random.normal(0, 0.1) if i > 2 else 0) phase_confidence = max(0, min(1, phase_confidence)) phase_confidences.append(round(phase_confidence * 100, 1)) # Risk factors for the flight risk_factors = { 'Weather Risk': round(np.random.uniform(10, 40), 1), 'Traffic Congestion': round(np.random.uniform(5, 25), 1), 'Airport Delay History': round(np.random.uniform(15, 35), 1), 'Airline Performance': round(np.random.uniform(8, 20), 1), 'Time of Day': round(np.random.uniform(5, 30), 1) } return jsonify({ 'flight_phases': flight_phases, 'phase_confidences': phase_confidences, 'risk_factors': list(risk_factors.keys()), 'risk_values': list(risk_factors.values()), 'overall_probability': probability, 'season': season, 'airline': airline, 'origin': origin, 'destination': destination }) except Exception as e: return jsonify({'error': str(e)}) if __name__ == '__main__': load_model_and_data() port = int(os.environ.get("PORT", 7860)) app.run(host='0.0.0.0', port=port)