Zayeemk's picture
Update app.py
b03b328 verified
from flask import Flask, render_template, request, jsonify
import pandas as pd
import numpy as np
from datetime import datetime
import joblib
import warnings
import plotly.graph_objects as go
import plotly.express as px
from utils.preprocess import FlightDataPreprocessor
import os
app = Flask(__name__, static_folder='static', static_url_path='/static')
# Add this route before if __name__ == '__main__':
@app.route('/static/images/background.png')
def serve_background():
try:
from flask import send_from_directory
return send_from_directory('static/images', 'background.png')
except:
return '', 404
# Global variables
model = None
data = None
airlines_mapping = {}
airports_mapping = {}
def load_model_and_data():
global model, data, airlines_mapping, airports_mapping
try:
# Initialize preprocessor
preprocessor = FlightDataPreprocessor()
# Load data
if preprocessor.load_data():
data = preprocessor.preprocess_flights_data()
airlines_mapping = preprocessor.clean_airlines_data()
airports_mapping = preprocessor.clean_airports_data()
print("Data loaded successfully!")
print(f"Loaded {len(data)} flight records")
print(f"Airlines: {len(airlines_mapping)}")
print(f"Airports: {len(airports_mapping)}")
else:
print("Failed to load data")
data = None
airlines_mapping = {}
airports_mapping = {}
# Load model
if os.path.exists('model/flight_delay_model.pkl'):
model = joblib.load('model/flight_delay_model.pkl')
print("Model loaded successfully!")
else:
print("Model file not found")
model = None
except Exception as e:
print(f"Error loading model/data: {e}")
model = None
data = None
airlines_mapping = {}
airports_mapping = {}
def calculate_season(date_str, origin_airport):
"""Calculate season based on date and origin airport hemisphere"""
try:
# Parse date
date_obj = datetime.strptime(date_str, '%Y-%m-%d')
month = date_obj.month
day = date_obj.day
# Northern hemisphere airports (default)
northern_airports = ['JFK', 'LAX', 'ORD', 'SFO', 'BOS', 'DCA', 'ATL', 'DFW', 'DEN', 'SEA',
'MSP', 'DTW', 'PHL', 'CLT', 'LGA', 'BWI', 'SLC', 'DCA', 'MCO',
'TPA', 'MDW', 'FLL', 'RDU', 'SAN', 'AUS', 'LAS', 'PHX', 'PDX', 'SMF']
# Major Indian airports (northern hemisphere)
indian_airports = ['DEL', 'BOM', 'BLR', 'HYD', 'MAA', 'CCU', 'COK', 'TRV', 'AMD', 'PNQ',
'GOI', 'IXC', 'JLR', 'IDR', 'VGA', 'NAG', 'RPR', 'BHU', 'JDH']
# Check if origin is in southern hemisphere (simplified list)
southern_airports = ['SYD', 'MEL', 'BNE', 'ADL', 'PER', 'CBR', 'HBA', 'DRW', 'CNS', 'OOL']
# Determine hemisphere
is_southern = origin_airport in southern_airports
# Calculate season based on hemisphere
if is_southern:
# Southern hemisphere seasons are reversed
if month in [12, 1, 2]:
return 'Summer'
elif month in [3, 4, 5]:
return 'Fall'
elif month in [6, 7, 8]:
return 'Winter'
else: # [9, 10, 11]
return 'Spring'
else:
# Northern hemisphere seasons
if month in [12, 1, 2]:
return 'Winter'
elif month in [3, 4, 5]:
return 'Spring'
elif month in [6, 7, 8]:
return 'Summer'
else: # [9, 10, 11]
return 'Fall'
except:
return 'Spring' # Default fallback
def calculate_distance(origin, destination):
"""Calculate approximate distance between airports"""
try:
# Airport coordinates (latitude, longitude) - simplified dataset
airport_coords = {
# Major US airports
'JFK': (40.64, -73.78), 'LAX': (33.94, -118.41), 'ORD': (41.98, -87.90),
'SFO': (37.62, -122.38), 'BOS': (42.36, -71.01), 'ATL': (33.64, -84.43),
'DFW': (32.90, -97.04), 'DEN': (39.86, -104.67), 'SEA': (47.45, -122.31),
'MSP': (44.88, -93.22), 'DTW': (42.21, -83.35), 'PHL': (39.87, -75.25),
'CLT': (35.21, -80.95), 'LGA': (40.77, -73.87), 'BWI': (39.18, -76.67),
'SLC': (40.79, -111.98), 'DCA': (38.85, -77.04), 'MCO': (28.43, -81.31),
'TPA': (27.98, -82.53), 'MDW': (41.78, -87.75), 'FLL': (26.07, -80.15),
'RDU': (35.87, -78.78), 'SAN': (32.73, -117.17), 'AUS': (30.19, -97.67),
'LAS': (36.08, -115.15), 'PHX': (33.45, -112.33), 'PDX': (45.59, -122.60),
# Major Indian airports
'DEL': (28.57, 77.21), 'BOM': (19.09, 72.87), 'BLR': (12.97, 77.59),
'HYD': (17.25, 78.43), 'MAA': (12.99, 80.18), 'CCU': (22.66, 88.45),
'COK': (10.15, 76.41), 'TRV': (8.48, 76.92), 'AMD': (23.08, 72.63),
'PNQ': (18.58, 73.92), 'GOI': (15.38, 73.83), 'IXC': (30.70, 76.72),
'JLR': (23.25, 79.93), 'IDR': (22.80, 75.90), 'VGA': (20.30, 75.70),
'NAG': (21.10, 79.07), 'RPR': (21.50, 81.73), 'BHU': (21.75, 72.15),
'JDH': (26.28, 73.02),
# Major international airports
'SYD': (-33.94, 151.18), 'MEL': (-37.81, 144.96), 'BNE': (-27.39, 153.13),
'ADL': (-34.95, 138.53), 'PER': (-31.94, 115.97), 'LHR': (51.47, -0.46),
'CDG': (49.01, 2.55), 'NRT': (35.77, 140.39), 'ICN': (37.46, 126.44),
'DXB': (25.25, 55.36), 'SIN': (1.36, 103.99), 'BKK': (13.69, 100.75),
'HKG': (22.31, 114.19), 'KUL': (3.13, 101.70), 'CAI': (30.12, 31.40)
}
# Extract airport codes from full names
origin_code = extract_airport_code(origin)
dest_code = extract_airport_code(destination)
# Get coordinates
if origin_code in airport_coords and dest_code in airport_coords:
lat1, lon1 = airport_coords[origin_code]
lat2, lon2 = airport_coords[dest_code]
# Calculate distance using Haversine formula
import math
# Convert to radians
lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
# Haversine formula
dlat = lat2 - lat1
dlon = lon2 - lon1
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
c = 2 * math.asin(math.sqrt(a))
# Earth's radius in miles
r = 3959
distance = c * r
return max(100, min(8000, distance)) # Clamp between 100 and 8000 miles
# Fallback distance if coordinates not found
return 1000 # Default 1000 miles
except Exception as e:
print(f"Error calculating distance: {e}")
return 1000 # Fallback distance
def extract_airport_code(airport_name):
"""Extract airport code from full airport name"""
try:
# Try to extract code from parentheses
if '(' in airport_name and ')' in airport_name:
code = airport_name.split('(')[-1].split(')')[0].strip()
if len(code) == 3: # Standard IATA code length
return code.upper()
# Try to extract from first word if it's 3 letters
words = airport_name.split()
for word in words:
if len(word) == 3 and word.isalpha():
return word.upper()
# Common airport name mappings
airport_mappings = {
'delhi': 'DEL', 'mumbai': 'BOM', 'bangalore': 'BLR', 'hyderabad': 'HYD',
'chennai': 'MAA', 'kolkata': 'CCU', 'kochi': 'COK', 'trivandrum': 'TRV',
'ahmedabad': 'AMD', 'pune': 'PNQ', 'goa': 'GOI', 'chandigarh': 'IXC',
'new york': 'JFK', 'los angeles': 'LAX', 'chicago': 'ORD', 'san francisco': 'SFO',
'boston': 'BOS', 'atlanta': 'ATL', 'dallas': 'DFW', 'denver': 'DEN',
'seattle': 'SEA', 'minneapolis': 'MSP', 'detroit': 'DTW', 'philadelphia': 'PHL',
'charlotte': 'CLT', 'london': 'LHR', 'paris': 'CDG', 'tokyo': 'NRT',
'seoul': 'ICN', 'dubai': 'DXB', 'singapore': 'SIN', 'bangkok': 'BKK'
}
# Check if airport name contains any known mappings
name_lower = airport_name.lower()
for key, code in airport_mappings.items():
if key in name_lower:
return code
# Return first 3 letters as fallback
return airport_name[:3].upper()
except:
return 'JFK' # Default fallback
@app.route('/')
def index():
return render_template('index.html')
@app.route('/api/test', methods=['GET', 'POST'])
def test_endpoint():
"""Test endpoint to debug issues"""
try:
if request.method == 'GET':
return jsonify({
'status': 'Test endpoint working',
'model_loaded': model is not None,
'data_loaded': data is not None,
'airlines_mapping': len(airlines_mapping) if airlines_mapping else 0,
'airports_mapping': len(airports_mapping) if airports_mapping else 0
})
else:
# Test prediction with sample data
airline = request.form.get('airline', 'Air India')
origin = request.form.get('origin', 'Indira Gandhi International Airport (DEL)')
destination = request.form.get('destination', 'Chatrapati Shivaji International Airport (BOM)')
departure_hour = int(request.form.get('departure_hour', 10))
flight_date = request.form.get('flight_date', '2024-12-15')
print(f"Test prediction: {airline}, {origin}, {destination}, {departure_hour}, {flight_date}")
# Test distance calculation
distance = calculate_distance(origin, destination)
print(f"Calculated distance: {distance}")
# Test season calculation
origin_code = extract_airport_code(origin)
season = calculate_season(flight_date, origin_code)
print(f"Calculated season: {season}")
return jsonify({
'status': 'Test successful',
'airline': airline,
'origin': origin,
'destination': destination,
'distance': distance,
'season': season,
'origin_code': origin_code
})
except Exception as e:
print(f"Test endpoint error: {e}")
return jsonify({'error': f'Test failed: {str(e)}'})
@app.route('/api/predict', methods=['POST'])
def predict():
try:
print("=== PREDICTION REQUEST STARTED ===")
# Get form data
airline = request.form.get('airline')
origin = request.form.get('origin')
destination = request.form.get('destination')
departure_hour = request.form.get('departure_hour')
flight_date = request.form.get('flight_date')
print(f"Form data received: airline={airline}, origin={origin}, destination={destination}, hour={departure_hour}, date={flight_date}")
# Validate required fields
if not all([airline, origin, destination, departure_hour, flight_date]):
error_msg = 'All fields are required'
print(f"Validation error: {error_msg}")
return jsonify({'error': error_msg})
# Convert departure_hour to int
try:
departure_hour = int(departure_hour)
except ValueError:
error_msg = 'Invalid departure hour'
print(f"Departure hour error: {error_msg}")
return jsonify({'error': error_msg})
# Extract origin airport code for season calculation
origin_code = extract_airport_code(origin)
print(f"Extracted origin code: {origin_code}")
# Calculate season automatically based on date and origin
season = calculate_season(flight_date, origin_code)
print(f"Calculated season: {season}")
# Calculate distance automatically between airports
distance = calculate_distance(origin, destination)
print(f"Calculated distance: {distance}")
# Extract day of week and month from date
date_obj = datetime.strptime(flight_date, '%Y-%m-%d')
day_of_week = date_obj.weekday() + 1 # Monday=1
month = date_obj.month
print(f"Date parsed: day_of_week={day_of_week}, month={month}")
# Enhanced prediction logic
if model is not None and data is not None:
print("Using model for prediction")
try:
# Create feature array
features = np.array([[departure_hour, day_of_week, month, distance]])
print(f"Features array: {features}")
# Make prediction
prediction = model.predict(features)[0]
probability = model.predict_proba(features)[0][1] * 100
print(f"Model prediction: {prediction}, probability: {probability}")
# Add some randomness for demo
probability = min(95, max(5, probability + np.random.normal(0, 5)))
except Exception as model_error:
print(f"Model prediction error: {model_error}")
# Fallback logic
prediction, probability = fallback_prediction_logic(departure_hour, distance, season)
else:
print("Using fallback prediction logic")
# Fallback logic based on patterns
prediction, probability = fallback_prediction_logic(departure_hour, distance, season)
result = {
'prediction': int(prediction),
'probability': round(probability, 1),
'status': 'Delayed' if prediction == 1 else 'On Time',
'departure_hour': departure_hour,
'flight_date': flight_date,
'season': season,
'day_of_week': day_of_week,
'month': month,
'airline': airline,
'origin': origin,
'destination': destination,
'distance': round(distance, 1)
}
print(f"Final result: {result}")
print("=== PREDICTION REQUEST COMPLETED ===")
return jsonify(result)
except Exception as e:
print(f"Prediction error: {e}")
import traceback
traceback.print_exc()
return jsonify({'error': f'Prediction failed: {str(e)}'})
def fallback_prediction_logic(departure_hour, distance, season):
"""Fallback prediction logic when model is not available"""
try:
# Base probability calculation
base_probability = 20 # Base 20% delay rate
# Time-based adjustments
if departure_hour in [6, 7, 8, 20, 21, 22]: # Peak hours
base_probability += 25
elif departure_hour in [0, 1, 2, 3, 4, 5]: # Late night/early morning
base_probability -= 10
# Distance-based adjustments
if distance > 2000: # Long flights
base_probability += 15
elif distance < 300: # Short flights
base_probability += 5
# Season-based adjustments
if season in ['Winter', 'Summer']: # Higher delay seasons
base_probability += 10
# Add some randomness
probability = base_probability + np.random.randint(-10, 10)
probability = max(5, min(90, probability))
# Determine prediction
prediction = 1 if probability > 40 else 0
return prediction, probability
except:
return 0, 25 # Safe fallback
@app.route('/api/insights')
def insights():
try:
if data is None:
return jsonify({'error': 'Data not available'})
# Calculate metrics
delay_rate = data['IS_DELAYED'].mean() * 100
total_flights = len(data)
avg_delay = data[data['ARRIVAL_DELAY'] > 0]['ARRIVAL_DELAY'].mean() if len(data[data['ARRIVAL_DELAY'] > 0]) > 0 else 0
on_time_flights = len(data[data['IS_DELAYED'] == 0])
# Delays by hour
delays_by_hour = data.groupby('DEPARTURE_HOUR')['IS_DELAYED'].mean() * 100
# Delays by season
delays_by_season = data.groupby('SEASON')['IS_DELAYED'].mean() * 100
# Feature importance (simplified)
feature_importance = {
'DEPARTURE_HOUR': 0.25,
'DISTANCE': 0.20,
'MONTH': 0.18,
'DAY_OF_WEEK': 0.15,
'AIRLINE': 0.12,
'ORIGIN': 0.10
}
return jsonify({
'delay_rate': round(delay_rate, 1),
'total_flights': total_flights,
'on_time_flights': on_time_flights,
'avg_delay': round(avg_delay, 1),
'delays_by_hour': delays_by_hour.to_dict(),
'delays_by_season': delays_by_season.to_dict(),
'feature_importance': feature_importance
})
except Exception as e:
return jsonify({'error': str(e)})
@app.route('/api/airlines')
def get_airlines():
try:
if data is None:
return jsonify({'error': 'Data not available'})
# Get unique airlines with full names
if 'AIRLINE_NAME' in data.columns:
airlines = sorted(data['AIRLINE_NAME'].dropna().unique().tolist())
else:
# Use airlines_mapping as fallback
if airlines_mapping:
airlines = sorted(list(set(airlines_mapping.values())))
else:
airlines = sorted(data['AIRLINE'].dropna().unique().tolist())
return jsonify({'airlines': airlines})
except Exception as e:
return jsonify({'error': str(e)})
@app.route('/api/airports')
def get_airports():
try:
if data is None:
return jsonify({'error': 'Data not available'})
# Get unique airports with full names
airports = []
# Try to get origin airport names
if 'ORIGIN_AIRPORT_NAME' in data.columns:
origin_airports = data['ORIGIN_AIRPORT_NAME'].dropna().unique().tolist()
airports.extend(origin_airports)
# Try to get destination airport names
if 'DESTINATION_AIRPORT_NAME' in data.columns:
dest_airports = data['DESTINATION_AIRPORT_NAME'].dropna().unique().tolist()
airports.extend(dest_airports)
# Remove duplicates and sort
airports = sorted(list(set(airports)))
# Fallback to airports_mapping if names not available
if not airports and airports_mapping:
airports = sorted(list(set(airports_mapping.values())))
# Final fallback to codes if nothing else works
if not airports:
origin_codes = sorted(data['ORIGIN_AIRPORT'].dropna().unique().tolist())
dest_codes = sorted(data['DESTINATION_AIRPORT'].dropna().unique().tolist())
airports = sorted(list(set(origin_codes + dest_codes)))
return jsonify({'airports': airports})
except Exception as e:
return jsonify({'error': str(e)})
@app.route('/api/chart-data/hourly')
def chart_data_hourly():
try:
if data is None:
return jsonify({'error': 'Data not available'})
# Delays by hour for chart
delays_by_hour = data.groupby('DEPARTURE_HOUR')['IS_DELAYED'].mean() * 100
# Create readable hour labels
hour_labels = []
for hour in range(24):
if hour == 0:
hour_labels.append('12:00 AM')
elif hour < 12:
hour_labels.append(f'{hour}:00 AM')
elif hour == 12:
hour_labels.append('12:00 PM')
else:
hour_labels.append(f'{hour-12}:00 PM')
return jsonify({
'labels': hour_labels,
'data': [delays_by_hour.get(hour, 0) for hour in range(24)]
})
except Exception as e:
return jsonify({'error': str(e)})
@app.route('/api/chart-data/seasonal')
def chart_data_seasonal():
try:
if data is None:
return jsonify({'error': 'Data not available'})
# Delays by season for chart
delays_by_season = data.groupby('SEASON')['IS_DELAYED'].mean() * 100
# Ensure consistent season order and readable labels
season_order = ['Spring', 'Summer', 'Fall', 'Winter']
seasonal_data = []
for season in season_order:
if season in delays_by_season:
seasonal_data.append(float(delays_by_season[season]))
else:
seasonal_data.append(0.0)
return jsonify({
'labels': season_order,
'data': seasonal_data
})
except Exception as e:
return jsonify({'error': str(e)})
@app.route('/api/chart-data/feature-importance')
def chart_data_feature_importance():
try:
# Dynamic feature importance with readable labels
base_importance = {
'Departure Hour': 0.25,
'Flight Distance': 0.20,
'Month of Year': 0.18,
'Day of Week': 0.15,
'Airline': 0.12,
'Origin Airport': 0.10
}
# Add some randomness for demo
np.random.seed(int(datetime.now().timestamp()))
dynamic_importance = {}
for feature, importance in base_importance.items():
dynamic_importance[feature] = max(0.05, min(0.30, importance * (1 + np.random.normal(0, 0.05))))
return jsonify({
'labels': list(dynamic_importance.keys()),
'data': list(dynamic_importance.values())
})
except Exception as e:
return jsonify({'error': str(e)})
@app.route('/api/chart-data/prediction-trends')
def chart_data_prediction_trends():
try:
if data is None:
return jsonify({'error': 'Data not available'})
# Simulate prediction trends over time
predictions = []
labels = []
for i in range(10):
# Generate some sample predictions
predictions.append(np.random.randint(15, 85))
labels.append(f'Day {i+1}')
return jsonify({
'labels': labels,
'data': predictions
})
except Exception as e:
return jsonify({'error': str(e)})
@app.route('/api/chart-data/airline-performance')
def chart_data_airline_performance():
try:
if data is None:
return jsonify({'error': 'Data not available'})
# Calculate delay rates by airline
if 'AIRLINE_NAME' in data.columns:
airline_delay_rates = data.groupby('AIRLINE_NAME')['IS_DELAYED'].mean() * 100
else:
airline_delay_rates = data.groupby('AIRLINE')['IS_DELAYED'].mean() * 100
# Convert airline codes to full names using mapping
if airlines_mapping:
renamed_rates = {}
for code, rate in airline_delay_rates.items():
full_name = airlines_mapping.get(code, code)
renamed_rates[full_name] = float(rate)
airline_delay_rates = renamed_rates
# Sort by delay rate and take top 10 for better visualization
sorted_rates = dict(sorted(airline_delay_rates.items(), key=lambda x: x[1], reverse=True)[:10])
# Convert to regular Python types to avoid JSON serialization issues
labels = [str(label) for label in sorted_rates.keys()]
values = [float(value) for value in sorted_rates.values()]
return jsonify({
'labels': labels,
'data': values
})
except Exception as e:
return jsonify({'error': str(e)})
@app.route('/api/chart-data/airport-performance')
def chart_data_airport_performance():
try:
if data is None:
return jsonify({'error': 'Data not available'})
# Calculate delay rates by airport
if 'ORIGIN_AIRPORT_NAME' in data.columns:
airport_delay_rates = data.groupby('ORIGIN_AIRPORT_NAME')['IS_DELAYED'].mean() * 100
top_airports = data['ORIGIN_AIRPORT_NAME'].value_counts().head(10)
else:
airport_delay_rates = data.groupby('ORIGIN_AIRPORT')['IS_DELAYED'].mean() * 100
top_airports = data['ORIGIN_AIRPORT'].value_counts().head(10)
# Convert airport codes to full names using mapping
if airports_mapping:
renamed_rates = {}
renamed_top = {}
for code, rate in airport_delay_rates.items():
full_name = airports_mapping.get(code, code)
renamed_rates[full_name] = float(rate)
for code, count in top_airports.items():
full_name = airports_mapping.get(code, code)
renamed_top[full_name] = int(count)
airport_delay_rates = renamed_rates
top_airports = renamed_top
# Get top 10 airports by flight volume for consistent display
top_10_airports = dict(sorted(top_airports.items(), key=lambda x: x[1], reverse=True)[:10])
# Get delay rates for these top airports only
filtered_delay_rates = {}
for airport in top_10_airports.keys():
if airport in airport_delay_rates:
filtered_delay_rates[airport] = airport_delay_rates[airport]
# Convert to regular Python types to avoid JSON serialization issues
delay_rates_dict = {str(k): float(v) for k, v in filtered_delay_rates.items()}
top_airports_dict = {str(k): int(v) for k, v in top_10_airports.items()}
return jsonify({
'delay_rates': delay_rates_dict,
'top_airports': top_airports_dict
})
except Exception as e:
return jsonify({'error': str(e)})
@app.route('/api/flight-visualization', methods=['POST'])
def flight_visualization():
try:
# Get flight details from request
airline = request.form.get('airline', 'Unknown Airline')
origin = request.form.get('origin', 'Unknown Origin')
destination = request.form.get('destination', 'Unknown Destination')
probability = float(request.form.get('probability', 50))
season = request.form.get('season', 'Spring')
# Create flight visualization data
# Simulate flight path with multiple data points
flight_phases = ['Pre-Flight', 'Boarding', 'Takeoff', 'Cruise', 'Descent', 'Landing', 'Post-Flight']
# Generate confidence levels for each phase
base_confidence = probability / 100
phase_confidences = []
for i, phase in enumerate(flight_phases):
# Vary confidence based on phase
phase_confidence = base_confidence + (np.random.normal(0, 0.1) if i > 2 else 0)
phase_confidence = max(0, min(1, phase_confidence))
phase_confidences.append(round(phase_confidence * 100, 1))
# Risk factors for the flight
risk_factors = {
'Weather Risk': round(np.random.uniform(10, 40), 1),
'Traffic Congestion': round(np.random.uniform(5, 25), 1),
'Airport Delay History': round(np.random.uniform(15, 35), 1),
'Airline Performance': round(np.random.uniform(8, 20), 1),
'Time of Day': round(np.random.uniform(5, 30), 1)
}
return jsonify({
'flight_phases': flight_phases,
'phase_confidences': phase_confidences,
'risk_factors': list(risk_factors.keys()),
'risk_values': list(risk_factors.values()),
'overall_probability': probability,
'season': season,
'airline': airline,
'origin': origin,
'destination': destination
})
except Exception as e:
return jsonify({'error': str(e)})
if __name__ == '__main__':
load_model_and_data()
port = int(os.environ.get("PORT", 7860))
app.run(host='0.0.0.0', port=port)