Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- AirFlights_HistBoost_model.pkl +3 -0
- flightprice.py +163 -0
- requirements.txt +0 -0
- x_test.parquet +3 -0
- y_test.parquet +3 -0
AirFlights_HistBoost_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cf6829db3eaa9a0d9836b2dd9147a18fb022feee15ed4114dbfe5689ad4c7c1
|
| 3 |
+
size 1407938
|
flightprice.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import joblib
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
import seaborn as sns
|
| 7 |
+
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
|
| 8 |
+
|
| 9 |
+
# Set Page Config
|
| 10 |
+
st.set_page_config(page_title="Flight Price Predictor", layout="wide")
|
| 11 |
+
|
| 12 |
+
# --- 1. Helper Functions ---
|
| 13 |
+
@st.cache_data
|
| 14 |
+
def load_data():
|
| 15 |
+
"""Loads the test data to get unique values for dropdowns and for evaluation."""
|
| 16 |
+
x_test = pd.read_parquet('x_test.parquet')
|
| 17 |
+
y_test = pd.read_parquet('y_test.parquet')
|
| 18 |
+
return x_test, y_test
|
| 19 |
+
|
| 20 |
+
@st.cache_resource
|
| 21 |
+
def load_model():
|
| 22 |
+
"""Loads the trained HistGradientBoosting model."""
|
| 23 |
+
return joblib.load('AirFlights_HistBoost_model.pkl')
|
| 24 |
+
|
| 25 |
+
# Load Data and Model
|
| 26 |
+
try:
|
| 27 |
+
x_test, y_test = load_data()
|
| 28 |
+
model = load_model()
|
| 29 |
+
# Ensure target is 1D array
|
| 30 |
+
if isinstance(y_test, pd.DataFrame):
|
| 31 |
+
y_test_series = y_test.iloc[:, 0]
|
| 32 |
+
else:
|
| 33 |
+
y_test_series = y_test
|
| 34 |
+
except Exception as e:
|
| 35 |
+
st.error(f"Error loading files: {e}")
|
| 36 |
+
st.stop()
|
| 37 |
+
|
| 38 |
+
# --- 2. Sidebar Navigation ---
|
| 39 |
+
st.sidebar.title("Navigation")
|
| 40 |
+
page = st.sidebar.radio("Go to", ["✈️ Predict Price", "qh Model Evaluation"])
|
| 41 |
+
|
| 42 |
+
# --- PAGE 1: PREDICT PRICE ---
|
| 43 |
+
if page == "Predict Price":
|
| 44 |
+
st.title("Flight Price Prediction")
|
| 45 |
+
st.markdown("Enter the flight details below to get an estimated price.")
|
| 46 |
+
|
| 47 |
+
# Create a form for user input
|
| 48 |
+
with st.form("prediction_form"):
|
| 49 |
+
col1, col2, col3 = st.columns(3)
|
| 50 |
+
|
| 51 |
+
# We extract unique values from x_test to populate dropdowns automatically
|
| 52 |
+
# This ensures the inputs match exactly what the model learned
|
| 53 |
+
|
| 54 |
+
with col1:
|
| 55 |
+
airline = st.selectbox("Airline", sorted(x_test['Airline'].unique()))
|
| 56 |
+
source = st.selectbox("Source", sorted(x_test['Source'].unique()))
|
| 57 |
+
destination = st.selectbox("Destination", sorted(x_test['Destination'].unique()))
|
| 58 |
+
|
| 59 |
+
with col2:
|
| 60 |
+
# Categorical Time Features
|
| 61 |
+
month = st.selectbox("Month", x_test['Month'].unique())
|
| 62 |
+
day = st.selectbox("Day", x_test['Day'].unique()) # e.g. Weekday or Day of Month
|
| 63 |
+
dept_quarter = st.selectbox("Departure Time of Day", x_test['Dept_Day_Quarter'].unique())
|
| 64 |
+
|
| 65 |
+
with col3:
|
| 66 |
+
# Numerical Features
|
| 67 |
+
stops = st.number_input("Total Stops", min_value=0, max_value=4, step=1, value=0)
|
| 68 |
+
duration = st.number_input("Duration (minutes)", min_value=30, max_value=3000, step=15, value=120)
|
| 69 |
+
|
| 70 |
+
submitted = st.form_submit_button("Predict Price")
|
| 71 |
+
|
| 72 |
+
if submitted:
|
| 73 |
+
# 1. Prepare Input Data
|
| 74 |
+
input_data = pd.DataFrame({
|
| 75 |
+
'Airline': [airline],
|
| 76 |
+
'Source': [source],
|
| 77 |
+
'Destination': [destination],
|
| 78 |
+
'Total_Stops': [stops],
|
| 79 |
+
'Duration_minutes': [duration],
|
| 80 |
+
'Day': [day],
|
| 81 |
+
'Month': [month],
|
| 82 |
+
'Dept_Day_Quarter': [dept_quarter]
|
| 83 |
+
})
|
| 84 |
+
|
| 85 |
+
# Ensure columns are in the exact same order as x_test
|
| 86 |
+
input_data = input_data[x_test.columns]
|
| 87 |
+
|
| 88 |
+
# 2. Predict (Model returns Log Price)
|
| 89 |
+
log_prediction = model.predict(input_data)[0]
|
| 90 |
+
|
| 91 |
+
# 3. Inverse Transform (Log -> Real Price)
|
| 92 |
+
real_price = np.expm1(log_prediction)
|
| 93 |
+
|
| 94 |
+
# 4. Display Result
|
| 95 |
+
st.success(f"Estimated Ticket Price: ₹ {real_price:,.2f}")
|
| 96 |
+
|
| 97 |
+
# Debug info (optional)
|
| 98 |
+
with st.expander("See processed input"):
|
| 99 |
+
st.write(input_data)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
# --- PAGE 2: MODEL EVALUATION ---
|
| 103 |
+
elif page == "Model Evaluation":
|
| 104 |
+
st.title("Model Performance Report")
|
| 105 |
+
st.write("Evaluating the model on `x_test.parquet` and `y_test.parquet`.")
|
| 106 |
+
|
| 107 |
+
if st.button("Run Evaluation"):
|
| 108 |
+
with st.spinner("Calculating predictions..."):
|
| 109 |
+
# 1. Predict on Test Set
|
| 110 |
+
y_pred_log = model.predict(x_test)
|
| 111 |
+
|
| 112 |
+
# 2. Convert to Real Prices
|
| 113 |
+
y_pred_real = np.expm1(y_pred_log)
|
| 114 |
+
y_test_real = np.expm1(y_test_series)
|
| 115 |
+
|
| 116 |
+
# 3. Metrics
|
| 117 |
+
r2 = r2_score(y_test_series, y_pred_log) # R2 on Log scale (Model Metric)
|
| 118 |
+
r2_real = r2_score(y_test_real, y_pred_real) # R2 on Real scale (Business Metric)
|
| 119 |
+
mae = mean_absolute_error(y_test_real, y_pred_real)
|
| 120 |
+
rmse = np.sqrt(mean_squared_error(y_test_real, y_pred_real))
|
| 121 |
+
|
| 122 |
+
# --- Display Metrics ---
|
| 123 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 124 |
+
col1.metric("R2 Score (Log)", f"{r2:.4f}")
|
| 125 |
+
col2.metric("R2 Score (Real)", f"{r2_real:.4f}")
|
| 126 |
+
col3.metric("MAE (Error)", f"₹ {mae:.0f}")
|
| 127 |
+
col4.metric("RMSE (Error)", f"₹ {rmse:.0f}")
|
| 128 |
+
|
| 129 |
+
st.markdown("---")
|
| 130 |
+
|
| 131 |
+
# --- Graphs ---
|
| 132 |
+
tab1, tab2 = st.tabs(["Actual vs Predicted", "Residuals Distribution"])
|
| 133 |
+
|
| 134 |
+
with tab1:
|
| 135 |
+
st.subheader("Actual Prices vs Predicted Prices")
|
| 136 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 137 |
+
sns.scatterplot(x=y_test_real, y=y_pred_real, alpha=0.5, color="blue", ax=ax)
|
| 138 |
+
# Perfect prediction line
|
| 139 |
+
min_val = min(y_test_real.min(), y_pred_real.min())
|
| 140 |
+
max_val = max(y_test_real.max(), y_pred_real.max())
|
| 141 |
+
ax.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2, label="Perfect Prediction")
|
| 142 |
+
ax.set_xlabel("Actual Price")
|
| 143 |
+
ax.set_ylabel("Predicted Price")
|
| 144 |
+
ax.legend()
|
| 145 |
+
st.pyplot(fig)
|
| 146 |
+
|
| 147 |
+
with tab2:
|
| 148 |
+
st.subheader("Residuals (Error) Distribution")
|
| 149 |
+
residuals = y_test_real - y_pred_real
|
| 150 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 151 |
+
sns.histplot(residuals, kde=True, color="purple", ax=ax)
|
| 152 |
+
ax.set_xlabel("Error (Actual - Predicted)")
|
| 153 |
+
ax.set_title("Are the errors centered around 0?")
|
| 154 |
+
st.pyplot(fig)
|
| 155 |
+
|
| 156 |
+
# --- Data Table ---
|
| 157 |
+
st.markdown("---")
|
| 158 |
+
st.subheader("Detailed Test Data & Predictions")
|
| 159 |
+
results_df = x_test.copy()
|
| 160 |
+
results_df['Actual_Price'] = y_test_real
|
| 161 |
+
results_df['Predicted_Price'] = y_pred_real
|
| 162 |
+
results_df['Difference'] = results_df['Actual_Price'] - results_df['Predicted_Price']
|
| 163 |
+
st.dataframe(results_df.head(100))
|
requirements.txt
ADDED
|
Binary file (3.23 kB). View file
|
|
|
x_test.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14b2378d0cc0c08968a3ca37404afcb40de80a12b698a41e4cb128c0037aa2e6
|
| 3 |
+
size 25918
|
y_test.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a8ed09322ef92c2d46b031a9f18c0202b5bef664652be589e4c5da5414c8c0f
|
| 3 |
+
size 22385
|