Spaces:

mimo1972
/

Fight_Price_Prediction

Sleeping

App Files Files Community

mimo1972 commited on Dec 31, 2025

Commit

40ecbe3

verified ·

1 Parent(s): a5dcfcb

Upload 5 files

Browse files

Files changed (5) hide show

AirFlights_HistBoost_model.pkl +3 -0
flightprice.py +163 -0
requirements.txt +0 -0
x_test.parquet +3 -0
y_test.parquet +3 -0

AirFlights_HistBoost_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cf6829db3eaa9a0d9836b2dd9147a18fb022feee15ed4114dbfe5689ad4c7c1
+size 1407938

flightprice.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import joblib
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
+# Set Page Config
+st.set_page_config(page_title="Flight Price Predictor", layout="wide")
+# --- 1. Helper Functions ---
+@st.cache_data
+def load_data():
+    """Loads the test data to get unique values for dropdowns and for evaluation."""
+    x_test = pd.read_parquet('x_test.parquet')
+    y_test = pd.read_parquet('y_test.parquet')
+    return x_test, y_test
+@st.cache_resource
+def load_model():
+    """Loads the trained HistGradientBoosting model."""
+    return joblib.load('AirFlights_HistBoost_model.pkl')
+# Load Data and Model
+try:
+    x_test, y_test = load_data()
+    model = load_model()
+    # Ensure target is 1D array
+    if isinstance(y_test, pd.DataFrame):
+        y_test_series = y_test.iloc[:, 0]
+    else:
+        y_test_series = y_test
+except Exception as e:
+    st.error(f"Error loading files: {e}")
+    st.stop()
+# --- 2. Sidebar Navigation ---
+st.sidebar.title("Navigation")
+page = st.sidebar.radio("Go to", ["✈️ Predict Price", "qh Model Evaluation"])
+# --- PAGE 1: PREDICT PRICE ---
+if page == "Predict Price":
+    st.title("Flight Price Prediction")
+    st.markdown("Enter the flight details below to get an estimated price.")
+    # Create a form for user input
+    with st.form("prediction_form"):
+        col1, col2, col3 = st.columns(3)
+        # We extract unique values from x_test to populate dropdowns automatically
+        # This ensures the inputs match exactly what the model learned
+        with col1:
+            airline = st.selectbox("Airline", sorted(x_test['Airline'].unique()))
+            source = st.selectbox("Source", sorted(x_test['Source'].unique()))
+            destination = st.selectbox("Destination", sorted(x_test['Destination'].unique()))
+        with col2:
+            # Categorical Time Features
+            month = st.selectbox("Month", x_test['Month'].unique())
+            day = st.selectbox("Day", x_test['Day'].unique()) # e.g. Weekday or Day of Month
+            dept_quarter = st.selectbox("Departure Time of Day", x_test['Dept_Day_Quarter'].unique())
+        with col3:
+            # Numerical Features
+            stops = st.number_input("Total Stops", min_value=0, max_value=4, step=1, value=0)
+            duration = st.number_input("Duration (minutes)", min_value=30, max_value=3000, step=15, value=120)
+        submitted = st.form_submit_button("Predict Price")
+    if submitted:
+        # 1. Prepare Input Data
+        input_data = pd.DataFrame({
+            'Airline': [airline],
+            'Source': [source],
+            'Destination': [destination],
+            'Total_Stops': [stops],
+            'Duration_minutes': [duration],
+            'Day': [day],
+            'Month': [month],
+            'Dept_Day_Quarter': [dept_quarter]
+        })
+        # Ensure columns are in the exact same order as x_test
+        input_data = input_data[x_test.columns]
+        # 2. Predict (Model returns Log Price)
+        log_prediction = model.predict(input_data)[0]
+        # 3. Inverse Transform (Log -> Real Price)
+        real_price = np.expm1(log_prediction)
+        # 4. Display Result
+        st.success(f"Estimated Ticket Price: ₹ {real_price:,.2f}")
+        # Debug info (optional)
+        with st.expander("See processed input"):
+            st.write(input_data)
+# --- PAGE 2: MODEL EVALUATION ---
+elif page == "Model Evaluation":
+    st.title("Model Performance Report")
+    st.write("Evaluating the model on `x_test.parquet` and `y_test.parquet`.")
+    if st.button("Run Evaluation"):
+        with st.spinner("Calculating predictions..."):
+            # 1. Predict on Test Set
+            y_pred_log = model.predict(x_test)
+            # 2. Convert to Real Prices
+            y_pred_real = np.expm1(y_pred_log)
+            y_test_real = np.expm1(y_test_series)
+            # 3. Metrics
+            r2 = r2_score(y_test_series, y_pred_log) # R2 on Log scale (Model Metric)
+            r2_real = r2_score(y_test_real, y_pred_real) # R2 on Real scale (Business Metric)
+            mae = mean_absolute_error(y_test_real, y_pred_real)
+            rmse = np.sqrt(mean_squared_error(y_test_real, y_pred_real))
+        # --- Display Metrics ---
+        col1, col2, col3, col4 = st.columns(4)
+        col1.metric("R2 Score (Log)", f"{r2:.4f}")
+        col2.metric("R2 Score (Real)", f"{r2_real:.4f}")
+        col3.metric("MAE (Error)", f"₹ {mae:.0f}")
+        col4.metric("RMSE (Error)", f"₹ {rmse:.0f}")
+        st.markdown("---")
+        # --- Graphs ---
+        tab1, tab2 = st.tabs(["Actual vs Predicted", "Residuals Distribution"])
+        with tab1:
+            st.subheader("Actual Prices vs Predicted Prices")
+            fig, ax = plt.subplots(figsize=(10, 6))
+            sns.scatterplot(x=y_test_real, y=y_pred_real, alpha=0.5, color="blue", ax=ax)
+            # Perfect prediction line
+            min_val = min(y_test_real.min(), y_pred_real.min())
+            max_val = max(y_test_real.max(), y_pred_real.max())
+            ax.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2, label="Perfect Prediction")
+            ax.set_xlabel("Actual Price")
+            ax.set_ylabel("Predicted Price")
+            ax.legend()
+            st.pyplot(fig)
+        with tab2:
+            st.subheader("Residuals (Error) Distribution")
+            residuals = y_test_real - y_pred_real
+            fig, ax = plt.subplots(figsize=(10, 6))
+            sns.histplot(residuals, kde=True, color="purple", ax=ax)
+            ax.set_xlabel("Error (Actual - Predicted)")
+            ax.set_title("Are the errors centered around 0?")
+            st.pyplot(fig)
+        # --- Data Table ---
+        st.markdown("---")
+        st.subheader("Detailed Test Data & Predictions")
+        results_df = x_test.copy()
+        results_df['Actual_Price'] = y_test_real
+        results_df['Predicted_Price'] = y_pred_real
+        results_df['Difference'] = results_df['Actual_Price'] - results_df['Predicted_Price']
+        st.dataframe(results_df.head(100))

requirements.txt ADDED Viewed

Binary file (3.23 kB). View file

x_test.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14b2378d0cc0c08968a3ca37404afcb40de80a12b698a41e4cb128c0037aa2e6
+size 25918

y_test.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a8ed09322ef92c2d46b031a9f18c0202b5bef664652be589e4c5da5414c8c0f
+size 22385