mimo1972 commited on
Commit
40ecbe3
·
verified ·
1 Parent(s): a5dcfcb

Upload 5 files

Browse files
AirFlights_HistBoost_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cf6829db3eaa9a0d9836b2dd9147a18fb022feee15ed4114dbfe5689ad4c7c1
3
+ size 1407938
flightprice.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
8
+
9
+ # Set Page Config
10
+ st.set_page_config(page_title="Flight Price Predictor", layout="wide")
11
+
12
+ # --- 1. Helper Functions ---
13
+ @st.cache_data
14
+ def load_data():
15
+ """Loads the test data to get unique values for dropdowns and for evaluation."""
16
+ x_test = pd.read_parquet('x_test.parquet')
17
+ y_test = pd.read_parquet('y_test.parquet')
18
+ return x_test, y_test
19
+
20
+ @st.cache_resource
21
+ def load_model():
22
+ """Loads the trained HistGradientBoosting model."""
23
+ return joblib.load('AirFlights_HistBoost_model.pkl')
24
+
25
+ # Load Data and Model
26
+ try:
27
+ x_test, y_test = load_data()
28
+ model = load_model()
29
+ # Ensure target is 1D array
30
+ if isinstance(y_test, pd.DataFrame):
31
+ y_test_series = y_test.iloc[:, 0]
32
+ else:
33
+ y_test_series = y_test
34
+ except Exception as e:
35
+ st.error(f"Error loading files: {e}")
36
+ st.stop()
37
+
38
+ # --- 2. Sidebar Navigation ---
39
+ st.sidebar.title("Navigation")
40
+ page = st.sidebar.radio("Go to", ["✈️ Predict Price", "qh Model Evaluation"])
41
+
42
+ # --- PAGE 1: PREDICT PRICE ---
43
+ if page == "Predict Price":
44
+ st.title("Flight Price Prediction")
45
+ st.markdown("Enter the flight details below to get an estimated price.")
46
+
47
+ # Create a form for user input
48
+ with st.form("prediction_form"):
49
+ col1, col2, col3 = st.columns(3)
50
+
51
+ # We extract unique values from x_test to populate dropdowns automatically
52
+ # This ensures the inputs match exactly what the model learned
53
+
54
+ with col1:
55
+ airline = st.selectbox("Airline", sorted(x_test['Airline'].unique()))
56
+ source = st.selectbox("Source", sorted(x_test['Source'].unique()))
57
+ destination = st.selectbox("Destination", sorted(x_test['Destination'].unique()))
58
+
59
+ with col2:
60
+ # Categorical Time Features
61
+ month = st.selectbox("Month", x_test['Month'].unique())
62
+ day = st.selectbox("Day", x_test['Day'].unique()) # e.g. Weekday or Day of Month
63
+ dept_quarter = st.selectbox("Departure Time of Day", x_test['Dept_Day_Quarter'].unique())
64
+
65
+ with col3:
66
+ # Numerical Features
67
+ stops = st.number_input("Total Stops", min_value=0, max_value=4, step=1, value=0)
68
+ duration = st.number_input("Duration (minutes)", min_value=30, max_value=3000, step=15, value=120)
69
+
70
+ submitted = st.form_submit_button("Predict Price")
71
+
72
+ if submitted:
73
+ # 1. Prepare Input Data
74
+ input_data = pd.DataFrame({
75
+ 'Airline': [airline],
76
+ 'Source': [source],
77
+ 'Destination': [destination],
78
+ 'Total_Stops': [stops],
79
+ 'Duration_minutes': [duration],
80
+ 'Day': [day],
81
+ 'Month': [month],
82
+ 'Dept_Day_Quarter': [dept_quarter]
83
+ })
84
+
85
+ # Ensure columns are in the exact same order as x_test
86
+ input_data = input_data[x_test.columns]
87
+
88
+ # 2. Predict (Model returns Log Price)
89
+ log_prediction = model.predict(input_data)[0]
90
+
91
+ # 3. Inverse Transform (Log -> Real Price)
92
+ real_price = np.expm1(log_prediction)
93
+
94
+ # 4. Display Result
95
+ st.success(f"Estimated Ticket Price: ₹ {real_price:,.2f}")
96
+
97
+ # Debug info (optional)
98
+ with st.expander("See processed input"):
99
+ st.write(input_data)
100
+
101
+
102
+ # --- PAGE 2: MODEL EVALUATION ---
103
+ elif page == "Model Evaluation":
104
+ st.title("Model Performance Report")
105
+ st.write("Evaluating the model on `x_test.parquet` and `y_test.parquet`.")
106
+
107
+ if st.button("Run Evaluation"):
108
+ with st.spinner("Calculating predictions..."):
109
+ # 1. Predict on Test Set
110
+ y_pred_log = model.predict(x_test)
111
+
112
+ # 2. Convert to Real Prices
113
+ y_pred_real = np.expm1(y_pred_log)
114
+ y_test_real = np.expm1(y_test_series)
115
+
116
+ # 3. Metrics
117
+ r2 = r2_score(y_test_series, y_pred_log) # R2 on Log scale (Model Metric)
118
+ r2_real = r2_score(y_test_real, y_pred_real) # R2 on Real scale (Business Metric)
119
+ mae = mean_absolute_error(y_test_real, y_pred_real)
120
+ rmse = np.sqrt(mean_squared_error(y_test_real, y_pred_real))
121
+
122
+ # --- Display Metrics ---
123
+ col1, col2, col3, col4 = st.columns(4)
124
+ col1.metric("R2 Score (Log)", f"{r2:.4f}")
125
+ col2.metric("R2 Score (Real)", f"{r2_real:.4f}")
126
+ col3.metric("MAE (Error)", f"₹ {mae:.0f}")
127
+ col4.metric("RMSE (Error)", f"₹ {rmse:.0f}")
128
+
129
+ st.markdown("---")
130
+
131
+ # --- Graphs ---
132
+ tab1, tab2 = st.tabs(["Actual vs Predicted", "Residuals Distribution"])
133
+
134
+ with tab1:
135
+ st.subheader("Actual Prices vs Predicted Prices")
136
+ fig, ax = plt.subplots(figsize=(10, 6))
137
+ sns.scatterplot(x=y_test_real, y=y_pred_real, alpha=0.5, color="blue", ax=ax)
138
+ # Perfect prediction line
139
+ min_val = min(y_test_real.min(), y_pred_real.min())
140
+ max_val = max(y_test_real.max(), y_pred_real.max())
141
+ ax.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2, label="Perfect Prediction")
142
+ ax.set_xlabel("Actual Price")
143
+ ax.set_ylabel("Predicted Price")
144
+ ax.legend()
145
+ st.pyplot(fig)
146
+
147
+ with tab2:
148
+ st.subheader("Residuals (Error) Distribution")
149
+ residuals = y_test_real - y_pred_real
150
+ fig, ax = plt.subplots(figsize=(10, 6))
151
+ sns.histplot(residuals, kde=True, color="purple", ax=ax)
152
+ ax.set_xlabel("Error (Actual - Predicted)")
153
+ ax.set_title("Are the errors centered around 0?")
154
+ st.pyplot(fig)
155
+
156
+ # --- Data Table ---
157
+ st.markdown("---")
158
+ st.subheader("Detailed Test Data & Predictions")
159
+ results_df = x_test.copy()
160
+ results_df['Actual_Price'] = y_test_real
161
+ results_df['Predicted_Price'] = y_pred_real
162
+ results_df['Difference'] = results_df['Actual_Price'] - results_df['Predicted_Price']
163
+ st.dataframe(results_df.head(100))
requirements.txt ADDED
Binary file (3.23 kB). View file
 
x_test.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14b2378d0cc0c08968a3ca37404afcb40de80a12b698a41e4cb128c0037aa2e6
3
+ size 25918
y_test.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a8ed09322ef92c2d46b031a9f18c0202b5bef664652be589e4c5da5414c8c0f
3
+ size 22385