Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,6 +14,7 @@ import torch
|
|
| 14 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 15 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 16 |
import re
|
|
|
|
| 17 |
|
| 18 |
# Dataset loading function with caching
|
| 19 |
@st.cache_data
|
|
@@ -113,19 +114,43 @@ def load_model_and_encodings():
|
|
| 113 |
st.error(f"Error loading model: {str(e)}")
|
| 114 |
raise e
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
def predict_price(model, encoders, categorical_features, user_input):
|
| 117 |
encoded_features = {}
|
|
|
|
|
|
|
| 118 |
for feature, value in user_input.items():
|
| 119 |
if feature.lower() in encoders:
|
| 120 |
-
encoded_features[feature] = encoders[feature.lower()].transform([value])[0]
|
| 121 |
elif feature in categorical_features:
|
| 122 |
# If it's a categorical feature but not in encoders, set to 0 (unknown)
|
| 123 |
-
encoded_features[feature] = 0
|
| 124 |
else:
|
| 125 |
# For numerical features, use the value as is
|
| 126 |
-
encoded_features[feature] = value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
input_data = pd.DataFrame([encoded_features])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
predicted_price = model.predict(input_data)
|
| 130 |
return predicted_price[0]
|
| 131 |
|
|
@@ -201,6 +226,7 @@ if image is not None:
|
|
| 201 |
user_input = {
|
| 202 |
'make': match['Make'],
|
| 203 |
'model': match['Model'],
|
|
|
|
| 204 |
'condition': match.get('Condition', 'unknown'),
|
| 205 |
'fuel': match.get('Fuel', 'unknown'),
|
| 206 |
'title_status': match.get('Title_status', 'unknown'),
|
|
@@ -209,7 +235,6 @@ if image is not None:
|
|
| 209 |
'size': match.get('Size', 'unknown'),
|
| 210 |
'type': match.get('Type', 'unknown'),
|
| 211 |
'paint_color': match.get('Paint_color', 'unknown'),
|
| 212 |
-
'year': year
|
| 213 |
}
|
| 214 |
|
| 215 |
price = predict_price(model, label_encoders, categorical_features, user_input)
|
|
|
|
| 14 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 15 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 16 |
import re
|
| 17 |
+
from datetime import datetime
|
| 18 |
|
| 19 |
# Dataset loading function with caching
|
| 20 |
@st.cache_data
|
|
|
|
| 114 |
st.error(f"Error loading model: {str(e)}")
|
| 115 |
raise e
|
| 116 |
|
| 117 |
+
def calculate_age(year):
|
| 118 |
+
current_year = datetime.now().year
|
| 119 |
+
return current_year - year
|
| 120 |
+
|
| 121 |
def predict_price(model, encoders, categorical_features, user_input):
|
| 122 |
encoded_features = {}
|
| 123 |
+
current_year = datetime.now().year
|
| 124 |
+
|
| 125 |
for feature, value in user_input.items():
|
| 126 |
if feature.lower() in encoders:
|
| 127 |
+
encoded_features[feature.capitalize()] = encoders[feature.lower()].transform([value])[0]
|
| 128 |
elif feature in categorical_features:
|
| 129 |
# If it's a categorical feature but not in encoders, set to 0 (unknown)
|
| 130 |
+
encoded_features[feature.capitalize()] = 0
|
| 131 |
else:
|
| 132 |
# For numerical features, use the value as is
|
| 133 |
+
encoded_features[feature.capitalize()] = value
|
| 134 |
+
|
| 135 |
+
# Calculate additional features
|
| 136 |
+
encoded_features['Age'] = calculate_age(encoded_features['Year'])
|
| 137 |
+
encoded_features['Age_squared'] = encoded_features['Age'] ** 2
|
| 138 |
+
|
| 139 |
+
# Assume average mileage per year (you may want to adjust this)
|
| 140 |
+
avg_mileage_per_year = 12000
|
| 141 |
+
encoded_features['Mileage_per_year'] = avg_mileage_per_year
|
| 142 |
+
|
| 143 |
+
# Assume odometer reading (you may want to adjust this)
|
| 144 |
+
encoded_features['Odometer'] = encoded_features['Age'] * avg_mileage_per_year
|
| 145 |
|
| 146 |
input_data = pd.DataFrame([encoded_features])
|
| 147 |
+
|
| 148 |
+
# Ensure all expected columns are present
|
| 149 |
+
expected_columns = ['Make', 'Model', 'Year', 'Condition', 'Fuel', 'Odometer', 'Title_status', 'Transmission', 'Drive', 'Size', 'Type', 'Paint_color', 'Age', 'Age_squared', 'Mileage_per_year']
|
| 150 |
+
for col in expected_columns:
|
| 151 |
+
if col not in input_data.columns:
|
| 152 |
+
input_data[col] = 0 # or some default value
|
| 153 |
+
|
| 154 |
predicted_price = model.predict(input_data)
|
| 155 |
return predicted_price[0]
|
| 156 |
|
|
|
|
| 226 |
user_input = {
|
| 227 |
'make': match['Make'],
|
| 228 |
'model': match['Model'],
|
| 229 |
+
'year': year,
|
| 230 |
'condition': match.get('Condition', 'unknown'),
|
| 231 |
'fuel': match.get('Fuel', 'unknown'),
|
| 232 |
'title_status': match.get('Title_status', 'unknown'),
|
|
|
|
| 235 |
'size': match.get('Size', 'unknown'),
|
| 236 |
'type': match.get('Type', 'unknown'),
|
| 237 |
'paint_color': match.get('Paint_color', 'unknown'),
|
|
|
|
| 238 |
}
|
| 239 |
|
| 240 |
price = predict_price(model, label_encoders, categorical_features, user_input)
|