Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,9 +11,6 @@ from sklearn.preprocessing import LabelEncoder
|
|
| 11 |
from huggingface_hub import hf_hub_download
|
| 12 |
from transformers import AutoFeatureExtractor, AutoModelForImageClassification
|
| 13 |
import torch
|
| 14 |
-
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 15 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
| 16 |
-
import re
|
| 17 |
from datetime import datetime
|
| 18 |
|
| 19 |
# Dataset loading function with caching
|
|
@@ -22,7 +19,7 @@ def load_datasets():
|
|
| 22 |
try:
|
| 23 |
with st.spinner('Loading dataset...'):
|
| 24 |
original_data = pd.read_csv('CTP_Model1.csv', low_memory=False)
|
| 25 |
-
original_data.columns = original_data.columns.str.strip().str.
|
| 26 |
return original_data
|
| 27 |
except Exception as e:
|
| 28 |
st.error(f"Error loading dataset: {str(e)}")
|
|
@@ -62,7 +59,7 @@ def classify_image(image):
|
|
| 62 |
|
| 63 |
def find_closest_match(df, brand, model):
|
| 64 |
# Combine brand and model names from the dataset
|
| 65 |
-
df['full_name'] = df['
|
| 66 |
|
| 67 |
# Create a list of all car names
|
| 68 |
car_names = df['full_name'].tolist()
|
|
@@ -85,7 +82,7 @@ def find_closest_match(df, brand, model):
|
|
| 85 |
return df.iloc[most_similar_index]
|
| 86 |
|
| 87 |
def get_car_overview(car_data):
|
| 88 |
-
prompt = f"Provide an overview of the following car:\nYear: {car_data['
|
| 89 |
response = openai.ChatCompletion.create(
|
| 90 |
model="gpt-3.5-turbo",
|
| 91 |
messages=[{"role": "user", "content": prompt}]
|
|
@@ -123,30 +120,31 @@ def predict_price(model, encoders, categorical_features, user_input):
|
|
| 123 |
current_year = datetime.now().year
|
| 124 |
|
| 125 |
for feature, value in user_input.items():
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
|
|
|
| 129 |
# If it's a categorical feature but not in encoders, set to 0 (unknown)
|
| 130 |
-
encoded_features[
|
| 131 |
else:
|
| 132 |
# For numerical features, use the value as is
|
| 133 |
-
encoded_features[
|
| 134 |
|
| 135 |
# Calculate additional features
|
| 136 |
-
encoded_features['
|
| 137 |
-
encoded_features['
|
| 138 |
|
| 139 |
# Assume average mileage per year (you may want to adjust this)
|
| 140 |
avg_mileage_per_year = 12000
|
| 141 |
-
encoded_features['
|
| 142 |
|
| 143 |
# Assume odometer reading (you may want to adjust this)
|
| 144 |
-
encoded_features['
|
| 145 |
|
| 146 |
input_data = pd.DataFrame([encoded_features])
|
| 147 |
|
| 148 |
# Ensure all expected columns are present
|
| 149 |
-
expected_columns = ['
|
| 150 |
for col in expected_columns:
|
| 151 |
if col not in input_data.columns:
|
| 152 |
input_data[col] = 0 # or some default value
|
|
@@ -204,10 +202,10 @@ if image is not None:
|
|
| 204 |
match = find_closest_match(df, brand, model_name)
|
| 205 |
if match is not None:
|
| 206 |
st.write("Closest Match Found:")
|
| 207 |
-
st.write(f"Make: {match['
|
| 208 |
-
st.write(f"Model: {match['
|
| 209 |
-
st.write(f"Year: {match['
|
| 210 |
-
st.write(f"Price: ${match['
|
| 211 |
|
| 212 |
# Get additional information using GPT-3.5-turbo
|
| 213 |
overview = get_car_overview(match)
|
|
@@ -224,17 +222,17 @@ if image is not None:
|
|
| 224 |
|
| 225 |
for year in years:
|
| 226 |
user_input = {
|
| 227 |
-
'make': match['
|
| 228 |
-
'model': match['
|
| 229 |
'year': year,
|
| 230 |
-
'condition': match.get('
|
| 231 |
-
'fuel': match.get('
|
| 232 |
-
'title_status': match.get('
|
| 233 |
-
'transmission': match.get('
|
| 234 |
-
'drive': match.get('
|
| 235 |
-
'size': match.get('
|
| 236 |
-
'type': match.get('
|
| 237 |
-
'paint_color': match.get('
|
| 238 |
}
|
| 239 |
|
| 240 |
price = predict_price(model, label_encoders, categorical_features, user_input)
|
|
@@ -243,7 +241,7 @@ if image is not None:
|
|
| 243 |
# Plotting the results
|
| 244 |
plt.figure(figsize=(10, 5))
|
| 245 |
plt.plot(years, predicted_prices, marker='o')
|
| 246 |
-
plt.title(f"Predicted Price of {match['
|
| 247 |
plt.xlabel("Year")
|
| 248 |
plt.ylabel("Predicted Price ($)")
|
| 249 |
plt.grid()
|
|
|
|
| 11 |
from huggingface_hub import hf_hub_download
|
| 12 |
from transformers import AutoFeatureExtractor, AutoModelForImageClassification
|
| 13 |
import torch
|
|
|
|
|
|
|
|
|
|
| 14 |
from datetime import datetime
|
| 15 |
|
| 16 |
# Dataset loading function with caching
|
|
|
|
| 19 |
try:
|
| 20 |
with st.spinner('Loading dataset...'):
|
| 21 |
original_data = pd.read_csv('CTP_Model1.csv', low_memory=False)
|
| 22 |
+
original_data.columns = original_data.columns.str.strip().str.lower()
|
| 23 |
return original_data
|
| 24 |
except Exception as e:
|
| 25 |
st.error(f"Error loading dataset: {str(e)}")
|
|
|
|
| 59 |
|
| 60 |
def find_closest_match(df, brand, model):
|
| 61 |
# Combine brand and model names from the dataset
|
| 62 |
+
df['full_name'] = df['make'] + ' ' + df['model']
|
| 63 |
|
| 64 |
# Create a list of all car names
|
| 65 |
car_names = df['full_name'].tolist()
|
|
|
|
| 82 |
return df.iloc[most_similar_index]
|
| 83 |
|
| 84 |
def get_car_overview(car_data):
|
| 85 |
+
prompt = f"Provide an overview of the following car:\nYear: {car_data['year']}\nMake: {car_data['make']}\nModel: {car_data['model']}\nTrim: {car_data['trim']}\nPrice: ${car_data['price']}\nCondition: {car_data['condition']}\n"
|
| 86 |
response = openai.ChatCompletion.create(
|
| 87 |
model="gpt-3.5-turbo",
|
| 88 |
messages=[{"role": "user", "content": prompt}]
|
|
|
|
| 120 |
current_year = datetime.now().year
|
| 121 |
|
| 122 |
for feature, value in user_input.items():
|
| 123 |
+
feature_lower = feature.lower()
|
| 124 |
+
if feature_lower in encoders:
|
| 125 |
+
encoded_features[feature_lower] = encoders[feature_lower].transform([value])[0]
|
| 126 |
+
elif feature_lower in categorical_features:
|
| 127 |
# If it's a categorical feature but not in encoders, set to 0 (unknown)
|
| 128 |
+
encoded_features[feature_lower] = 0
|
| 129 |
else:
|
| 130 |
# For numerical features, use the value as is
|
| 131 |
+
encoded_features[feature_lower] = value
|
| 132 |
|
| 133 |
# Calculate additional features
|
| 134 |
+
encoded_features['age'] = calculate_age(encoded_features['year'])
|
| 135 |
+
encoded_features['age_squared'] = encoded_features['age'] ** 2
|
| 136 |
|
| 137 |
# Assume average mileage per year (you may want to adjust this)
|
| 138 |
avg_mileage_per_year = 12000
|
| 139 |
+
encoded_features['mileage_per_year'] = avg_mileage_per_year
|
| 140 |
|
| 141 |
# Assume odometer reading (you may want to adjust this)
|
| 142 |
+
encoded_features['odometer'] = encoded_features['age'] * avg_mileage_per_year
|
| 143 |
|
| 144 |
input_data = pd.DataFrame([encoded_features])
|
| 145 |
|
| 146 |
# Ensure all expected columns are present
|
| 147 |
+
expected_columns = ['make', 'model', 'year', 'condition', 'fuel', 'odometer', 'title_status', 'transmission', 'drive', 'size', 'type', 'paint_color', 'age', 'age_squared', 'mileage_per_year']
|
| 148 |
for col in expected_columns:
|
| 149 |
if col not in input_data.columns:
|
| 150 |
input_data[col] = 0 # or some default value
|
|
|
|
| 202 |
match = find_closest_match(df, brand, model_name)
|
| 203 |
if match is not None:
|
| 204 |
st.write("Closest Match Found:")
|
| 205 |
+
st.write(f"Make: {match['make']}")
|
| 206 |
+
st.write(f"Model: {match['model']}")
|
| 207 |
+
st.write(f"Year: {match['year']}")
|
| 208 |
+
st.write(f"Price: ${match['price']}")
|
| 209 |
|
| 210 |
# Get additional information using GPT-3.5-turbo
|
| 211 |
overview = get_car_overview(match)
|
|
|
|
| 222 |
|
| 223 |
for year in years:
|
| 224 |
user_input = {
|
| 225 |
+
'make': match['make'].lower(),
|
| 226 |
+
'model': match['model'].lower(),
|
| 227 |
'year': year,
|
| 228 |
+
'condition': match.get('condition', 'unknown').lower(),
|
| 229 |
+
'fuel': match.get('fuel', 'unknown').lower(),
|
| 230 |
+
'title_status': match.get('title_status', 'unknown').lower(),
|
| 231 |
+
'transmission': match.get('transmission', 'unknown').lower(),
|
| 232 |
+
'drive': match.get('drive', 'unknown').lower(),
|
| 233 |
+
'size': match.get('size', 'unknown').lower(),
|
| 234 |
+
'type': match.get('type', 'unknown').lower(),
|
| 235 |
+
'paint_color': match.get('paint_color', 'unknown').lower(),
|
| 236 |
}
|
| 237 |
|
| 238 |
price = predict_price(model, label_encoders, categorical_features, user_input)
|
|
|
|
| 241 |
# Plotting the results
|
| 242 |
plt.figure(figsize=(10, 5))
|
| 243 |
plt.plot(years, predicted_prices, marker='o')
|
| 244 |
+
plt.title(f"Predicted Price of {match['make']} {match['model']} Over Time")
|
| 245 |
plt.xlabel("Year")
|
| 246 |
plt.ylabel("Predicted Price ($)")
|
| 247 |
plt.grid()
|