Upload bmw (1).py
Browse files- bmw (1).py +283 -0
bmw (1).py
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""bmw.ipynb
|
| 3 |
+
|
| 4 |
+
Automatically generated by Colab.
|
| 5 |
+
|
| 6 |
+
Original file is located at
|
| 7 |
+
https://colab.research.google.com/drive/1Z-qv95gP_ABNRU7BiTH6jASxVMgk2BQZ
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import numpy as np
|
| 12 |
+
import matplotlib.pyplot as plt
|
| 13 |
+
import seaborn as sns
|
| 14 |
+
|
| 15 |
+
dataset = pd.read_csv('/content/BMW_Car_Sales_Classification.csv')
|
| 16 |
+
dataset.head()
|
| 17 |
+
|
| 18 |
+
dataset['Model'].unique()
|
| 19 |
+
|
| 20 |
+
dataset.isnull().sum()
|
| 21 |
+
|
| 22 |
+
dataset.info()
|
| 23 |
+
|
| 24 |
+
from sklearn.preprocessing import LabelEncoder
|
| 25 |
+
le = LabelEncoder()
|
| 26 |
+
|
| 27 |
+
dataset['Model'] = le.fit_transform(dataset['Model'])
|
| 28 |
+
|
| 29 |
+
dataset['Region'] = le.fit_transform(dataset['Region'])
|
| 30 |
+
|
| 31 |
+
dataset['Color'] = le.fit_transform(dataset['Color'])
|
| 32 |
+
|
| 33 |
+
dataset['Fuel_Type'] = le.fit_transform(dataset['Fuel_Type'])
|
| 34 |
+
|
| 35 |
+
dataset['Transmission'] = le.fit_transform(dataset['Transmission'])
|
| 36 |
+
|
| 37 |
+
dataset['Sales_Classification'] = le.fit_transform(dataset['Sales_Classification'])
|
| 38 |
+
|
| 39 |
+
dataset.head(5)
|
| 40 |
+
|
| 41 |
+
x = dataset.iloc[:,:-1]
|
| 42 |
+
y = dataset.iloc[:,-1]
|
| 43 |
+
|
| 44 |
+
x
|
| 45 |
+
|
| 46 |
+
y
|
| 47 |
+
|
| 48 |
+
dataset['Model'].describe()
|
| 49 |
+
|
| 50 |
+
sns.barplot(x='Model', y='Year', data=dataset)
|
| 51 |
+
plt.show()
|
| 52 |
+
|
| 53 |
+
sns.countplot(x='Model', data=dataset)
|
| 54 |
+
plt.show()
|
| 55 |
+
|
| 56 |
+
sns.distplot(dataset['Year'])
|
| 57 |
+
plt.show()
|
| 58 |
+
|
| 59 |
+
from sklearn.model_selection import train_test_split
|
| 60 |
+
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
|
| 61 |
+
|
| 62 |
+
from sklearn.preprocessing import StandardScaler
|
| 63 |
+
sc = StandardScaler()
|
| 64 |
+
|
| 65 |
+
x_train_scaled = sc.fit_transform(x_train)
|
| 66 |
+
x_test_scaled = sc.transform(x_test)
|
| 67 |
+
|
| 68 |
+
x_train = pd.DataFrame(x_train_scaled, columns=x_train.columns)
|
| 69 |
+
x_test = pd.DataFrame(x_test_scaled, columns=x_test.columns)
|
| 70 |
+
|
| 71 |
+
x_train
|
| 72 |
+
|
| 73 |
+
x_test
|
| 74 |
+
|
| 75 |
+
from sklearn.linear_model import LogisticRegression
|
| 76 |
+
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
|
| 77 |
+
from sklearn.tree import DecisionTreeClassifier
|
| 78 |
+
from sklearn.ensemble import RandomForestClassifier
|
| 79 |
+
|
| 80 |
+
lg = LogisticRegression()
|
| 81 |
+
lg.fit(x_train, y_train)
|
| 82 |
+
|
| 83 |
+
lg.score(x_train, y_train)*100,lg.score(x_test, y_test)*100
|
| 84 |
+
|
| 85 |
+
y_pred_lg = lg.predict(x_test)
|
| 86 |
+
|
| 87 |
+
y_pred_lg
|
| 88 |
+
|
| 89 |
+
dt = DecisionTreeClassifier()
|
| 90 |
+
dt.fit(x_train, y_train)
|
| 91 |
+
|
| 92 |
+
dt.score(x_train, y_train)*100,dt.score(x_test, y_test)*100
|
| 93 |
+
|
| 94 |
+
y_pred = dt.predict(x_test)
|
| 95 |
+
|
| 96 |
+
y_pred
|
| 97 |
+
|
| 98 |
+
rg = RandomForestClassifier(n_estimators=5)
|
| 99 |
+
rg.fit(x_train, y_train)
|
| 100 |
+
|
| 101 |
+
rg.score(x_train, y_train)*100,rg.score(x_test, y_test)*100
|
| 102 |
+
|
| 103 |
+
y_pred_rg = rg.predict(x_test)
|
| 104 |
+
y_pred_rg
|
| 105 |
+
|
| 106 |
+
accuracy_score = accuracy_score(y_test, y_pred_rg)
|
| 107 |
+
accuracy_score
|
| 108 |
+
|
| 109 |
+
cr = classification_report(y_test, y_pred_rg)
|
| 110 |
+
print(cr)
|
| 111 |
+
|
| 112 |
+
cf = confusion_matrix(y_test, y_pred_rg)
|
| 113 |
+
cf
|
| 114 |
+
|
| 115 |
+
sns.heatmap(cf, annot=True)
|
| 116 |
+
plt.xlabel('Predicted')
|
| 117 |
+
plt.ylabel('Truth')
|
| 118 |
+
plt.show()
|
| 119 |
+
|
| 120 |
+
dataset_original = pd.read_csv('/content/BMW_Car_Sales_Classification.csv')
|
| 121 |
+
|
| 122 |
+
label_encoders = {}
|
| 123 |
+
for column in dataset_original.select_dtypes(include='object').columns:
|
| 124 |
+
if column != 'Sales_Classification':
|
| 125 |
+
le = LabelEncoder()
|
| 126 |
+
dataset_original[column] = le.fit_transform(dataset_original[column])
|
| 127 |
+
label_encoders[column] = le
|
| 128 |
+
|
| 129 |
+
scaler = StandardScaler()
|
| 130 |
+
x_train_scaled = scaler.fit_transform(x_train)
|
| 131 |
+
new_data = pd.DataFrame([{
|
| 132 |
+
"Model": "X5",
|
| 133 |
+
"Year": 2023,
|
| 134 |
+
"Region": "North America",
|
| 135 |
+
"Color": "Black",
|
| 136 |
+
"Fuel_Type": "Diesel",
|
| 137 |
+
"Transmission": "Automatic",
|
| 138 |
+
"Engine_Size_L": 3.0,
|
| 139 |
+
"Mileage_KM": 25000,
|
| 140 |
+
"Price_USD": 55000,
|
| 141 |
+
"Sales_Volume": 120
|
| 142 |
+
}])
|
| 143 |
+
|
| 144 |
+
for column in new_data.select_dtypes(include='object').columns:
|
| 145 |
+
le = label_encoders[column]
|
| 146 |
+
new_data[column] = le.transform(new_data[column])
|
| 147 |
+
|
| 148 |
+
new_data = new_data[x_train.columns]
|
| 149 |
+
|
| 150 |
+
new_data_scaled = scaler.transform(new_data)
|
| 151 |
+
|
| 152 |
+
display(new_data_scaled)
|
| 153 |
+
|
| 154 |
+
# Convert the scaled numpy array back to a pandas DataFrame with the correct column names
|
| 155 |
+
new_data_scaled_df = pd.DataFrame(new_data_scaled, columns=x_train.columns)
|
| 156 |
+
|
| 157 |
+
# Make prediction using the trained RandomForestClassifier model (rg)
|
| 158 |
+
predicted_class = rg.predict(new_data_scaled_df)
|
| 159 |
+
|
| 160 |
+
# Interpret prediction by converting numerical prediction back to original class label
|
| 161 |
+
# We need to re-fit a LabelEncoder for Sales_Classification to inverse transform
|
| 162 |
+
le_sales = LabelEncoder()
|
| 163 |
+
le_sales.fit(pd.read_csv('/content/BMW_Car_Sales_Classification.csv')['Sales_Classification'])
|
| 164 |
+
predicted_class_label = le_sales.inverse_transform(predicted_class)
|
| 165 |
+
|
| 166 |
+
# Display the predicted class label
|
| 167 |
+
print(f"Predicted Sales Classification: {predicted_class_label[0]}")
|
| 168 |
+
|
| 169 |
+
pd.DataFrame(predicted_class_label, columns=['Predicted_Sales_Classification'])
|
| 170 |
+
|
| 171 |
+
from google.colab import userdata
|
| 172 |
+
userdata.get('HF_WRITE')
|
| 173 |
+
|
| 174 |
+
HF_WRITE = userdata.get('HF_WRITE')
|
| 175 |
+
|
| 176 |
+
!git config --global credential.helper cache
|
| 177 |
+
|
| 178 |
+
!hf auth login
|
| 179 |
+
|
| 180 |
+
from huggingface_hub import notebook_login
|
| 181 |
+
notebook_login()
|
| 182 |
+
|
| 183 |
+
# Make sure git-lfs is installed (https://git-lfs.com)
|
| 184 |
+
!git lfs install
|
| 185 |
+
|
| 186 |
+
!git remote add origin https://huggingface.co/RRPATEL228/car_price
|
| 187 |
+
|
| 188 |
+
# You'll be prompted for your HF credentials
|
| 189 |
+
!git push -u origin main
|
| 190 |
+
|
| 191 |
+
# Install the Hugging Face CLI
|
| 192 |
+
!pip install -U "huggingface_hub[cli]"
|
| 193 |
+
|
| 194 |
+
!hf auth login
|
| 195 |
+
|
| 196 |
+
from huggingface_hub import HfApi, Repository
|
| 197 |
+
import shutil, os
|
| 198 |
+
|
| 199 |
+
username = "RRPATEL228" # Replace with your HF username
|
| 200 |
+
model_name = "car_price_model" # Choose a model repo name
|
| 201 |
+
|
| 202 |
+
# Create and clone the repo
|
| 203 |
+
api = HfApi()
|
| 204 |
+
repo_url = api.create_repo(name=model_name, exist_ok=True, repo_type="model")
|
| 205 |
+
repo = Repository(local_dir="model-repo", clone_from=repo_url)
|
| 206 |
+
|
| 207 |
+
# Copy your saved model and architecture file into the repo directory
|
| 208 |
+
# Assuming you have saved your model as 'random_forest_model.joblib' in the 'car_price_model' directory
|
| 209 |
+
model_path = os.path.join("car_price_model", "random_forest_model.joblib")
|
| 210 |
+
if os.path.exists(model_path):
|
| 211 |
+
shutil.copy(model_path, "model-repo/random_forest_model.joblib")
|
| 212 |
+
print(f"Successfully copied {model_path} to model-repo/")
|
| 213 |
+
else:
|
| 214 |
+
print(f"Error: Model file not found at {model_path}. Please ensure the model saving cell was run successfully.")
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
# (Optional) Add a README.md with usage instructions
|
| 218 |
+
with open("model-repo/README.md", "w") as f:
|
| 219 |
+
f.write("""
|
| 220 |
+
# My Tabular Model
|
| 221 |
+
|
| 222 |
+
This is a model trained on tabular data for car price sales classification.
|
| 223 |
+
|
| 224 |
+
To load the model:
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
from huggingface_hub import HfApi, Repository
|
| 229 |
+
import shutil, os
|
| 230 |
+
|
| 231 |
+
username = "RRPATEL228" # Replace with your HF username
|
| 232 |
+
model_name = "car_price_model" # Choose a model repo name
|
| 233 |
+
|
| 234 |
+
# Create and clone the repo
|
| 235 |
+
api = HfApi()
|
| 236 |
+
repo_url = api.create_repo(name=model_name, exist_ok=True, repo_type="model")
|
| 237 |
+
repo = Repository(local_dir="model-repo", clone_from=repo_url)
|
| 238 |
+
|
| 239 |
+
# Copy your saved model and architecture file into the repo directory
|
| 240 |
+
# Assuming you have saved your model as 'random_forest_model.joblib' in the 'car_price_model' directory
|
| 241 |
+
model_path = os.path.join("car_price_model", "random_forest_model.joblib")
|
| 242 |
+
if os.path.exists(model_path):
|
| 243 |
+
shutil.copy(model_path, "model-repo/random_forest_model.joblib")
|
| 244 |
+
print(f"Successfully copied {model_path} to model-repo/")
|
| 245 |
+
else:
|
| 246 |
+
print(f"Error: Model file not found at {model_path}. Please ensure the model saving cell was run successfully.")
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
# (Optional) Add a README.md with usage instructions
|
| 250 |
+
with open("model-repo/README.md", "w") as f:
|
| 251 |
+
f.write("""
|
| 252 |
+
# My Tabular Model
|
| 253 |
+
|
| 254 |
+
This is a model trained on tabular data for car price sales classification.
|
| 255 |
+
|
| 256 |
+
To load the model:
|
| 257 |
+
|
| 258 |
+
from huggingface_hub import HfApi, Repository
|
| 259 |
+
import shutil, os
|
| 260 |
+
|
| 261 |
+
username = "RRPATEL228" # Replace with your HF username
|
| 262 |
+
model_name = "car_price_model" # Choose a model repo name
|
| 263 |
+
|
| 264 |
+
# Create and clone the repo
|
| 265 |
+
api = HfApi()
|
| 266 |
+
repo_url = api.create_repo(model_name, exist_ok=True, repo_type="model")
|
| 267 |
+
repo = Repository(local_dir="model-repo", clone_from=repo_url)
|
| 268 |
+
|
| 269 |
+
# Copy your saved model and architecture file into the repo directory
|
| 270 |
+
# Assuming you have saved your model as 'random_forest_model.joblib' in the 'car_price_model' directory
|
| 271 |
+
model_path = os.path.join("car_price_model", "random_forest_model.joblib")
|
| 272 |
+
if os.path.exists(model_path):
|
| 273 |
+
shutil.copy(model_path, "model-repo/random_forest_model.joblib")
|
| 274 |
+
print(f"Successfully copied {model_path} to model-repo/")
|
| 275 |
+
else:
|
| 276 |
+
print(f"Error: Model file not found at {model_path}. Please ensure the model saving cell was run successfully.")
|
| 277 |
+
|
| 278 |
+
# If you had a model architecture file (e.g., a Python script defining the model class), copy it here as well.
|
| 279 |
+
# shutil.copy("model.py", "model-repo/model.py")
|
| 280 |
+
|
| 281 |
+
# Removed the README.md writing section to avoid SyntaxError
|
| 282 |
+
# You can manually add a README.md on Hugging Face Hub after uploading.
|
| 283 |
+
|