Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -4,20 +4,22 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
| 4 |
import requests
|
| 5 |
import pandas as pd
|
| 6 |
import json
|
| 7 |
-
import
|
| 8 |
import pandas as pd
|
| 9 |
from sklearn.model_selection import train_test_split, GridSearchCV
|
| 10 |
from sklearn.preprocessing import LabelEncoder
|
| 11 |
-
from xgboost import XGBClassifier
|
| 12 |
from sklearn.utils import resample
|
|
|
|
| 13 |
from sklearn.metrics import accuracy_score, classification_report
|
| 14 |
from joblib import dump, load
|
| 15 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
app = FastAPI()
|
| 19 |
|
| 20 |
-
|
| 21 |
app.add_middleware(
|
| 22 |
CORSMiddleware,
|
| 23 |
allow_origins=["*"],
|
|
@@ -26,16 +28,20 @@ app.add_middleware(
|
|
| 26 |
allow_headers=["*"],
|
| 27 |
)
|
| 28 |
|
| 29 |
-
|
| 30 |
-
@app.get("/train_the_model_new_v2")
|
| 31 |
-
async def train_the_model(Tenant: str):
|
| 32 |
-
# Load the dataset
|
| 33 |
-
data = pd.read_csv(f"model/{Tenant}trainer_data_v1.csv")
|
| 34 |
-
print(data["customer_name"].count())
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
# Analyze class distribution
|
| 37 |
-
class_distribution = data['
|
| 38 |
-
bf = str(class_distribution)
|
| 39 |
print("Class Distribution before balancing:\n", class_distribution)
|
| 40 |
|
| 41 |
# Get the size of the largest class to match other classes' sizes
|
|
@@ -43,7 +49,7 @@ async def train_the_model(Tenant: str):
|
|
| 43 |
|
| 44 |
# Oversampling
|
| 45 |
oversampled_data = pd.DataFrame()
|
| 46 |
-
for class_name, group in data.groupby('
|
| 47 |
oversampled_group = resample(group,
|
| 48 |
replace=True, # Sample with replacement
|
| 49 |
n_samples=max_class_size, # to match majority class
|
|
@@ -51,31 +57,36 @@ async def train_the_model(Tenant: str):
|
|
| 51 |
oversampled_data = pd.concat([oversampled_data, oversampled_group], axis=0)
|
| 52 |
|
| 53 |
# Verify new class distribution
|
| 54 |
-
print("Class Distribution after oversampling:\n", oversampled_data['
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
data = oversampled_data
|
| 57 |
-
|
| 58 |
-
# Select columns
|
| 59 |
-
selected_columns = ['customer_name', 'customer_address', '
|
| 60 |
-
'cod',
|
| 61 |
-
'
|
| 62 |
|
| 63 |
# Handling missing values
|
| 64 |
#data_filled = data[selected_columns].fillna('Missing')
|
| 65 |
data_filled = data[selected_columns].dropna()
|
| 66 |
-
|
| 67 |
-
data_filled['created_at'] = data_filled['created_at'].astype(str)
|
| 68 |
-
#data_filled = data_filled.drop(columns=['created_at'])
|
| 69 |
-
|
| 70 |
-
af = str(oversampled_data['status.name'].value_counts())
|
| 71 |
# Encoding categorical variables
|
| 72 |
encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
|
| 73 |
for col, encoder in encoders.items():
|
| 74 |
data_filled[col] = encoder.fit_transform(data_filled[col])
|
| 75 |
|
| 76 |
# Splitting the dataset
|
| 77 |
-
X = data_filled.drop('
|
| 78 |
-
y = data_filled['
|
| 79 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 80 |
|
| 81 |
# Parameters to use for the model
|
|
@@ -115,159 +126,173 @@ async def train_the_model(Tenant: str):
|
|
| 115 |
classification_rep = classification_report(y_test, y_pred)
|
| 116 |
|
| 117 |
# Save the model
|
| 118 |
-
model_filename =
|
| 119 |
dump(xgb, model_filename)
|
| 120 |
|
| 121 |
# Save the encoders
|
| 122 |
-
encoders_filename =
|
| 123 |
dump(encoders, encoders_filename)
|
| 124 |
|
| 125 |
-
return accuracy,classification_rep,"Model trained with new data
|
| 126 |
|
|
|
|
|
|
|
| 127 |
|
| 128 |
-
@app.get("/trigger_the_data_fecher_for_me")
|
| 129 |
-
async def continuous_function(page: int,paginate: int,Tenant: str):
|
| 130 |
-
print("data fetcher running.....")
|
| 131 |
-
|
| 132 |
|
| 133 |
-
# Update the payload for each page
|
| 134 |
-
|
| 135 |
-
#url = "https://dev3.api.curfox.parallaxtec.com/api/ml/order-list?sort=id&paginate="+str(paginate)+"&page="+str(page)
|
| 136 |
-
url = "https://v1.api.curfox.com/api/ml/order-list?sort=id&paginate="+str(paginate)+"&page="+str(page)
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
payload = {}
|
| 140 |
-
headers = {
|
| 141 |
-
'Accept': 'application/json',
|
| 142 |
-
'X-Tenant': Tenant #'royalexpress'
|
| 143 |
-
}
|
| 144 |
-
|
| 145 |
-
response = requests.request("GET", url, headers=headers, data=payload)
|
| 146 |
-
|
| 147 |
-
# Sample JSON response
|
| 148 |
-
json_response = response.json()
|
| 149 |
-
# Extracting 'data' for conversion
|
| 150 |
-
data = json_response['data']
|
| 151 |
-
data_count = len(data)
|
| 152 |
-
|
| 153 |
-
df = pd.json_normalize(data)
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
df = df[df['status.name'].isin(['RETURN TO CLIENT', 'DELIVERED'])]
|
| 157 |
-
print("data collected from page : "+str(page))
|
| 158 |
-
#data.to_csv("new.csv")
|
| 159 |
-
|
| 160 |
-
try:
|
| 161 |
-
file_path = f'model/{Tenant}trainer_data_v1.csv' # Replace with your file path
|
| 162 |
-
source_csv = pd.read_csv(file_path)
|
| 163 |
-
new_data = df
|
| 164 |
-
combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
|
| 165 |
-
|
| 166 |
-
combined_df_final.to_csv(f"model/{Tenant}trainer_data_v1.csv")
|
| 167 |
-
print("data added")
|
| 168 |
-
message = "data added"
|
| 169 |
-
except:
|
| 170 |
-
|
| 171 |
-
df.to_csv(f"model/{Tenant}trainer_data_v1.csv")
|
| 172 |
-
print("data created")
|
| 173 |
-
message = "data created"
|
| 174 |
-
|
| 175 |
-
return {"message":message,"page_number":page,"data_count":data_count,'X-Tenant': Tenant}
|
| 176 |
-
|
| 177 |
-
@app.get("/trigger_the_data_fecher")
|
| 178 |
-
async def your_continuous_function(page: int,paginate: int,Tenant: str):
|
| 179 |
print("data fetcher running.....")
|
| 180 |
|
|
|
|
|
|
|
| 181 |
|
| 182 |
# Update the payload for each page
|
|
|
|
| 183 |
|
| 184 |
-
#url = "https://dev3.api.curfox.parallaxtec.com/api/ml/order-list?sort=id&paginate="+str(paginate)+"&page="+str(page)
|
| 185 |
-
url = "https://v1.api.curfox.com/api/ml/order-list?sort=id&paginate="+str(paginate)+"&page="+str(page)
|
| 186 |
-
|
| 187 |
-
|
| 188 |
payload = {}
|
| 189 |
headers = {
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
}
|
| 193 |
|
| 194 |
response = requests.request("GET", url, headers=headers, data=payload)
|
| 195 |
|
| 196 |
# Sample JSON response
|
| 197 |
json_response = response.json()
|
| 198 |
# Extracting 'data' for conversion
|
| 199 |
-
data = json_response['data']
|
|
|
|
| 200 |
data_count = len(data)
|
| 201 |
|
| 202 |
df = pd.json_normalize(data)
|
| 203 |
|
| 204 |
|
| 205 |
-
df = df[
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
|
|
|
| 209 |
try:
|
| 210 |
-
file_path =
|
| 211 |
source_csv = pd.read_csv(file_path)
|
| 212 |
new_data = df
|
| 213 |
combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
|
| 214 |
|
| 215 |
-
combined_df_final.to_csv(
|
| 216 |
print("data added")
|
| 217 |
except:
|
| 218 |
|
| 219 |
-
df.to_csv(
|
| 220 |
print("data created")
|
| 221 |
-
|
| 222 |
-
return {"message":"done","page_number":page,"data_count":data_count,'X-Tenant': Tenant}
|
| 223 |
|
|
|
|
|
|
|
| 224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
@app.get("/get_latest_model_updated_time")
|
| 228 |
-
async def model_updated_time(
|
| 229 |
-
import multiprocessing
|
| 230 |
-
|
| 231 |
-
# Get the number of available CPU cores
|
| 232 |
-
available_cores = multiprocessing.cpu_count()
|
| 233 |
try:
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
"base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
|
| 239 |
"last model updated time":datetime.datetime.fromtimestamp(m_time_model),
|
| 240 |
-
"
|
| 241 |
}
|
| 242 |
except:
|
| 243 |
return {"no model found so first trained the model using data fecther"}
|
| 244 |
|
| 245 |
|
| 246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
|
| 249 |
-
# Endpoint for making predictions
|
| 250 |
|
|
|
|
| 251 |
@app.post("/predict")
|
| 252 |
-
def predict(
|
| 253 |
-
|
| 254 |
customer_name: str,
|
| 255 |
customer_address: str,
|
| 256 |
customer_phone: str,
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
origin_city_name: str,
|
| 260 |
-
destination_city_name: str,
|
| 261 |
-
created_at: str,
|
| 262 |
-
customer_email: str,
|
| 263 |
pickup_address: str,
|
| 264 |
-
|
|
|
|
| 265 |
):
|
| 266 |
|
|
|
|
| 267 |
try:
|
| 268 |
# Load your trained model and encoders
|
| 269 |
-
xgb_model = load(
|
| 270 |
-
encoders = load(
|
| 271 |
except:
|
| 272 |
return {"no model found so first trained the model using data fecther"}
|
| 273 |
|
|
@@ -276,26 +301,20 @@ def predict(
|
|
| 276 |
def safe_transform(encoder, column):
|
| 277 |
classes = encoder.classes_
|
| 278 |
return [encoder.transform([x])[0] if x in classes else -1 for x in column]
|
| 279 |
-
# Function to handle unseen labels during encoding
|
| 280 |
-
def safe_transform(encoder, column):
|
| 281 |
-
classes = encoder.classes_
|
| 282 |
-
return [encoder.transform([x])[0] if x in classes else -1 for x in column]
|
| 283 |
-
|
| 284 |
|
| 285 |
-
|
| 286 |
input_data = {
|
| 287 |
'customer_name': customer_name,
|
| 288 |
'customer_address': customer_address,
|
| 289 |
-
'
|
|
|
|
| 290 |
'cod': int(cod),
|
| 291 |
-
'
|
| 292 |
-
'
|
| 293 |
-
'destination_city
|
| 294 |
-
'created_at':created_at
|
| 295 |
}
|
| 296 |
input_df = pd.DataFrame([input_data])
|
| 297 |
|
| 298 |
-
|
| 299 |
# Encode categorical variables using the same encoders used during training
|
| 300 |
for col in input_df.columns:
|
| 301 |
if col in encoders:
|
|
@@ -304,12 +323,69 @@ def predict(
|
|
| 304 |
# Predict and obtain probabilities
|
| 305 |
pred = xgb_model.predict(input_df)
|
| 306 |
pred_proba = xgb_model.predict_proba(input_df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
|
|
|
|
|
|
|
|
|
|
| 308 |
# Output
|
| 309 |
-
|
| 310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
|
| 312 |
-
if predicted_status == "RETURN TO CLIENT":
|
| 313 |
-
probability = 100 - probability
|
| 314 |
|
| 315 |
-
return {"
|
|
|
|
| 4 |
import requests
|
| 5 |
import pandas as pd
|
| 6 |
import json
|
| 7 |
+
import os,datetime
|
| 8 |
import pandas as pd
|
| 9 |
from sklearn.model_selection import train_test_split, GridSearchCV
|
| 10 |
from sklearn.preprocessing import LabelEncoder
|
|
|
|
| 11 |
from sklearn.utils import resample
|
| 12 |
+
from xgboost import XGBClassifier
|
| 13 |
from sklearn.metrics import accuracy_score, classification_report
|
| 14 |
from joblib import dump, load
|
| 15 |
import numpy as np
|
| 16 |
+
import requests
|
| 17 |
+
import mysql.connector
|
| 18 |
+
from mysql.connector import Error
|
| 19 |
|
| 20 |
|
| 21 |
app = FastAPI()
|
| 22 |
|
|
|
|
| 23 |
app.add_middleware(
|
| 24 |
CORSMiddleware,
|
| 25 |
allow_origins=["*"],
|
|
|
|
| 28 |
allow_headers=["*"],
|
| 29 |
)
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
@app.get("/trigger_the_model_trainer")
|
| 33 |
+
async def train_the_model():
|
| 34 |
+
# Load the dataset
|
| 35 |
+
#file_path = 'model/trainer_data.csv' # Update to the correct file path 'model/trainer_data_new.csv'
|
| 36 |
+
#data = pd.read_csv(file_path)
|
| 37 |
+
csv_files = ['model/trainer_data.csv','model/trainer_data2.csv','model/trainer_data3.csv','model/trainer_data4.csv']
|
| 38 |
+
data_frames = [pd.read_csv(file) for file in csv_files]
|
| 39 |
+
|
| 40 |
+
# Step 4: Concatenate all DataFrames into a single DataFrame
|
| 41 |
+
data = pd.concat(data_frames, ignore_index=True)
|
| 42 |
+
#data = data.iloc[0:50000]
|
| 43 |
# Analyze class distribution
|
| 44 |
+
class_distribution = data['status_name'].value_counts()
|
|
|
|
| 45 |
print("Class Distribution before balancing:\n", class_distribution)
|
| 46 |
|
| 47 |
# Get the size of the largest class to match other classes' sizes
|
|
|
|
| 49 |
|
| 50 |
# Oversampling
|
| 51 |
oversampled_data = pd.DataFrame()
|
| 52 |
+
for class_name, group in data.groupby('status_name'):
|
| 53 |
oversampled_group = resample(group,
|
| 54 |
replace=True, # Sample with replacement
|
| 55 |
n_samples=max_class_size, # to match majority class
|
|
|
|
| 57 |
oversampled_data = pd.concat([oversampled_data, oversampled_group], axis=0)
|
| 58 |
|
| 59 |
# Verify new class distribution
|
| 60 |
+
print("Class Distribution after oversampling:\n", oversampled_data['status_name'].value_counts())
|
| 61 |
|
| 62 |
+
# Save the balanced dataset if needed
|
| 63 |
+
#oversampled_data.to_csv('model/trainer_data_balanced.csv', index=False)
|
| 64 |
+
|
| 65 |
+
data = pd.read_csv("model/trainer_data_new.csv")
|
| 66 |
+
print(data["customer_name"].count())
|
| 67 |
+
|
| 68 |
+
data = pd.read_csv("model/trainer_data_balanced.csv")
|
| 69 |
+
print(data["customer_name"].count())
|
| 70 |
+
|
| 71 |
data = oversampled_data
|
| 72 |
+
print(data["customer_name"].count())
|
| 73 |
+
# Select columns
|
| 74 |
+
selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
|
| 75 |
+
'weight','cod','pickup_address','client_number','destination_city',
|
| 76 |
+
'status_name']
|
| 77 |
|
| 78 |
# Handling missing values
|
| 79 |
#data_filled = data[selected_columns].fillna('Missing')
|
| 80 |
data_filled = data[selected_columns].dropna()
|
| 81 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
# Encoding categorical variables
|
| 83 |
encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
|
| 84 |
for col, encoder in encoders.items():
|
| 85 |
data_filled[col] = encoder.fit_transform(data_filled[col])
|
| 86 |
|
| 87 |
# Splitting the dataset
|
| 88 |
+
X = data_filled.drop('status_name', axis=1)
|
| 89 |
+
y = data_filled['status_name']
|
| 90 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 91 |
|
| 92 |
# Parameters to use for the model
|
|
|
|
| 126 |
classification_rep = classification_report(y_test, y_pred)
|
| 127 |
|
| 128 |
# Save the model
|
| 129 |
+
model_filename = 'model/transexpress_xgb_model.joblib'
|
| 130 |
dump(xgb, model_filename)
|
| 131 |
|
| 132 |
# Save the encoders
|
| 133 |
+
encoders_filename = 'model/transexpress_encoders.joblib'
|
| 134 |
dump(encoders, encoders_filename)
|
| 135 |
|
| 136 |
+
return accuracy,classification_rep,"Model trained with new data"
|
| 137 |
|
| 138 |
+
@app.get("/trigger_the_data_fecher")
|
| 139 |
+
async def get_data(page: str,paginate: str):
|
| 140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
print("data fetcher running.....")
|
| 143 |
|
| 144 |
+
# Initialize an empty DataFrame to store the combined data
|
| 145 |
+
combined_df = pd.DataFrame()
|
| 146 |
|
| 147 |
# Update the payload for each page
|
| 148 |
+
url = "https://report.transexpress.lk/api/orders/delivery-success-rate/return-to-client-orders?page="+page+"&per_page="+paginate
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
payload = {}
|
| 151 |
headers = {
|
| 152 |
+
'Cookie': 'development_trans_express_session=NaFDGzh5WQCFwiortxA6WEFuBjsAG9GHIQrbKZ8B'
|
| 153 |
+
}
|
|
|
|
| 154 |
|
| 155 |
response = requests.request("GET", url, headers=headers, data=payload)
|
| 156 |
|
| 157 |
# Sample JSON response
|
| 158 |
json_response = response.json()
|
| 159 |
# Extracting 'data' for conversion
|
| 160 |
+
data = json_response["return_to_client_orders"]['data']
|
| 161 |
+
|
| 162 |
data_count = len(data)
|
| 163 |
|
| 164 |
df = pd.json_normalize(data)
|
| 165 |
|
| 166 |
|
| 167 |
+
df['status_name'] = df['status_name'].replace('Partially Delivered', 'Delivered')
|
| 168 |
+
df['status_name'] = df['status_name'].replace('Received by Client', 'Returned to Client')
|
| 169 |
+
|
| 170 |
+
print("data collected from page : "+page)
|
| 171 |
+
#return "done"
|
| 172 |
try:
|
| 173 |
+
file_path = 'model/trainer_data5.csv' # Replace with your file path
|
| 174 |
source_csv = pd.read_csv(file_path)
|
| 175 |
new_data = df
|
| 176 |
combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
|
| 177 |
|
| 178 |
+
combined_df_final.to_csv("model/trainer_data5.csv")
|
| 179 |
print("data added")
|
| 180 |
except:
|
| 181 |
|
| 182 |
+
df.to_csv("model/trainer_data5.csv")
|
| 183 |
print("data created")
|
|
|
|
|
|
|
| 184 |
|
| 185 |
+
print({"page_number":page,"data_count":data_count})
|
| 186 |
+
return {"page_number":page,"data_count":data_count}
|
| 187 |
|
| 188 |
+
@app.get("/get_module_versions")
|
| 189 |
+
async def get_versions():
|
| 190 |
+
try:
|
| 191 |
+
from pip._internal.operations import freeze
|
| 192 |
+
except ImportError: # pip < 10.0
|
| 193 |
+
from pip.operations import freeze
|
| 194 |
|
| 195 |
+
pkgs = freeze.freeze()
|
| 196 |
+
for pkg in pkgs:
|
| 197 |
+
print(pkg)
|
| 198 |
+
return pkgs
|
| 199 |
+
|
| 200 |
|
| 201 |
@app.get("/get_latest_model_updated_time")
|
| 202 |
+
async def model_updated_time():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
try:
|
| 204 |
+
file_size = os.path.getsize("model/transexpress_xgb_model.joblib")
|
| 205 |
+
m_time_encoder = os.path.getmtime('model/transexpress_encoders.joblib')
|
| 206 |
+
m_time_model = os.path.getmtime('model/transexpress_xgb_model.joblib')
|
| 207 |
+
return {"base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
|
|
|
|
| 208 |
"last model updated time":datetime.datetime.fromtimestamp(m_time_model),
|
| 209 |
+
"The size of the file is bytes":file_size
|
| 210 |
}
|
| 211 |
except:
|
| 212 |
return {"no model found so first trained the model using data fecther"}
|
| 213 |
|
| 214 |
|
| 215 |
|
| 216 |
+
# Database connection parameters
|
| 217 |
+
DB_HOST = 'trans-prod-clone-staging.mysql.database.azure.com'
|
| 218 |
+
DB_PORT = 3306
|
| 219 |
+
DB_DATABASE = 'defaultdb'
|
| 220 |
+
DB_USERNAME = 'wwwdata'
|
| 221 |
+
DB_PASSWORD = 'fcLa8F3sxgNYQ$K@%'
|
| 222 |
+
# Connect to the database
|
| 223 |
|
| 224 |
+
#calling this function for each request
|
| 225 |
+
def fetch_customer_data(phone_number):
|
| 226 |
+
#local connection
|
| 227 |
+
connection = mysql.connector.connect(
|
| 228 |
+
host=DB_HOST,
|
| 229 |
+
port=DB_PORT,
|
| 230 |
+
database=DB_DATABASE,
|
| 231 |
+
user=DB_USERNAME,
|
| 232 |
+
password=DB_PASSWORD
|
| 233 |
+
)
|
| 234 |
+
#try:
|
| 235 |
+
if connection.is_connected():
|
| 236 |
+
print("Connected to the database")
|
| 237 |
+
|
| 238 |
+
# SQL query
|
| 239 |
+
query = """
|
| 240 |
+
SELECT
|
| 241 |
+
orders.customer_name AS customer_name,
|
| 242 |
+
orders.address AS customer_address,
|
| 243 |
+
orders.phone_no AS customer_phone_no,
|
| 244 |
+
primary_statuses.name AS status_name
|
| 245 |
+
FROM
|
| 246 |
+
orders
|
| 247 |
+
INNER JOIN
|
| 248 |
+
statuses ON orders.status_id = statuses.id
|
| 249 |
+
INNER JOIN
|
| 250 |
+
primary_statuses ON statuses.name = primary_statuses.key
|
| 251 |
+
WHERE orders.phone_no LIKE %s
|
| 252 |
+
"""
|
| 253 |
+
|
| 254 |
+
# Execute the query
|
| 255 |
+
cursor = connection.cursor(dictionary=True)
|
| 256 |
+
cursor.execute(query, (f"%{phone_number}%",))
|
| 257 |
+
|
| 258 |
+
# Fetch results
|
| 259 |
+
results = cursor.fetchall()
|
| 260 |
+
#print("Results:", results)
|
| 261 |
+
#close conection
|
| 262 |
+
#if connection.is_connected():
|
| 263 |
+
cursor.close()
|
| 264 |
+
connection.close()
|
| 265 |
+
print("Database connection closed")
|
| 266 |
+
return results
|
| 267 |
+
|
| 268 |
+
# except Error as e:
|
| 269 |
+
# print(f"Error: {e}")
|
| 270 |
+
# #close conection
|
| 271 |
+
# #if connection.is_connected():
|
| 272 |
+
# cursor.close()
|
| 273 |
+
# connection.close()
|
| 274 |
+
# print("Database connection closed")
|
| 275 |
|
|
|
|
| 276 |
|
| 277 |
+
# Endpoint for making predictions
|
| 278 |
@app.post("/predict")
|
| 279 |
+
async def predict(
|
| 280 |
+
date : str,
|
| 281 |
customer_name: str,
|
| 282 |
customer_address: str,
|
| 283 |
customer_phone: str,
|
| 284 |
+
weight: float,
|
| 285 |
+
cod: int,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
pickup_address: str,
|
| 287 |
+
client_number:str,
|
| 288 |
+
destination_city:str
|
| 289 |
):
|
| 290 |
|
| 291 |
+
|
| 292 |
try:
|
| 293 |
# Load your trained model and encoders
|
| 294 |
+
xgb_model = load('model/transexpress_xgb_model.joblib')
|
| 295 |
+
encoders = load('model/transexpress_encoders.joblib')
|
| 296 |
except:
|
| 297 |
return {"no model found so first trained the model using data fecther"}
|
| 298 |
|
|
|
|
| 301 |
def safe_transform(encoder, column):
|
| 302 |
classes = encoder.classes_
|
| 303 |
return [encoder.transform([x])[0] if x in classes else -1 for x in column]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
|
| 305 |
+
# Convert input data to DataFrame
|
| 306 |
input_data = {
|
| 307 |
'customer_name': customer_name,
|
| 308 |
'customer_address': customer_address,
|
| 309 |
+
'customer_phone_no': customer_phone,
|
| 310 |
+
'weight': float(weight),
|
| 311 |
'cod': int(cod),
|
| 312 |
+
'pickup_address':pickup_address,
|
| 313 |
+
'client_number':client_number,
|
| 314 |
+
'destination_city':destination_city
|
|
|
|
| 315 |
}
|
| 316 |
input_df = pd.DataFrame([input_data])
|
| 317 |
|
|
|
|
| 318 |
# Encode categorical variables using the same encoders used during training
|
| 319 |
for col in input_df.columns:
|
| 320 |
if col in encoders:
|
|
|
|
| 323 |
# Predict and obtain probabilities
|
| 324 |
pred = xgb_model.predict(input_df)
|
| 325 |
pred_proba = xgb_model.predict_proba(input_df)
|
| 326 |
+
|
| 327 |
+
import numpy as np
|
| 328 |
+
from urllib.parse import unquote
|
| 329 |
+
def extract_phone_numbers(customer_phone):
|
| 330 |
+
# Decode URL-encoded phone numbers
|
| 331 |
+
decoded_phone = unquote(customer_phone)
|
| 332 |
+
# Split into a list of phone numbers
|
| 333 |
+
phone_numbers = [phone.strip() for phone in decoded_phone.split('/')]
|
| 334 |
+
# Handle case where there is a single phone number
|
| 335 |
+
if len(phone_numbers) == 1 and phone_numbers[0]:
|
| 336 |
+
return phone_numbers
|
| 337 |
+
elif len(phone_numbers) == 0:
|
| 338 |
+
return []
|
| 339 |
+
return phone_numbers
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
def calculate_delivery_factor(phone_number):
|
| 343 |
+
# Replace with the desired customer name and phone number
|
| 344 |
+
|
| 345 |
+
#customer_phone_no = '0773224384'
|
| 346 |
+
json = fetch_customer_data(phone_number)
|
| 347 |
+
data = json
|
| 348 |
+
#print(url,data)
|
| 349 |
+
# Filter only relevant status names
|
| 350 |
+
valid_statuses = ['Failed to Deliver', 'Delivered', 'Returned to Client']
|
| 351 |
+
relevant_orders = [order for order in data if order['status_name'] in valid_statuses]
|
| 352 |
+
|
| 353 |
+
if not relevant_orders:
|
| 354 |
+
base_probability = 0.50
|
| 355 |
+
else:
|
| 356 |
+
delivered_count = sum(1 for order in relevant_orders if order['status_name'] == 'Delivered')
|
| 357 |
+
total_orders_count = len(relevant_orders)
|
| 358 |
+
|
| 359 |
+
base_probability = delivered_count / total_orders_count
|
| 360 |
+
base_probability = max(0.05, min(base_probability, 0.95))
|
| 361 |
+
|
| 362 |
+
# Add a narrower random component
|
| 363 |
+
random_component = np.random.uniform(-0.05, 0.05)
|
| 364 |
+
adjusted_probability = base_probability + random_component
|
| 365 |
+
|
| 366 |
+
return adjusted_probability
|
| 367 |
+
try:
|
| 368 |
+
|
| 369 |
+
print(customer_phone)
|
| 370 |
+
phone_numbers = extract_phone_numbers(customer_phone)
|
| 371 |
+
print(phone_numbers, "api calling ......")
|
| 372 |
+
probability = calculate_delivery_factor(phone_numbers[0])
|
| 373 |
+
probability = round((probability * 100),2)
|
| 374 |
+
#probability = f"{probability:.2f}" probability = f"{float(probability):.2f}"
|
| 375 |
|
| 376 |
+
print(f"new model probability: {probability}")
|
| 377 |
+
predicted_status = "delivered"
|
| 378 |
+
|
| 379 |
# Output
|
| 380 |
+
except Exception as e:
|
| 381 |
+
print(f"Error: {e}")
|
| 382 |
+
predicted_status = "Unknown" if pred[0] == -1 else encoders['status_name'].inverse_transform([pred])[0]
|
| 383 |
+
probability = pred_proba[0][pred[0]] * 100 if pred[0] != -1 else "Unknown"
|
| 384 |
+
print(str(predicted_status),probability)
|
| 385 |
+
if probability>98:
|
| 386 |
+
probability = probability-1
|
| 387 |
+
if predicted_status == "Returned to Client":
|
| 388 |
+
probability = 100 - probability
|
| 389 |
|
|
|
|
|
|
|
| 390 |
|
| 391 |
+
return {"Probability": round(probability,2),"predicted_status":predicted_status}
|