curfox_model_trainer

Sleeping

App Files Files Community

Arafath10 commited on Mar 4, 2024

Commit

167a0c6

verified ·

1 Parent(s): 1380647

Update main.py

Browse files

Files changed (1) hide show

main.py +112 -6

main.py CHANGED Viewed

@@ -5,6 +5,12 @@ import requests
 import pandas as pd
 import json
 import httpx
 app = FastAPI()
 app.add_middleware(
@@ -15,8 +21,6 @@ app.add_middleware(
     allow_headers=["*"],
 )
-global page
-page = 1
 # Declare the continuous function as an async function.
 #async def your_continuous_function():
@@ -27,12 +31,114 @@ page = 1
 #async def startup_event():
     # Start the continuous function as a background task.
     #asyncio.create_task(your_continuous_function())
-@app.get("/trigger_the_data_fecher_every_60s")
-async def your_continuous_function():
-    global page
-    page+=1
     return "model trained with new page : "+str(page)+" data"
 @app.get("/test_api")

 import pandas as pd
 import json
 import httpx
+import pandas as pd
+from sklearn.model_selection import train_test_split, GridSearchCV
+from sklearn.preprocessing import LabelEncoder
+from xgboost import XGBClassifierS
+from sklearn.metrics import accuracy_score, classification_report
 app = FastAPI()
 app.add_middleware(
     allow_headers=["*"],
 )
 # Declare the continuous function as an async function.
 #async def your_continuous_function():
 #async def startup_event():
     # Start the continuous function as a background task.
     #asyncio.create_task(your_continuous_function())
+from joblib import dump
+def train_the_model(data):
+    data = data
+    # Select columns
+    selected_columns = ['customer_name', 'customer_address', 'customer_phone',
+                        'customer_email', 'cod', 'weight',
+                        'origin_city.name', 'destination_city.name', 'status.name']
+    # Handling missing values
+    data_filled = data[selected_columns].fillna('Missing')
+    # Encoding categorical variables
+    encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
+    for col, encoder in encoders.items():
+        data_filled[col] = encoder.fit_transform(data_filled[col])
+    # Splitting the dataset
+    X = data_filled.drop('status.name', axis=1)
+    y = data_filled['status.name']
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    # Setup the hyperparameter grid to search
+    param_grid = {
+        'max_depth': [3, 4, 5],
+        'learning_rate': [0.01, 0.1, 0.4],
+        'n_estimators': [100, 200, 300],
+        'subsample': [0.8, 0.9, 1],
+        'colsample_bytree': [0.3, 0.7]
+    }
+    # Initialize the classifier
+    xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
+    # Setup GridSearchCV
+    grid_search = GridSearchCV(xgb, param_grid, cv=10, n_jobs=-1, scoring='accuracy')
+    # Fit the grid search to the data
+    grid_search.fit(X_train, y_train)
+    # Get the best parameters
+    best_params = grid_search.best_params_
+    print("Best parameters:", best_params)
+    # Train the model with best parameters
+    best_xgb = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss')
+    best_xgb.fit(X_train, y_train)
+    # Predict on the test set
+    y_pred = best_xgb.predict(X_test)
+    y_pred_proba = best_xgb.predict_proba(X_test)
+    # Evaluate the model
+    accuracy = accuracy_score(y_test, y_pred)
+    classification_rep = classification_report(y_test, y_pred)
+    # Print the results
+    print("Accuracy:", accuracy)
+    print("Classification Report:\n", classification_report(y_test, y_pred))
+    # Save the model
+    model_filename = 'xgb_model.joblib'
+    dump(best_xgb, model_filename)
+    # Save the encoders
+    encoders_filename = 'encoders.joblib'
+    dump(encoders, encoders_filename)
+    print(f"Model saved as {model_filename}")
+    print(f"Encoders saved as {encoders_filename}")
+@app.get("/trigger_the_data_fecher_every_30min")
+async def your_continuous_function(page: int):
+    print("data fetcher running.....")
+    # Initialize an empty DataFrame to store the combined data
+    combined_df = pd.DataFrame()
+    # Update the payload for each page
+    url = "https://dev3.api.curfox.parallaxtec.com/api/ml/order-list?sort=id&paginate=500&page="+str(page)
+    payload = {}
+    headers = {
+                    'Accept': 'application/json',
+                    'X-Tenant': 'royalexpress'
+                  }
+    response = requests.request("GET", url, headers=headers, data=payload)
+    # Sample JSON response
+    json_response = response.json()
+    # Extracting 'data' for conversion
+    data = json_response['data']
+    df = pd.json_normalize(data)
+    # Concatenate the current page's DataFrame with the combined DataFrame
+    combined_df = pd.concat([combined_df, df], ignore_index=True)
+    data = combined_df[combined_df['status.name'].isin(['RETURN TO CLIENT', 'DELIVERED'])]
+    print("data collected from page : "+str(page))
+    #data.to_csv("new.csv")
+    train_the_model(data)
     return "model trained with new page : "+str(page)+" data"
 @app.get("/test_api")