curfox_model_trainer

Sleeping

App Files Files Community

Arafath10 commited on Jun 3, 2024

Commit

f7b1e15

verified ·

1 Parent(s): 6d0b34a

Update main.py

Browse files

Files changed (1) hide show

main.py +83 -20

main.py CHANGED Viewed

@@ -17,6 +17,7 @@ import numpy as np
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -26,14 +27,15 @@ app.add_middleware(
 )
-@app.get("/train_the_model")
 async def train_the_model(Tenant: str):
         # Load the dataset
-        data = pd.read_csv(f"model/{Tenant}trainer_data.csv")
         print(data["customer_name"].count())
         # Analyze class distribution
         class_distribution = data['status.name'].value_counts()
         print("Class Distribution before balancing:\n", class_distribution)
         # Get the size of the largest class to match other classes' sizes
@@ -53,16 +55,19 @@ async def train_the_model(Tenant: str):
         data = oversampled_data
         # Select columns 'customer_email'
         selected_columns = ['customer_name', 'customer_address', 'customer_phone',
                             'cod', 'weight', 'origin_city.name',
-                            'destination_city.name','created_at','status.name']
         # Handling missing values
         #data_filled = data[selected_columns].fillna('Missing')
         data_filled = data[selected_columns].dropna()
         # Encoding categorical variables
         encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
         for col, encoder in encoders.items():
@@ -117,9 +122,58 @@ async def train_the_model(Tenant: str):
         encoders_filename = f'model/{Tenant}_curfox_encoders.joblib'
         dump(encoders, encoders_filename)
-        return accuracy,classification_rep,"Model trained with new data for :",model_filename
 @app.get("/trigger_the_data_fecher")
 async def your_continuous_function(page: int,paginate: int,Tenant: str):
     print("data fetcher running.....")
@@ -153,17 +207,18 @@ async def your_continuous_function(page: int,paginate: int,Tenant: str):
     #data.to_csv("new.csv")
     try:
-        file_path = f'model/{Tenant}trainer_data.csv'  # Replace with your file path
         source_csv = pd.read_csv(file_path)
         new_data = df
         combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
-        combined_df_final.to_csv(f"model/{Tenant}trainer_data.csv")
         print("data added")
     except:
-        df.to_csv(f"model/{Tenant}trainer_data.csv")
         print("data created")
     return {"message":"done","page_number":page,"data_count":data_count,'X-Tenant': Tenant}
@@ -171,12 +226,19 @@ async def your_continuous_function(page: int,paginate: int,Tenant: str):
 @app.get("/get_latest_model_updated_time")
 async def model_updated_time(Tenant: str):
     try:
         m_time_encoder = os.path.getmtime(f'model/{Tenant}_curfox_encoders.joblib')
         m_time_model = os.path.getmtime(f'model/{Tenant}_curfox_xgb_model.joblib')
-        return {"Tenant":Tenant,
                 "base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
-                "last model updated time":datetime.datetime.fromtimestamp(m_time_model)}
     except:
         return {"no model found so first trained the model using data fecther"}
@@ -185,20 +247,21 @@ async def model_updated_time(Tenant: str):
 # Endpoint for making predictions
 @app.post("/predict")
 def predict(
     Tenant: str,
     customer_name: str,
     customer_address: str,
     customer_phone: str,
-    customer_email: str,
     cod:str,
     weight: str,
-    pickup_address: str,
     origin_city_name: str,
     destination_city_name: str,
-    origin_country: str,
-    created_at: str
     ):
     try:
@@ -219,13 +282,13 @@ def predict(
         return [encoder.transform([x])[0] if x in classes else -1 for x in column]
-    # Convert input data to DataFrame
     input_data = {
         'customer_name': customer_name,
         'customer_address': customer_address,
-        'customer_phone': int(customer_phone), #'customer_email': customer_email,
-        'cod': float(cod),
-        'weight': float(weight),
         'origin_city.name':origin_city_name,
         'destination_city.name':destination_city_name,
         'created_at':created_at
@@ -249,4 +312,4 @@ def predict(
     if predicted_status == "RETURN TO CLIENT":
        probability = 100 - probability
-    return {"Probability": round(probability,2),"Tenant":Tenant}

 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
 )
+@app.get("/train_the_model_new_v2")
 async def train_the_model(Tenant: str):
         # Load the dataset
+        data = pd.read_csv(f"model/{Tenant}trainer_data_v1.csv")
         print(data["customer_name"].count())
         # Analyze class distribution
         class_distribution = data['status.name'].value_counts()
+        bf = str(class_distribution)
         print("Class Distribution before balancing:\n", class_distribution)
         # Get the size of the largest class to match other classes' sizes
         data = oversampled_data
         # Select columns 'customer_email'
         selected_columns = ['customer_name', 'customer_address', 'customer_phone',
                             'cod', 'weight', 'origin_city.name',
+                            'destination_city.name','status.name','created_at']
         # Handling missing values
         #data_filled = data[selected_columns].fillna('Missing')
         data_filled = data[selected_columns].dropna()
+        data_filled['customer_phone'] = data_filled['customer_phone'].astype(str)
+        data_filled['created_at'] = data_filled['created_at'].astype(str)
+        #data_filled = data_filled.drop(columns=['created_at'])
+        af = str(oversampled_data['status.name'].value_counts())
         # Encoding categorical variables
         encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
         for col, encoder in encoders.items():
         encoders_filename = f'model/{Tenant}_curfox_encoders.joblib'
         dump(encoders, encoders_filename)
+        return accuracy,classification_rep,"Model trained with new data for :",model_filename,str(af),str(bf)
+@app.get("/trigger_the_data_fecher_for_me")
+async def continuous_function(page: int,paginate: int,Tenant: str):
+    print("data fetcher running.....")
+    # Update the payload for each page
+    #url = "https://dev3.api.curfox.parallaxtec.com/api/ml/order-list?sort=id&paginate="+str(paginate)+"&page="+str(page)
+    url = "https://v1.api.curfox.com/api/ml/order-list?sort=id&paginate="+str(paginate)+"&page="+str(page)
+    payload = {}
+    headers = {
+                    'Accept': 'application/json',
+                    'X-Tenant': Tenant #'royalexpress'
+                  }
+    response = requests.request("GET", url, headers=headers, data=payload)
+    # Sample JSON response
+    json_response = response.json()
+    # Extracting 'data' for conversion
+    data = json_response['data']
+    data_count = len(data)
+    df = pd.json_normalize(data)
+    df = df[df['status.name'].isin(['RETURN TO CLIENT', 'DELIVERED'])]
+    print("data collected from page : "+str(page))
+    #data.to_csv("new.csv")
+    try:
+        file_path = f'model/{Tenant}trainer_data_v1.csv'  # Replace with your file path
+        source_csv = pd.read_csv(file_path)
+        new_data = df
+        combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
+        combined_df_final.to_csv(f"model/{Tenant}trainer_data_v1.csv")
+        print("data added")
+        message = "data added"
+    except:
+        df.to_csv(f"model/{Tenant}trainer_data_v1.csv")
+        print("data created")
+        message = "data created"
+    return {"message":message,"page_number":page,"data_count":data_count,'X-Tenant': Tenant}
 @app.get("/trigger_the_data_fecher")
 async def your_continuous_function(page: int,paginate: int,Tenant: str):
     print("data fetcher running.....")
     #data.to_csv("new.csv")
     try:
+        file_path = f'model/{Tenant}trainer_data_.csv'  # Replace with your file path
         source_csv = pd.read_csv(file_path)
         new_data = df
         combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
+        combined_df_final.to_csv(f"model/{Tenant}trainer_data_.csv")
         print("data added")
     except:
+        df.to_csv(f"model/{Tenant}trainer_data_.csv")
         print("data created")
     return {"message":"done","page_number":page,"data_count":data_count,'X-Tenant': Tenant}
 @app.get("/get_latest_model_updated_time")
 async def model_updated_time(Tenant: str):
+    import multiprocessing
+    # Get the number of available CPU cores
+    available_cores = multiprocessing.cpu_count()
     try:
         m_time_encoder = os.path.getmtime(f'model/{Tenant}_curfox_encoders.joblib')
         m_time_model = os.path.getmtime(f'model/{Tenant}_curfox_xgb_model.joblib')
+        return {
+                "Tenant":Tenant,
                 "base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
+                "last model updated time":datetime.datetime.fromtimestamp(m_time_model),
+                "Number of available CPU cores": available_cores
+               }
     except:
         return {"no model found so first trained the model using data fecther"}
 # Endpoint for making predictions
 @app.post("/predict")
 def predict(
     Tenant: str,
     customer_name: str,
     customer_address: str,
     customer_phone: str,
     cod:str,
     weight: str,
     origin_city_name: str,
     destination_city_name: str,
+    created_at: str,
+    customer_email: str,
+    pickup_address: str,
+    origin_country: str
     ):
     try:
         return [encoder.transform([x])[0] if x in classes else -1 for x in column]
     input_data = {
         'customer_name': customer_name,
         'customer_address': customer_address,
+        'customer_phone': customer_phone, #'customer_email': customer_email,
+        'cod': int(cod),
+        'weight': int(weight),
         'origin_city.name':origin_city_name,
         'destination_city.name':destination_city_name,
         'created_at':created_at
     if predicted_status == "RETURN TO CLIENT":
        probability = 100 - probability
+    return {"predicted_status":predicted_status,Probability": round(probability,2),"Tenant_new":Tenant}