Spaces:

cmasukume
/

Fraud_Detection

Sleeping

App Files Files Community

cmasukume commited on Oct 20, 2024

Commit

045d34f

verified ·

1 Parent(s): f93d476

Upload 21 files

Browse files

Files changed (21) hide show

.gitattributes +1 -35
Dockerfile.yml +26 -0
Example of Fraudulent & Acceptable Transaction.txt +69 -0
Fraud_Detection.ipynb +0 -0
Procfile +2 -0
README.md +1 -13
config.json +5 -0
fraud_detection.py +143 -0
mlflow.db +0 -0
mlflowtest.py +115 -0
model.pkl +0 -0
pyvenv.cfg +5 -0
register.py +13 -0
requirements.txt +14 -0
run.py +6 -0
scheduler.py +14 -0
score.py +31 -0
simulate_data.py +22 -0
slapp.py +96 -0
train_and_log.bat +15 -0
train_log.py +85 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text


1	+ *.csv filter=lfs diff=lfs merge=lfs -text

Dockerfile.yml ADDED Viewed

	@@ -0,0 +1,26 @@

+# Use the official Python image from the Docker Hub
+FROM python:3.9-slim
+# Set the working directory in the container
+WORKDIR /app
+# Use an official Python runtime as the base image
+FROM python:3.8-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container
+COPY requirements.txt .
+# Install the dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application code into the container
+COPY . .
+# Expose the port the app runs on
+EXPOSE 5000
+# Define the command to run the application
+CMD ["python", "app.py"]

Example of Fraudulent & Acceptable Transaction.txt ADDED Viewed

	@@ -0,0 +1,69 @@

+Example of Fraudulent Transaction Request
+{
+  "Time": 5000,
+  "V1": -2.3122265423263,
+  "V2": 1.95199201064142,
+  "V3": -1.60985073229792,
+  "V4": 3.9979055875468,
+  "V5": -0.522187864667764,
+  "V6": -1.42654531915544,
+  "V7": -2.53738730624579,
+  "V8": 1.39165724822242,
+  "V9": -2.77008927779529,
+  "V10": -2.77227214467803,
+  "V11": 3.20203320709994,
+  "V12": -2.89990738824397,
+  "V13": -0.595221881324185,
+  "V14": -4.28925378244238,
+  "V15": -1.72471928638227,
+  "V16": -1.92786884505502,
+  "V17": -1.32909757135474,
+  "V18": 0.393608591647132,
+  "V19": 0.233809776745693,
+  "V20": -0.108300452035545,
+  "V21": 0.00527359678253453,
+  "V22": -0.190320518742841,
+  "V23": -1.17557533186321,
+  "V24": 0.647376034602038,
+  "V25": -0.221928844458407,
+  "V26": 0.0627228487293033,
+  "V27": 0.0614576285006353,
+  "V28": -0.142382874811225,
+  "Amount": 0.0
+}
+Example Prediction Request
+{
+  "Time": 1000,
+  "V1": -1.3598071336738,
+  "V2": -0.0727811733098497,
+  "V3": 2.53634673796914,
+  "V4": 1.37815522427443,
+  "V5": -0.338320769942518,
+  "V6": 0.462387777762292,
+  "V7": 0.239598554061257,
+  "V8": 0.0986979012610507,
+  "V9": 0.363786969611213,
+  "V10": 0.0907941719789316,
+  "V11": -0.551599533260813,
+  "V12": -0.617800855762348,
+  "V13": -0.991389847235408,
+  "V14": -0.311169353699879,
+  "V15": 1.46817697209427,
+  "V16": -0.470400525259478,
+  "V17": 0.207971241929242,
+  "V18": 0.0257905801985591,
+  "V19": 0.403992960255733,
+  "V20": 0.251412098239705,
+  "V21": -0.018306777944153,
+  "V22": 0.277837575558899,
+  "V23": -0.110473910188767,
+  "V24": 0.0669280749146731,
+  "V25": 0.128539358273528,
+  "V26": -0.189114843888824,
+  "V27": 0.133558376740387,
+  "V28": -0.0210530534538215,
+  "Amount": 149.62
+}

Fraud_Detection.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Procfile ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ web: python app.py
2	+

README.md CHANGED Viewed

@@ -1,13 +1 @@
----
-title: Fraud Detection
-emoji: 🏆
-colorFrom: gray
-colorTo: indigo
-sdk: streamlit
-sdk_version: 1.39.0
-app_file: app.py
-pinned: false
-short_description: fraud detection model and app
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # Fraud-Detection

config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "subscription_id": "5ac6f187-b9ed-4088-8b2f-091417aa1f41",
+    "resource_group": "resource_fraud_detection",
+    "workspace_name": "Fraud_Detection"
+}

fraud_detection.py ADDED Viewed

	@@ -0,0 +1,143 @@

+# -*- coding: utf-8 -*-
+"""Fraud Detection.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1JTnx_TGE4NuRxerkz1nbP9jrCP59prdN
+Dependencies Import
+"""
+# this code will import all the libraries that we need for this model.
+import numpy as np
+import pandas as pd
+from matplotlib import pyplot as plt
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score
+# read the dataset that is holding the information for the credit card
+# transactions.
+credit_card_data = pd.read_csv('/content/creditcard.csv')
+# this will display the first 5 rows of the dataset.
+credit_card_data.head()
+# this will display the last 5 rows of the dataset.
+credit_card_data.tail()
+# information about the dataset showcasing the datatypes used and whether the
+#dataset contains null values or not.
+credit_card_data.info()
+# check for the number of missing values on each column.
+credit_card_data.isnull().sum()
+# check the distribution between acceptable transaction and fraudulent transaction.
+credit_card_data['Class'].value_counts()
+"""Looking at the above values it shows that the dataset is very unstable and inbalanced.
+0---> would represent acceptable transaction
+1---> would the represent fradulent transaction.
+"""
+# data seperation for analysis.
+acceptable = credit_card_data[credit_card_data.Class == 0]
+fraudulent = credit_card_data[credit_card_data.Class == 1]
+print(acceptable.shape)
+print(fraudulent.shape)
+# statistical measures.
+acceptable.Amount.describe()
+fraudulent.Amount.describe()
+# value comparison of both transactions.
+credit_card_data.groupby('Class').mean()
+"""Data Sampling"""
+# before sampling the dataset...
+# calculate the percentage of both the acceptable and fraudulent transactions of
+# the dataset.
+classes=credit_card_data['Class'].value_counts()
+acceptable_percent=classes[0]/credit_card_data['Class'].count()*100
+fraudulent_percent=classes[1]/credit_card_data['Class'].count()*100
+print(acceptable_percent)
+print(fraudulent_percent)
+labels = ['Acceptable','Fraudulent']
+count = credit_card_data.value_counts(credit_card_data['Class'])
+count.plot(kind = "bar",rot=0)
+plt.title("Labels")
+plt.ylabel("Count")
+plt.xticks(range(2), labels)
+plt.show()
+"""create a sample dataset that contains a normal distribution of both transactions."""
+acceptable_sample=acceptable.sample(n=492)
+"""Concatinate the sample dataset into the already existing fraudulent dataframe"""
+# axis = 0 (rows)
+# axis = 1 (columns)
+new_dataset=pd.concat([acceptable_sample,fraudulent],axis=0)
+# view the first 5 rows of the new dataset picked at random.
+new_dataset.head()
+new_dataset.tail()
+new_dataset['Class'].value_counts()
+new_dataset.groupby('Class').mean()
+"""Splitting dataset into training and testind sets. (80%, 10%)"""
+# x represent the features(content of the dataset) and y represent the class.
+x = new_dataset.drop(columns = 'Class',axis = 1)
+y = new_dataset['Class']
+# this prints the data except the class column.
+print(x)
+# prints the classes.
+print(y)
+"""Splitting dataset into training and testind sets. (80%, 20%)"""
+x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,stratify=y,random_state=2)
+# original data, the training data, test data split.
+print(x.shape,x_train.shape,x_test.shape)
+"""Training the Model.
+Logistics Regression Model
+"""
+model = LogisticRegression()
+# train the logistics regression model using the training data.
+model.fit(x_train, y_train)
+# check the accuracy of the training data.
+x_train_prediction = model.predict(x_train)
+training_data_accuracy = accuracy_score(x_train_prediction, y_train)
+# if the accuracy is above 80% then it means the model is good
+print('Accuracy on the Training data : ', training_data_accuracy * 100)
+# accuracy on the test data
+x_test_prediction = model.predict(x_test)
+test_data_accuracy = accuracy_score(x_test_prediction, y_test)
+print('Accuracy on the Test data : ', test_data_accuracy * 100)
+import pickle
+pickle.dump(model,open('model.pkl','wb'))

mlflow.db ADDED Viewed

Binary file (225 kB). View file

mlflowtest.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import numpy as np
+import pandas as pd
+from matplotlib import pyplot as plt
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score
+import mlflow
+import requests
+import json
+import os
+import mlflow.pyfunc
+# Create or set the desired experiment
+experiment = mlflow.set_experiment("fraud_detection")
+print("Experiment ID:", experiment.experiment_id)
+print("Experiment Name:", experiment.name)
+# Set MLFLOW_TRACKING_URI to the desired directory
+mlflow_tracking_uri = "file:///C:/Fraud_Detection/mlruns"
+os.environ['MLFLOW_TRACKING_URI'] = mlflow_tracking_uri.replace('\\', '/')
+# Set the artifact location to a shorter path
+mlflow.set_tracking_uri(mlflow_tracking_uri)
+# Read the dataset
+credit_card_data = pd.read_csv('C:\Fraud_Detection\creditcard.csv')
+# Display basic information
+print(credit_card_data.head())
+print(credit_card_data.tail())
+credit_card_data.info()
+print(credit_card_data.isnull().sum())
+print(credit_card_data['Class'].value_counts())
+# Data separation
+acceptable = credit_card_data[credit_card_data.Class == 0]
+fraudulent = credit_card_data[credit_card_data.Class == 1]
+print(acceptable.shape)
+print(fraudulent.shape)
+# Statistical measures
+print(acceptable.Amount.describe())
+print(fraudulent.Amount.describe())
+print(credit_card_data.groupby('Class').mean())
+# Data Sampling
+acceptable_sample = acceptable.sample(n=492)
+new_dataset = pd.concat([acceptable_sample, fraudulent], axis=0)
+# Splitting dataset
+x = new_dataset.drop(columns='Class', axis=1)
+y = new_dataset['Class']
+x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=2)
+print(x.shape, x_train.shape, x_test.shape)
+# Training the Model
+model = LogisticRegression(max_iter=1000000)
+model.fit(x_train, y_train)
+# Accuracy
+x_train_prediction = model.predict(x_train)
+training_data_accuracy = accuracy_score(x_train_prediction, y_train)
+print('Accuracy on the Training data : ', training_data_accuracy * 100)
+x_test_prediction = model.predict(x_test)
+test_data_accuracy = accuracy_score(x_test_prediction, y_test)
+print('Accuracy on the Test data : ', test_data_accuracy * 100)
+# Manually start and end the MLflow run
+run = mlflow.start_run()
+try:
+    mlflow.sklearn.log_model(model, "model")
+    mlflow.log_metric("training_data_accuracy", training_data_accuracy)
+    mlflow.log_metric("test_data_accuracy", test_data_accuracy)
+    run_id = run.info.run_id
+    model_uri = f"runs:/{run_id}/model"
+finally:
+    mlflow.end_run()
+print(f"Model logged to MLflow with run_id: {run_id}")
+# Serve the model
+loaded_model = mlflow.pyfunc.load_model(model_uri)
+# Example of making predictions using the loaded model
+sample_input = x_test.iloc[:5].values.tolist()
+predictions = loaded_model.predict(sample_input)
+print("Predictions:", predictions)
+# Alternatively, you can use Python's built-in HTTP server to serve the model
+from flask import Flask, request, jsonify
+app = Flask(__name__)
+@app.route('/')
+def index():
+    return "Welcome to the model serving endpoint!"
+# Define prediction route
+@app.route('/predict', methods=['POST'])
+def predict():
+    data = request.json['data']
+    predictions = loaded_model.predict(data)
+    return jsonify(predictions.tolist())
+# Run the Flask app
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5000)
+# activate a vertual environment ---C:\Fraud_Detection\Scripts\activate
+# to display the mlflow dashboard ---mlflow ui --backend-store-uri file:///C:/Fraud_Detection/mlruns
+# cancel ---ctrl c

model.pkl ADDED Viewed

Binary file (1.21 kB). View file

pyvenv.cfg ADDED Viewed

	@@ -0,0 +1,5 @@

+home = C:\Users\masukumec\AppData\Local\Programs\Python\Python312
+include-system-site-packages = false
+version = 3.12.4
+executable = C:\Users\masukumec\AppData\Local\Programs\Python\Python312\python.exe
+command = C:\Users\masukumec\AppData\Local\Programs\Python\Python312\python.exe -m venv C:\Fraud_Detection

	@@ -0,0 +1,13 @@

+model_path = r"C:\Fraud_Detection\model.pkl"
+from azureml.core import Workspace, Model
+# Connect to the Azure ML workspace
+ws = Workspace.from_config()
+# Register the model
+model = Model.register(
+    model_path=model_path,  # Use raw string literal for the file path
+    model_name="Fraud Detection",  # Your model name
+    workspace=ws
+)

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+flask==1.1.2
+scikit-learn
+scipy
+numpy
+pandas
+matplotlib
+seaborn
+schedule
+jupyter
+mlflow
+requests
+jinja2==3.0.3
+streamlit

run.py ADDED Viewed

	@@ -0,0 +1,6 @@

+    # run.py
+from app import app
+if __name__ == '__main__':
+  app.run(debug=True)

scheduler.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import os
+import schedule
+import time
+def job():
+    os.system('c:/Fraud_Detection/Scripts/python.exe c:/Fraud_Detection/train_log.py --data "c:/Fraud_Detection/creditcard.csv"')
+# Schedule the job every 30 days
+schedule.every(30).days.do(job)
+while True:
+    schedule.run_pending()
+    time.sleep(1)

score.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from azureml.core import Workspace, Model, Environment
+from azureml.core.model import InferenceConfig
+from azureml.core.webservice import AciWebservice, Webservice
+# Initialize workspace
+ws = Workspace.from_config()
+# Load the model
+model = Model(ws, name="model.pkl")  # Replace 'your_model_name' with your model's name
+# Define the environment (if not using the YAML method)
+env = Environment(name="fraud_detection_env")
+deps = CondaDependencies.create(pip_packages=["azureml-core", "scikit-learn", "joblib", "numpy"])
+env.python.conda_dependencies = deps
+# Define inference configuration
+inference_config = InferenceConfig(entry_script="score.py", environment=env)
+# Define deployment configuration
+aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)
+# Deploy the model
+service = Model.deploy(workspace=ws,
+                       name="fraud-detection-service",
+                       models=[model],
+                       inference_config=inference_config,
+                       deployment_config=aci_config)
+service.wait_for_deployment(show_output=True)
+print(f"Service deployed at: {service.scoring_uri}")

simulate_data.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import pandas as pd
+import numpy as np
+import os
+def simulate_monthly_data(base_data_path, output_dir, months=12):
+    # Create the output directory if it doesn't exist
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    base_data = pd.read_csv(base_data_path)
+    for month in range(1, months + 1):
+        new_data = base_data.copy()
+        drift_factor = np.random.normal(0, 0.1, new_data.shape)
+        new_data += drift_factor
+        new_data_path = os.path.join(output_dir, f'creditcard_month_{month}.csv')
+        new_data.to_csv(new_data_path, index=False)
+        print(f"Simulated data for month {month} saved to {new_data_path}")
+if __name__ == "__main__":
+    simulate_monthly_data('C:/Fraud_Detection/creditcard.csv', 'C:/Fraud_Detection/simulated_data')
+#'C:/Fraud_Detection/creditcard.csv',

slapp.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import streamlit as st
+import pickle
+import pandas as pd
+# Load the saved model
+try:
+    model = pickle.load(open('model.pkl', 'rb'))
+except Exception as e:
+    st.error(f"Error loading model: {e}")
+    model = None
+# Streamlit app
+st.title("Fraud Detection API")
+st.write("Enter the transaction details to check if it's acceptable or fraudulent.")
+# Create input fields for the features
+time = st.number_input('Time')
+v1 = st.number_input('V1')
+v2 = st.number_input('V2')
+v3 = st.number_input('V3')
+v4 = st.number_input('V4')
+v5 = st.number_input('V5')
+v6 = st.number_input('V6')
+v7 = st.number_input('V7')
+v8 = st.number_input('V8')
+v9 = st.number_input('V9')
+v10 = st.number_input('V10')
+v11 = st.number_input('V11')
+v12 = st.number_input('V12')
+v13 = st.number_input('V13')
+v14 = st.number_input('V14')
+v15 = st.number_input('V15')
+v16 = st.number_input('V16')
+v17 = st.number_input('V17')
+v18 = st.number_input('V18')
+v19 = st.number_input('V19')
+v20 = st.number_input('V20')
+v21 = st.number_input('V21')
+v22 = st.number_input('V22')
+v23 = st.number_input('V23')
+v24 = st.number_input('V24')
+v25 = st.number_input('V25')
+v26 = st.number_input('V26')
+v27 = st.number_input('V27')
+v28 = st.number_input('V28')
+amount = st.number_input('Amount')
+# Prepare a button for prediction
+if st.button('Predict'):
+    try:
+        # Create a DataFrame from the input data
+        transaction_data = pd.DataFrame({
+            'Time': [time],
+            'V1': [v1],
+            'V2': [v2],
+            'V3': [v3],
+            'V4': [v4],
+            'V5': [v5],
+            'V6': [v6],
+            'V7': [v7],
+            'V8': [v8],
+            'V9': [v9],
+            'V10': [v10],
+            'V11': [v11],
+            'V12': [v12],
+            'V13': [v13],
+            'V14': [v14],
+            'V15': [v15],
+            'V16': [v16],
+            'V17': [v17],
+            'V18': [v18],
+            'V19': [v19],
+            'V20': [v20],
+            'V21': [v21],
+            'V22': [v22],
+            'V23': [v23],
+            'V24': [v24],
+            'V25': [v25],
+            'V26': [v26],
+            'V27': [v27],
+            'V28': [v28],
+            'Amount': [amount]
+        })
+        # Perform prediction using the loaded model
+        prediction = model.predict(transaction_data)
+        # Prepare response
+        if prediction[0] == 0:
+            st.success('Prediction: Acceptable transaction')
+        else:
+            st.error('Prediction: Fraudulent transaction')
+    except Exception as e:
+        st.error(f'Error: {str(e)}')

train_and_log.bat ADDED Viewed

	@@ -0,0 +1,15 @@

+@echo off
+REM Start MLflow server in a new window
+start "MLflow Server" cmd /c "mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./mlruns --host 0.0.0.0 --port 5000"
+REM Wait for a few seconds to ensure the server is up
+timeout /t 10 /nobreak
+REM Run the training script
+c:/Fraud_Detection/Scripts/python.exe c:/Fraud_Detection/train_log.py --data "c:/Fraud_Detection/creditcard.csv"
+REM Close the MLflow server
+taskkill /FI "WINDOWTITLE eq MLflow Server*"
+echo Training process completed.
+pause

train_log.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import argparse
+import os
+import mlflow
+import mlflow.sklearn
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score
+from sklearn.preprocessing import StandardScaler
+import schedule
+import time
+def main(data_path):
+    print(f"Reading data from: {data_path}")
+    # Set the tracking URI to your MLflow server
+    mlflow.set_tracking_uri("http://127.0.0.1:5000")  # Replace with your MLflow tracking server URI
+    # Load data
+    try:
+        data = pd.read_csv(data_path)
+    except Exception as e:
+        print(f"Error reading the data file: {e}")
+        return
+    # Preprocess and split data
+    X = data.drop(columns='Class')
+    y = data['Class']
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    # Scale data
+    scaler = StandardScaler()
+    X_train = scaler.fit_transform(X_train)
+    X_test = scaler.transform(X_test)
+    # Train model
+    model = LogisticRegression(max_iter=1000)
+    model.fit(X_train, y_train)
+    # Evaluate model
+    train_accuracy = accuracy_score(y_train, model.predict(X_train))
+    test_accuracy = accuracy_score(y_test, model.predict(X_test))
+    # Log model with MLflow
+    with mlflow.start_run() as run:
+        mlflow.log_param('random_state', 42)
+        mlflow.log_metric('train_accuracy', train_accuracy)
+        mlflow.log_metric('test_accuracy', test_accuracy)
+        mlflow.sklearn.log_model(model, 'model')
+        # Register the model
+        mlflow.register_model(
+            model_uri=f"runs:/{run.info.run_id}/model",
+            name="LogisticRegressionModel"
+        )
+    print(f"Train Accuracy: {train_accuracy}")
+    print(f"Test Accuracy: {test_accuracy}")
+def job():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data', type=str, help="Path to the training data", required=True)
+    args = parser.parse_args()
+    # Print args.data for debugging
+    print(f"Data path provided: {args.data}")
+    # Check if the file exists
+    if not os.path.isfile(args.data):
+        print(f"Error: The file {args.data} does not exist.")
+    else:
+        main(args.data)
+if __name__ == "__main__":
+    # Schedule the job to run every 30 days
+    schedule.every(30).days.do(job)
+    # Run the scheduling loop
+    while True:
+        schedule.run_pending()
+        time.sleep(1)
+# run:
+# mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./mlruns --host 0.0.0.0 --port 5000
+# then run:
+# c:/Fraud_Detection/Scripts/python.exe c:/Fraud_Detection/train_log.py --data "c:/Fraud_Detection/creditcard.csv"