Spaces:
Sleeping
Sleeping
Upload 21 files
Browse files- .gitattributes +1 -35
- Dockerfile.yml +26 -0
- Example of Fraudulent & Acceptable Transaction.txt +69 -0
- Fraud_Detection.ipynb +0 -0
- Procfile +2 -0
- README.md +1 -13
- config.json +5 -0
- fraud_detection.py +143 -0
- mlflow.db +0 -0
- mlflowtest.py +115 -0
- model.pkl +0 -0
- pyvenv.cfg +5 -0
- register.py +13 -0
- requirements.txt +14 -0
- run.py +6 -0
- scheduler.py +14 -0
- score.py +31 -0
- simulate_data.py +22 -0
- slapp.py +96 -0
- train_and_log.bat +15 -0
- train_log.py +85 -0
.gitattributes
CHANGED
|
@@ -1,35 +1 @@
|
|
| 1 |
-
*.
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
*.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Dockerfile.yml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use the official Python image from the Docker Hub
|
| 2 |
+
FROM python:3.9-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory in the container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Use an official Python runtime as the base image
|
| 8 |
+
FROM python:3.8-slim
|
| 9 |
+
|
| 10 |
+
# Set the working directory in the container
|
| 11 |
+
WORKDIR /app
|
| 12 |
+
|
| 13 |
+
# Copy the requirements file into the container
|
| 14 |
+
COPY requirements.txt .
|
| 15 |
+
|
| 16 |
+
# Install the dependencies
|
| 17 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 18 |
+
|
| 19 |
+
# Copy the rest of the application code into the container
|
| 20 |
+
COPY . .
|
| 21 |
+
|
| 22 |
+
# Expose the port the app runs on
|
| 23 |
+
EXPOSE 5000
|
| 24 |
+
|
| 25 |
+
# Define the command to run the application
|
| 26 |
+
CMD ["python", "app.py"]
|
Example of Fraudulent & Acceptable Transaction.txt
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Example of Fraudulent Transaction Request
|
| 2 |
+
{
|
| 3 |
+
"Time": 5000,
|
| 4 |
+
"V1": -2.3122265423263,
|
| 5 |
+
"V2": 1.95199201064142,
|
| 6 |
+
"V3": -1.60985073229792,
|
| 7 |
+
"V4": 3.9979055875468,
|
| 8 |
+
"V5": -0.522187864667764,
|
| 9 |
+
"V6": -1.42654531915544,
|
| 10 |
+
"V7": -2.53738730624579,
|
| 11 |
+
"V8": 1.39165724822242,
|
| 12 |
+
"V9": -2.77008927779529,
|
| 13 |
+
"V10": -2.77227214467803,
|
| 14 |
+
"V11": 3.20203320709994,
|
| 15 |
+
"V12": -2.89990738824397,
|
| 16 |
+
"V13": -0.595221881324185,
|
| 17 |
+
"V14": -4.28925378244238,
|
| 18 |
+
"V15": -1.72471928638227,
|
| 19 |
+
"V16": -1.92786884505502,
|
| 20 |
+
"V17": -1.32909757135474,
|
| 21 |
+
"V18": 0.393608591647132,
|
| 22 |
+
"V19": 0.233809776745693,
|
| 23 |
+
"V20": -0.108300452035545,
|
| 24 |
+
"V21": 0.00527359678253453,
|
| 25 |
+
"V22": -0.190320518742841,
|
| 26 |
+
"V23": -1.17557533186321,
|
| 27 |
+
"V24": 0.647376034602038,
|
| 28 |
+
"V25": -0.221928844458407,
|
| 29 |
+
"V26": 0.0627228487293033,
|
| 30 |
+
"V27": 0.0614576285006353,
|
| 31 |
+
"V28": -0.142382874811225,
|
| 32 |
+
"Amount": 0.0
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
Example Prediction Request
|
| 37 |
+
|
| 38 |
+
{
|
| 39 |
+
"Time": 1000,
|
| 40 |
+
"V1": -1.3598071336738,
|
| 41 |
+
"V2": -0.0727811733098497,
|
| 42 |
+
"V3": 2.53634673796914,
|
| 43 |
+
"V4": 1.37815522427443,
|
| 44 |
+
"V5": -0.338320769942518,
|
| 45 |
+
"V6": 0.462387777762292,
|
| 46 |
+
"V7": 0.239598554061257,
|
| 47 |
+
"V8": 0.0986979012610507,
|
| 48 |
+
"V9": 0.363786969611213,
|
| 49 |
+
"V10": 0.0907941719789316,
|
| 50 |
+
"V11": -0.551599533260813,
|
| 51 |
+
"V12": -0.617800855762348,
|
| 52 |
+
"V13": -0.991389847235408,
|
| 53 |
+
"V14": -0.311169353699879,
|
| 54 |
+
"V15": 1.46817697209427,
|
| 55 |
+
"V16": -0.470400525259478,
|
| 56 |
+
"V17": 0.207971241929242,
|
| 57 |
+
"V18": 0.0257905801985591,
|
| 58 |
+
"V19": 0.403992960255733,
|
| 59 |
+
"V20": 0.251412098239705,
|
| 60 |
+
"V21": -0.018306777944153,
|
| 61 |
+
"V22": 0.277837575558899,
|
| 62 |
+
"V23": -0.110473910188767,
|
| 63 |
+
"V24": 0.0669280749146731,
|
| 64 |
+
"V25": 0.128539358273528,
|
| 65 |
+
"V26": -0.189114843888824,
|
| 66 |
+
"V27": 0.133558376740387,
|
| 67 |
+
"V28": -0.0210530534538215,
|
| 68 |
+
"Amount": 149.62
|
| 69 |
+
}
|
Fraud_Detection.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Procfile
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
web: python app.py
|
| 2 |
+
|
README.md
CHANGED
|
@@ -1,13 +1 @@
|
|
| 1 |
-
|
| 2 |
-
title: Fraud Detection
|
| 3 |
-
emoji: 🏆
|
| 4 |
-
colorFrom: gray
|
| 5 |
-
colorTo: indigo
|
| 6 |
-
sdk: streamlit
|
| 7 |
-
sdk_version: 1.39.0
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
-
short_description: fraud detection model and app
|
| 11 |
-
---
|
| 12 |
-
|
| 13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
+
# Fraud-Detection
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"subscription_id": "5ac6f187-b9ed-4088-8b2f-091417aa1f41",
|
| 3 |
+
"resource_group": "resource_fraud_detection",
|
| 4 |
+
"workspace_name": "Fraud_Detection"
|
| 5 |
+
}
|
fraud_detection.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""Fraud Detection.ipynb
|
| 3 |
+
|
| 4 |
+
Automatically generated by Colab.
|
| 5 |
+
|
| 6 |
+
Original file is located at
|
| 7 |
+
https://colab.research.google.com/drive/1JTnx_TGE4NuRxerkz1nbP9jrCP59prdN
|
| 8 |
+
|
| 9 |
+
Dependencies Import
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
# this code will import all the libraries that we need for this model.
|
| 13 |
+
import numpy as np
|
| 14 |
+
import pandas as pd
|
| 15 |
+
from matplotlib import pyplot as plt
|
| 16 |
+
from sklearn.model_selection import train_test_split
|
| 17 |
+
from sklearn.linear_model import LogisticRegression
|
| 18 |
+
from sklearn.metrics import accuracy_score
|
| 19 |
+
|
| 20 |
+
# read the dataset that is holding the information for the credit card
|
| 21 |
+
# transactions.
|
| 22 |
+
credit_card_data = pd.read_csv('/content/creditcard.csv')
|
| 23 |
+
|
| 24 |
+
# this will display the first 5 rows of the dataset.
|
| 25 |
+
credit_card_data.head()
|
| 26 |
+
|
| 27 |
+
# this will display the last 5 rows of the dataset.
|
| 28 |
+
credit_card_data.tail()
|
| 29 |
+
|
| 30 |
+
# information about the dataset showcasing the datatypes used and whether the
|
| 31 |
+
#dataset contains null values or not.
|
| 32 |
+
credit_card_data.info()
|
| 33 |
+
|
| 34 |
+
# check for the number of missing values on each column.
|
| 35 |
+
credit_card_data.isnull().sum()
|
| 36 |
+
|
| 37 |
+
# check the distribution between acceptable transaction and fraudulent transaction.
|
| 38 |
+
credit_card_data['Class'].value_counts()
|
| 39 |
+
|
| 40 |
+
"""Looking at the above values it shows that the dataset is very unstable and inbalanced.
|
| 41 |
+
|
| 42 |
+
0---> would represent acceptable transaction
|
| 43 |
+
|
| 44 |
+
1---> would the represent fradulent transaction.
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
# data seperation for analysis.
|
| 48 |
+
acceptable = credit_card_data[credit_card_data.Class == 0]
|
| 49 |
+
fraudulent = credit_card_data[credit_card_data.Class == 1]
|
| 50 |
+
|
| 51 |
+
print(acceptable.shape)
|
| 52 |
+
print(fraudulent.shape)
|
| 53 |
+
|
| 54 |
+
# statistical measures.
|
| 55 |
+
acceptable.Amount.describe()
|
| 56 |
+
|
| 57 |
+
fraudulent.Amount.describe()
|
| 58 |
+
|
| 59 |
+
# value comparison of both transactions.
|
| 60 |
+
credit_card_data.groupby('Class').mean()
|
| 61 |
+
|
| 62 |
+
"""Data Sampling"""
|
| 63 |
+
|
| 64 |
+
# before sampling the dataset...
|
| 65 |
+
# calculate the percentage of both the acceptable and fraudulent transactions of
|
| 66 |
+
# the dataset.
|
| 67 |
+
classes=credit_card_data['Class'].value_counts()
|
| 68 |
+
acceptable_percent=classes[0]/credit_card_data['Class'].count()*100
|
| 69 |
+
fraudulent_percent=classes[1]/credit_card_data['Class'].count()*100
|
| 70 |
+
print(acceptable_percent)
|
| 71 |
+
print(fraudulent_percent)
|
| 72 |
+
|
| 73 |
+
labels = ['Acceptable','Fraudulent']
|
| 74 |
+
count = credit_card_data.value_counts(credit_card_data['Class'])
|
| 75 |
+
count.plot(kind = "bar",rot=0)
|
| 76 |
+
plt.title("Labels")
|
| 77 |
+
plt.ylabel("Count")
|
| 78 |
+
plt.xticks(range(2), labels)
|
| 79 |
+
plt.show()
|
| 80 |
+
|
| 81 |
+
"""create a sample dataset that contains a normal distribution of both transactions."""
|
| 82 |
+
|
| 83 |
+
acceptable_sample=acceptable.sample(n=492)
|
| 84 |
+
|
| 85 |
+
"""Concatinate the sample dataset into the already existing fraudulent dataframe"""
|
| 86 |
+
|
| 87 |
+
# axis = 0 (rows)
|
| 88 |
+
# axis = 1 (columns)
|
| 89 |
+
new_dataset=pd.concat([acceptable_sample,fraudulent],axis=0)
|
| 90 |
+
|
| 91 |
+
# view the first 5 rows of the new dataset picked at random.
|
| 92 |
+
new_dataset.head()
|
| 93 |
+
|
| 94 |
+
new_dataset.tail()
|
| 95 |
+
|
| 96 |
+
new_dataset['Class'].value_counts()
|
| 97 |
+
|
| 98 |
+
new_dataset.groupby('Class').mean()
|
| 99 |
+
|
| 100 |
+
"""Splitting dataset into training and testind sets. (80%, 10%)"""
|
| 101 |
+
|
| 102 |
+
# x represent the features(content of the dataset) and y represent the class.
|
| 103 |
+
x = new_dataset.drop(columns = 'Class',axis = 1)
|
| 104 |
+
y = new_dataset['Class']
|
| 105 |
+
|
| 106 |
+
# this prints the data except the class column.
|
| 107 |
+
print(x)
|
| 108 |
+
|
| 109 |
+
# prints the classes.
|
| 110 |
+
print(y)
|
| 111 |
+
|
| 112 |
+
"""Splitting dataset into training and testind sets. (80%, 20%)"""
|
| 113 |
+
|
| 114 |
+
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,stratify=y,random_state=2)
|
| 115 |
+
|
| 116 |
+
# original data, the training data, test data split.
|
| 117 |
+
print(x.shape,x_train.shape,x_test.shape)
|
| 118 |
+
|
| 119 |
+
"""Training the Model.
|
| 120 |
+
|
| 121 |
+
Logistics Regression Model
|
| 122 |
+
"""
|
| 123 |
+
|
| 124 |
+
model = LogisticRegression()
|
| 125 |
+
|
| 126 |
+
# train the logistics regression model using the training data.
|
| 127 |
+
model.fit(x_train, y_train)
|
| 128 |
+
|
| 129 |
+
# check the accuracy of the training data.
|
| 130 |
+
x_train_prediction = model.predict(x_train)
|
| 131 |
+
training_data_accuracy = accuracy_score(x_train_prediction, y_train)
|
| 132 |
+
|
| 133 |
+
# if the accuracy is above 80% then it means the model is good
|
| 134 |
+
print('Accuracy on the Training data : ', training_data_accuracy * 100)
|
| 135 |
+
|
| 136 |
+
# accuracy on the test data
|
| 137 |
+
x_test_prediction = model.predict(x_test)
|
| 138 |
+
test_data_accuracy = accuracy_score(x_test_prediction, y_test)
|
| 139 |
+
|
| 140 |
+
print('Accuracy on the Test data : ', test_data_accuracy * 100)
|
| 141 |
+
|
| 142 |
+
import pickle
|
| 143 |
+
pickle.dump(model,open('model.pkl','wb'))
|
mlflow.db
ADDED
|
Binary file (225 kB). View file
|
|
|
mlflowtest.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from matplotlib import pyplot as plt
|
| 4 |
+
from sklearn.model_selection import train_test_split
|
| 5 |
+
from sklearn.linear_model import LogisticRegression
|
| 6 |
+
from sklearn.metrics import accuracy_score
|
| 7 |
+
import mlflow
|
| 8 |
+
import requests
|
| 9 |
+
import json
|
| 10 |
+
import os
|
| 11 |
+
import mlflow.pyfunc
|
| 12 |
+
|
| 13 |
+
# Create or set the desired experiment
|
| 14 |
+
experiment = mlflow.set_experiment("fraud_detection")
|
| 15 |
+
print("Experiment ID:", experiment.experiment_id)
|
| 16 |
+
print("Experiment Name:", experiment.name)
|
| 17 |
+
|
| 18 |
+
# Set MLFLOW_TRACKING_URI to the desired directory
|
| 19 |
+
mlflow_tracking_uri = "file:///C:/Fraud_Detection/mlruns"
|
| 20 |
+
os.environ['MLFLOW_TRACKING_URI'] = mlflow_tracking_uri.replace('\\', '/')
|
| 21 |
+
|
| 22 |
+
# Set the artifact location to a shorter path
|
| 23 |
+
mlflow.set_tracking_uri(mlflow_tracking_uri)
|
| 24 |
+
|
| 25 |
+
# Read the dataset
|
| 26 |
+
credit_card_data = pd.read_csv('C:\Fraud_Detection\creditcard.csv')
|
| 27 |
+
|
| 28 |
+
# Display basic information
|
| 29 |
+
print(credit_card_data.head())
|
| 30 |
+
print(credit_card_data.tail())
|
| 31 |
+
credit_card_data.info()
|
| 32 |
+
print(credit_card_data.isnull().sum())
|
| 33 |
+
print(credit_card_data['Class'].value_counts())
|
| 34 |
+
|
| 35 |
+
# Data separation
|
| 36 |
+
acceptable = credit_card_data[credit_card_data.Class == 0]
|
| 37 |
+
fraudulent = credit_card_data[credit_card_data.Class == 1]
|
| 38 |
+
|
| 39 |
+
print(acceptable.shape)
|
| 40 |
+
print(fraudulent.shape)
|
| 41 |
+
|
| 42 |
+
# Statistical measures
|
| 43 |
+
print(acceptable.Amount.describe())
|
| 44 |
+
print(fraudulent.Amount.describe())
|
| 45 |
+
|
| 46 |
+
print(credit_card_data.groupby('Class').mean())
|
| 47 |
+
|
| 48 |
+
# Data Sampling
|
| 49 |
+
acceptable_sample = acceptable.sample(n=492)
|
| 50 |
+
new_dataset = pd.concat([acceptable_sample, fraudulent], axis=0)
|
| 51 |
+
|
| 52 |
+
# Splitting dataset
|
| 53 |
+
x = new_dataset.drop(columns='Class', axis=1)
|
| 54 |
+
y = new_dataset['Class']
|
| 55 |
+
|
| 56 |
+
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=2)
|
| 57 |
+
print(x.shape, x_train.shape, x_test.shape)
|
| 58 |
+
|
| 59 |
+
# Training the Model
|
| 60 |
+
model = LogisticRegression(max_iter=1000000)
|
| 61 |
+
model.fit(x_train, y_train)
|
| 62 |
+
|
| 63 |
+
# Accuracy
|
| 64 |
+
x_train_prediction = model.predict(x_train)
|
| 65 |
+
training_data_accuracy = accuracy_score(x_train_prediction, y_train)
|
| 66 |
+
print('Accuracy on the Training data : ', training_data_accuracy * 100)
|
| 67 |
+
|
| 68 |
+
x_test_prediction = model.predict(x_test)
|
| 69 |
+
test_data_accuracy = accuracy_score(x_test_prediction, y_test)
|
| 70 |
+
print('Accuracy on the Test data : ', test_data_accuracy * 100)
|
| 71 |
+
|
| 72 |
+
# Manually start and end the MLflow run
|
| 73 |
+
run = mlflow.start_run()
|
| 74 |
+
try:
|
| 75 |
+
mlflow.sklearn.log_model(model, "model")
|
| 76 |
+
mlflow.log_metric("training_data_accuracy", training_data_accuracy)
|
| 77 |
+
mlflow.log_metric("test_data_accuracy", test_data_accuracy)
|
| 78 |
+
run_id = run.info.run_id
|
| 79 |
+
model_uri = f"runs:/{run_id}/model"
|
| 80 |
+
finally:
|
| 81 |
+
mlflow.end_run()
|
| 82 |
+
|
| 83 |
+
print(f"Model logged to MLflow with run_id: {run_id}")
|
| 84 |
+
|
| 85 |
+
# Serve the model
|
| 86 |
+
loaded_model = mlflow.pyfunc.load_model(model_uri)
|
| 87 |
+
|
| 88 |
+
# Example of making predictions using the loaded model
|
| 89 |
+
sample_input = x_test.iloc[:5].values.tolist()
|
| 90 |
+
predictions = loaded_model.predict(sample_input)
|
| 91 |
+
print("Predictions:", predictions)
|
| 92 |
+
|
| 93 |
+
# Alternatively, you can use Python's built-in HTTP server to serve the model
|
| 94 |
+
from flask import Flask, request, jsonify
|
| 95 |
+
|
| 96 |
+
app = Flask(__name__)
|
| 97 |
+
|
| 98 |
+
@app.route('/')
|
| 99 |
+
def index():
|
| 100 |
+
return "Welcome to the model serving endpoint!"
|
| 101 |
+
|
| 102 |
+
# Define prediction route
|
| 103 |
+
@app.route('/predict', methods=['POST'])
|
| 104 |
+
def predict():
|
| 105 |
+
data = request.json['data']
|
| 106 |
+
predictions = loaded_model.predict(data)
|
| 107 |
+
return jsonify(predictions.tolist())
|
| 108 |
+
|
| 109 |
+
# Run the Flask app
|
| 110 |
+
if __name__ == '__main__':
|
| 111 |
+
app.run(host='0.0.0.0', port=5000)
|
| 112 |
+
|
| 113 |
+
# activate a vertual environment ---C:\Fraud_Detection\Scripts\activate
|
| 114 |
+
# to display the mlflow dashboard ---mlflow ui --backend-store-uri file:///C:/Fraud_Detection/mlruns
|
| 115 |
+
# cancel ---ctrl c
|
model.pkl
ADDED
|
Binary file (1.21 kB). View file
|
|
|
pyvenv.cfg
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
home = C:\Users\masukumec\AppData\Local\Programs\Python\Python312
|
| 2 |
+
include-system-site-packages = false
|
| 3 |
+
version = 3.12.4
|
| 4 |
+
executable = C:\Users\masukumec\AppData\Local\Programs\Python\Python312\python.exe
|
| 5 |
+
command = C:\Users\masukumec\AppData\Local\Programs\Python\Python312\python.exe -m venv C:\Fraud_Detection
|
register.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_path = r"C:\Fraud_Detection\model.pkl"
|
| 2 |
+
|
| 3 |
+
from azureml.core import Workspace, Model
|
| 4 |
+
|
| 5 |
+
# Connect to the Azure ML workspace
|
| 6 |
+
ws = Workspace.from_config()
|
| 7 |
+
|
| 8 |
+
# Register the model
|
| 9 |
+
model = Model.register(
|
| 10 |
+
model_path=model_path, # Use raw string literal for the file path
|
| 11 |
+
model_name="Fraud Detection", # Your model name
|
| 12 |
+
workspace=ws
|
| 13 |
+
)
|
requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
flask==1.1.2
|
| 2 |
+
scikit-learn
|
| 3 |
+
scipy
|
| 4 |
+
numpy
|
| 5 |
+
pandas
|
| 6 |
+
matplotlib
|
| 7 |
+
seaborn
|
| 8 |
+
schedule
|
| 9 |
+
jupyter
|
| 10 |
+
mlflow
|
| 11 |
+
requests
|
| 12 |
+
jinja2==3.0.3
|
| 13 |
+
streamlit
|
| 14 |
+
|
run.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# run.py
|
| 2 |
+
|
| 3 |
+
from app import app
|
| 4 |
+
|
| 5 |
+
if __name__ == '__main__':
|
| 6 |
+
app.run(debug=True)
|
scheduler.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import schedule
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
def job():
|
| 6 |
+
os.system('c:/Fraud_Detection/Scripts/python.exe c:/Fraud_Detection/train_log.py --data "c:/Fraud_Detection/creditcard.csv"')
|
| 7 |
+
|
| 8 |
+
# Schedule the job every 30 days
|
| 9 |
+
schedule.every(30).days.do(job)
|
| 10 |
+
|
| 11 |
+
while True:
|
| 12 |
+
schedule.run_pending()
|
| 13 |
+
time.sleep(1)
|
| 14 |
+
|
score.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from azureml.core import Workspace, Model, Environment
|
| 2 |
+
from azureml.core.model import InferenceConfig
|
| 3 |
+
from azureml.core.webservice import AciWebservice, Webservice
|
| 4 |
+
|
| 5 |
+
# Initialize workspace
|
| 6 |
+
ws = Workspace.from_config()
|
| 7 |
+
|
| 8 |
+
# Load the model
|
| 9 |
+
model = Model(ws, name="model.pkl") # Replace 'your_model_name' with your model's name
|
| 10 |
+
|
| 11 |
+
# Define the environment (if not using the YAML method)
|
| 12 |
+
env = Environment(name="fraud_detection_env")
|
| 13 |
+
deps = CondaDependencies.create(pip_packages=["azureml-core", "scikit-learn", "joblib", "numpy"])
|
| 14 |
+
env.python.conda_dependencies = deps
|
| 15 |
+
|
| 16 |
+
# Define inference configuration
|
| 17 |
+
inference_config = InferenceConfig(entry_script="score.py", environment=env)
|
| 18 |
+
|
| 19 |
+
# Define deployment configuration
|
| 20 |
+
aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)
|
| 21 |
+
|
| 22 |
+
# Deploy the model
|
| 23 |
+
service = Model.deploy(workspace=ws,
|
| 24 |
+
name="fraud-detection-service",
|
| 25 |
+
models=[model],
|
| 26 |
+
inference_config=inference_config,
|
| 27 |
+
deployment_config=aci_config)
|
| 28 |
+
|
| 29 |
+
service.wait_for_deployment(show_output=True)
|
| 30 |
+
|
| 31 |
+
print(f"Service deployed at: {service.scoring_uri}")
|
simulate_data.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
def simulate_monthly_data(base_data_path, output_dir, months=12):
|
| 6 |
+
# Create the output directory if it doesn't exist
|
| 7 |
+
if not os.path.exists(output_dir):
|
| 8 |
+
os.makedirs(output_dir)
|
| 9 |
+
|
| 10 |
+
base_data = pd.read_csv(base_data_path)
|
| 11 |
+
for month in range(1, months + 1):
|
| 12 |
+
new_data = base_data.copy()
|
| 13 |
+
drift_factor = np.random.normal(0, 0.1, new_data.shape)
|
| 14 |
+
new_data += drift_factor
|
| 15 |
+
new_data_path = os.path.join(output_dir, f'creditcard_month_{month}.csv')
|
| 16 |
+
new_data.to_csv(new_data_path, index=False)
|
| 17 |
+
print(f"Simulated data for month {month} saved to {new_data_path}")
|
| 18 |
+
|
| 19 |
+
if __name__ == "__main__":
|
| 20 |
+
simulate_monthly_data('C:/Fraud_Detection/creditcard.csv', 'C:/Fraud_Detection/simulated_data')
|
| 21 |
+
|
| 22 |
+
#'C:/Fraud_Detection/creditcard.csv',
|
slapp.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pickle
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
# Load the saved model
|
| 6 |
+
try:
|
| 7 |
+
model = pickle.load(open('model.pkl', 'rb'))
|
| 8 |
+
except Exception as e:
|
| 9 |
+
st.error(f"Error loading model: {e}")
|
| 10 |
+
model = None
|
| 11 |
+
|
| 12 |
+
# Streamlit app
|
| 13 |
+
st.title("Fraud Detection API")
|
| 14 |
+
st.write("Enter the transaction details to check if it's acceptable or fraudulent.")
|
| 15 |
+
|
| 16 |
+
# Create input fields for the features
|
| 17 |
+
time = st.number_input('Time')
|
| 18 |
+
v1 = st.number_input('V1')
|
| 19 |
+
v2 = st.number_input('V2')
|
| 20 |
+
v3 = st.number_input('V3')
|
| 21 |
+
v4 = st.number_input('V4')
|
| 22 |
+
v5 = st.number_input('V5')
|
| 23 |
+
v6 = st.number_input('V6')
|
| 24 |
+
v7 = st.number_input('V7')
|
| 25 |
+
v8 = st.number_input('V8')
|
| 26 |
+
v9 = st.number_input('V9')
|
| 27 |
+
v10 = st.number_input('V10')
|
| 28 |
+
v11 = st.number_input('V11')
|
| 29 |
+
v12 = st.number_input('V12')
|
| 30 |
+
v13 = st.number_input('V13')
|
| 31 |
+
v14 = st.number_input('V14')
|
| 32 |
+
v15 = st.number_input('V15')
|
| 33 |
+
v16 = st.number_input('V16')
|
| 34 |
+
v17 = st.number_input('V17')
|
| 35 |
+
v18 = st.number_input('V18')
|
| 36 |
+
v19 = st.number_input('V19')
|
| 37 |
+
v20 = st.number_input('V20')
|
| 38 |
+
v21 = st.number_input('V21')
|
| 39 |
+
v22 = st.number_input('V22')
|
| 40 |
+
v23 = st.number_input('V23')
|
| 41 |
+
v24 = st.number_input('V24')
|
| 42 |
+
v25 = st.number_input('V25')
|
| 43 |
+
v26 = st.number_input('V26')
|
| 44 |
+
v27 = st.number_input('V27')
|
| 45 |
+
v28 = st.number_input('V28')
|
| 46 |
+
amount = st.number_input('Amount')
|
| 47 |
+
|
| 48 |
+
# Prepare a button for prediction
|
| 49 |
+
if st.button('Predict'):
|
| 50 |
+
try:
|
| 51 |
+
# Create a DataFrame from the input data
|
| 52 |
+
transaction_data = pd.DataFrame({
|
| 53 |
+
'Time': [time],
|
| 54 |
+
'V1': [v1],
|
| 55 |
+
'V2': [v2],
|
| 56 |
+
'V3': [v3],
|
| 57 |
+
'V4': [v4],
|
| 58 |
+
'V5': [v5],
|
| 59 |
+
'V6': [v6],
|
| 60 |
+
'V7': [v7],
|
| 61 |
+
'V8': [v8],
|
| 62 |
+
'V9': [v9],
|
| 63 |
+
'V10': [v10],
|
| 64 |
+
'V11': [v11],
|
| 65 |
+
'V12': [v12],
|
| 66 |
+
'V13': [v13],
|
| 67 |
+
'V14': [v14],
|
| 68 |
+
'V15': [v15],
|
| 69 |
+
'V16': [v16],
|
| 70 |
+
'V17': [v17],
|
| 71 |
+
'V18': [v18],
|
| 72 |
+
'V19': [v19],
|
| 73 |
+
'V20': [v20],
|
| 74 |
+
'V21': [v21],
|
| 75 |
+
'V22': [v22],
|
| 76 |
+
'V23': [v23],
|
| 77 |
+
'V24': [v24],
|
| 78 |
+
'V25': [v25],
|
| 79 |
+
'V26': [v26],
|
| 80 |
+
'V27': [v27],
|
| 81 |
+
'V28': [v28],
|
| 82 |
+
'Amount': [amount]
|
| 83 |
+
})
|
| 84 |
+
|
| 85 |
+
# Perform prediction using the loaded model
|
| 86 |
+
prediction = model.predict(transaction_data)
|
| 87 |
+
|
| 88 |
+
# Prepare response
|
| 89 |
+
if prediction[0] == 0:
|
| 90 |
+
st.success('Prediction: Acceptable transaction')
|
| 91 |
+
else:
|
| 92 |
+
st.error('Prediction: Fraudulent transaction')
|
| 93 |
+
|
| 94 |
+
except Exception as e:
|
| 95 |
+
st.error(f'Error: {str(e)}')
|
| 96 |
+
|
train_and_log.bat
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@echo off
|
| 2 |
+
REM Start MLflow server in a new window
|
| 3 |
+
start "MLflow Server" cmd /c "mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./mlruns --host 0.0.0.0 --port 5000"
|
| 4 |
+
|
| 5 |
+
REM Wait for a few seconds to ensure the server is up
|
| 6 |
+
timeout /t 10 /nobreak
|
| 7 |
+
|
| 8 |
+
REM Run the training script
|
| 9 |
+
c:/Fraud_Detection/Scripts/python.exe c:/Fraud_Detection/train_log.py --data "c:/Fraud_Detection/creditcard.csv"
|
| 10 |
+
|
| 11 |
+
REM Close the MLflow server
|
| 12 |
+
taskkill /FI "WINDOWTITLE eq MLflow Server*"
|
| 13 |
+
|
| 14 |
+
echo Training process completed.
|
| 15 |
+
pause
|
train_log.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
import mlflow
|
| 4 |
+
import mlflow.sklearn
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from sklearn.model_selection import train_test_split
|
| 7 |
+
from sklearn.linear_model import LogisticRegression
|
| 8 |
+
from sklearn.metrics import accuracy_score
|
| 9 |
+
from sklearn.preprocessing import StandardScaler
|
| 10 |
+
import schedule
|
| 11 |
+
import time
|
| 12 |
+
|
| 13 |
+
def main(data_path):
|
| 14 |
+
print(f"Reading data from: {data_path}")
|
| 15 |
+
# Set the tracking URI to your MLflow server
|
| 16 |
+
mlflow.set_tracking_uri("http://127.0.0.1:5000") # Replace with your MLflow tracking server URI
|
| 17 |
+
|
| 18 |
+
# Load data
|
| 19 |
+
try:
|
| 20 |
+
data = pd.read_csv(data_path)
|
| 21 |
+
except Exception as e:
|
| 22 |
+
print(f"Error reading the data file: {e}")
|
| 23 |
+
return
|
| 24 |
+
|
| 25 |
+
# Preprocess and split data
|
| 26 |
+
X = data.drop(columns='Class')
|
| 27 |
+
y = data['Class']
|
| 28 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 29 |
+
|
| 30 |
+
# Scale data
|
| 31 |
+
scaler = StandardScaler()
|
| 32 |
+
X_train = scaler.fit_transform(X_train)
|
| 33 |
+
X_test = scaler.transform(X_test)
|
| 34 |
+
|
| 35 |
+
# Train model
|
| 36 |
+
model = LogisticRegression(max_iter=1000)
|
| 37 |
+
model.fit(X_train, y_train)
|
| 38 |
+
|
| 39 |
+
# Evaluate model
|
| 40 |
+
train_accuracy = accuracy_score(y_train, model.predict(X_train))
|
| 41 |
+
test_accuracy = accuracy_score(y_test, model.predict(X_test))
|
| 42 |
+
|
| 43 |
+
# Log model with MLflow
|
| 44 |
+
with mlflow.start_run() as run:
|
| 45 |
+
mlflow.log_param('random_state', 42)
|
| 46 |
+
mlflow.log_metric('train_accuracy', train_accuracy)
|
| 47 |
+
mlflow.log_metric('test_accuracy', test_accuracy)
|
| 48 |
+
mlflow.sklearn.log_model(model, 'model')
|
| 49 |
+
|
| 50 |
+
# Register the model
|
| 51 |
+
mlflow.register_model(
|
| 52 |
+
model_uri=f"runs:/{run.info.run_id}/model",
|
| 53 |
+
name="LogisticRegressionModel"
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
print(f"Train Accuracy: {train_accuracy}")
|
| 57 |
+
print(f"Test Accuracy: {test_accuracy}")
|
| 58 |
+
|
| 59 |
+
def job():
|
| 60 |
+
parser = argparse.ArgumentParser()
|
| 61 |
+
parser.add_argument('--data', type=str, help="Path to the training data", required=True)
|
| 62 |
+
args = parser.parse_args()
|
| 63 |
+
|
| 64 |
+
# Print args.data for debugging
|
| 65 |
+
print(f"Data path provided: {args.data}")
|
| 66 |
+
|
| 67 |
+
# Check if the file exists
|
| 68 |
+
if not os.path.isfile(args.data):
|
| 69 |
+
print(f"Error: The file {args.data} does not exist.")
|
| 70 |
+
else:
|
| 71 |
+
main(args.data)
|
| 72 |
+
|
| 73 |
+
if __name__ == "__main__":
|
| 74 |
+
# Schedule the job to run every 30 days
|
| 75 |
+
schedule.every(30).days.do(job)
|
| 76 |
+
|
| 77 |
+
# Run the scheduling loop
|
| 78 |
+
while True:
|
| 79 |
+
schedule.run_pending()
|
| 80 |
+
time.sleep(1)
|
| 81 |
+
|
| 82 |
+
# run:
|
| 83 |
+
# mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./mlruns --host 0.0.0.0 --port 5000
|
| 84 |
+
# then run:
|
| 85 |
+
# c:/Fraud_Detection/Scripts/python.exe c:/Fraud_Detection/train_log.py --data "c:/Fraud_Detection/creditcard.csv"
|