cmasukume commited on
Commit
045d34f
·
verified ·
1 Parent(s): f93d476

Upload 21 files

Browse files
.gitattributes CHANGED
@@ -1,35 +1 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.csv filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile.yml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python image from the Docker Hub
2
+ FROM python:3.9-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Use an official Python runtime as the base image
8
+ FROM python:3.8-slim
9
+
10
+ # Set the working directory in the container
11
+ WORKDIR /app
12
+
13
+ # Copy the requirements file into the container
14
+ COPY requirements.txt .
15
+
16
+ # Install the dependencies
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Copy the rest of the application code into the container
20
+ COPY . .
21
+
22
+ # Expose the port the app runs on
23
+ EXPOSE 5000
24
+
25
+ # Define the command to run the application
26
+ CMD ["python", "app.py"]
Example of Fraudulent & Acceptable Transaction.txt ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Example of Fraudulent Transaction Request
2
+ {
3
+ "Time": 5000,
4
+ "V1": -2.3122265423263,
5
+ "V2": 1.95199201064142,
6
+ "V3": -1.60985073229792,
7
+ "V4": 3.9979055875468,
8
+ "V5": -0.522187864667764,
9
+ "V6": -1.42654531915544,
10
+ "V7": -2.53738730624579,
11
+ "V8": 1.39165724822242,
12
+ "V9": -2.77008927779529,
13
+ "V10": -2.77227214467803,
14
+ "V11": 3.20203320709994,
15
+ "V12": -2.89990738824397,
16
+ "V13": -0.595221881324185,
17
+ "V14": -4.28925378244238,
18
+ "V15": -1.72471928638227,
19
+ "V16": -1.92786884505502,
20
+ "V17": -1.32909757135474,
21
+ "V18": 0.393608591647132,
22
+ "V19": 0.233809776745693,
23
+ "V20": -0.108300452035545,
24
+ "V21": 0.00527359678253453,
25
+ "V22": -0.190320518742841,
26
+ "V23": -1.17557533186321,
27
+ "V24": 0.647376034602038,
28
+ "V25": -0.221928844458407,
29
+ "V26": 0.0627228487293033,
30
+ "V27": 0.0614576285006353,
31
+ "V28": -0.142382874811225,
32
+ "Amount": 0.0
33
+ }
34
+
35
+
36
+ Example Prediction Request
37
+
38
+ {
39
+ "Time": 1000,
40
+ "V1": -1.3598071336738,
41
+ "V2": -0.0727811733098497,
42
+ "V3": 2.53634673796914,
43
+ "V4": 1.37815522427443,
44
+ "V5": -0.338320769942518,
45
+ "V6": 0.462387777762292,
46
+ "V7": 0.239598554061257,
47
+ "V8": 0.0986979012610507,
48
+ "V9": 0.363786969611213,
49
+ "V10": 0.0907941719789316,
50
+ "V11": -0.551599533260813,
51
+ "V12": -0.617800855762348,
52
+ "V13": -0.991389847235408,
53
+ "V14": -0.311169353699879,
54
+ "V15": 1.46817697209427,
55
+ "V16": -0.470400525259478,
56
+ "V17": 0.207971241929242,
57
+ "V18": 0.0257905801985591,
58
+ "V19": 0.403992960255733,
59
+ "V20": 0.251412098239705,
60
+ "V21": -0.018306777944153,
61
+ "V22": 0.277837575558899,
62
+ "V23": -0.110473910188767,
63
+ "V24": 0.0669280749146731,
64
+ "V25": 0.128539358273528,
65
+ "V26": -0.189114843888824,
66
+ "V27": 0.133558376740387,
67
+ "V28": -0.0210530534538215,
68
+ "Amount": 149.62
69
+ }
Fraud_Detection.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Procfile ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ web: python app.py
2
+
README.md CHANGED
@@ -1,13 +1 @@
1
- ---
2
- title: Fraud Detection
3
- emoji: 🏆
4
- colorFrom: gray
5
- colorTo: indigo
6
- sdk: streamlit
7
- sdk_version: 1.39.0
8
- app_file: app.py
9
- pinned: false
10
- short_description: fraud detection model and app
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # Fraud-Detection
 
 
 
 
 
 
 
 
 
 
 
 
config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "subscription_id": "5ac6f187-b9ed-4088-8b2f-091417aa1f41",
3
+ "resource_group": "resource_fraud_detection",
4
+ "workspace_name": "Fraud_Detection"
5
+ }
fraud_detection.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Fraud Detection.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1JTnx_TGE4NuRxerkz1nbP9jrCP59prdN
8
+
9
+ Dependencies Import
10
+ """
11
+
12
+ # this code will import all the libraries that we need for this model.
13
+ import numpy as np
14
+ import pandas as pd
15
+ from matplotlib import pyplot as plt
16
+ from sklearn.model_selection import train_test_split
17
+ from sklearn.linear_model import LogisticRegression
18
+ from sklearn.metrics import accuracy_score
19
+
20
+ # read the dataset that is holding the information for the credit card
21
+ # transactions.
22
+ credit_card_data = pd.read_csv('/content/creditcard.csv')
23
+
24
+ # this will display the first 5 rows of the dataset.
25
+ credit_card_data.head()
26
+
27
+ # this will display the last 5 rows of the dataset.
28
+ credit_card_data.tail()
29
+
30
+ # information about the dataset showcasing the datatypes used and whether the
31
+ #dataset contains null values or not.
32
+ credit_card_data.info()
33
+
34
+ # check for the number of missing values on each column.
35
+ credit_card_data.isnull().sum()
36
+
37
+ # check the distribution between acceptable transaction and fraudulent transaction.
38
+ credit_card_data['Class'].value_counts()
39
+
40
+ """Looking at the above values it shows that the dataset is very unstable and inbalanced.
41
+
42
+ 0---> would represent acceptable transaction
43
+
44
+ 1---> would the represent fradulent transaction.
45
+ """
46
+
47
+ # data seperation for analysis.
48
+ acceptable = credit_card_data[credit_card_data.Class == 0]
49
+ fraudulent = credit_card_data[credit_card_data.Class == 1]
50
+
51
+ print(acceptable.shape)
52
+ print(fraudulent.shape)
53
+
54
+ # statistical measures.
55
+ acceptable.Amount.describe()
56
+
57
+ fraudulent.Amount.describe()
58
+
59
+ # value comparison of both transactions.
60
+ credit_card_data.groupby('Class').mean()
61
+
62
+ """Data Sampling"""
63
+
64
+ # before sampling the dataset...
65
+ # calculate the percentage of both the acceptable and fraudulent transactions of
66
+ # the dataset.
67
+ classes=credit_card_data['Class'].value_counts()
68
+ acceptable_percent=classes[0]/credit_card_data['Class'].count()*100
69
+ fraudulent_percent=classes[1]/credit_card_data['Class'].count()*100
70
+ print(acceptable_percent)
71
+ print(fraudulent_percent)
72
+
73
+ labels = ['Acceptable','Fraudulent']
74
+ count = credit_card_data.value_counts(credit_card_data['Class'])
75
+ count.plot(kind = "bar",rot=0)
76
+ plt.title("Labels")
77
+ plt.ylabel("Count")
78
+ plt.xticks(range(2), labels)
79
+ plt.show()
80
+
81
+ """create a sample dataset that contains a normal distribution of both transactions."""
82
+
83
+ acceptable_sample=acceptable.sample(n=492)
84
+
85
+ """Concatinate the sample dataset into the already existing fraudulent dataframe"""
86
+
87
+ # axis = 0 (rows)
88
+ # axis = 1 (columns)
89
+ new_dataset=pd.concat([acceptable_sample,fraudulent],axis=0)
90
+
91
+ # view the first 5 rows of the new dataset picked at random.
92
+ new_dataset.head()
93
+
94
+ new_dataset.tail()
95
+
96
+ new_dataset['Class'].value_counts()
97
+
98
+ new_dataset.groupby('Class').mean()
99
+
100
+ """Splitting dataset into training and testind sets. (80%, 10%)"""
101
+
102
+ # x represent the features(content of the dataset) and y represent the class.
103
+ x = new_dataset.drop(columns = 'Class',axis = 1)
104
+ y = new_dataset['Class']
105
+
106
+ # this prints the data except the class column.
107
+ print(x)
108
+
109
+ # prints the classes.
110
+ print(y)
111
+
112
+ """Splitting dataset into training and testind sets. (80%, 20%)"""
113
+
114
+ x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,stratify=y,random_state=2)
115
+
116
+ # original data, the training data, test data split.
117
+ print(x.shape,x_train.shape,x_test.shape)
118
+
119
+ """Training the Model.
120
+
121
+ Logistics Regression Model
122
+ """
123
+
124
+ model = LogisticRegression()
125
+
126
+ # train the logistics regression model using the training data.
127
+ model.fit(x_train, y_train)
128
+
129
+ # check the accuracy of the training data.
130
+ x_train_prediction = model.predict(x_train)
131
+ training_data_accuracy = accuracy_score(x_train_prediction, y_train)
132
+
133
+ # if the accuracy is above 80% then it means the model is good
134
+ print('Accuracy on the Training data : ', training_data_accuracy * 100)
135
+
136
+ # accuracy on the test data
137
+ x_test_prediction = model.predict(x_test)
138
+ test_data_accuracy = accuracy_score(x_test_prediction, y_test)
139
+
140
+ print('Accuracy on the Test data : ', test_data_accuracy * 100)
141
+
142
+ import pickle
143
+ pickle.dump(model,open('model.pkl','wb'))
mlflow.db ADDED
Binary file (225 kB). View file
 
mlflowtest.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from matplotlib import pyplot as plt
4
+ from sklearn.model_selection import train_test_split
5
+ from sklearn.linear_model import LogisticRegression
6
+ from sklearn.metrics import accuracy_score
7
+ import mlflow
8
+ import requests
9
+ import json
10
+ import os
11
+ import mlflow.pyfunc
12
+
13
+ # Create or set the desired experiment
14
+ experiment = mlflow.set_experiment("fraud_detection")
15
+ print("Experiment ID:", experiment.experiment_id)
16
+ print("Experiment Name:", experiment.name)
17
+
18
+ # Set MLFLOW_TRACKING_URI to the desired directory
19
+ mlflow_tracking_uri = "file:///C:/Fraud_Detection/mlruns"
20
+ os.environ['MLFLOW_TRACKING_URI'] = mlflow_tracking_uri.replace('\\', '/')
21
+
22
+ # Set the artifact location to a shorter path
23
+ mlflow.set_tracking_uri(mlflow_tracking_uri)
24
+
25
+ # Read the dataset
26
+ credit_card_data = pd.read_csv('C:\Fraud_Detection\creditcard.csv')
27
+
28
+ # Display basic information
29
+ print(credit_card_data.head())
30
+ print(credit_card_data.tail())
31
+ credit_card_data.info()
32
+ print(credit_card_data.isnull().sum())
33
+ print(credit_card_data['Class'].value_counts())
34
+
35
+ # Data separation
36
+ acceptable = credit_card_data[credit_card_data.Class == 0]
37
+ fraudulent = credit_card_data[credit_card_data.Class == 1]
38
+
39
+ print(acceptable.shape)
40
+ print(fraudulent.shape)
41
+
42
+ # Statistical measures
43
+ print(acceptable.Amount.describe())
44
+ print(fraudulent.Amount.describe())
45
+
46
+ print(credit_card_data.groupby('Class').mean())
47
+
48
+ # Data Sampling
49
+ acceptable_sample = acceptable.sample(n=492)
50
+ new_dataset = pd.concat([acceptable_sample, fraudulent], axis=0)
51
+
52
+ # Splitting dataset
53
+ x = new_dataset.drop(columns='Class', axis=1)
54
+ y = new_dataset['Class']
55
+
56
+ x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=2)
57
+ print(x.shape, x_train.shape, x_test.shape)
58
+
59
+ # Training the Model
60
+ model = LogisticRegression(max_iter=1000000)
61
+ model.fit(x_train, y_train)
62
+
63
+ # Accuracy
64
+ x_train_prediction = model.predict(x_train)
65
+ training_data_accuracy = accuracy_score(x_train_prediction, y_train)
66
+ print('Accuracy on the Training data : ', training_data_accuracy * 100)
67
+
68
+ x_test_prediction = model.predict(x_test)
69
+ test_data_accuracy = accuracy_score(x_test_prediction, y_test)
70
+ print('Accuracy on the Test data : ', test_data_accuracy * 100)
71
+
72
+ # Manually start and end the MLflow run
73
+ run = mlflow.start_run()
74
+ try:
75
+ mlflow.sklearn.log_model(model, "model")
76
+ mlflow.log_metric("training_data_accuracy", training_data_accuracy)
77
+ mlflow.log_metric("test_data_accuracy", test_data_accuracy)
78
+ run_id = run.info.run_id
79
+ model_uri = f"runs:/{run_id}/model"
80
+ finally:
81
+ mlflow.end_run()
82
+
83
+ print(f"Model logged to MLflow with run_id: {run_id}")
84
+
85
+ # Serve the model
86
+ loaded_model = mlflow.pyfunc.load_model(model_uri)
87
+
88
+ # Example of making predictions using the loaded model
89
+ sample_input = x_test.iloc[:5].values.tolist()
90
+ predictions = loaded_model.predict(sample_input)
91
+ print("Predictions:", predictions)
92
+
93
+ # Alternatively, you can use Python's built-in HTTP server to serve the model
94
+ from flask import Flask, request, jsonify
95
+
96
+ app = Flask(__name__)
97
+
98
+ @app.route('/')
99
+ def index():
100
+ return "Welcome to the model serving endpoint!"
101
+
102
+ # Define prediction route
103
+ @app.route('/predict', methods=['POST'])
104
+ def predict():
105
+ data = request.json['data']
106
+ predictions = loaded_model.predict(data)
107
+ return jsonify(predictions.tolist())
108
+
109
+ # Run the Flask app
110
+ if __name__ == '__main__':
111
+ app.run(host='0.0.0.0', port=5000)
112
+
113
+ # activate a vertual environment ---C:\Fraud_Detection\Scripts\activate
114
+ # to display the mlflow dashboard ---mlflow ui --backend-store-uri file:///C:/Fraud_Detection/mlruns
115
+ # cancel ---ctrl c
model.pkl ADDED
Binary file (1.21 kB). View file
 
pyvenv.cfg ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ home = C:\Users\masukumec\AppData\Local\Programs\Python\Python312
2
+ include-system-site-packages = false
3
+ version = 3.12.4
4
+ executable = C:\Users\masukumec\AppData\Local\Programs\Python\Python312\python.exe
5
+ command = C:\Users\masukumec\AppData\Local\Programs\Python\Python312\python.exe -m venv C:\Fraud_Detection
register.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_path = r"C:\Fraud_Detection\model.pkl"
2
+
3
+ from azureml.core import Workspace, Model
4
+
5
+ # Connect to the Azure ML workspace
6
+ ws = Workspace.from_config()
7
+
8
+ # Register the model
9
+ model = Model.register(
10
+ model_path=model_path, # Use raw string literal for the file path
11
+ model_name="Fraud Detection", # Your model name
12
+ workspace=ws
13
+ )
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flask==1.1.2
2
+ scikit-learn
3
+ scipy
4
+ numpy
5
+ pandas
6
+ matplotlib
7
+ seaborn
8
+ schedule
9
+ jupyter
10
+ mlflow
11
+ requests
12
+ jinja2==3.0.3
13
+ streamlit
14
+
run.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # run.py
2
+
3
+ from app import app
4
+
5
+ if __name__ == '__main__':
6
+ app.run(debug=True)
scheduler.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import schedule
3
+ import time
4
+
5
+ def job():
6
+ os.system('c:/Fraud_Detection/Scripts/python.exe c:/Fraud_Detection/train_log.py --data "c:/Fraud_Detection/creditcard.csv"')
7
+
8
+ # Schedule the job every 30 days
9
+ schedule.every(30).days.do(job)
10
+
11
+ while True:
12
+ schedule.run_pending()
13
+ time.sleep(1)
14
+
score.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from azureml.core import Workspace, Model, Environment
2
+ from azureml.core.model import InferenceConfig
3
+ from azureml.core.webservice import AciWebservice, Webservice
4
+
5
+ # Initialize workspace
6
+ ws = Workspace.from_config()
7
+
8
+ # Load the model
9
+ model = Model(ws, name="model.pkl") # Replace 'your_model_name' with your model's name
10
+
11
+ # Define the environment (if not using the YAML method)
12
+ env = Environment(name="fraud_detection_env")
13
+ deps = CondaDependencies.create(pip_packages=["azureml-core", "scikit-learn", "joblib", "numpy"])
14
+ env.python.conda_dependencies = deps
15
+
16
+ # Define inference configuration
17
+ inference_config = InferenceConfig(entry_script="score.py", environment=env)
18
+
19
+ # Define deployment configuration
20
+ aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)
21
+
22
+ # Deploy the model
23
+ service = Model.deploy(workspace=ws,
24
+ name="fraud-detection-service",
25
+ models=[model],
26
+ inference_config=inference_config,
27
+ deployment_config=aci_config)
28
+
29
+ service.wait_for_deployment(show_output=True)
30
+
31
+ print(f"Service deployed at: {service.scoring_uri}")
simulate_data.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import os
4
+
5
+ def simulate_monthly_data(base_data_path, output_dir, months=12):
6
+ # Create the output directory if it doesn't exist
7
+ if not os.path.exists(output_dir):
8
+ os.makedirs(output_dir)
9
+
10
+ base_data = pd.read_csv(base_data_path)
11
+ for month in range(1, months + 1):
12
+ new_data = base_data.copy()
13
+ drift_factor = np.random.normal(0, 0.1, new_data.shape)
14
+ new_data += drift_factor
15
+ new_data_path = os.path.join(output_dir, f'creditcard_month_{month}.csv')
16
+ new_data.to_csv(new_data_path, index=False)
17
+ print(f"Simulated data for month {month} saved to {new_data_path}")
18
+
19
+ if __name__ == "__main__":
20
+ simulate_monthly_data('C:/Fraud_Detection/creditcard.csv', 'C:/Fraud_Detection/simulated_data')
21
+
22
+ #'C:/Fraud_Detection/creditcard.csv',
slapp.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import pandas as pd
4
+
5
+ # Load the saved model
6
+ try:
7
+ model = pickle.load(open('model.pkl', 'rb'))
8
+ except Exception as e:
9
+ st.error(f"Error loading model: {e}")
10
+ model = None
11
+
12
+ # Streamlit app
13
+ st.title("Fraud Detection API")
14
+ st.write("Enter the transaction details to check if it's acceptable or fraudulent.")
15
+
16
+ # Create input fields for the features
17
+ time = st.number_input('Time')
18
+ v1 = st.number_input('V1')
19
+ v2 = st.number_input('V2')
20
+ v3 = st.number_input('V3')
21
+ v4 = st.number_input('V4')
22
+ v5 = st.number_input('V5')
23
+ v6 = st.number_input('V6')
24
+ v7 = st.number_input('V7')
25
+ v8 = st.number_input('V8')
26
+ v9 = st.number_input('V9')
27
+ v10 = st.number_input('V10')
28
+ v11 = st.number_input('V11')
29
+ v12 = st.number_input('V12')
30
+ v13 = st.number_input('V13')
31
+ v14 = st.number_input('V14')
32
+ v15 = st.number_input('V15')
33
+ v16 = st.number_input('V16')
34
+ v17 = st.number_input('V17')
35
+ v18 = st.number_input('V18')
36
+ v19 = st.number_input('V19')
37
+ v20 = st.number_input('V20')
38
+ v21 = st.number_input('V21')
39
+ v22 = st.number_input('V22')
40
+ v23 = st.number_input('V23')
41
+ v24 = st.number_input('V24')
42
+ v25 = st.number_input('V25')
43
+ v26 = st.number_input('V26')
44
+ v27 = st.number_input('V27')
45
+ v28 = st.number_input('V28')
46
+ amount = st.number_input('Amount')
47
+
48
+ # Prepare a button for prediction
49
+ if st.button('Predict'):
50
+ try:
51
+ # Create a DataFrame from the input data
52
+ transaction_data = pd.DataFrame({
53
+ 'Time': [time],
54
+ 'V1': [v1],
55
+ 'V2': [v2],
56
+ 'V3': [v3],
57
+ 'V4': [v4],
58
+ 'V5': [v5],
59
+ 'V6': [v6],
60
+ 'V7': [v7],
61
+ 'V8': [v8],
62
+ 'V9': [v9],
63
+ 'V10': [v10],
64
+ 'V11': [v11],
65
+ 'V12': [v12],
66
+ 'V13': [v13],
67
+ 'V14': [v14],
68
+ 'V15': [v15],
69
+ 'V16': [v16],
70
+ 'V17': [v17],
71
+ 'V18': [v18],
72
+ 'V19': [v19],
73
+ 'V20': [v20],
74
+ 'V21': [v21],
75
+ 'V22': [v22],
76
+ 'V23': [v23],
77
+ 'V24': [v24],
78
+ 'V25': [v25],
79
+ 'V26': [v26],
80
+ 'V27': [v27],
81
+ 'V28': [v28],
82
+ 'Amount': [amount]
83
+ })
84
+
85
+ # Perform prediction using the loaded model
86
+ prediction = model.predict(transaction_data)
87
+
88
+ # Prepare response
89
+ if prediction[0] == 0:
90
+ st.success('Prediction: Acceptable transaction')
91
+ else:
92
+ st.error('Prediction: Fraudulent transaction')
93
+
94
+ except Exception as e:
95
+ st.error(f'Error: {str(e)}')
96
+
train_and_log.bat ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+ REM Start MLflow server in a new window
3
+ start "MLflow Server" cmd /c "mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./mlruns --host 0.0.0.0 --port 5000"
4
+
5
+ REM Wait for a few seconds to ensure the server is up
6
+ timeout /t 10 /nobreak
7
+
8
+ REM Run the training script
9
+ c:/Fraud_Detection/Scripts/python.exe c:/Fraud_Detection/train_log.py --data "c:/Fraud_Detection/creditcard.csv"
10
+
11
+ REM Close the MLflow server
12
+ taskkill /FI "WINDOWTITLE eq MLflow Server*"
13
+
14
+ echo Training process completed.
15
+ pause
train_log.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import mlflow
4
+ import mlflow.sklearn
5
+ import pandas as pd
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.linear_model import LogisticRegression
8
+ from sklearn.metrics import accuracy_score
9
+ from sklearn.preprocessing import StandardScaler
10
+ import schedule
11
+ import time
12
+
13
+ def main(data_path):
14
+ print(f"Reading data from: {data_path}")
15
+ # Set the tracking URI to your MLflow server
16
+ mlflow.set_tracking_uri("http://127.0.0.1:5000") # Replace with your MLflow tracking server URI
17
+
18
+ # Load data
19
+ try:
20
+ data = pd.read_csv(data_path)
21
+ except Exception as e:
22
+ print(f"Error reading the data file: {e}")
23
+ return
24
+
25
+ # Preprocess and split data
26
+ X = data.drop(columns='Class')
27
+ y = data['Class']
28
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
29
+
30
+ # Scale data
31
+ scaler = StandardScaler()
32
+ X_train = scaler.fit_transform(X_train)
33
+ X_test = scaler.transform(X_test)
34
+
35
+ # Train model
36
+ model = LogisticRegression(max_iter=1000)
37
+ model.fit(X_train, y_train)
38
+
39
+ # Evaluate model
40
+ train_accuracy = accuracy_score(y_train, model.predict(X_train))
41
+ test_accuracy = accuracy_score(y_test, model.predict(X_test))
42
+
43
+ # Log model with MLflow
44
+ with mlflow.start_run() as run:
45
+ mlflow.log_param('random_state', 42)
46
+ mlflow.log_metric('train_accuracy', train_accuracy)
47
+ mlflow.log_metric('test_accuracy', test_accuracy)
48
+ mlflow.sklearn.log_model(model, 'model')
49
+
50
+ # Register the model
51
+ mlflow.register_model(
52
+ model_uri=f"runs:/{run.info.run_id}/model",
53
+ name="LogisticRegressionModel"
54
+ )
55
+
56
+ print(f"Train Accuracy: {train_accuracy}")
57
+ print(f"Test Accuracy: {test_accuracy}")
58
+
59
+ def job():
60
+ parser = argparse.ArgumentParser()
61
+ parser.add_argument('--data', type=str, help="Path to the training data", required=True)
62
+ args = parser.parse_args()
63
+
64
+ # Print args.data for debugging
65
+ print(f"Data path provided: {args.data}")
66
+
67
+ # Check if the file exists
68
+ if not os.path.isfile(args.data):
69
+ print(f"Error: The file {args.data} does not exist.")
70
+ else:
71
+ main(args.data)
72
+
73
+ if __name__ == "__main__":
74
+ # Schedule the job to run every 30 days
75
+ schedule.every(30).days.do(job)
76
+
77
+ # Run the scheduling loop
78
+ while True:
79
+ schedule.run_pending()
80
+ time.sleep(1)
81
+
82
+ # run:
83
+ # mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./mlruns --host 0.0.0.0 --port 5000
84
+ # then run:
85
+ # c:/Fraud_Detection/Scripts/python.exe c:/Fraud_Detection/train_log.py --data "c:/Fraud_Detection/creditcard.csv"