Fraud_Detection / mlflowtest.py
cmasukume's picture
Upload 21 files
045d34f verified
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import mlflow
import requests
import json
import os
import mlflow.pyfunc
# Create or set the desired experiment
experiment = mlflow.set_experiment("fraud_detection")
print("Experiment ID:", experiment.experiment_id)
print("Experiment Name:", experiment.name)
# Set MLFLOW_TRACKING_URI to the desired directory
mlflow_tracking_uri = "file:///C:/Fraud_Detection/mlruns"
os.environ['MLFLOW_TRACKING_URI'] = mlflow_tracking_uri.replace('\\', '/')
# Set the artifact location to a shorter path
mlflow.set_tracking_uri(mlflow_tracking_uri)
# Read the dataset
credit_card_data = pd.read_csv('C:\Fraud_Detection\creditcard.csv')
# Display basic information
print(credit_card_data.head())
print(credit_card_data.tail())
credit_card_data.info()
print(credit_card_data.isnull().sum())
print(credit_card_data['Class'].value_counts())
# Data separation
acceptable = credit_card_data[credit_card_data.Class == 0]
fraudulent = credit_card_data[credit_card_data.Class == 1]
print(acceptable.shape)
print(fraudulent.shape)
# Statistical measures
print(acceptable.Amount.describe())
print(fraudulent.Amount.describe())
print(credit_card_data.groupby('Class').mean())
# Data Sampling
acceptable_sample = acceptable.sample(n=492)
new_dataset = pd.concat([acceptable_sample, fraudulent], axis=0)
# Splitting dataset
x = new_dataset.drop(columns='Class', axis=1)
y = new_dataset['Class']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=2)
print(x.shape, x_train.shape, x_test.shape)
# Training the Model
model = LogisticRegression(max_iter=1000000)
model.fit(x_train, y_train)
# Accuracy
x_train_prediction = model.predict(x_train)
training_data_accuracy = accuracy_score(x_train_prediction, y_train)
print('Accuracy on the Training data : ', training_data_accuracy * 100)
x_test_prediction = model.predict(x_test)
test_data_accuracy = accuracy_score(x_test_prediction, y_test)
print('Accuracy on the Test data : ', test_data_accuracy * 100)
# Manually start and end the MLflow run
run = mlflow.start_run()
try:
mlflow.sklearn.log_model(model, "model")
mlflow.log_metric("training_data_accuracy", training_data_accuracy)
mlflow.log_metric("test_data_accuracy", test_data_accuracy)
run_id = run.info.run_id
model_uri = f"runs:/{run_id}/model"
finally:
mlflow.end_run()
print(f"Model logged to MLflow with run_id: {run_id}")
# Serve the model
loaded_model = mlflow.pyfunc.load_model(model_uri)
# Example of making predictions using the loaded model
sample_input = x_test.iloc[:5].values.tolist()
predictions = loaded_model.predict(sample_input)
print("Predictions:", predictions)
# Alternatively, you can use Python's built-in HTTP server to serve the model
from flask import Flask, request, jsonify
app = Flask(__name__)
@app.route('/')
def index():
return "Welcome to the model serving endpoint!"
# Define prediction route
@app.route('/predict', methods=['POST'])
def predict():
data = request.json['data']
predictions = loaded_model.predict(data)
return jsonify(predictions.tolist())
# Run the Flask app
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)
# activate a vertual environment ---C:\Fraud_Detection\Scripts\activate
# to display the mlflow dashboard ---mlflow ui --backend-store-uri file:///C:/Fraud_Detection/mlruns
# cancel ---ctrl c