Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import pandas as pd | |
| from matplotlib import pyplot as plt | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.metrics import accuracy_score | |
| import mlflow | |
| import requests | |
| import json | |
| import os | |
| import mlflow.pyfunc | |
| # Create or set the desired experiment | |
| experiment = mlflow.set_experiment("fraud_detection") | |
| print("Experiment ID:", experiment.experiment_id) | |
| print("Experiment Name:", experiment.name) | |
| # Set MLFLOW_TRACKING_URI to the desired directory | |
| mlflow_tracking_uri = "file:///C:/Fraud_Detection/mlruns" | |
| os.environ['MLFLOW_TRACKING_URI'] = mlflow_tracking_uri.replace('\\', '/') | |
| # Set the artifact location to a shorter path | |
| mlflow.set_tracking_uri(mlflow_tracking_uri) | |
| # Read the dataset | |
| credit_card_data = pd.read_csv('C:\Fraud_Detection\creditcard.csv') | |
| # Display basic information | |
| print(credit_card_data.head()) | |
| print(credit_card_data.tail()) | |
| credit_card_data.info() | |
| print(credit_card_data.isnull().sum()) | |
| print(credit_card_data['Class'].value_counts()) | |
| # Data separation | |
| acceptable = credit_card_data[credit_card_data.Class == 0] | |
| fraudulent = credit_card_data[credit_card_data.Class == 1] | |
| print(acceptable.shape) | |
| print(fraudulent.shape) | |
| # Statistical measures | |
| print(acceptable.Amount.describe()) | |
| print(fraudulent.Amount.describe()) | |
| print(credit_card_data.groupby('Class').mean()) | |
| # Data Sampling | |
| acceptable_sample = acceptable.sample(n=492) | |
| new_dataset = pd.concat([acceptable_sample, fraudulent], axis=0) | |
| # Splitting dataset | |
| x = new_dataset.drop(columns='Class', axis=1) | |
| y = new_dataset['Class'] | |
| x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=2) | |
| print(x.shape, x_train.shape, x_test.shape) | |
| # Training the Model | |
| model = LogisticRegression(max_iter=1000000) | |
| model.fit(x_train, y_train) | |
| # Accuracy | |
| x_train_prediction = model.predict(x_train) | |
| training_data_accuracy = accuracy_score(x_train_prediction, y_train) | |
| print('Accuracy on the Training data : ', training_data_accuracy * 100) | |
| x_test_prediction = model.predict(x_test) | |
| test_data_accuracy = accuracy_score(x_test_prediction, y_test) | |
| print('Accuracy on the Test data : ', test_data_accuracy * 100) | |
| # Manually start and end the MLflow run | |
| run = mlflow.start_run() | |
| try: | |
| mlflow.sklearn.log_model(model, "model") | |
| mlflow.log_metric("training_data_accuracy", training_data_accuracy) | |
| mlflow.log_metric("test_data_accuracy", test_data_accuracy) | |
| run_id = run.info.run_id | |
| model_uri = f"runs:/{run_id}/model" | |
| finally: | |
| mlflow.end_run() | |
| print(f"Model logged to MLflow with run_id: {run_id}") | |
| # Serve the model | |
| loaded_model = mlflow.pyfunc.load_model(model_uri) | |
| # Example of making predictions using the loaded model | |
| sample_input = x_test.iloc[:5].values.tolist() | |
| predictions = loaded_model.predict(sample_input) | |
| print("Predictions:", predictions) | |
| # Alternatively, you can use Python's built-in HTTP server to serve the model | |
| from flask import Flask, request, jsonify | |
| app = Flask(__name__) | |
| def index(): | |
| return "Welcome to the model serving endpoint!" | |
| # Define prediction route | |
| def predict(): | |
| data = request.json['data'] | |
| predictions = loaded_model.predict(data) | |
| return jsonify(predictions.tolist()) | |
| # Run the Flask app | |
| if __name__ == '__main__': | |
| app.run(host='0.0.0.0', port=5000) | |
| # activate a vertual environment ---C:\Fraud_Detection\Scripts\activate | |
| # to display the mlflow dashboard ---mlflow ui --backend-store-uri file:///C:/Fraud_Detection/mlruns | |
| # cancel ---ctrl c |