Spaces:

cmasukume
/

Fraud_Detection

Sleeping

App Files Files Community

Fraud_Detection / train_log.py

cmasukume

Upload 21 files

045d34f verified over 1 year ago

raw

history blame contribute delete

2.76 kB

	import argparse
	import os
	import mlflow
	import mlflow.sklearn
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LogisticRegression
	from sklearn.metrics import accuracy_score
	from sklearn.preprocessing import StandardScaler
	import schedule
	import time

	def main(data_path):
	print(f"Reading data from: {data_path}")
	# Set the tracking URI to your MLflow server
	mlflow.set_tracking_uri("http://127.0.0.1:5000") # Replace with your MLflow tracking server URI

	# Load data
	try:
	data = pd.read_csv(data_path)
	except Exception as e:
	print(f"Error reading the data file: {e}")
	return

	# Preprocess and split data
	X = data.drop(columns='Class')
	y = data['Class']
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Scale data
	scaler = StandardScaler()
	X_train = scaler.fit_transform(X_train)
	X_test = scaler.transform(X_test)

	# Train model
	model = LogisticRegression(max_iter=1000)
	model.fit(X_train, y_train)

	# Evaluate model
	train_accuracy = accuracy_score(y_train, model.predict(X_train))
	test_accuracy = accuracy_score(y_test, model.predict(X_test))

	# Log model with MLflow
	with mlflow.start_run() as run:
	mlflow.log_param('random_state', 42)
	mlflow.log_metric('train_accuracy', train_accuracy)
	mlflow.log_metric('test_accuracy', test_accuracy)
	mlflow.sklearn.log_model(model, 'model')

	# Register the model
	mlflow.register_model(
	model_uri=f"runs:/{run.info.run_id}/model",
	name="LogisticRegressionModel"
	)

	print(f"Train Accuracy: {train_accuracy}")
	print(f"Test Accuracy: {test_accuracy}")

	def job():
	parser = argparse.ArgumentParser()
	parser.add_argument('--data', type=str, help="Path to the training data", required=True)
	args = parser.parse_args()

	# Print args.data for debugging
	print(f"Data path provided: {args.data}")

	# Check if the file exists
	if not os.path.isfile(args.data):
	print(f"Error: The file {args.data} does not exist.")
	else:
	main(args.data)

	if __name__ == "__main__":
	# Schedule the job to run every 30 days
	schedule.every(30).days.do(job)

	# Run the scheduling loop
	while True:
	schedule.run_pending()
	time.sleep(1)

	# run:
	# mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./mlruns --host 0.0.0.0 --port 5000
	# then run:
	# c:/Fraud_Detection/Scripts/python.exe c:/Fraud_Detection/train_log.py --data "c:/Fraud_Detection/creditcard.csv"