diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.dvc/config b/.dvc/config new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.github/workflows/deploy-hf-space.yaml b/.github/workflows/deploy-hf-space.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..77953e207cf708554fa586bec0142615a3109f86 --- /dev/null +++ b/.gitignore @@ -0,0 +1,83 @@ +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +automlenv/ + +.pytest_cache/ +.coverage +htmlcov/ +*.cover +.hypothesis/ + +*.log + +.vscode/ +.idea/ +*.swp +*.swo +*~ + +artifacts/ +!artifacts/.gitkeep +mlflow/mlruns/ +mlruns/ +mlartifacts/ + +.dvc/cache/ +.dvc/tmp/ + +data/ +!data/.gitkeep +!data/sample_data.csv + +*.h5 +*.pkl +*.joblib +*.model +*.onnx +*.pb + +.ipynb_checkpoints/ +*.ipynb_checkpoints + +.DS_Store +Thumbs.db +*.bak +*.tmp + +node_modules/ +package-lock.json + +.mypy_cache/ +.dmypy.json +dmypy.json +.pyre/ +.pytype/ + +models/production/ +!models/.gitkeep \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/dependencies.py b/app/dependencies.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/middleware.py b/app/middleware.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/routers/__init__.py b/app/routers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/routers/health.py b/app/routers/health.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/routers/predict.py b/app/routers/predict.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/routers/train.py b/app/routers/train.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/schemas.py b/app/schemas.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/config/automl_config.yaml b/config/automl_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4100350301c926c1694272e25e29b19f970d879 --- /dev/null +++ b/config/automl_config.yaml @@ -0,0 +1,44 @@ +automl_library: autogluon + +autogluon: + time_limit: 600 + presets: medium_quality + eval_metric: null + verbosity: 2 + num_bag_folds: 5 + num_stack_levels: 1 + +flaml: + time_budget: 600 + metric: auto + task: classification + estimator_list: + - lgbm + - xgboost + - rf + - extra_tree + n_jobs: -1 + verbose: 1 + early_stop: true + +pycaret: + session_id: 42 + n_select: 5 + fold: 5 + verbose: false + optimize: Accuracy + use_gpu: false + tuning: + enabled: true + n_iter: 10 + optimize: Accuracy + ensemble: + enabled: false + method: Bagging + n_estimators: 10 + +common: + target_column: target + problem_type: auto + cv_folds: 5 + random_state: 42 \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a3f75eaf938421d8b6eb6868f12a58eeb3d4677 --- /dev/null +++ b/config/config.yaml @@ -0,0 +1,53 @@ +artifacts_root: artifacts + +data_ingestion: + root_dir: artifacts/data_ingestion + source_url: null + local_data_file: artifacts/data_ingestion/data.csv + unzip_dir: artifacts/data_ingestion + +data_validation: + root_dir: artifacts/data_validation + data_dir: artifacts/data_ingestion/data.csv + status_file: artifacts/data_validation/status.txt + schema_file: config/schema.yaml + +data_transformation: + root_dir: artifacts/data_transformation + data_path: artifacts/data_ingestion/data.csv + train_path: artifacts/data_transformation/train.csv + test_path: artifacts/data_transformation/test.csv + test_size: 0.2 + random_state: 42 + +feature_engineering: + root_dir: artifacts/feature_engineering + train_path: artifacts/data_transformation/train.csv + test_path: artifacts/data_transformation/test.csv + output_train_path: artifacts/feature_engineering/train_features.csv + output_test_path: artifacts/feature_engineering/test_features.csv + +model_trainer: + root_dir: artifacts/model_trainer + train_data_path: artifacts/feature_engineering/train_features.csv + test_data_path: artifacts/feature_engineering/test_features.csv + model_path: artifacts/model_trainer/model + target_column: target + +model_evaluation: + root_dir: artifacts/model_evaluation + model_path: artifacts/model_trainer/model + test_data_path: artifacts/feature_engineering/test_features.csv + metrics_file: artifacts/model_evaluation/metrics.json + target_column: target + +model_pusher: + root_dir: artifacts/model_pusher + model_path: artifacts/model_trainer/model + model_registry_path: models/production + +mlflow: + tracking_uri: http://localhost:5000 + experiment_name: automl_experiment + run_name: null + registry_uri: null \ No newline at end of file diff --git a/config/deployment_config.yaml b/config/deployment_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52a0452bdbc9a71479b46b6771b6de161f537dbf --- /dev/null +++ b/config/deployment_config.yaml @@ -0,0 +1,42 @@ +api: + host: 0.0.0.0 + port: 8000 + workers: 4 + reload: false + log_level: info + title: AutoML MLOps API + version: 1.0.0 + +model: + path: models/production + cache_enabled: true + cache_ttl: 3600 + prediction_timeout: 30 + +docker: + image_name: automl-mlops + registry: ghcr.io + tag: latest + platform: linux/amd64 + +huggingface: + space_name: null + space_sdk: docker + space_hardware: cpu-basic + private: false + +health: + liveness_path: /health + readiness_path: /readiness + startup_timeout: 60 + +metrics: + enabled: true + path: /metrics + include_trace_exemplar: true + +logging: + level: INFO + format: json + rotation: 100 MB + retention: 7 days \ No newline at end of file diff --git a/config/monitoring_config.yaml b/config/monitoring_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..152d51468d093350faab30eaa9555883e7a054c3 --- /dev/null +++ b/config/monitoring_config.yaml @@ -0,0 +1,44 @@ +data_drift: + reference_data_path: artifacts/data/processed/reference.csv + current_data_path: artifacts/data/processed/current.csv + report_path: artifacts/reports/drift_reports + threshold: 0.1 + columns: null + +model_monitoring: + predictions_path: artifacts/logs/predictions + performance_threshold: 0.05 + window_size: 1000 + metrics: + - accuracy + - precision + - recall + - f1 + +evidently: + reference_window: 7 + detection_window: 1 + confidence_level: 0.95 + stattest: ks + stattest_threshold: 0.05 + +prometheus: + host: localhost + port: 9090 + scrape_interval: 15s + evaluation_interval: 15s + +grafana: + host: localhost + port: 3000 + admin_user: admin + admin_password: admin + +loki: + host: localhost + port: 3100 + +promtail: + host: localhost + port: 9080 + log_path: artifacts/logs \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/monitoring/__init__.py b/monitoring/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/monitoring/dashboards/evidently_dashboard.ipynb b/monitoring/dashboards/evidently_dashboard.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..21c26798c798c324d4d4d68038534735678f3528 --- /dev/null +++ b/monitoring/dashboards/evidently_dashboard.ipynb @@ -0,0 +1,10 @@ +{ + "cells": [], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/monitoring/dashboards/generate_reports.py b/monitoring/dashboards/generate_reports.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/monitoring/data_drift/__init__.py b/monitoring/data_drift/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/monitoring/data_drift/drift_detector.py b/monitoring/data_drift/drift_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/monitoring/data_drift/evidently_monitor.py b/monitoring/data_drift/evidently_monitor.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/monitoring/model_monitoring/__init__.py b/monitoring/model_monitoring/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/monitoring/model_monitoring/performance_tracker.py b/monitoring/model_monitoring/performance_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/monitoring/model_monitoring/prediction_logger.py b/monitoring/model_monitoring/prediction_logger.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/notebooks/01_data_exploration.ipynb b/notebooks/01_data_exploration.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..21c26798c798c324d4d4d68038534735678f3528 --- /dev/null +++ b/notebooks/01_data_exploration.ipynb @@ -0,0 +1,10 @@ +{ + "cells": [], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/02_automl_experiments.ipynb b/notebooks/02_automl_experiments.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..21c26798c798c324d4d4d68038534735678f3528 --- /dev/null +++ b/notebooks/02_automl_experiments.ipynb @@ -0,0 +1,10 @@ +{ + "cells": [], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/03_model_analysis.ipynb b/notebooks/03_model_analysis.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..21c26798c798c324d4d4d68038534735678f3528 --- /dev/null +++ b/notebooks/03_model_analysis.ipynb @@ -0,0 +1,10 @@ +{ + "cells": [], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/observability/grafana/dashboards/api_metrics.json b/observability/grafana/dashboards/api_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/observability/grafana/dashboards/model_metrics.json b/observability/grafana/dashboards/model_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/observability/grafana/dashboards/system_metrics.json b/observability/grafana/dashboards/system_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/observability/grafana/provisioning/dashboards.yaml b/observability/grafana/provisioning/dashboards.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/observability/grafana/provisioning/datasources.yaml b/observability/grafana/provisioning/datasources.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/observability/loki/loki-config.yaml b/observability/loki/loki-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/observability/prometheus/alerts.yml b/observability/prometheus/alerts.yml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/observability/prometheus/prometheus.yml b/observability/prometheus/prometheus.yml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/observability/promtail/promtail-config.yaml b/observability/promtail/promtail-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000000000000000000000000000000000000..d40deb28228f3b09a9f44af6dcc464ac3ba5280f --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,19 @@ +-r requirements.txt + +pytest +pytest-asyncio +pytest-cov +pytest-mock + +black +flake8 +isort +mypy + +pre-commit + +jupyter +ipykernel +notebook + +locust \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..e4f559deec958e0280b581b7f323d5330953824f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,30 @@ +fastapi +uvicorn[standard] +pydantic +pydantic-settings + +pandas +numpy +scikit-learn + +autogluon.tabular +flaml +pycaret + +mlflow +dvc + +evidently + +pyarrow +fastparquet + +pyyaml +python-box +ensure +python-multipart +prometheus-client +python-json-logger + +httpx +requests \ No newline at end of file diff --git a/scripts/evaluate.py b/scripts/evaluate.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/scripts/init_db.py b/scripts/init_db.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/scripts/serve.py b/scripts/serve.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/scripts/setup_env.sh b/scripts/setup_env.sh new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/scripts/train.py b/scripts/train.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/__init__.py b/src/mlpipeline/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/automl/__init__.py b/src/mlpipeline/automl/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/automl/autogluon_trainer.py b/src/mlpipeline/automl/autogluon_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/automl/automl_factory.py b/src/mlpipeline/automl/automl_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/automl/flaml_trainer.py b/src/mlpipeline/automl/flaml_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/automl/pycaret_trainer.py b/src/mlpipeline/automl/pycaret_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/components/__init__.py b/src/mlpipeline/components/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/components/automl_trainer.py b/src/mlpipeline/components/automl_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/components/data_ingestion.py b/src/mlpipeline/components/data_ingestion.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/components/data_transformation.py b/src/mlpipeline/components/data_transformation.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/components/data_validation.py b/src/mlpipeline/components/data_validation.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/components/feature_engineering.py b/src/mlpipeline/components/feature_engineering.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/components/model_evaluation.py b/src/mlpipeline/components/model_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/components/model_pusher.py b/src/mlpipeline/components/model_pusher.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/config/__init__.py b/src/mlpipeline/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/config/configuration.py b/src/mlpipeline/config/configuration.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/constants.py b/src/mlpipeline/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..5c2fa3298d2b91b321d4b2eb127d6a6d6517cbca --- /dev/null +++ b/src/mlpipeline/constants.py @@ -0,0 +1,15 @@ +from pathlib import Path + +CONFIG_FILE_PATH = Path("config/config.yaml") +AUTOML_CONFIG_FILE_PATH = Path("config/automl_config.yaml") +MONITORING_CONFIG_FILE_PATH = Path("config/monitoring_config.yaml") +DEPLOYMENT_CONFIG_FILE_PATH = Path("config/deployment_config.yaml") + +ARTIFACTS_DIR = Path("artifacts") +LOGS_DIR = Path("artifacts/logs") +MODELS_DIR = Path("artifacts/models") +DATA_DIR = Path("artifacts/data") +REPORTS_DIR = Path("artifacts/reports") + +MLFLOW_TRACKING_URI = "http://localhost:5000" +MLFLOW_REGISTRY_URI = None \ No newline at end of file diff --git a/src/mlpipeline/entity/__init__.py b/src/mlpipeline/entity/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/entity/artifact_entity.py b/src/mlpipeline/entity/artifact_entity.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/entity/config_entity.py b/src/mlpipeline/entity/config_entity.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/exception/__init__.py b/src/mlpipeline/exception/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3f89726d2cd598f3ae3d70292e78ef719bd8ef1b --- /dev/null +++ b/src/mlpipeline/exception/__init__.py @@ -0,0 +1,9 @@ +from mlpipeline.exception.exception import ( + MLPipelineException, + DataIngestionException, + DataValidationException, + DataTransformationException, + ModelTrainingException, + ModelEvaluationException, + ConfigurationException, +) \ No newline at end of file diff --git a/src/mlpipeline/exception/exception.py b/src/mlpipeline/exception/exception.py new file mode 100644 index 0000000000000000000000000000000000000000..da81172f1307582fede91bb2d35fcfaf401e655a --- /dev/null +++ b/src/mlpipeline/exception/exception.py @@ -0,0 +1,47 @@ +import sys +from typing import Optional + + +class MLPipelineException(Exception): + def __init__(self, error_message: str, error_detail: Optional[sys] = None): + super().__init__(error_message) + self.error_message = error_message + + if error_detail: + _, _, exc_tb = error_detail.exc_info() + if exc_tb: + self.file_name = exc_tb.tb_frame.f_code.co_filename + self.line_number = exc_tb.tb_lineno + else: + self.file_name = "Unknown" + self.line_number = 0 + else: + self.file_name = "Unknown" + self.line_number = 0 + + def __str__(self): + return f"Error in {self.file_name} at line {self.line_number}: {self.error_message}" + + +class DataIngestionException(MLPipelineException): + pass + + +class DataValidationException(MLPipelineException): + pass + + +class DataTransformationException(MLPipelineException): + pass + + +class ModelTrainingException(MLPipelineException): + pass + + +class ModelEvaluationException(MLPipelineException): + pass + + +class ConfigurationException(MLPipelineException): + pass \ No newline at end of file diff --git a/src/mlpipeline/logging/__init__.py b/src/mlpipeline/logging/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..01e18d1f9b26917620b565840dc659aeba3001e0 --- /dev/null +++ b/src/mlpipeline/logging/__init__.py @@ -0,0 +1 @@ +from mlpipeline.logging.logger import get_logger, logger \ No newline at end of file diff --git a/src/mlpipeline/logging/logger.py b/src/mlpipeline/logging/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..05337099b45d29a837ae9454b267ab4c94ca1c5b --- /dev/null +++ b/src/mlpipeline/logging/logger.py @@ -0,0 +1,43 @@ +import logging +import sys +from pathlib import Path +from datetime import datetime +from pythonjsonlogger import jsonlogger + +LOGS_DIR = Path("artifacts/logs") +LOGS_DIR.mkdir(parents=True, exist_ok=True) + +LOG_FILE = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" +LOG_FILE_PATH = LOGS_DIR / LOG_FILE + + +def get_logger(name: str, level: int = logging.INFO, json_format: bool = False) -> logging.Logger: + logger = logging.getLogger(name) + logger.setLevel(level) + + if logger.handlers: + return logger + + if json_format: + formatter = jsonlogger.JsonFormatter( + '%(asctime)s %(name)s %(levelname)s %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + else: + formatter = logging.Formatter( + '[%(asctime)s] %(name)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + + file_handler = logging.FileHandler(LOG_FILE_PATH) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + return logger + + +logger = get_logger(__name__) \ No newline at end of file diff --git a/src/mlpipeline/pipeline/__init__.py b/src/mlpipeline/pipeline/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/pipeline/prediction_pipeline.py b/src/mlpipeline/pipeline/prediction_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/pipeline/training_pipeline.py b/src/mlpipeline/pipeline/training_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mlpipeline/utils/__init__.py b/src/mlpipeline/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..370b3088e02ae0d40814b9bd43c3f5fff76f888a --- /dev/null +++ b/src/mlpipeline/utils/__init__.py @@ -0,0 +1,12 @@ +from mlpipeline.utils.common import ( + read_yaml, + create_directories, + save_json, + load_json, + save_pickle, + load_pickle, + save_model, + load_model, + get_size, +) +from mlpipeline.utils.mlflow_utils import MLflowManager \ No newline at end of file diff --git a/src/mlpipeline/utils/common.py b/src/mlpipeline/utils/common.py new file mode 100644 index 0000000000000000000000000000000000000000..16cd647133752dfc54a34166b283c7b6ba28ef18 --- /dev/null +++ b/src/mlpipeline/utils/common.py @@ -0,0 +1,82 @@ +import os +import json +import yaml +import pickle +import joblib +from pathlib import Path +from typing import Any, Dict +from box import ConfigBox +from ensure import ensure_annotations + +from mlpipeline.logging.logger import get_logger + +logger = get_logger(__name__) + + +@ensure_annotations +def read_yaml(path: Path) -> ConfigBox: + try: + with open(path) as yaml_file: + content = yaml.safe_load(yaml_file) + logger.info(f"YAML file loaded: {path}") + return ConfigBox(content) + except Exception as e: + logger.error(f"Error reading YAML file {path}: {e}") + raise e + + +@ensure_annotations +def create_directories(paths: list, verbose: bool = True): + for path in paths: + os.makedirs(path, exist_ok=True) + if verbose: + logger.info(f"Created directory: {path}") + + +@ensure_annotations +def save_json(path: Path, data: Dict): + with open(path, "w") as f: + json.dump(data, f, indent=4) + logger.info(f"JSON file saved: {path}") + + +@ensure_annotations +def load_json(path: Path) -> ConfigBox: + with open(path) as f: + content = json.load(f) + logger.info(f"JSON file loaded: {path}") + return ConfigBox(content) + + +@ensure_annotations +def save_pickle(path: Path, obj: Any): + with open(path, "wb") as f: + pickle.dump(obj, f) + logger.info(f"Pickle file saved: {path}") + + +@ensure_annotations +def load_pickle(path: Path) -> Any: + with open(path, "rb") as f: + obj = pickle.load(f) + logger.info(f"Pickle file loaded: {path}") + return obj + + +@ensure_annotations +def save_model(path: Path, model: Any): + joblib.dump(model, path) + logger.info(f"Model saved: {path}") + + +@ensure_annotations +def load_model(path: Path) -> Any: + model = joblib.load(path) + logger.info(f"Model loaded: {path}") + return model + + +@ensure_annotations +def get_size(path: Path) -> str: + size_in_kb = round(os.path.getsize(path) / 1024) + return f"~ {size_in_kb} KB" \ No newline at end of file diff --git a/src/mlpipeline/utils/mlflow_utils.py b/src/mlpipeline/utils/mlflow_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..bf359014ef263f28f0d44c9d94bebc8255081546 --- /dev/null +++ b/src/mlpipeline/utils/mlflow_utils.py @@ -0,0 +1,56 @@ +import mlflow +from typing import Dict, Any, Optional +from pathlib import Path + +from mlpipeline.logging.logger import get_logger + +logger = get_logger(__name__) + + +class MLflowManager: + def __init__(self, tracking_uri: str, experiment_name: str): + self.tracking_uri = tracking_uri + self.experiment_name = experiment_name + mlflow.set_tracking_uri(tracking_uri) + mlflow.set_experiment(experiment_name) + logger.info(f"MLflow tracking URI: {tracking_uri}") + logger.info(f"MLflow experiment: {experiment_name}") + + def start_run(self, run_name: Optional[str] = None): + mlflow.start_run(run_name=run_name) + logger.info(f"Started MLflow run: {run_name or 'auto'}") + + def end_run(self): + mlflow.end_run() + logger.info("Ended MLflow run") + + def log_params(self, params: Dict[str, Any]): + mlflow.log_params(params) + logger.info(f"Logged {len(params)} parameters") + + def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None): + mlflow.log_metrics(metrics, step=step) + logger.info(f"Logged {len(metrics)} metrics") + + def log_metric(self, key: str, value: float, step: Optional[int] = None): + mlflow.log_metric(key, value, step=step) + + def log_artifact(self, local_path: str, artifact_path: Optional[str] = None): + mlflow.log_artifact(local_path, artifact_path) + logger.info(f"Logged artifact: {local_path}") + + def log_model(self, model: Any, artifact_path: str, **kwargs): + mlflow.sklearn.log_model(model, artifact_path, **kwargs) + logger.info(f"Logged model: {artifact_path}") + + def register_model(self, model_uri: str, name: str) -> Any: + result = mlflow.register_model(model_uri, name) + logger.info(f"Registered model: {name}") + return result + + def set_tag(self, key: str, value: str): + mlflow.set_tag(key, value) + + def set_tags(self, tags: Dict[str, str]): + mlflow.set_tags(tags) + logger.info(f"Set {len(tags)} tags") \ No newline at end of file