Abeshith commited on
Commit
cbb53b6
Β·
1 Parent(s): 5259cf2

Scripts Stages Added

Browse files
scripts/README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Utility Scripts
2
+
3
+ Simple CLI scripts for common operations.
4
+
5
+ ## Available Scripts
6
+
7
+ ### 1. Train Pipeline
8
+ Run the complete training pipeline (all 7 stages):
9
+ ```bash
10
+ python scripts/train.py
11
+ ```
12
+
13
+ ### 2. Evaluate Model
14
+ Evaluate the trained model on test data:
15
+ ```bash
16
+ # Use default model path
17
+ python scripts/evaluate.py
18
+
19
+ # Or specify custom model path
20
+ python scripts/evaluate.py --model-path artifacts/model_trainer/model
21
+ ```
22
+
23
+ ### 3. Start API Server
24
+ Start the FastAPI server:
25
+ ```bash
26
+ # Default (localhost:8000)
27
+ python scripts/serve.py
28
+
29
+ # Custom host/port
30
+ python scripts/serve.py --host 0.0.0.0 --port 8080
31
+
32
+ # Development mode with auto-reload
33
+ python scripts/serve.py --reload
34
+ ```
35
+
36
+ ### 4. Initialize Airflow
37
+ Set up Airflow database and create admin user:
38
+ ```bash
39
+ python scripts/init_db.py
40
+ ```
41
+
42
+ Then start Airflow services:
43
+ ```bash
44
+ airflow scheduler # Terminal 1
45
+ airflow webserver # Terminal 2
46
+ ```
47
+
48
+ ## Quick Examples
49
+
50
+ ```bash
51
+ # Full workflow
52
+ python scripts/train.py # Train model
53
+ python scripts/evaluate.py # Evaluate model
54
+ python scripts/serve.py --reload # Start API server
55
+
56
+ # Airflow setup
57
+ python scripts/init_db.py # Initialize
58
+ airflow webserver # Start UI
59
+ ```
scripts/evaluate.py CHANGED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """CLI script to evaluate the trained model."""
2
+ import sys
3
+ from pathlib import Path
4
+ import argparse
5
+
6
+ # Add project root to path
7
+ project_root = Path(__file__).parent.parent
8
+ sys.path.insert(0, str(project_root))
9
+
10
+ from mlpipeline.components.model_evaluation import ModelEvaluation
11
+ from mlpipeline.config.configuration import ConfigurationManager
12
+ from mlpipeline.logging.logger import get_logger
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ def main():
18
+ """Evaluate the trained model."""
19
+ parser = argparse.ArgumentParser(description="Evaluate AutoML model")
20
+ parser.add_argument(
21
+ "--model-path",
22
+ type=str,
23
+ default=None,
24
+ help="Path to model directory (default: from config)"
25
+ )
26
+ args = parser.parse_args()
27
+
28
+ try:
29
+ logger.info("Starting model evaluation...")
30
+ config_manager = ConfigurationManager()
31
+ eval_config = config_manager.get_model_evaluation_config()
32
+
33
+ # Override model path if provided
34
+ if args.model_path:
35
+ eval_config.model_path = Path(args.model_path)
36
+
37
+ evaluator = ModelEvaluation(eval_config)
38
+ metrics = evaluator.evaluate()
39
+
40
+ logger.info("βœ… Evaluation completed!")
41
+ logger.info(f"Metrics: {metrics}")
42
+ return 0
43
+ except Exception as e:
44
+ logger.error(f"❌ Evaluation failed: {e}")
45
+ return 1
46
+
47
+
48
+ if __name__ == "__main__":
49
+ exit(main())
scripts/init_db.py CHANGED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """CLI script to initialize Airflow database."""
2
+ import sys
3
+ from pathlib import Path
4
+ import subprocess
5
+ import os
6
+
7
+ # Add project root to path
8
+ project_root = Path(__file__).parent.parent
9
+ sys.path.insert(0, str(project_root))
10
+
11
+ from mlpipeline.logging.logger import get_logger
12
+
13
+ logger = get_logger(__name__)
14
+
15
+
16
+ def main():
17
+ """Initialize Airflow database and create admin user."""
18
+ try:
19
+ airflow_home = project_root / "airflow"
20
+
21
+ # Set AIRFLOW_HOME environment variable
22
+ os.environ["AIRFLOW_HOME"] = str(airflow_home)
23
+ logger.info(f"AIRFLOW_HOME set to: {airflow_home}")
24
+
25
+ # Initialize database
26
+ logger.info("Initializing Airflow database...")
27
+ result = subprocess.run(
28
+ ["airflow", "db", "init"],
29
+ capture_output=True,
30
+ text=True
31
+ )
32
+
33
+ if result.returncode != 0:
34
+ logger.error(f"Failed to initialize database: {result.stderr}")
35
+ return 1
36
+
37
+ logger.info("βœ… Database initialized successfully!")
38
+
39
+ # Create admin user
40
+ logger.info("Creating admin user...")
41
+ result = subprocess.run(
42
+ [
43
+ "airflow", "users", "create",
44
+ "--username", "admin",
45
+ "--firstname", "Admin",
46
+ "--lastname", "User",
47
+ "--role", "Admin",
48
+ "--email", "admin@example.com",
49
+ "--password", "admin"
50
+ ],
51
+ capture_output=True,
52
+ text=True
53
+ )
54
+
55
+ if result.returncode != 0 and "already exists" not in result.stderr:
56
+ logger.warning(f"User creation note: {result.stderr}")
57
+ else:
58
+ logger.info("βœ… Admin user created (username: admin, password: admin)")
59
+
60
+ logger.info("\nπŸš€ Next steps:")
61
+ logger.info(" 1. Start scheduler: airflow scheduler")
62
+ logger.info(" 2. Start webserver: airflow webserver")
63
+ logger.info(" 3. Access UI: http://localhost:8080")
64
+
65
+ return 0
66
+ except FileNotFoundError:
67
+ logger.error("❌ Airflow not found. Install with: pip install apache-airflow")
68
+ return 1
69
+ except Exception as e:
70
+ logger.error(f"❌ Initialization failed: {e}")
71
+ return 1
72
+
73
+
74
+ if __name__ == "__main__":
75
+ exit(main())
scripts/serve.py CHANGED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """CLI script to start the FastAPI server."""
2
+ import sys
3
+ from pathlib import Path
4
+ import argparse
5
+ import uvicorn
6
+
7
+ # Add project root to path
8
+ project_root = Path(__file__).parent.parent
9
+ sys.path.insert(0, str(project_root))
10
+
11
+ from mlpipeline.logging.logger import get_logger
12
+
13
+ logger = get_logger(__name__)
14
+
15
+
16
+ def main():
17
+ """Start the FastAPI server."""
18
+ parser = argparse.ArgumentParser(description="Start AutoML API server")
19
+ parser.add_argument(
20
+ "--host",
21
+ type=str,
22
+ default="0.0.0.0",
23
+ help="Host to bind (default: 0.0.0.0)"
24
+ )
25
+ parser.add_argument(
26
+ "--port",
27
+ type=int,
28
+ default=8000,
29
+ help="Port to bind (default: 8000)"
30
+ )
31
+ parser.add_argument(
32
+ "--reload",
33
+ action="store_true",
34
+ help="Enable auto-reload for development"
35
+ )
36
+ args = parser.parse_args()
37
+
38
+ try:
39
+ logger.info(f"πŸš€ Starting FastAPI server at {args.host}:{args.port}")
40
+ logger.info(f"πŸ“š API docs: http://{args.host if args.host != '0.0.0.0' else 'localhost'}:{args.port}/docs")
41
+
42
+ uvicorn.run(
43
+ "app.main:app",
44
+ host=args.host,
45
+ port=args.port,
46
+ reload=args.reload
47
+ )
48
+ return 0
49
+ except Exception as e:
50
+ logger.error(f"❌ Server failed to start: {e}")
51
+ return 1
52
+
53
+
54
+ if __name__ == "__main__":
55
+ exit(main())
scripts/train.py CHANGED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """CLI script to run the training pipeline."""
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ # Add project root to path
6
+ project_root = Path(__file__).parent.parent
7
+ sys.path.insert(0, str(project_root))
8
+
9
+ from mlpipeline.pipeline.training_pipeline import TrainingPipeline
10
+ from mlpipeline.logging.logger import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ def main():
16
+ """Run the complete training pipeline."""
17
+ try:
18
+ logger.info("Starting AutoML Training Pipeline...")
19
+ pipeline = TrainingPipeline()
20
+ pipeline.run_pipeline()
21
+ logger.info("βœ… Training pipeline completed successfully!")
22
+ return 0
23
+ except Exception as e:
24
+ logger.error(f"❌ Training pipeline failed: {e}")
25
+ return 1
26
+
27
+
28
+ if __name__ == "__main__":
29
+ exit(main())