File size: 2,467 Bytes
07660e7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | """
Runs the full pipeline: data download → feature extraction → training → evaluation.
"""
import argparse
import subprocess
import sys
def run_step(cmd: list[str], step_name: str):
"""Run a subprocess and handle errors."""
print(f"\n{'═' * 60}")
print(f" STEP: {step_name}")
print(f"{'═' * 60}\n")
result = subprocess.run(
[sys.executable] + cmd,
cwd=".",
)
if result.returncode != 0:
print(f"\n[ERROR] Step '{step_name}' failed with code {result.returncode}")
sys.exit(1)
print(f"\n[OK] {step_name} complete.\n")
def main():
parser = argparse.ArgumentParser(description="Run full project pipeline")
parser.add_argument("--step", type=str, default="all",
choices=["all", "data", "features", "train", "evaluate",
"experiment", "tune"])
parser.add_argument("--max_samples", type=int, default=None,
help="Max samples to load (for quick testing)")
parser.add_argument("--epochs", type=int, default=3,
help="Training epochs for deep learning model")
parser.add_argument("--model", type=str, default="all",
choices=["all", "naive", "classical", "deep"])
args = parser.parse_args()
steps = {
"data": lambda: run_step(
["scripts/make_dataset.py"] + (["--max_samples", str(args.max_samples)] if args.max_samples else []),
"Data Download & Preprocessing"
),
"features": lambda: run_step(
["scripts/build_features.py"],
"Feature Extraction"
),
"tune": lambda: run_step(
["scripts/tune_hyperparams.py"],
"Hyperparameter Tuning"
),
"train": lambda: run_step(
["scripts/train.py", "--model", args.model, "--epochs", str(args.epochs)],
"Model Training"
),
"evaluate": lambda: run_step(
["scripts/evaluate.py"],
"Model Evaluation"
),
"experiment": lambda: run_step(
["scripts/experiment.py"],
"Experiments"
),
}
if args.step == "all":
ordered = ["data", "features", "train", "evaluate", "experiment"]
for step_name in ordered:
steps[step_name]()
else:
steps[args.step]()
print("\n[DONE] Pipeline complete.")
if __name__ == "__main__":
main()
|