File size: 3,146 Bytes
f7d11f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
stages:
  download_data:
    cmd: uv run predicting_outcomes_in_heart_failure/data/dataset.py
    deps:
      - predicting_outcomes_in_heart_failure/data/dataset.py
    outs:
      - data/raw/heart.csv

  preprocessing:
    cmd: uv run predicting_outcomes_in_heart_failure/data/preprocess.py
    deps:
      - predicting_outcomes_in_heart_failure/data/preprocess.py
      - data/raw/heart.csv
    outs:
      - data/interim/preprocessed.csv
      - data/interim/preprocessed_female_only.csv
      - data/interim/preprocessed_male_only.csv
      - data/interim/preprocessed_no_sex_column.csv
      - data/interim/preprocess_artifacts/scaler.joblib

  split_data:
    foreach: [all, female, male, nosex]
    do:
      cmd: >
        uv run predicting_outcomes_in_heart_failure/data/split_data.py
        --variant ${item}
      deps:
        - predicting_outcomes_in_heart_failure/data/split_data.py
        - data/interim/preprocessed.csv
        - data/interim/preprocessed_female_only.csv
        - data/interim/preprocessed_male_only.csv
        - data/interim/preprocessed_no_sex_column.csv
      outs:
        - data/processed/${item}

  training:
    foreach:
      - { variant: all,    model: logreg }
      - { variant: all,    model: random_forest }
      - { variant: all,    model: decision_tree }
      - { variant: female, model: logreg }
      - { variant: female, model: random_forest }
      - { variant: female, model: decision_tree }
      - { variant: male,   model: logreg }
      - { variant: male,   model: random_forest }
      - { variant: male,   model: decision_tree }
      - { variant: nosex,  model: logreg }
      - { variant: nosex,  model: random_forest }
      - { variant: nosex,  model: decision_tree }
    do:
      cmd: >
        uv run predicting_outcomes_in_heart_failure/modeling/train.py
        --variant ${item.variant}
        --model ${item.model}
      deps:
        - predicting_outcomes_in_heart_failure/modeling/train.py
        - data/processed/${item.variant}/train.csv
      outs:
        - models/${item.variant}/${item.model}.joblib
        - reports/${item.variant}/${item.model}

  evaluation:
    foreach:
      - { variant: all,    model: logreg }
      - { variant: all,    model: random_forest }
      - { variant: all,    model: decision_tree }
      - { variant: female, model: logreg }
      - { variant: female, model: random_forest }
      - { variant: female, model: decision_tree }
      - { variant: male,   model: logreg }
      - { variant: male,   model: random_forest }
      - { variant: male,   model: decision_tree }
      - { variant: nosex,  model: logreg }
      - { variant: nosex,  model: random_forest }
      - { variant: nosex,  model: decision_tree }
    do:
      cmd: >
        uv run predicting_outcomes_in_heart_failure/modeling/evaluate.py
        --variant ${item.variant}
        --model ${item.model}
      deps:
        - predicting_outcomes_in_heart_failure/modeling/evaluate.py
        - models/${item.variant}/${item.model}.joblib
        - data/processed/${item.variant}/test.csv
      outs:
        - metrics/test/${item.variant}/${item.model}.json