Spaces:
Sleeping
Sleeping
Yusuf
commited on
Commit
·
2fd4542
1
Parent(s):
9dbc9de
CHORE: remove duplicate clearml task config
Browse files
dataPrep/data_preparation.py
CHANGED
|
@@ -53,7 +53,8 @@ task = Task.init(
|
|
| 53 |
task.set_random_seed(SEED)
|
| 54 |
clearml_logger = task.get_logger()
|
| 55 |
|
| 56 |
-
|
|
|
|
| 57 |
task.connect({
|
| 58 |
"seed": SEED,
|
| 59 |
"dataset": {
|
|
@@ -77,12 +78,6 @@ data_plants, prototyping_dataset, features, clearml_dataset = make_subset(
|
|
| 77 |
DATASET_LINK, DATASET_SUBSET_RATIO, clearml_logger
|
| 78 |
)
|
| 79 |
|
| 80 |
-
# Log subset config to ClearML
|
| 81 |
-
task.connect_configuration(
|
| 82 |
-
{"subset_ratio": DATASET_SUBSET_RATIO},
|
| 83 |
-
name="Data subsetting"
|
| 84 |
-
)
|
| 85 |
-
|
| 86 |
|
| 87 |
# ---- Exploratory data analysis (EDA) ----
|
| 88 |
|
|
@@ -145,7 +140,7 @@ clearml_logger.report_matplotlib_figure(
|
|
| 145 |
# ----------------------------------------------------------------------
|
| 146 |
if __name__ == "__main__":
|
| 147 |
|
| 148 |
-
#
|
| 149 |
aug_config = {
|
| 150 |
'rotation': ROTATION,
|
| 151 |
'brightness': BRIGHTNESS,
|
|
|
|
| 53 |
task.set_random_seed(SEED)
|
| 54 |
clearml_logger = task.get_logger()
|
| 55 |
|
| 56 |
+
|
| 57 |
+
# -------- Track full configuration in ClearML --------
|
| 58 |
task.connect({
|
| 59 |
"seed": SEED,
|
| 60 |
"dataset": {
|
|
|
|
| 78 |
DATASET_LINK, DATASET_SUBSET_RATIO, clearml_logger
|
| 79 |
)
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
# ---- Exploratory data analysis (EDA) ----
|
| 83 |
|
|
|
|
| 140 |
# ----------------------------------------------------------------------
|
| 141 |
if __name__ == "__main__":
|
| 142 |
|
| 143 |
+
# ---------------- Dataset splits ----------------
|
| 144 |
aug_config = {
|
| 145 |
'rotation': ROTATION,
|
| 146 |
'brightness': BRIGHTNESS,
|
dataPrep/helpers/create_dataset.py
CHANGED
|
@@ -9,7 +9,11 @@ from datasets import load_dataset
|
|
| 9 |
from clearml import Dataset
|
| 10 |
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
def make_subset(dataset_link, subset_ratio, clearml_logger):
|
| 14 |
|
| 15 |
# Load dataset
|
|
|
|
| 9 |
from clearml import Dataset
|
| 10 |
|
| 11 |
|
| 12 |
+
'''
|
| 13 |
+
Load a DS from HuggingFace Link & randomly subset it - upload subset to ClearML
|
| 14 |
+
Subset indicies are uploaded to ClearML for reproducibility
|
| 15 |
+
REPRODUCE: Load full DS, then load indicies from ClearML to get same subset
|
| 16 |
+
'''
|
| 17 |
def make_subset(dataset_link, subset_ratio, clearml_logger):
|
| 18 |
|
| 19 |
# Load dataset
|