Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Add comments to generated config
Browse files- yourbench_space/config.py +19 -4
- yourbench_space/utils.py +17 -0
yourbench_space/config.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
-
import
|
| 2 |
from loguru import logger
|
| 3 |
|
| 4 |
from yourbench_space import PATH
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
|
|
@@ -82,10 +83,24 @@ def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
|
|
| 82 |
}
|
| 83 |
|
| 84 |
|
| 85 |
-
def save_yaml_file(config:
|
| 86 |
-
"""Saves the given config dictionary to a YAML file"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
with open(path, "w") as file:
|
| 88 |
-
yaml.dump(
|
|
|
|
| 89 |
return path
|
| 90 |
|
| 91 |
|
|
|
|
| 1 |
+
from ruamel.yaml import YAML
|
| 2 |
from loguru import logger
|
| 3 |
|
| 4 |
from yourbench_space import PATH
|
| 5 |
+
from yourbench_space.utils import to_commentable_yaml
|
| 6 |
|
| 7 |
|
| 8 |
def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
|
|
|
|
| 83 |
}
|
| 84 |
|
| 85 |
|
| 86 |
+
def save_yaml_file(config: dict, path: str):
|
| 87 |
+
"""Saves the given config dictionary to a YAML file with helpful comments."""
|
| 88 |
+
yaml = YAML()
|
| 89 |
+
yaml.indent(mapping=2, sequence=4, offset=2)
|
| 90 |
+
|
| 91 |
+
config_cm = to_commentable_yaml(config)
|
| 92 |
+
|
| 93 |
+
# Now we can add inline comments
|
| 94 |
+
ingestion = config_cm["pipeline"]["ingestion"]
|
| 95 |
+
ingestion.yaml_set_comment_before_after_key("source_documents_dir", before="⚠️ Change this path to match your local directory")
|
| 96 |
+
ingestion.yaml_set_comment_before_after_key("output_dir", before="⚠️ This is where ingested data will be saved")
|
| 97 |
+
|
| 98 |
+
upload = config_cm["pipeline"]["upload_ingest_to_hub"]
|
| 99 |
+
upload.yaml_set_comment_before_after_key("source_documents_dir", before="⚠️ Same as output_dir from ingestion — adjust as needed")
|
| 100 |
+
|
| 101 |
with open(path, "w") as file:
|
| 102 |
+
yaml.dump(config_cm, file)
|
| 103 |
+
|
| 104 |
return path
|
| 105 |
|
| 106 |
|
yourbench_space/utils.py
CHANGED
|
@@ -5,6 +5,7 @@ import shutil
|
|
| 5 |
import pathlib
|
| 6 |
import subprocess
|
| 7 |
from typing import List, Union, Optional
|
|
|
|
| 8 |
|
| 9 |
import pandas as pd
|
| 10 |
from loguru import logger
|
|
@@ -34,6 +35,22 @@ STAGE_DISPLAY_MAP = {
|
|
| 34 |
"lighteval": "Generate Lighteval Subset",
|
| 35 |
}
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
def map_stage_names(stages: list[str]) -> list[str]:
|
| 39 |
return [STAGE_DISPLAY_MAP.get(stage, stage) for stage in stages]
|
|
|
|
| 5 |
import pathlib
|
| 6 |
import subprocess
|
| 7 |
from typing import List, Union, Optional
|
| 8 |
+
from ruamel.yaml.comments import CommentedMap, CommentedSeq
|
| 9 |
|
| 10 |
import pandas as pd
|
| 11 |
from loguru import logger
|
|
|
|
| 35 |
"lighteval": "Generate Lighteval Subset",
|
| 36 |
}
|
| 37 |
|
| 38 |
+
def to_commentable_yaml(obj):
|
| 39 |
+
"""
|
| 40 |
+
Recursively converts standard Python dicts and lists into
|
| 41 |
+
ruamel.yaml's CommentedMap and CommentedSeq so that comments
|
| 42 |
+
can be attached when dumping YAML
|
| 43 |
+
"""
|
| 44 |
+
# Convert dict to CommentedMap with recursively processed values
|
| 45 |
+
if isinstance(obj, dict):
|
| 46 |
+
return CommentedMap({k: to_commentable_yaml(v) for k, v in obj.items()})
|
| 47 |
+
|
| 48 |
+
# Convert list to CommentedSeq with recursively processed elements
|
| 49 |
+
elif isinstance(obj, list):
|
| 50 |
+
return CommentedSeq([to_commentable_yaml(i) for i in obj])
|
| 51 |
+
|
| 52 |
+
# Return non-container values as-is
|
| 53 |
+
return obj
|
| 54 |
|
| 55 |
def map_stage_names(stages: list[str]) -> list[str]:
|
| 56 |
return [STAGE_DISPLAY_MAP.get(stage, stage) for stage in stages]
|