Commit ·
e7ee64e
1
Parent(s): d65fba7
fix prep data job
Browse files
root_gnn_dgl/jobs/prep_data/run_processing.py
CHANGED
|
@@ -34,7 +34,7 @@ def call_bash_script(config, dataset_key, shuffle_chunks, bash_script):
|
|
| 34 |
"""
|
| 35 |
try:
|
| 36 |
# Construct the command to call the bash script
|
| 37 |
-
command = f"{bash_script}
|
| 38 |
|
| 39 |
print(f"Executing command: {command}") # Debugging: Print the command being executed
|
| 40 |
|
|
@@ -75,14 +75,15 @@ def main():
|
|
| 75 |
|
| 76 |
# List of YAML configuration files (relative to the base directory)
|
| 77 |
configs = [
|
| 78 |
-
"configs/stats_100K/pretraining_multiclass.yaml",
|
| 79 |
-
"configs/stats_100K/ttH_CP_even_vs_odd.yaml",
|
| 80 |
-
"configs/stats_all/pretraining_multiclass.yaml",
|
| 81 |
-
"configs/stats_all/ttH_CP_even_vs_odd.yaml",
|
|
|
|
| 82 |
]
|
| 83 |
|
| 84 |
# Path to the bash script to be called
|
| 85 |
-
bash_script = base_directory + "jobs/prep_data.sh"
|
| 86 |
|
| 87 |
# Use ThreadPoolExecutor to process YAML files concurrently
|
| 88 |
max_workers = os.cpu_count() # Use all available CPU cores
|
|
|
|
| 34 |
"""
|
| 35 |
try:
|
| 36 |
# Construct the command to call the bash script
|
| 37 |
+
command = f"{bash_script} {config} {dataset_key} {shuffle_chunks} --drop_last"
|
| 38 |
|
| 39 |
print(f"Executing command: {command}") # Debugging: Print the command being executed
|
| 40 |
|
|
|
|
| 75 |
|
| 76 |
# List of YAML configuration files (relative to the base directory)
|
| 77 |
configs = [
|
| 78 |
+
# "configs/stats_100K/pretraining_multiclass.yaml",
|
| 79 |
+
# "configs/stats_100K/ttH_CP_even_vs_odd.yaml",
|
| 80 |
+
# "configs/stats_all/pretraining_multiclass.yaml",
|
| 81 |
+
# "configs/stats_all/ttH_CP_even_vs_odd.yaml",
|
| 82 |
+
"configs/attention/ttH_CP_even_vs_odd.yaml",
|
| 83 |
]
|
| 84 |
|
| 85 |
# Path to the bash script to be called
|
| 86 |
+
bash_script = base_directory + "jobs/prep_data/prep_data.sh"
|
| 87 |
|
| 88 |
# Use ThreadPoolExecutor to process YAML files concurrently
|
| 89 |
max_workers = os.cpu_count() # Use all available CPU cores
|