File size: 5,411 Bytes
0828e8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import subprocess
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from loguru import logger
from matharena.configs import load_configs
import re

def matches_models(model_name, models):
    if models is None:
        return True
    for model in models:
        if re.match(model, model_name):
            return True
    return False

def run_configs(config_folder, apis, comp, skip_existing=False, n=4, simul=False, models=None, 
                output_folder="outputs", include_old=False):
    """
    Loads configuration files recursively from `config_folder` and runs the appropriate
    command for each valid config file in parallel.

    Each command's stdout/stderr is redirected to a log file under the folder 'logs':
      - If simul is True, the log file is 'logs/{model}.log', where 'model' is obtained from the config.
      - If simul is False, the log file is 'logs/{api}.log' for each api group.

    Args:
        config_folder (str): The root folder where config files are stored.
        apis (list): A list of valid api names. Only configs with an "api" key matching one of these will be used.
        comp (str): Competition parameter passed to the command.
        skip_existing (bool): If True, skip running configs that have already been run.
        n (int): The number of runs to perform for each config.
        simul (bool): If True, run each config separately; if False, group configs by api.
        models (list): A list of model names to filter configs by. Can be a regex. If None, all configs are used.
        output_folder (str): The folder where model answers are stored.
        include_old (bool): If True, include old configs in the config_folder. Old configs are read from configs/exclude.txt
    """
    # Ensure the log directory exists.
    log_dir = Path("logs")
    log_dir.mkdir(exist_ok=True)

    # Convert the config_folder to a Path object.
    configs = load_configs(config_folder, remove_extension=False)

    if not include_old:
        exclude_file = Path("configs/exclude.txt")
        with exclude_file.open("r") as f:
            exclude_regexes = f.read().splitlines()
        
        for config_path in list(configs.keys()):
            for regex in exclude_regexes:
                if re.match(regex, config_path):
                    logger.info(f"Excluding {config_path} due to {regex}")
                    del configs[config_path]
                    break

    valid_configs = []  # Each entry is a tuple: (config_path, api, model)
    for file_path in configs:
        if configs[file_path]['api'] in apis and matches_models(configs[file_path]["model"], models):
            if simul:
                valid_configs.append((file_path, configs[file_path]['api'], configs[file_path]['model']))
            else:
                valid_configs.append((file_path, configs[file_path]['api'], None))

    def run_command(cmd, log_file):
        """Runs a shell command and redirects output to the specified log file."""
        logger.info(f"Running: {cmd} -> logging to {log_file}")
        with open(log_file, "w") as lf:
            subprocess.run(cmd, shell=True, stdout=lf, stderr=subprocess.STDOUT)

    # Use a ThreadPoolExecutor to run commands concurrently.
    with ThreadPoolExecutor() as executor:
        if simul:
            # For simul True, run a separate command for each config.
            for cfg_path, api, model in valid_configs:
                log_file = log_dir / f"{model.replace('/', '-')}.log"
                # Build the command for this config.
                cmd = f"uv run python scripts/run.py --comp {comp} --configs {cfg_path} --n {n} --output-folder {output_folder}"
                if skip_existing:
                    cmd += " --skip_existing"
                executor.submit(run_command, cmd, str(log_file))
        else:
            # For simul False, group config files by API.
            api_groups = {}
            for cfg_path, api, _ in valid_configs:
                api_groups.setdefault(api, []).append(str(cfg_path))
            
            # For each API group, build and run a single command.
            for api, cfg_paths in api_groups.items():
                log_file = log_dir / f"{api}.log"
                configs_str = " ".join(cfg_paths)
                cmd = f"uv run python scripts/run.py --comp {comp} --configs {configs_str} --n {n} --output-folder {output_folder}"
                if skip_existing:
                    cmd += " --skip_existing"
                executor.submit(run_command, cmd, str(log_file))


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--apis", type=str, nargs="+", required=True)
    parser.add_argument("--comp", type=str, required=True)
    parser.add_argument("--simul", action="store_true")
    parser.add_argument("--models", type=str, nargs="+", default=None)
    parser.add_argument("--skip_existing", action="store_true")
    parser.add_argument("--n", type=int, default=4)
    parser.add_argument("--output-folder", type=str, default="outputs")
    parser.add_argument("--config-folder", type=str, default="configs")
    parser.add_argument("--include-old", action="store_true")
    args = parser.parse_args()
    run_configs(args.config_folder, args.apis, args.comp,args.skip_existing, args.n, 
                args.simul, args.models, args.output_folder)