remote useless code
Browse files- OpenAIChatAtomicFlow.py +29 -107
OpenAIChatAtomicFlow.py
CHANGED
|
@@ -48,15 +48,15 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
| 48 |
response_annotators: Optional[Dict[str, MessageAnnotator]] = {}
|
| 49 |
|
| 50 |
default_search_space = {
|
| 51 |
-
"model": tune.choice(
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
),
|
| 60 |
"temperature_or_top_p": tune.choice(
|
| 61 |
[
|
| 62 |
{"temperature": tune.uniform(0, 2)},
|
|
@@ -133,7 +133,10 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
| 133 |
response_annotators = config.get("response_annotators", {})
|
| 134 |
if len(response_annotators) > 0:
|
| 135 |
for key, config in response_annotators.items():
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
| 137 |
return {"response_annotators": response_annotators}
|
| 138 |
|
| 139 |
@classmethod
|
|
@@ -361,6 +364,7 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
| 361 |
optimization_budget: Optional[float] = None,
|
| 362 |
num_samples: Optional[int] = 1,
|
| 363 |
logging_level: Optional[int] = logging.WARN, # TODO(yeeef)
|
|
|
|
| 364 |
**config,
|
| 365 |
) -> Tuple[Dict, Any]: # tune.ExperimentAnalysis
|
| 366 |
"""
|
|
@@ -396,6 +400,7 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
| 396 |
- tune.ExperimentAnalysis: The tuning results.
|
| 397 |
"""
|
| 398 |
|
|
|
|
| 399 |
space = cls.default_search_space.copy()
|
| 400 |
|
| 401 |
if config is not None:
|
|
@@ -413,100 +418,16 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
| 413 |
space["temperature"] = temperature
|
| 414 |
space["top_p"] = top_p
|
| 415 |
log.warning("temperature and top_p are not recommended to vary together.")
|
| 416 |
-
|
| 417 |
-
# TODO: shall we use cls method?
|
| 418 |
-
cls._max_valid_n_per_max_tokens, cls._min_invalid_n_per_max_tokens = {}, {}
|
| 419 |
-
cls.optimization_budget = optimization_budget
|
| 420 |
-
cls.inference_budget = inference_budget
|
| 421 |
-
cls._prune_hp = "best_of" if space.get("best_of", 1) != 1 else "n"
|
| 422 |
-
cls._prompts = space.get("prompt")
|
| 423 |
-
|
| 424 |
-
# if cls._prompts is None:
|
| 425 |
-
# cls._messages = space.get("messages")
|
| 426 |
-
# assert isinstance(cls._messages, list) and isinstance(
|
| 427 |
-
# cls._messages[0], (dict, list)
|
| 428 |
-
# ), "messages must be a list of dicts or a list of lists."
|
| 429 |
-
# if isinstance(cls._messages[0], dict):
|
| 430 |
-
# cls._messages = [cls._messages]
|
| 431 |
-
# space["messages"] = tune.choice(list(range(len(cls._messages))))
|
| 432 |
-
# else:
|
| 433 |
-
# assert space.get("messages") is None, "messages and prompt cannot be provided at the same time."
|
| 434 |
-
# assert isinstance(cls._prompts, (str, list)), "prompt must be a string or a list of strings."
|
| 435 |
-
# if isinstance(cls._prompts, str):
|
| 436 |
-
# cls._prompts = [cls._prompts]
|
| 437 |
-
# space["prompt"] = tune.choice(list(range(len(cls._prompts))))
|
| 438 |
-
# cls._stops = space.get("stop")
|
| 439 |
-
# if cls._stops:
|
| 440 |
-
# assert isinstance(
|
| 441 |
-
# cls._stops, (str, list)
|
| 442 |
-
# ), "stop must be a string, a list of strings, or a list of lists of strings."
|
| 443 |
-
# if not (isinstance(cls._stops, list) and isinstance(cls._stops[0], list)):
|
| 444 |
-
# cls._stops = [cls._stops]
|
| 445 |
-
# space["stop"] = tune.choice(list(range(len(cls._stops))))
|
| 446 |
-
|
| 447 |
-
# cls._config_list = space.get("config_list")
|
| 448 |
-
# if cls._config_list is not None:
|
| 449 |
-
# is_const = is_constant(cls._config_list)
|
| 450 |
-
# if is_const:
|
| 451 |
-
# space.pop("config_list")
|
| 452 |
-
# cls._metric, cls._mode = metric, mode
|
| 453 |
-
# cls._total_cost = 0 # total optimization cost
|
| 454 |
-
# cls._eval_func = eval_func
|
| 455 |
-
# cls.data = data
|
| 456 |
-
# cls.avg_input_tokens = None
|
| 457 |
-
|
| 458 |
-
space_model = space["model"]
|
| 459 |
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
if "n" in space:
|
| 470 |
-
subspace["n"] = space.pop("n")
|
| 471 |
-
choices = []
|
| 472 |
-
for model in space["model"]:
|
| 473 |
-
choices.append({"model": model, **subspace})
|
| 474 |
-
space["subspace"] = tune.choice(choices)
|
| 475 |
-
space.pop("model")
|
| 476 |
-
# start all the models with the same hp config
|
| 477 |
-
search_alg = BlendSearch(
|
| 478 |
-
cost_attr="cost",
|
| 479 |
-
cost_budget=optimization_budget,
|
| 480 |
-
metric=metric,
|
| 481 |
-
mode=mode,
|
| 482 |
-
space=space,
|
| 483 |
-
)
|
| 484 |
-
config0 = search_alg.suggest("t0")
|
| 485 |
-
points_to_evaluate = [config0]
|
| 486 |
-
for model in space_model:
|
| 487 |
-
if model != config0["subspace"]["model"]:
|
| 488 |
-
point = config0.copy()
|
| 489 |
-
point["subspace"] = point["subspace"].copy()
|
| 490 |
-
point["subspace"]["model"] = model
|
| 491 |
-
points_to_evaluate.append(point)
|
| 492 |
-
search_alg = BlendSearch(
|
| 493 |
-
cost_attr="cost",
|
| 494 |
-
cost_budget=optimization_budget,
|
| 495 |
-
metric=metric,
|
| 496 |
-
mode=mode,
|
| 497 |
-
space=space,
|
| 498 |
-
points_to_evaluate=points_to_evaluate,
|
| 499 |
-
)
|
| 500 |
-
else:
|
| 501 |
-
# TODO: currently we always falls in this branch
|
| 502 |
-
search_alg = BlendSearch(
|
| 503 |
-
cost_attr="cost",
|
| 504 |
-
cost_budget=optimization_budget,
|
| 505 |
-
metric=metric,
|
| 506 |
-
mode=mode,
|
| 507 |
-
space=space,
|
| 508 |
-
)
|
| 509 |
-
|
| 510 |
# Args:
|
| 511 |
# evaluation_function: A user-defined evaluation function.
|
| 512 |
# It takes a configuration as input, outputs a evaluation
|
|
@@ -527,7 +448,7 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
| 527 |
if temperature_or_top_p is not None:
|
| 528 |
search_config.update(temperature_or_top_p)
|
| 529 |
|
| 530 |
-
flow_config["model_name"] = search_config
|
| 531 |
generation_parameters = flow_config["generation_parameters"]
|
| 532 |
for generation_parameter in generation_parameters:
|
| 533 |
if generation_parameter == "model_kwargs":
|
|
@@ -558,11 +479,12 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
| 558 |
log.info(f"Tunning with config: {search_config}")
|
| 559 |
# TODO: the code currently only works when there is no subspace, i.e. there is only one model to tune with
|
| 560 |
# align search_config with flow_config
|
| 561 |
-
updated_flow_config = updated_flow_config_with_search_config(flow_config=
|
| 562 |
log.info(f"Updated flow_config: {updated_flow_config}")
|
| 563 |
# flow_launcher = FlowAPILauncher(flow, 1, False, 3, 0, ["code"]) TODO: maybe refactor with flow_launcher
|
| 564 |
|
| 565 |
-
# TODO: limitations: langchain api call does not give us the cost of the api call
|
|
|
|
| 566 |
final_metrics = {}
|
| 567 |
for sample in tune_dps:
|
| 568 |
sample["api_key"] = api_key
|
|
@@ -593,6 +515,6 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
| 593 |
verbose=3,
|
| 594 |
)
|
| 595 |
best_search_config = analysis.best_config
|
| 596 |
-
flow_config = updated_flow_config_with_search_config(
|
| 597 |
log.info(f"best search config found: {best_search_config}, analysis: {analysis.best_result}")
|
| 598 |
return flow_config, analysis
|
|
|
|
| 48 |
response_annotators: Optional[Dict[str, MessageAnnotator]] = {}
|
| 49 |
|
| 50 |
default_search_space = {
|
| 51 |
+
# "model": tune.choice(
|
| 52 |
+
# [
|
| 53 |
+
# # "text-ada-001",
|
| 54 |
+
# # "text-babbage-001",
|
| 55 |
+
# # "text-davinci-003",
|
| 56 |
+
# "gpt-3.5-turbo",
|
| 57 |
+
# # "gpt-4",
|
| 58 |
+
# ]
|
| 59 |
+
# ),
|
| 60 |
"temperature_or_top_p": tune.choice(
|
| 61 |
[
|
| 62 |
{"temperature": tune.uniform(0, 2)},
|
|
|
|
| 133 |
response_annotators = config.get("response_annotators", {})
|
| 134 |
if len(response_annotators) > 0:
|
| 135 |
for key, config in response_annotators.items():
|
| 136 |
+
if isinstance(config, MessageAnnotator):
|
| 137 |
+
response_annotators[key] = config
|
| 138 |
+
else:
|
| 139 |
+
response_annotators[key] = hydra.utils.instantiate(config, _convert_="partial")
|
| 140 |
return {"response_annotators": response_annotators}
|
| 141 |
|
| 142 |
@classmethod
|
|
|
|
| 364 |
optimization_budget: Optional[float] = None,
|
| 365 |
num_samples: Optional[int] = 1,
|
| 366 |
logging_level: Optional[int] = logging.WARN, # TODO(yeeef)
|
| 367 |
+
initial_flow_config: Optional[Dict] = None, # if not supplied will use default flow config of the class (xxx.yaml)
|
| 368 |
**config,
|
| 369 |
) -> Tuple[Dict, Any]: # tune.ExperimentAnalysis
|
| 370 |
"""
|
|
|
|
| 400 |
- tune.ExperimentAnalysis: The tuning results.
|
| 401 |
"""
|
| 402 |
|
| 403 |
+
initial_flow_config = initial_flow_config or cls.get_config()
|
| 404 |
space = cls.default_search_space.copy()
|
| 405 |
|
| 406 |
if config is not None:
|
|
|
|
| 418 |
space["temperature"] = temperature
|
| 419 |
space["top_p"] = top_p
|
| 420 |
log.warning("temperature and top_p are not recommended to vary together.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
|
| 422 |
+
# Note: currently we fix the model rather than make it tunable
|
| 423 |
+
search_alg = BlendSearch(
|
| 424 |
+
cost_attr="cost",
|
| 425 |
+
cost_budget=optimization_budget,
|
| 426 |
+
metric=metric,
|
| 427 |
+
mode=mode,
|
| 428 |
+
space=space,
|
| 429 |
+
)
|
| 430 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
# Args:
|
| 432 |
# evaluation_function: A user-defined evaluation function.
|
| 433 |
# It takes a configuration as input, outputs a evaluation
|
|
|
|
| 448 |
if temperature_or_top_p is not None:
|
| 449 |
search_config.update(temperature_or_top_p)
|
| 450 |
|
| 451 |
+
flow_config["model_name"] = search_config.get("model", flow_config["model_name"])
|
| 452 |
generation_parameters = flow_config["generation_parameters"]
|
| 453 |
for generation_parameter in generation_parameters:
|
| 454 |
if generation_parameter == "model_kwargs":
|
|
|
|
| 479 |
log.info(f"Tunning with config: {search_config}")
|
| 480 |
# TODO: the code currently only works when there is no subspace, i.e. there is only one model to tune with
|
| 481 |
# align search_config with flow_config
|
| 482 |
+
updated_flow_config = updated_flow_config_with_search_config(flow_config=initial_flow_config, search_config=search_config)
|
| 483 |
log.info(f"Updated flow_config: {updated_flow_config}")
|
| 484 |
# flow_launcher = FlowAPILauncher(flow, 1, False, 3, 0, ["code"]) TODO: maybe refactor with flow_launcher
|
| 485 |
|
| 486 |
+
# TODO: limitations: langchain api call does not give us the cost of the api call, and only give us
|
| 487 |
+
# one result no matter the n
|
| 488 |
final_metrics = {}
|
| 489 |
for sample in tune_dps:
|
| 490 |
sample["api_key"] = api_key
|
|
|
|
| 515 |
verbose=3,
|
| 516 |
)
|
| 517 |
best_search_config = analysis.best_config
|
| 518 |
+
flow_config = updated_flow_config_with_search_config(initial_flow_config, best_search_config)
|
| 519 |
log.info(f"best search config found: {best_search_config}, analysis: {analysis.best_result}")
|
| 520 |
return flow_config, analysis
|