Commit
·
1a3765a
1
Parent(s):
ca366eb
Refactoring.
Browse files- CF_Code.py +1 -5
- CF_Code.yaml +17 -12
- CF_CodeDebug.yaml +123 -30
- CF_CodeTesting.py +26 -6
- CF_CodeTesting.yaml +68 -66
- CodeTesting.py +12 -14
- __init__.py +63 -63
CF_Code.py
CHANGED
|
@@ -1,10 +1,6 @@
|
|
| 1 |
-
import importlib
|
| 2 |
-
import os
|
| 3 |
-
|
| 4 |
from flows.application_flows import OpenAIChatAtomicFlow
|
| 5 |
|
|
|
|
| 6 |
class CF_Code(OpenAIChatAtomicFlow):
|
| 7 |
def __init__(self, **kwargs):
|
| 8 |
super().__init__(**kwargs)
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from flows.application_flows import OpenAIChatAtomicFlow
|
| 2 |
|
| 3 |
+
|
| 4 |
class CF_Code(OpenAIChatAtomicFlow):
|
| 5 |
def __init__(self, **kwargs):
|
| 6 |
super().__init__(**kwargs)
|
|
|
|
|
|
CF_Code.yaml
CHANGED
|
@@ -64,22 +64,27 @@ init_human_message_prompt_template:
|
|
| 64 |
code_placeholder: "{{python_code}}"
|
| 65 |
template_format: jinja2
|
| 66 |
|
| 67 |
-
|
| 68 |
-
init_input_keys:
|
| 69 |
- "problem_description"
|
| 70 |
- "input_description"
|
| 71 |
- "output_description"
|
| 72 |
- "io_examples_and_explanation"
|
| 73 |
|
| 74 |
-
|
| 75 |
-
-
|
| 76 |
-
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
| 77 |
-
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
| 78 |
-
input_key: "api_output"
|
| 79 |
-
output_key: "code"
|
| 80 |
-
strip: True
|
| 81 |
-
assert_unique: True
|
| 82 |
|
| 83 |
-
|
| 84 |
- "api_output"
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
code_placeholder: "{{python_code}}"
|
| 65 |
template_format: jinja2
|
| 66 |
|
| 67 |
+
input_interface_non_initialized:
|
|
|
|
| 68 |
- "problem_description"
|
| 69 |
- "input_description"
|
| 70 |
- "output_description"
|
| 71 |
- "io_examples_and_explanation"
|
| 72 |
|
| 73 |
+
input_interface_initialized:
|
| 74 |
+
- "query"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
+
output_interface:
|
| 77 |
- "api_output"
|
| 78 |
+
|
| 79 |
+
#output_data_transformations:
|
| 80 |
+
# - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
| 81 |
+
# regex: '(?<=```python)([\s\S]*?)(?=```)'
|
| 82 |
+
# regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
| 83 |
+
# input_key: "api_output"
|
| 84 |
+
# output_key: "code"
|
| 85 |
+
# strip: True
|
| 86 |
+
# assert_unique: True
|
| 87 |
+
|
| 88 |
+
#output_keys:
|
| 89 |
+
# - "api_output"
|
| 90 |
+
# - "code"
|
CF_CodeDebug.yaml
CHANGED
|
@@ -3,51 +3,144 @@ description: "ToDO: add description"
|
|
| 3 |
|
| 4 |
max_rounds: 2 # ToDo: To increase to 4
|
| 5 |
|
| 6 |
-
|
| 7 |
- "problem_description"
|
| 8 |
- "input_description"
|
| 9 |
- "output_description"
|
| 10 |
- "io_examples_and_explanation"
|
| 11 |
- "public_tests_individual_io"
|
| 12 |
-
|
| 13 |
-
output_keys:
|
| 14 |
- "code"
|
| 15 |
|
| 16 |
subflows_config:
|
| 17 |
CodeGenerator:
|
| 18 |
_target_: .CF_Code.instantiate_from_default_config
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
Consider the problem statement, the last proposed solution, and its issue. Provide a corrected version of the code that solves the original problem and resolves the issue, without any explanation, in the following format:
|
| 28 |
-
```python
|
| 29 |
-
{{code_placeholder}}
|
| 30 |
-
```
|
| 31 |
-
input_variables:
|
| 32 |
-
- testing_results_summary
|
| 33 |
-
partial_variables:
|
| 34 |
-
code_placeholder: "{{python_code}}"
|
| 35 |
-
input_keys:
|
| 36 |
-
- "testing_results_summary"
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
CodeTestingCritic:
|
| 39 |
_target_: .CF_CodeTesting.instantiate_from_default_config
|
| 40 |
|
| 41 |
topology:
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
reset_every_round: false
|
| 44 |
-
output_transformations:
|
| 45 |
-
- _target_: flows.data_transformations.EndOfInteraction
|
| 46 |
-
end_of_interaction_string: "Final answer"
|
| 47 |
-
input_key: "api_output"
|
| 48 |
-
output_key: "end_of_interaction"
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
|
|
|
|
| 3 |
|
| 4 |
max_rounds: 2 # ToDo: To increase to 4
|
| 5 |
|
| 6 |
+
input_interface:
|
| 7 |
- "problem_description"
|
| 8 |
- "input_description"
|
| 9 |
- "output_description"
|
| 10 |
- "io_examples_and_explanation"
|
| 11 |
- "public_tests_individual_io"
|
| 12 |
+
output_interface:
|
|
|
|
| 13 |
- "code"
|
| 14 |
|
| 15 |
subflows_config:
|
| 16 |
CodeGenerator:
|
| 17 |
_target_: .CF_Code.instantiate_from_default_config
|
| 18 |
+
name: "CodeGenerator"
|
| 19 |
+
model_name: "gpt-4"
|
| 20 |
+
human_message_prompt_template:
|
| 21 |
+
template: |2-
|
| 22 |
+
{{testing_results_summary}}
|
| 23 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
Consider the problem statement, the last proposed solution, and its issue. Provide a corrected version of the code that solves the original problem and resolves the issue, without any explanation, in the following format:
|
| 26 |
+
```python
|
| 27 |
+
{{code_placeholder}}
|
| 28 |
+
```
|
| 29 |
+
input_variables:
|
| 30 |
+
- testing_results_summary
|
| 31 |
+
partial_variables:
|
| 32 |
+
code_placeholder: "{{python_code}}"
|
| 33 |
+
input_interface_initialized:
|
| 34 |
+
- "testing_results_summary"
|
| 35 |
CodeTestingCritic:
|
| 36 |
_target_: .CF_CodeTesting.instantiate_from_default_config
|
| 37 |
|
| 38 |
topology:
|
| 39 |
+
# ~~~ Code Generator ~~~
|
| 40 |
+
- goal: "Generate/refine a solution."
|
| 41 |
+
|
| 42 |
+
### Input Interface
|
| 43 |
+
input_interface:
|
| 44 |
+
_target_: flows.interfaces.KeyInterface
|
| 45 |
+
additional_transformations:
|
| 46 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
| 47 |
+
|
| 48 |
+
### Flow Specification
|
| 49 |
+
flow: CodeGenerator
|
| 50 |
+
|
| 51 |
+
### Output Interface
|
| 52 |
+
output_interface:
|
| 53 |
+
_target_: flows.interfaces.KeyInterface
|
| 54 |
+
additional_transformations:
|
| 55 |
+
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
| 56 |
+
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
| 57 |
+
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
| 58 |
+
input_key: "api_output"
|
| 59 |
+
output_key: "code"
|
| 60 |
+
strip: True
|
| 61 |
+
assert_unique: True
|
| 62 |
+
keys_to_select:
|
| 63 |
+
- "code"
|
| 64 |
+
|
| 65 |
reset_every_round: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
+
# ~~~ Code Testing Critic ~~~
|
| 68 |
+
- goal: "Test the code on the public tests and provide a results summary."
|
| 69 |
+
|
| 70 |
+
### Input Interface
|
| 71 |
+
input_interface:
|
| 72 |
+
_target_: flows.interfaces.KeyInterface
|
| 73 |
+
additional_transformations:
|
| 74 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
| 75 |
+
|
| 76 |
+
### Flow Specification
|
| 77 |
+
flow: CodeTestingCritic
|
| 78 |
+
|
| 79 |
+
### Output Interface
|
| 80 |
+
output_interface:
|
| 81 |
+
_target_: flows.interfaces.KeyInterface
|
| 82 |
+
additional_transformations:
|
| 83 |
+
- _target_: martinjosifoski.CC_flows.src.data_transformations.CorrectnessFlag
|
| 84 |
+
input_key: "public_tests_results"
|
| 85 |
+
output_key: "all_tests_passed"
|
| 86 |
+
- _target_: martinjosifoski.CC_flows.src.data_transformations.TestingResultsSummaryGeneration
|
| 87 |
+
output_key: "testing_results_summary"
|
| 88 |
+
|
| 89 |
+
single_test_error_message: True
|
| 90 |
+
|
| 91 |
+
no_error_template: |2-
|
| 92 |
+
${.issue_title}
|
| 93 |
+
All of the executed tests passed.
|
| 94 |
+
|
| 95 |
+
compilation_error_template: |2-
|
| 96 |
+
${.issue_title}
|
| 97 |
+
The execution resulted in a compilation error.
|
| 98 |
+
## Compilation error message:
|
| 99 |
+
{{error_message}}
|
| 100 |
+
timeout_error_template: |2-
|
| 101 |
+
${.issue_title}
|
| 102 |
+
The execution timed out, the solution is not efficient enough.
|
| 103 |
+
runtime_error_template: |2-
|
| 104 |
+
${.issue_title}
|
| 105 |
+
The execution resulted in a runtime error on the following test.
|
| 106 |
+
## [Failed test] Input
|
| 107 |
+
```
|
| 108 |
+
{{test_input}}
|
| 109 |
+
```
|
| 110 |
+
## [Failed test] Runtime error message
|
| 111 |
+
{{error_message}}
|
| 112 |
+
single_test_error_template: |2-
|
| 113 |
+
${.issue_title}
|
| 114 |
+
The Python code does not solve the problem in the problem description due to logical errors. It fails the following test:
|
| 115 |
+
## [Failed test] Input
|
| 116 |
+
```
|
| 117 |
+
{{test_input}}
|
| 118 |
+
```
|
| 119 |
+
## [Failed test] Expected output
|
| 120 |
+
```
|
| 121 |
+
{{expected_output}}
|
| 122 |
+
```
|
| 123 |
+
## [Failed test] Generated output
|
| 124 |
+
```
|
| 125 |
+
{{generated_output}}
|
| 126 |
+
```
|
| 127 |
+
all_tests_header: |2-
|
| 128 |
+
${.issue_title}
|
| 129 |
+
The Python code does not solve the problem in the problem description due to logical errors. It fails on the following tests.
|
| 130 |
+
test_error_template: |2-
|
| 131 |
+
## [Failed test {{idx}}]
|
| 132 |
+
### [Failed test {{idx}}] Input
|
| 133 |
+
```
|
| 134 |
+
{{test_input}}
|
| 135 |
+
```
|
| 136 |
+
### [Failed test {{idx}}] Expected output
|
| 137 |
+
```
|
| 138 |
+
{{expected_output}}
|
| 139 |
+
```
|
| 140 |
+
### [Failed test {{idx}}] Generated output
|
| 141 |
+
```
|
| 142 |
+
{{generated_output}}
|
| 143 |
+
```
|
| 144 |
+
tests_separator: "\n\n"
|
| 145 |
|
| 146 |
+
issue_title: "# Issue with the last proposed solution"
|
CF_CodeTesting.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
| 1 |
from typing import Any, Dict
|
| 2 |
|
| 3 |
from flows import logging
|
|
|
|
| 4 |
from .src.evaluation import testing_utils_codeforces
|
| 5 |
from .CodeTesting import CodeTesting
|
| 6 |
|
| 7 |
log = logging.get_logger(__name__)
|
| 8 |
|
| 9 |
-
# ToDo: Add a flag to control whether hidden, public or both tests should be used for evaluation
|
| 10 |
-
|
| 11 |
|
| 12 |
class CF_CodeTesting(CodeTesting):
|
| 13 |
REQUIRED_KEYS_CONFIG = []
|
|
@@ -16,17 +15,38 @@ class CF_CodeTesting(CodeTesting):
|
|
| 16 |
def __init__(self, **kwargs):
|
| 17 |
super().__init__(**kwargs)
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
def _get_test_data(self, input_data: Dict):
|
| 20 |
"""This function retrieves (or generates) input-output pairs that will be used to test the implementation."""
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
|
| 24 |
testing_results = testing_utils_codeforces.evaluate_solution_for_problem(
|
| 25 |
candidate_solution=input_data["code"],
|
| 26 |
-
|
| 27 |
)
|
| 28 |
|
| 29 |
-
|
| 30 |
-
test_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
return testing_results
|
|
|
|
| 1 |
from typing import Any, Dict
|
| 2 |
|
| 3 |
from flows import logging
|
| 4 |
+
from flows.utils.general_helpers import validate_parameters
|
| 5 |
from .src.evaluation import testing_utils_codeforces
|
| 6 |
from .CodeTesting import CodeTesting
|
| 7 |
|
| 8 |
log = logging.get_logger(__name__)
|
| 9 |
|
|
|
|
|
|
|
| 10 |
|
| 11 |
class CF_CodeTesting(CodeTesting):
|
| 12 |
REQUIRED_KEYS_CONFIG = []
|
|
|
|
| 15 |
def __init__(self, **kwargs):
|
| 16 |
super().__init__(**kwargs)
|
| 17 |
|
| 18 |
+
@classmethod
|
| 19 |
+
def _validate_parameters(cls, kwargs):
|
| 20 |
+
validate_parameters(cls, kwargs)
|
| 21 |
+
|
| 22 |
+
if "public_tests_key" not in kwargs["flow_config"] and "hidden_tests_key" not in kwargs["flow_config"]:
|
| 23 |
+
raise ValueError("At least one of 'public_tests_key' "
|
| 24 |
+
"and 'hidden_tests_key' must be specified in the config.")
|
| 25 |
+
|
| 26 |
def _get_test_data(self, input_data: Dict):
|
| 27 |
"""This function retrieves (or generates) input-output pairs that will be used to test the implementation."""
|
| 28 |
+
test_data = {"public_tests_io": None, "hidden_tests_io": None}
|
| 29 |
+
|
| 30 |
+
if "public_tests_key" in self.flow_config:
|
| 31 |
+
test_data["public_tests_io"] = input_data[self.flow_config["public_tests_key"]]
|
| 32 |
+
|
| 33 |
+
if "hidden_tests_key" in self.flow_config:
|
| 34 |
+
test_data["hidden_tests_io"] = input_data[self.flow_config["hidden_tests_key"]]
|
| 35 |
+
|
| 36 |
+
return test_data
|
| 37 |
|
| 38 |
def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
|
| 39 |
testing_results = testing_utils_codeforces.evaluate_solution_for_problem(
|
| 40 |
candidate_solution=input_data["code"],
|
| 41 |
+
**test_data
|
| 42 |
)
|
| 43 |
|
| 44 |
+
if "public_tests_results" in testing_results:
|
| 45 |
+
for test_output in testing_results["public_tests_results"]:
|
| 46 |
+
test_output["input"] = "\n".join(test_output["input"])
|
| 47 |
+
|
| 48 |
+
if "hidden_tests_results" in testing_results:
|
| 49 |
+
for test_output in testing_results["hidden_tests_results"]:
|
| 50 |
+
test_output["input"] = "\n".join(test_output["input"])
|
| 51 |
|
| 52 |
return testing_results
|
CF_CodeTesting.yaml
CHANGED
|
@@ -1,76 +1,78 @@
|
|
| 1 |
name: "CF_CodeTesting"
|
| 2 |
description: "ToDo: add description"
|
| 3 |
|
| 4 |
-
|
| 5 |
-
input_keys:
|
| 6 |
- "code"
|
| 7 |
- "public_tests_individual_io"
|
| 8 |
|
| 9 |
-
|
| 10 |
- "all_tests_passed"
|
| 11 |
- "testing_results_summary"
|
| 12 |
-
output_data_transformations:
|
| 13 |
-
- _target_: martinjosifoski.CC_flows.src.data_transformations.CorrectnessFlag
|
| 14 |
-
input_key: "public_tests_results"
|
| 15 |
-
output_key: "all_tests_passed"
|
| 16 |
-
- _target_: martinjosifoski.CC_flows.src.data_transformations.TestingResultsSummaryGeneration
|
| 17 |
-
output_key: "testing_results_summary"
|
| 18 |
|
| 19 |
-
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
${.issue_title}
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
{{
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
${.issue_title}
|
| 44 |
-
The
|
| 45 |
-
## [Failed test] Input
|
| 46 |
-
```
|
| 47 |
-
{{test_input}}
|
| 48 |
-
```
|
| 49 |
-
## [Failed test]
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
```
|
| 64 |
-
{{
|
| 65 |
-
```
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
name: "CF_CodeTesting"
|
| 2 |
description: "ToDo: add description"
|
| 3 |
|
| 4 |
+
input_interface:
|
|
|
|
| 5 |
- "code"
|
| 6 |
- "public_tests_individual_io"
|
| 7 |
|
| 8 |
+
output_interface:
|
| 9 |
- "all_tests_passed"
|
| 10 |
- "testing_results_summary"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
public_tests_key: "public_tests_individual_io"
|
| 13 |
|
| 14 |
+
#output_data_transformations:
|
| 15 |
+
# - _target_: martinjosifoski.CC_flows.src.data_transformations.CorrectnessFlag
|
| 16 |
+
# input_key: "public_tests_results"
|
| 17 |
+
# output_key: "all_tests_passed"
|
| 18 |
+
# - _target_: martinjosifoski.CC_flows.src.data_transformations.TestingResultsSummaryGeneration
|
| 19 |
+
# output_key: "testing_results_summary"
|
| 20 |
+
#
|
| 21 |
+
# single_test_error_message: True
|
| 22 |
+
#
|
| 23 |
+
# no_error_template: |2-
|
| 24 |
+
# ${.issue_title}
|
| 25 |
+
# All of the executed tests passed.
|
| 26 |
+
#
|
| 27 |
+
# compilation_error_template: |2-
|
| 28 |
+
# ${.issue_title}
|
| 29 |
+
# The execution resulted in a compilation error.
|
| 30 |
+
# ## Compilation error message:
|
| 31 |
+
# {{error_message}}
|
| 32 |
+
# timeout_error_template: |2-
|
| 33 |
+
# ${.issue_title}
|
| 34 |
+
# The execution timed out, the solution is not efficient enough.
|
| 35 |
+
# runtime_error_template: |2-
|
| 36 |
+
# ${.issue_title}
|
| 37 |
+
# The execution resulted in a runtime error on the following test.
|
| 38 |
+
# ## [Failed test] Input
|
| 39 |
+
# ```
|
| 40 |
+
# {{test_input}}
|
| 41 |
+
# ```
|
| 42 |
+
# ## [Failed test] Runtime error message
|
| 43 |
+
# {{error_message}}
|
| 44 |
+
# single_test_error_template: |2-
|
| 45 |
+
# ${.issue_title}
|
| 46 |
+
# The Python code does not solve the problem in the problem description due to logical errors. It fails the following test:
|
| 47 |
+
# ## [Failed test] Input
|
| 48 |
+
# ```
|
| 49 |
+
# {{test_input}}
|
| 50 |
+
# ```
|
| 51 |
+
# ## [Failed test] Expected output
|
| 52 |
+
# ```
|
| 53 |
+
# {{expected_output}}
|
| 54 |
+
# ```
|
| 55 |
+
# ## [Failed test] Generated output
|
| 56 |
+
# ```
|
| 57 |
+
# {{generated_output}}
|
| 58 |
+
# ```
|
| 59 |
+
# all_tests_header: |2-
|
| 60 |
+
# ${.issue_title}
|
| 61 |
+
# The Python code does not solve the problem in the problem description due to logical errors. It fails on the following tests.
|
| 62 |
+
# test_error_template: |2-
|
| 63 |
+
# ## [Failed test {{idx}}]
|
| 64 |
+
# ### [Failed test {{idx}}] Input
|
| 65 |
+
# ```
|
| 66 |
+
# {{test_input}}
|
| 67 |
+
# ```
|
| 68 |
+
# ### [Failed test {{idx}}] Expected output
|
| 69 |
+
# ```
|
| 70 |
+
# {{expected_output}}
|
| 71 |
+
# ```
|
| 72 |
+
# ### [Failed test {{idx}}] Generated output
|
| 73 |
+
# ```
|
| 74 |
+
# {{generated_output}}
|
| 75 |
+
# ```
|
| 76 |
+
# tests_separator: "\n\n"
|
| 77 |
+
#
|
| 78 |
+
# issue_title: "# Issue with the last proposed solution"
|
CodeTesting.py
CHANGED
|
@@ -21,20 +21,18 @@ class CodeTesting(AtomicFlow):
|
|
| 21 |
def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
|
| 22 |
raise NotImplementedError()
|
| 23 |
|
| 24 |
-
@classmethod
|
| 25 |
-
def instantiate_from_config(cls, config):
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
def run(self,
|
| 36 |
-
input_data: Dict[str, Any]) -> Dict[str, Any]:
|
| 37 |
-
|
| 38 |
# ~~~ Retrieve the test data ~~~
|
| 39 |
test_data = self._get_test_data(input_data)
|
| 40 |
|
|
|
|
| 21 |
def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
|
| 22 |
raise NotImplementedError()
|
| 23 |
|
| 24 |
+
# @classmethod
|
| 25 |
+
# def instantiate_from_config(cls, config):
|
| 26 |
+
# flow_config = deepcopy(config)
|
| 27 |
+
#
|
| 28 |
+
# kwargs = {"flow_config": flow_config}
|
| 29 |
+
# kwargs["input_data_transformations"] = cls._set_up_data_transformations(config["input_data_transformations"])
|
| 30 |
+
# kwargs["output_data_transformations"] = cls._set_up_data_transformations(config["output_data_transformations"])
|
| 31 |
+
#
|
| 32 |
+
# # ~~~ Instantiate flow ~~~
|
| 33 |
+
# return cls(**kwargs)
|
| 34 |
+
|
| 35 |
+
def run(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
|
|
|
| 36 |
# ~~~ Retrieve the test data ~~~
|
| 37 |
test_data = self._get_test_data(input_data)
|
| 38 |
|
__init__.py
CHANGED
|
@@ -4,84 +4,84 @@ from flows import flow_verse
|
|
| 4 |
# cf-code
|
| 5 |
from .CF_Code import CF_Code
|
| 6 |
# lc-code
|
| 7 |
-
from .LC_Code import LC_Code
|
| 8 |
|
| 9 |
# cf-code_reflect
|
| 10 |
-
from .FixedReply_CodeReflect import FixedReply_CodeReflect
|
| 11 |
-
from .CF_CodeReflect import CF_CodeReflect
|
| 12 |
|
| 13 |
# cf-code_collab
|
| 14 |
-
from .CF_CodeCritic import CF_CodeCritic
|
| 15 |
-
from .CF_CodeCollab import CF_CodeCollab
|
| 16 |
|
| 17 |
# cf-plan-code (and cf-plan_oracle-code)
|
| 18 |
-
from .CF_Plan import CF_Plan
|
| 19 |
-
from .CF_CodeWithPlan import CF_CodeWithPlan
|
| 20 |
-
from .CF_Plan_Code import CF_Plan_Code
|
| 21 |
|
| 22 |
# # cf-plan_reflect-code
|
| 23 |
-
from .FixedReply_PlanReflect import FixedReply_PlanReflect
|
| 24 |
-
from .CF_PlanReflect import CF_PlanReflect
|
| 25 |
-
from .CF_PlanReflect_Code import CF_PlanReflect_Code
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
#
|
| 27 |
-
# # cf-
|
| 28 |
-
from .CF_PlanCritic import CF_PlanCritic
|
| 29 |
-
from .CF_PlanCollab import CF_PlanCollab
|
| 30 |
-
from .CF_PlanCollab_Code import CF_PlanCollab_Code
|
| 31 |
-
|
| 32 |
-
# cf-code_debug
|
| 33 |
from .CF_CodeTesting import CF_CodeTesting
|
| 34 |
from .CF_CodeDebug import CF_CodeDebug
|
| 35 |
|
| 36 |
-
# # cf-code_debug_collab
|
| 37 |
-
from .CF_CodeCriticWrongAttempt import CF_CodeCriticWrongAttempt
|
| 38 |
-
from .CF_CodeDebugCritic import CF_CodeDebugCritic
|
| 39 |
-
from .CF_CodeDebugCollab import CF_CodeDebugCollab
|
| 40 |
-
#
|
| 41 |
-
# # cf-plan_oracle-code_debug_collab
|
| 42 |
-
from .CF_CodeCriticWrongAttemptWithPlan import CF_CodeCriticWrongAttemptWithPlan
|
| 43 |
-
from .CF_CodeDebugCriticWithPlan import CF_CodeDebugCriticWithPlan
|
| 44 |
-
from .CF_CodeDebugCollabWithPlan import CF_CodeDebugCollabWithPlan
|
| 45 |
|
| 46 |
|
| 47 |
########################## LC ##########################
|
| 48 |
|
| 49 |
-
# lc-code
|
| 50 |
-
from .LC_Code import LC_Code
|
| 51 |
-
|
| 52 |
-
# lc-code_reflect
|
| 53 |
-
from .LC_CodeReflect import LC_CodeReflect
|
| 54 |
-
|
| 55 |
-
# lc-code_collab
|
| 56 |
-
from .LC_CodeCritic import LC_CodeCritic
|
| 57 |
-
from .LC_CodeCollab import LC_CodeCollab
|
| 58 |
-
|
| 59 |
-
# lc-plan-code (and lc-plan_oracle-code)
|
| 60 |
-
from .LC_Plan import LC_Plan
|
| 61 |
-
from .LC_CodeWithPlan import LC_CodeWithPlan
|
| 62 |
-
from .LC_Plan_Code import LC_Plan_Code
|
| 63 |
-
|
| 64 |
-
# lc-plan_reflect-code
|
| 65 |
-
from .LC_PlanReflect import LC_PlanReflect
|
| 66 |
-
from .LC_PlanReflect_Code import LC_PlanReflect_Code
|
| 67 |
-
|
| 68 |
-
# lc-plan_collab-code
|
| 69 |
-
from .LC_PlanCritic import LC_PlanCritic
|
| 70 |
-
from .LC_PlanCollab import LC_PlanCollab
|
| 71 |
-
from .LC_PlanCollab_Code import LC_PlanCollab_Code
|
| 72 |
-
|
| 73 |
-
# lc-code_debug
|
| 74 |
-
from .LC_CodeTesting import LC_CodeTesting
|
| 75 |
-
from .LC_CodeDebug import LC_CodeDebug
|
| 76 |
-
|
| 77 |
-
# lc-code_debug_collab
|
| 78 |
-
from .LC_CodeCriticWrongAttempt import LC_CodeCriticWrongAttempt
|
| 79 |
-
from .LC_CodeDebugCritic import LC_CodeDebugCritic
|
| 80 |
-
from .LC_CodeDebugCollab import LC_CodeDebugCollab
|
| 81 |
-
|
| 82 |
-
# lc-plan_oracle-code_debug_collab
|
| 83 |
-
from .LC_CodeCriticWrongAttemptWithPlan import LC_CodeCriticWrongAttemptWithPlan
|
| 84 |
-
from .LC_CodeDebugCriticWithPlan import LC_CodeDebugCriticWithPlan
|
| 85 |
-
from .LC_CodeDebugCollabWithPlan import LC_CodeDebugCollabWithPlan
|
| 86 |
|
| 87 |
|
|
|
|
| 4 |
# cf-code
|
| 5 |
from .CF_Code import CF_Code
|
| 6 |
# lc-code
|
| 7 |
+
# from .LC_Code import LC_Code
|
| 8 |
|
| 9 |
# cf-code_reflect
|
| 10 |
+
# from .FixedReply_CodeReflect import FixedReply_CodeReflect
|
| 11 |
+
# from .CF_CodeReflect import CF_CodeReflect
|
| 12 |
|
| 13 |
# cf-code_collab
|
| 14 |
+
# from .CF_CodeCritic import CF_CodeCritic
|
| 15 |
+
# from .CF_CodeCollab import CF_CodeCollab
|
| 16 |
|
| 17 |
# cf-plan-code (and cf-plan_oracle-code)
|
| 18 |
+
# from .CF_Plan import CF_Plan
|
| 19 |
+
# from .CF_CodeWithPlan import CF_CodeWithPlan
|
| 20 |
+
# from .CF_Plan_Code import CF_Plan_Code
|
| 21 |
|
| 22 |
# # cf-plan_reflect-code
|
| 23 |
+
# from .FixedReply_PlanReflect import FixedReply_PlanReflect
|
| 24 |
+
# from .CF_PlanReflect import CF_PlanReflect
|
| 25 |
+
# from .CF_PlanReflect_Code import CF_PlanReflect_Code
|
| 26 |
+
# #
|
| 27 |
+
# # # cf-plan_collab-code
|
| 28 |
+
# from .CF_PlanCritic import CF_PlanCritic
|
| 29 |
+
# from .CF_PlanCollab import CF_PlanCollab
|
| 30 |
+
# from .CF_PlanCollab_Code import CF_PlanCollab_Code
|
| 31 |
#
|
| 32 |
+
# # cf-code_debug
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
from .CF_CodeTesting import CF_CodeTesting
|
| 34 |
from .CF_CodeDebug import CF_CodeDebug
|
| 35 |
|
| 36 |
+
# # # cf-code_debug_collab
|
| 37 |
+
# from .CF_CodeCriticWrongAttempt import CF_CodeCriticWrongAttempt
|
| 38 |
+
# from .CF_CodeDebugCritic import CF_CodeDebugCritic
|
| 39 |
+
# from .CF_CodeDebugCollab import CF_CodeDebugCollab
|
| 40 |
+
# #
|
| 41 |
+
# # # cf-plan_oracle-code_debug_collab
|
| 42 |
+
# from .CF_CodeCriticWrongAttemptWithPlan import CF_CodeCriticWrongAttemptWithPlan
|
| 43 |
+
# from .CF_CodeDebugCriticWithPlan import CF_CodeDebugCriticWithPlan
|
| 44 |
+
# from .CF_CodeDebugCollabWithPlan import CF_CodeDebugCollabWithPlan
|
| 45 |
|
| 46 |
|
| 47 |
########################## LC ##########################
|
| 48 |
|
| 49 |
+
# # lc-code
|
| 50 |
+
# from .LC_Code import LC_Code
|
| 51 |
+
#
|
| 52 |
+
# # lc-code_reflect
|
| 53 |
+
# from .LC_CodeReflect import LC_CodeReflect
|
| 54 |
+
#
|
| 55 |
+
# # lc-code_collab
|
| 56 |
+
# from .LC_CodeCritic import LC_CodeCritic
|
| 57 |
+
# from .LC_CodeCollab import LC_CodeCollab
|
| 58 |
+
#
|
| 59 |
+
# # lc-plan-code (and lc-plan_oracle-code)
|
| 60 |
+
# from .LC_Plan import LC_Plan
|
| 61 |
+
# from .LC_CodeWithPlan import LC_CodeWithPlan
|
| 62 |
+
# from .LC_Plan_Code import LC_Plan_Code
|
| 63 |
+
#
|
| 64 |
+
# # lc-plan_reflect-code
|
| 65 |
+
# from .LC_PlanReflect import LC_PlanReflect
|
| 66 |
+
# from .LC_PlanReflect_Code import LC_PlanReflect_Code
|
| 67 |
+
#
|
| 68 |
+
# # lc-plan_collab-code
|
| 69 |
+
# from .LC_PlanCritic import LC_PlanCritic
|
| 70 |
+
# from .LC_PlanCollab import LC_PlanCollab
|
| 71 |
+
# from .LC_PlanCollab_Code import LC_PlanCollab_Code
|
| 72 |
+
#
|
| 73 |
+
# # lc-code_debug
|
| 74 |
+
# from .LC_CodeTesting import LC_CodeTesting
|
| 75 |
+
# from .LC_CodeDebug import LC_CodeDebug
|
| 76 |
+
#
|
| 77 |
+
# # lc-code_debug_collab
|
| 78 |
+
# from .LC_CodeCriticWrongAttempt import LC_CodeCriticWrongAttempt
|
| 79 |
+
# from .LC_CodeDebugCritic import LC_CodeDebugCritic
|
| 80 |
+
# from .LC_CodeDebugCollab import LC_CodeDebugCollab
|
| 81 |
+
#
|
| 82 |
+
# # lc-plan_oracle-code_debug_collab
|
| 83 |
+
# from .LC_CodeCriticWrongAttemptWithPlan import LC_CodeCriticWrongAttemptWithPlan
|
| 84 |
+
# from .LC_CodeDebugCriticWithPlan import LC_CodeDebugCriticWithPlan
|
| 85 |
+
# from .LC_CodeDebugCollabWithPlan import LC_CodeDebugCollabWithPlan
|
| 86 |
|
| 87 |
|