Commit
·
b475feb
1
Parent(s):
1a3765a
Propagate changes from refactoring.
Browse files- CF_CodeCriticWrongAttempt.py +1 -0
- CF_CodeCriticWrongAttempt.yaml +3 -7
- CF_CodeCriticWrongAttemptWithPlan.yaml +3 -8
- CF_CodeDebug.yaml +3 -1
- CF_CodeDebugCollab.yaml +65 -41
- CF_CodeDebugCollabWithPlan.yaml +59 -45
- CF_CodeDebugCritic.yaml +106 -6
- CF_CodeDebugCriticWithPlan.yaml +112 -15
- CF_CodeWithPlan.py +1 -0
- CF_CodeWithPlan.yaml +6 -13
- __init__.py +9 -12
CF_CodeCriticWrongAttempt.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from flows.application_flows import OpenAIChatAtomicFlow
|
| 2 |
|
|
|
|
| 3 |
class CF_CodeCriticWrongAttempt(OpenAIChatAtomicFlow):
|
| 4 |
def __init__(self, **kwargs):
|
| 5 |
super().__init__(**kwargs)
|
|
|
|
| 1 |
from flows.application_flows import OpenAIChatAtomicFlow
|
| 2 |
|
| 3 |
+
|
| 4 |
class CF_CodeCriticWrongAttempt(OpenAIChatAtomicFlow):
|
| 5 |
def __init__(self, **kwargs):
|
| 6 |
super().__init__(**kwargs)
|
CF_CodeCriticWrongAttempt.yaml
CHANGED
|
@@ -70,7 +70,7 @@ init_human_message_prompt_template:
|
|
| 70 |
- "testing_results_summary"
|
| 71 |
template_format: jinja2
|
| 72 |
|
| 73 |
-
|
| 74 |
- "problem_description"
|
| 75 |
- "input_description"
|
| 76 |
- "output_description"
|
|
@@ -78,9 +78,5 @@ init_input_keys:
|
|
| 78 |
- "testing_results_summary"
|
| 79 |
- "code"
|
| 80 |
|
| 81 |
-
|
| 82 |
-
-
|
| 83 |
-
old_key2new_key:
|
| 84 |
-
api_output: "code_feedback"
|
| 85 |
-
output_keys:
|
| 86 |
-
- "code_feedback"
|
|
|
|
| 70 |
- "testing_results_summary"
|
| 71 |
template_format: jinja2
|
| 72 |
|
| 73 |
+
input_interface_non_initialized:
|
| 74 |
- "problem_description"
|
| 75 |
- "input_description"
|
| 76 |
- "output_description"
|
|
|
|
| 78 |
- "testing_results_summary"
|
| 79 |
- "code"
|
| 80 |
|
| 81 |
+
output_interface:
|
| 82 |
+
- "api_output"
|
|
|
|
|
|
|
|
|
|
|
|
CF_CodeCriticWrongAttemptWithPlan.yaml
CHANGED
|
@@ -76,8 +76,7 @@ init_human_message_prompt_template:
|
|
| 76 |
- "testing_results_summary"
|
| 77 |
template_format: jinja2
|
| 78 |
|
| 79 |
-
|
| 80 |
-
init_input_keys:
|
| 81 |
- "problem_description"
|
| 82 |
- "input_description"
|
| 83 |
- "output_description"
|
|
@@ -86,9 +85,5 @@ init_input_keys:
|
|
| 86 |
- "plan"
|
| 87 |
- "code"
|
| 88 |
|
| 89 |
-
|
| 90 |
-
-
|
| 91 |
-
old_key2new_key:
|
| 92 |
-
api_output: "code_feedback"
|
| 93 |
-
output_keys:
|
| 94 |
-
- "code_feedback"
|
|
|
|
| 76 |
- "testing_results_summary"
|
| 77 |
template_format: jinja2
|
| 78 |
|
| 79 |
+
input_interface_non_initialized:
|
|
|
|
| 80 |
- "problem_description"
|
| 81 |
- "input_description"
|
| 82 |
- "output_description"
|
|
|
|
| 85 |
- "plan"
|
| 86 |
- "code"
|
| 87 |
|
| 88 |
+
output_interface:
|
| 89 |
+
- "api_output"
|
|
|
|
|
|
|
|
|
|
|
|
CF_CodeDebug.yaml
CHANGED
|
@@ -62,7 +62,7 @@ topology:
|
|
| 62 |
keys_to_select:
|
| 63 |
- "code"
|
| 64 |
|
| 65 |
-
|
| 66 |
|
| 67 |
# ~~~ Code Testing Critic ~~~
|
| 68 |
- goal: "Test the code on the public tests and provide a results summary."
|
|
@@ -144,3 +144,5 @@ topology:
|
|
| 144 |
tests_separator: "\n\n"
|
| 145 |
|
| 146 |
issue_title: "# Issue with the last proposed solution"
|
|
|
|
|
|
|
|
|
| 62 |
keys_to_select:
|
| 63 |
- "code"
|
| 64 |
|
| 65 |
+
reset: false
|
| 66 |
|
| 67 |
# ~~~ Code Testing Critic ~~~
|
| 68 |
- goal: "Test the code on the public tests and provide a results summary."
|
|
|
|
| 144 |
tests_separator: "\n\n"
|
| 145 |
|
| 146 |
issue_title: "# Issue with the last proposed solution"
|
| 147 |
+
|
| 148 |
+
reset: true
|
CF_CodeDebugCollab.yaml
CHANGED
|
@@ -3,58 +3,82 @@ description: "ToDO: add description"
|
|
| 3 |
|
| 4 |
max_rounds: 2 # ToDo: To increase to 4
|
| 5 |
|
| 6 |
-
|
| 7 |
- "problem_description"
|
| 8 |
- "input_description"
|
| 9 |
- "output_description"
|
| 10 |
- "io_examples_and_explanation"
|
| 11 |
- "public_tests_individual_io"
|
| 12 |
-
|
| 13 |
-
output_keys:
|
| 14 |
- "code"
|
| 15 |
|
| 16 |
subflows_config:
|
| 17 |
CodeGenerator:
|
| 18 |
_target_: .CF_Code.instantiate_from_default_config
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
input_keys:
|
| 42 |
-
- "code_feedback"
|
| 43 |
-
- "testing_results_summary"
|
| 44 |
-
|
| 45 |
CodeDebugCritic:
|
| 46 |
_target_: .CF_CodeDebugCritic.instantiate_from_default_config
|
| 47 |
|
| 48 |
topology:
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
max_rounds: 2 # ToDo: To increase to 4
|
| 5 |
|
| 6 |
+
input_interface:
|
| 7 |
- "problem_description"
|
| 8 |
- "input_description"
|
| 9 |
- "output_description"
|
| 10 |
- "io_examples_and_explanation"
|
| 11 |
- "public_tests_individual_io"
|
| 12 |
+
output_interface:
|
|
|
|
| 13 |
- "code"
|
| 14 |
|
| 15 |
subflows_config:
|
| 16 |
CodeGenerator:
|
| 17 |
_target_: .CF_Code.instantiate_from_default_config
|
| 18 |
+
name: "CodeGenerator"
|
| 19 |
+
model_name: "gpt-4"
|
| 20 |
+
human_message_prompt_template:
|
| 21 |
+
_target_: langchain.PromptTemplate
|
| 22 |
+
template: |2-
|
| 23 |
+
{{testing_results_summary}}
|
| 24 |
+
|
| 25 |
+
{{code_feedback}}
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
Consider the problem statement, the last proposed solution, its issue and the provided feedback. Return a corrected version of the code that solves the original problem and resolves the issue, without any explanation, in the following format:
|
| 29 |
+
```python
|
| 30 |
+
{{code_placeholder}}
|
| 31 |
+
```
|
| 32 |
+
input_variables:
|
| 33 |
+
- code_feedback
|
| 34 |
+
- testing_results_summary
|
| 35 |
+
partial_variables:
|
| 36 |
+
code_placeholder: "{{python_code}}"
|
| 37 |
+
input_interface_initialized:
|
| 38 |
+
- "code_feedback"
|
| 39 |
+
- "testing_results_summary"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
CodeDebugCritic:
|
| 41 |
_target_: .CF_CodeDebugCritic.instantiate_from_default_config
|
| 42 |
|
| 43 |
topology:
|
| 44 |
+
# ~~~ Code Generator ~~~
|
| 45 |
+
- goal: "Generate/refine a solution."
|
| 46 |
+
|
| 47 |
+
### Input Interface
|
| 48 |
+
input_interface:
|
| 49 |
+
_target_: flows.interfaces.KeyInterface
|
| 50 |
+
additional_transformations:
|
| 51 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
| 52 |
+
|
| 53 |
+
### Flow Specification
|
| 54 |
+
flow: CodeGenerator
|
| 55 |
+
|
| 56 |
+
### Output Interface
|
| 57 |
+
output_interface:
|
| 58 |
+
_target_: flows.interfaces.KeyInterface
|
| 59 |
+
additional_transformations:
|
| 60 |
+
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
| 61 |
+
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
| 62 |
+
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
| 63 |
+
input_key: "api_output"
|
| 64 |
+
output_key: "code"
|
| 65 |
+
strip: True
|
| 66 |
+
assert_unique: True
|
| 67 |
+
keys_to_select:
|
| 68 |
+
- "code"
|
| 69 |
+
|
| 70 |
+
reset: false
|
| 71 |
+
|
| 72 |
+
# ~~~ Code Critic Grounded in Tests ~~~
|
| 73 |
+
- goal: ""
|
| 74 |
+
|
| 75 |
+
### Input Interface
|
| 76 |
+
input_interface:
|
| 77 |
+
_target_: flows.interfaces.KeyInterface
|
| 78 |
+
additional_transformations:
|
| 79 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
| 80 |
+
|
| 81 |
+
### Flow Specification
|
| 82 |
+
flow: CodeDebugCritic
|
| 83 |
+
|
| 84 |
+
reset: true
|
CF_CodeDebugCollabWithPlan.yaml
CHANGED
|
@@ -1,53 +1,63 @@
|
|
| 1 |
name: "CodeDebugCollabWithPlan_Flow"
|
| 2 |
description: "ToDO: add description"
|
| 3 |
|
| 4 |
-
|
| 5 |
max_rounds: 2 # ToDo: To increase to 4
|
| 6 |
-
early_exit_key: "end_of_interaction"
|
| 7 |
|
| 8 |
-
|
| 9 |
-
input_keys:
|
| 10 |
- "problem_description"
|
| 11 |
- "input_description"
|
| 12 |
- "output_description"
|
| 13 |
- "io_examples_and_explanation"
|
| 14 |
- "public_tests_individual_io"
|
| 15 |
- "plan"
|
| 16 |
-
|
| 17 |
-
output_data_transformations:
|
| 18 |
-
- _target_: flows.data_transformations.KeyRename
|
| 19 |
-
old_key2new_key:
|
| 20 |
-
code: "code"
|
| 21 |
-
output_keys:
|
| 22 |
- "code"
|
| 23 |
|
| 24 |
subflows_config:
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
input_variables:
|
| 42 |
-
- code_feedback
|
| 43 |
-
- testing_results_summary
|
| 44 |
-
partial_variables:
|
| 45 |
-
code_placeholder: "{{python_code}}"
|
| 46 |
-
template_format: jinja2
|
| 47 |
-
input_keys:
|
| 48 |
-
- "code_feedback"
|
| 49 |
-
- "testing_results_summary"
|
| 50 |
-
output_data_transformations:
|
| 51 |
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
| 52 |
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
| 53 |
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
|
@@ -55,17 +65,21 @@ subflows_config:
|
|
| 55 |
output_key: "code"
|
| 56 |
strip: True
|
| 57 |
assert_unique: True
|
|
|
|
|
|
|
| 58 |
|
| 59 |
-
|
| 60 |
-
end_of_interaction_string: "Final answer"
|
| 61 |
-
input_key: "api_output"
|
| 62 |
-
output_key: "end_of_interaction"
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
-
|
| 70 |
-
CodeGenerator: False
|
| 71 |
-
CodeDebugCriticWithPlan_Flow: True
|
|
|
|
| 1 |
name: "CodeDebugCollabWithPlan_Flow"
|
| 2 |
description: "ToDO: add description"
|
| 3 |
|
|
|
|
| 4 |
max_rounds: 2 # ToDo: To increase to 4
|
|
|
|
| 5 |
|
| 6 |
+
input_interface:
|
|
|
|
| 7 |
- "problem_description"
|
| 8 |
- "input_description"
|
| 9 |
- "output_description"
|
| 10 |
- "io_examples_and_explanation"
|
| 11 |
- "public_tests_individual_io"
|
| 12 |
- "plan"
|
| 13 |
+
output_interface:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
- "code"
|
| 15 |
|
| 16 |
subflows_config:
|
| 17 |
+
CodeGenerator:
|
| 18 |
+
_target_: .CF_CodeWithPlan.instantiate_from_default_config
|
| 19 |
+
name: "CodeGenerator"
|
| 20 |
+
model_name: "gpt-4"
|
| 21 |
+
human_message_prompt_template:
|
| 22 |
+
_target_: langchain.PromptTemplate
|
| 23 |
+
template: |2-
|
| 24 |
+
{{testing_results_summary}}
|
| 25 |
+
|
| 26 |
+
{{code_feedback}}
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
Consider the problem statement, the last proposed solution, its issue and the provided feedback. Return a corrected version of the code that solves the original problem and resolves the issue, without any explanation, in the following format:
|
| 30 |
+
```python
|
| 31 |
+
{{code_placeholder}}
|
| 32 |
+
```
|
| 33 |
+
input_variables:
|
| 34 |
+
- code_feedback
|
| 35 |
+
- testing_results_summary
|
| 36 |
+
partial_variables:
|
| 37 |
+
code_placeholder: "{{python_code}}"
|
| 38 |
+
input_interface_initialized:
|
| 39 |
+
- "code_feedback"
|
| 40 |
+
- "testing_results_summary"
|
| 41 |
+
CodeDebugCritic:
|
| 42 |
+
_target_: .CF_CodeDebugCriticWithPlan.instantiate_from_default_config
|
| 43 |
+
|
| 44 |
+
topology:
|
| 45 |
+
# ~~~ Code Generator ~~~
|
| 46 |
+
- goal: "Generate/refine a solution."
|
| 47 |
+
|
| 48 |
+
### Input Interface
|
| 49 |
+
input_interface:
|
| 50 |
+
_target_: flows.interfaces.KeyInterface
|
| 51 |
+
additional_transformations:
|
| 52 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
| 53 |
|
| 54 |
+
### Flow Specification
|
| 55 |
+
flow: CodeGenerator
|
| 56 |
|
| 57 |
+
### Output Interface
|
| 58 |
+
output_interface:
|
| 59 |
+
_target_: flows.interfaces.KeyInterface
|
| 60 |
+
additional_transformations:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
- _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
|
| 62 |
regex: '(?<=```python)([\s\S]*?)(?=```)'
|
| 63 |
regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
|
|
|
|
| 65 |
output_key: "code"
|
| 66 |
strip: True
|
| 67 |
assert_unique: True
|
| 68 |
+
keys_to_select:
|
| 69 |
+
- "code"
|
| 70 |
|
| 71 |
+
reset: false
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
+
# ~~~ Code Critic Grounded in Tests ~~~
|
| 74 |
+
- goal: ""
|
| 75 |
+
|
| 76 |
+
### Input Interface
|
| 77 |
+
input_interface:
|
| 78 |
+
_target_: flows.interfaces.KeyInterface
|
| 79 |
+
additional_transformations:
|
| 80 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
| 81 |
+
|
| 82 |
+
### Flow Specification
|
| 83 |
+
flow: CodeDebugCritic
|
| 84 |
|
| 85 |
+
reset: true
|
|
|
|
|
|
CF_CodeDebugCritic.yaml
CHANGED
|
@@ -1,9 +1,7 @@
|
|
| 1 |
name: "CodeDebugCritic_Flow"
|
| 2 |
description: "ToDo: add description"
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
input_keys:
|
| 7 |
- "problem_description"
|
| 8 |
- "input_description"
|
| 9 |
- "output_description"
|
|
@@ -11,11 +9,13 @@ input_keys:
|
|
| 11 |
- "public_tests_individual_io"
|
| 12 |
- "code"
|
| 13 |
|
| 14 |
-
|
| 15 |
- "testing_results_summary"
|
| 16 |
- "all_tests_passed"
|
| 17 |
- "code_feedback"
|
| 18 |
|
|
|
|
|
|
|
| 19 |
subflows_config:
|
| 20 |
CodeTestingCritic:
|
| 21 |
_target_: .CF_CodeTesting.instantiate_from_default_config
|
|
@@ -23,6 +23,106 @@ subflows_config:
|
|
| 23 |
_target_: .CF_CodeCriticWrongAttempt.instantiate_from_default_config
|
| 24 |
|
| 25 |
topology:
|
| 26 |
-
|
| 27 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
|
|
|
| 1 |
name: "CodeDebugCritic_Flow"
|
| 2 |
description: "ToDo: add description"
|
| 3 |
|
| 4 |
+
input_interface:
|
|
|
|
|
|
|
| 5 |
- "problem_description"
|
| 6 |
- "input_description"
|
| 7 |
- "output_description"
|
|
|
|
| 9 |
- "public_tests_individual_io"
|
| 10 |
- "code"
|
| 11 |
|
| 12 |
+
output_interface:
|
| 13 |
- "testing_results_summary"
|
| 14 |
- "all_tests_passed"
|
| 15 |
- "code_feedback"
|
| 16 |
|
| 17 |
+
public_tests_key: "public_tests_individual_io"
|
| 18 |
+
|
| 19 |
subflows_config:
|
| 20 |
CodeTestingCritic:
|
| 21 |
_target_: .CF_CodeTesting.instantiate_from_default_config
|
|
|
|
| 23 |
_target_: .CF_CodeCriticWrongAttempt.instantiate_from_default_config
|
| 24 |
|
| 25 |
topology:
|
| 26 |
+
# ~~~ Code Testing Critic ~~~
|
| 27 |
+
- goal: "Test the code on the public tests and provide a results summary."
|
| 28 |
+
|
| 29 |
+
### Input Interface
|
| 30 |
+
input_interface:
|
| 31 |
+
_target_: flows.interfaces.KeyInterface
|
| 32 |
+
additional_transformations:
|
| 33 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
| 34 |
+
|
| 35 |
+
### Flow Specification
|
| 36 |
+
flow: CodeTestingCritic
|
| 37 |
+
|
| 38 |
+
### Output Interface
|
| 39 |
+
output_interface:
|
| 40 |
+
_target_: flows.interfaces.KeyInterface
|
| 41 |
+
additional_transformations:
|
| 42 |
+
- _target_: martinjosifoski.CC_flows.src.data_transformations.CorrectnessFlag
|
| 43 |
+
input_key: "public_tests_results"
|
| 44 |
+
output_key: "all_tests_passed"
|
| 45 |
+
- _target_: martinjosifoski.CC_flows.src.data_transformations.TestingResultsSummaryGeneration
|
| 46 |
+
output_key: "testing_results_summary"
|
| 47 |
+
|
| 48 |
+
single_test_error_message: True
|
| 49 |
+
|
| 50 |
+
no_error_template: |2-
|
| 51 |
+
${.issue_title}
|
| 52 |
+
All of the executed tests passed.
|
| 53 |
+
|
| 54 |
+
compilation_error_template: |2-
|
| 55 |
+
${.issue_title}
|
| 56 |
+
The execution resulted in a compilation error.
|
| 57 |
+
## Compilation error message:
|
| 58 |
+
{{error_message}}
|
| 59 |
+
timeout_error_template: |2-
|
| 60 |
+
${.issue_title}
|
| 61 |
+
The execution timed out, the solution is not efficient enough.
|
| 62 |
+
runtime_error_template: |2-
|
| 63 |
+
${.issue_title}
|
| 64 |
+
The execution resulted in a runtime error on the following test.
|
| 65 |
+
## [Failed test] Input
|
| 66 |
+
```
|
| 67 |
+
{{test_input}}
|
| 68 |
+
```
|
| 69 |
+
## [Failed test] Runtime error message
|
| 70 |
+
{{error_message}}
|
| 71 |
+
single_test_error_template: |2-
|
| 72 |
+
${.issue_title}
|
| 73 |
+
The Python code does not solve the problem in the problem description due to logical errors. It fails the following test:
|
| 74 |
+
## [Failed test] Input
|
| 75 |
+
```
|
| 76 |
+
{{test_input}}
|
| 77 |
+
```
|
| 78 |
+
## [Failed test] Expected output
|
| 79 |
+
```
|
| 80 |
+
{{expected_output}}
|
| 81 |
+
```
|
| 82 |
+
## [Failed test] Generated output
|
| 83 |
+
```
|
| 84 |
+
{{generated_output}}
|
| 85 |
+
```
|
| 86 |
+
all_tests_header: |2-
|
| 87 |
+
${.issue_title}
|
| 88 |
+
The Python code does not solve the problem in the problem description due to logical errors. It fails on the following tests.
|
| 89 |
+
test_error_template: |2-
|
| 90 |
+
## [Failed test {{idx}}]
|
| 91 |
+
### [Failed test {{idx}}] Input
|
| 92 |
+
```
|
| 93 |
+
{{test_input}}
|
| 94 |
+
```
|
| 95 |
+
### [Failed test {{idx}}] Expected output
|
| 96 |
+
```
|
| 97 |
+
{{expected_output}}
|
| 98 |
+
```
|
| 99 |
+
### [Failed test {{idx}}] Generated output
|
| 100 |
+
```
|
| 101 |
+
{{generated_output}}
|
| 102 |
+
```
|
| 103 |
+
tests_separator: "\n\n"
|
| 104 |
+
|
| 105 |
+
issue_title: "# Issue with the last proposed solution"
|
| 106 |
+
|
| 107 |
+
# ~~~ Feedback Generator ~~~
|
| 108 |
+
- goal: "Generate feedback grounded in the test results summary."
|
| 109 |
+
|
| 110 |
+
### Input Interface
|
| 111 |
+
input_interface:
|
| 112 |
+
_target_: flows.interfaces.KeyInterface
|
| 113 |
+
additional_transformations:
|
| 114 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
| 115 |
+
|
| 116 |
+
### Flow Specification
|
| 117 |
+
flow: CodeCriticWrongAttempt
|
| 118 |
+
|
| 119 |
+
### Output Interface
|
| 120 |
+
output_interface:
|
| 121 |
+
_target_: flows.interfaces.KeyInterface
|
| 122 |
+
additional_transformations:
|
| 123 |
+
- _target_: flows.data_transformations.KeyRename
|
| 124 |
+
old_key2new_key:
|
| 125 |
+
api_output: "code_feedback"
|
| 126 |
+
|
| 127 |
+
reset: true
|
| 128 |
|
CF_CodeDebugCriticWithPlan.yaml
CHANGED
|
@@ -1,10 +1,7 @@
|
|
| 1 |
name: "CodeDebugCriticWithPlan_Flow"
|
| 2 |
description: "ToDo: add description"
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
input_data_transformations: []
|
| 7 |
-
input_keys:
|
| 8 |
- "problem_description"
|
| 9 |
- "input_description"
|
| 10 |
- "output_description"
|
|
@@ -13,19 +10,119 @@ input_keys:
|
|
| 13 |
- "code"
|
| 14 |
- "plan"
|
| 15 |
|
| 16 |
-
|
| 17 |
-
- _target_: flows.data_transformations.KeyRename
|
| 18 |
-
old_key2new_key:
|
| 19 |
-
testing_results_summary: "testing_results_summary"
|
| 20 |
-
all_tests_passed: "all_tests_passed"
|
| 21 |
-
code_feedback: "code_feedback"
|
| 22 |
-
output_keys:
|
| 23 |
- "testing_results_summary"
|
| 24 |
- "all_tests_passed"
|
| 25 |
- "code_feedback"
|
| 26 |
|
|
|
|
|
|
|
| 27 |
subflows_config:
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
name: "CodeDebugCriticWithPlan_Flow"
|
| 2 |
description: "ToDo: add description"
|
| 3 |
|
| 4 |
+
input_interface:
|
|
|
|
|
|
|
|
|
|
| 5 |
- "problem_description"
|
| 6 |
- "input_description"
|
| 7 |
- "output_description"
|
|
|
|
| 10 |
- "code"
|
| 11 |
- "plan"
|
| 12 |
|
| 13 |
+
output_interface:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
- "testing_results_summary"
|
| 15 |
- "all_tests_passed"
|
| 16 |
- "code_feedback"
|
| 17 |
|
| 18 |
+
public_tests_key: "public_tests_individual_io"
|
| 19 |
+
|
| 20 |
subflows_config:
|
| 21 |
+
CodeTestingCritic:
|
| 22 |
+
_target_: .CF_CodeTesting.instantiate_from_default_config
|
| 23 |
+
CodeCriticWrongAttempt:
|
| 24 |
+
_target_: .CF_CodeCriticWrongAttemptWithPlan.instantiate_from_default_config
|
| 25 |
+
|
| 26 |
+
topology:
|
| 27 |
+
# ~~~ Code Testing Critic ~~~
|
| 28 |
+
- goal: "Test the code on the public tests and provide a results summary."
|
| 29 |
+
|
| 30 |
+
### Input Interface
|
| 31 |
+
input_interface:
|
| 32 |
+
_target_: flows.interfaces.KeyInterface
|
| 33 |
+
additional_transformations:
|
| 34 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
| 35 |
+
|
| 36 |
+
### Flow Specification
|
| 37 |
+
flow: CodeTestingCritic
|
| 38 |
+
|
| 39 |
+
### Output Interface
|
| 40 |
+
output_interface:
|
| 41 |
+
_target_: flows.interfaces.KeyInterface
|
| 42 |
+
additional_transformations:
|
| 43 |
+
- _target_: martinjosifoski.CC_flows.src.data_transformations.CorrectnessFlag
|
| 44 |
+
input_key: "public_tests_results"
|
| 45 |
+
output_key: "all_tests_passed"
|
| 46 |
+
- _target_: martinjosifoski.CC_flows.src.data_transformations.TestingResultsSummaryGeneration
|
| 47 |
+
output_key: "testing_results_summary"
|
| 48 |
+
|
| 49 |
+
single_test_error_message: True
|
| 50 |
+
|
| 51 |
+
no_error_template: |2-
|
| 52 |
+
${.issue_title}
|
| 53 |
+
All of the executed tests passed.
|
| 54 |
+
|
| 55 |
+
compilation_error_template: |2-
|
| 56 |
+
${.issue_title}
|
| 57 |
+
The execution resulted in a compilation error.
|
| 58 |
+
## Compilation error message:
|
| 59 |
+
{{error_message}}
|
| 60 |
+
timeout_error_template: |2-
|
| 61 |
+
${.issue_title}
|
| 62 |
+
The execution timed out, the solution is not efficient enough.
|
| 63 |
+
runtime_error_template: |2-
|
| 64 |
+
${.issue_title}
|
| 65 |
+
The execution resulted in a runtime error on the following test.
|
| 66 |
+
## [Failed test] Input
|
| 67 |
+
```
|
| 68 |
+
{{test_input}}
|
| 69 |
+
```
|
| 70 |
+
## [Failed test] Runtime error message
|
| 71 |
+
{{error_message}}
|
| 72 |
+
single_test_error_template: |2-
|
| 73 |
+
${.issue_title}
|
| 74 |
+
The Python code does not solve the problem in the problem description due to logical errors. It fails the following test:
|
| 75 |
+
## [Failed test] Input
|
| 76 |
+
```
|
| 77 |
+
{{test_input}}
|
| 78 |
+
```
|
| 79 |
+
## [Failed test] Expected output
|
| 80 |
+
```
|
| 81 |
+
{{expected_output}}
|
| 82 |
+
```
|
| 83 |
+
## [Failed test] Generated output
|
| 84 |
+
```
|
| 85 |
+
{{generated_output}}
|
| 86 |
+
```
|
| 87 |
+
all_tests_header: |2-
|
| 88 |
+
${.issue_title}
|
| 89 |
+
The Python code does not solve the problem in the problem description due to logical errors. It fails on the following tests.
|
| 90 |
+
test_error_template: |2-
|
| 91 |
+
## [Failed test {{idx}}]
|
| 92 |
+
### [Failed test {{idx}}] Input
|
| 93 |
+
```
|
| 94 |
+
{{test_input}}
|
| 95 |
+
```
|
| 96 |
+
### [Failed test {{idx}}] Expected output
|
| 97 |
+
```
|
| 98 |
+
{{expected_output}}
|
| 99 |
+
```
|
| 100 |
+
### [Failed test {{idx}}] Generated output
|
| 101 |
+
```
|
| 102 |
+
{{generated_output}}
|
| 103 |
+
```
|
| 104 |
+
tests_separator: "\n\n"
|
| 105 |
+
|
| 106 |
+
issue_title: "# Issue with the last proposed solution"
|
| 107 |
+
|
| 108 |
+
# ~~~ Feedback Generator ~~~
|
| 109 |
+
- goal: "Generate feedback grounded in the test results summary."
|
| 110 |
+
|
| 111 |
+
### Input Interface
|
| 112 |
+
input_interface:
|
| 113 |
+
_target_: flows.interfaces.KeyInterface
|
| 114 |
+
additional_transformations:
|
| 115 |
+
- _target_: flows.data_transformations.KeyMatchInput
|
| 116 |
+
|
| 117 |
+
### Flow Specification
|
| 118 |
+
flow: CodeCriticWrongAttempt
|
| 119 |
+
|
| 120 |
+
### Output Interface
|
| 121 |
+
output_interface:
|
| 122 |
+
_target_: flows.interfaces.KeyInterface
|
| 123 |
+
additional_transformations:
|
| 124 |
+
- _target_: flows.data_transformations.KeyRename
|
| 125 |
+
old_key2new_key:
|
| 126 |
+
api_output: "code_feedback"
|
| 127 |
+
|
| 128 |
+
reset: true
|
CF_CodeWithPlan.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from flows.application_flows import OpenAIChatAtomicFlow
|
| 2 |
|
|
|
|
| 3 |
class CF_CodeWithPlan(OpenAIChatAtomicFlow):
|
| 4 |
def __init__(self, **kwargs):
|
| 5 |
super().__init__(**kwargs)
|
|
|
|
| 1 |
from flows.application_flows import OpenAIChatAtomicFlow
|
| 2 |
|
| 3 |
+
|
| 4 |
class CF_CodeWithPlan(OpenAIChatAtomicFlow):
|
| 5 |
def __init__(self, **kwargs):
|
| 6 |
super().__init__(**kwargs)
|
CF_CodeWithPlan.yaml
CHANGED
|
@@ -70,22 +70,15 @@ init_human_message_prompt_template:
|
|
| 70 |
code_placeholder: "{{python_code}}"
|
| 71 |
template_format: jinja2
|
| 72 |
|
| 73 |
-
|
| 74 |
-
init_input_keys:
|
| 75 |
- "problem_description"
|
| 76 |
- "input_description"
|
| 77 |
- "output_description"
|
| 78 |
- "io_examples_and_explanation"
|
| 79 |
- "plan"
|
| 80 |
|
| 81 |
-
|
| 82 |
-
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
output_key: "code"
|
| 87 |
-
strip: True
|
| 88 |
-
assert_unique: True
|
| 89 |
-
|
| 90 |
-
output_keys:
|
| 91 |
-
- "code"
|
|
|
|
| 70 |
code_placeholder: "{{python_code}}"
|
| 71 |
template_format: jinja2
|
| 72 |
|
| 73 |
+
input_interface_non_initialized:
|
|
|
|
| 74 |
- "problem_description"
|
| 75 |
- "input_description"
|
| 76 |
- "output_description"
|
| 77 |
- "io_examples_and_explanation"
|
| 78 |
- "plan"
|
| 79 |
|
| 80 |
+
input_interface_initialized:
|
| 81 |
+
- "query"
|
| 82 |
+
|
| 83 |
+
output_interface:
|
| 84 |
+
- "api_output"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__init__.py
CHANGED
|
@@ -16,7 +16,7 @@ from .CF_Code import CF_Code
|
|
| 16 |
|
| 17 |
# cf-plan-code (and cf-plan_oracle-code)
|
| 18 |
# from .CF_Plan import CF_Plan
|
| 19 |
-
|
| 20 |
# from .CF_Plan_Code import CF_Plan_Code
|
| 21 |
|
| 22 |
# # cf-plan_reflect-code
|
|
@@ -33,16 +33,15 @@ from .CF_Code import CF_Code
|
|
| 33 |
from .CF_CodeTesting import CF_CodeTesting
|
| 34 |
from .CF_CodeDebug import CF_CodeDebug
|
| 35 |
|
| 36 |
-
#
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
# #
|
| 41 |
-
# # # cf-plan_oracle-code_debug_collab
|
| 42 |
-
# from .CF_CodeCriticWrongAttemptWithPlan import CF_CodeCriticWrongAttemptWithPlan
|
| 43 |
-
# from .CF_CodeDebugCriticWithPlan import CF_CodeDebugCriticWithPlan
|
| 44 |
-
# from .CF_CodeDebugCollabWithPlan import CF_CodeDebugCollabWithPlan
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
########################## LC ##########################
|
| 48 |
|
|
@@ -83,5 +82,3 @@ from .CF_CodeDebug import CF_CodeDebug
|
|
| 83 |
# from .LC_CodeCriticWrongAttemptWithPlan import LC_CodeCriticWrongAttemptWithPlan
|
| 84 |
# from .LC_CodeDebugCriticWithPlan import LC_CodeDebugCriticWithPlan
|
| 85 |
# from .LC_CodeDebugCollabWithPlan import LC_CodeDebugCollabWithPlan
|
| 86 |
-
|
| 87 |
-
|
|
|
|
| 16 |
|
| 17 |
# cf-plan-code (and cf-plan_oracle-code)
|
| 18 |
# from .CF_Plan import CF_Plan
|
| 19 |
+
from .CF_CodeWithPlan import CF_CodeWithPlan
|
| 20 |
# from .CF_Plan_Code import CF_Plan_Code
|
| 21 |
|
| 22 |
# # cf-plan_reflect-code
|
|
|
|
| 33 |
from .CF_CodeTesting import CF_CodeTesting
|
| 34 |
from .CF_CodeDebug import CF_CodeDebug
|
| 35 |
|
| 36 |
+
# cf-code_debug_collab
|
| 37 |
+
from .CF_CodeCriticWrongAttempt import CF_CodeCriticWrongAttempt
|
| 38 |
+
from .CF_CodeDebugCritic import CF_CodeDebugCritic
|
| 39 |
+
from .CF_CodeDebugCollab import CF_CodeDebugCollab
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
# cf-plan_oracle-code_debug_collab
|
| 42 |
+
from .CF_CodeCriticWrongAttemptWithPlan import CF_CodeCriticWrongAttemptWithPlan
|
| 43 |
+
from .CF_CodeDebugCriticWithPlan import CF_CodeDebugCriticWithPlan
|
| 44 |
+
from .CF_CodeDebugCollabWithPlan import CF_CodeDebugCollabWithPlan
|
| 45 |
|
| 46 |
########################## LC ##########################
|
| 47 |
|
|
|
|
| 82 |
# from .LC_CodeCriticWrongAttemptWithPlan import LC_CodeCriticWrongAttemptWithPlan
|
| 83 |
# from .LC_CodeDebugCriticWithPlan import LC_CodeDebugCriticWithPlan
|
| 84 |
# from .LC_CodeDebugCollabWithPlan import LC_CodeDebugCollabWithPlan
|
|
|
|
|
|