Refactoring.

Browse files

Files changed (7) hide show

CF_Code.py +1 -5
CF_Code.yaml +17 -12
CF_CodeDebug.yaml +123 -30
CF_CodeTesting.py +26 -6
CF_CodeTesting.yaml +68 -66
CodeTesting.py +12 -14
__init__.py +63 -63

CF_Code.py CHANGED Viewed

@@ -1,10 +1,6 @@
-import importlib
-import os
 from flows.application_flows import OpenAIChatAtomicFlow
 class CF_Code(OpenAIChatAtomicFlow):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)

 from flows.application_flows import OpenAIChatAtomicFlow
 class CF_Code(OpenAIChatAtomicFlow):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)

CF_Code.yaml CHANGED Viewed

@@ -64,22 +64,27 @@ init_human_message_prompt_template:
     code_placeholder: "{{python_code}}"
   template_format: jinja2
-input_data_transformations: []
-init_input_keys:
   - "problem_description"
   - "input_description"
   - "output_description"
   - "io_examples_and_explanation"
-output_data_transformations:
-  - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
-    regex: '(?<=```python)([\s\S]*?)(?=```)'
-    regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
-    input_key: "api_output"
-    output_key: "code"
-    strip: True
-    assert_unique: True
-output_keys:
   - "api_output"
-  - "code"

     code_placeholder: "{{python_code}}"
   template_format: jinja2
+input_interface_non_initialized:
   - "problem_description"
   - "input_description"
   - "output_description"
   - "io_examples_and_explanation"
+input_interface_initialized:
+  - "query"
+output_interface:
   - "api_output"
+#output_data_transformations:
+#  - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+#    regex: '(?<=```python)([\s\S]*?)(?=```)'
+#    regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
+#    input_key: "api_output"
+#    output_key: "code"
+#    strip: True
+#    assert_unique: True
+#output_keys:
+#  - "api_output"
+#  - "code"

CF_CodeDebug.yaml CHANGED Viewed

@@ -3,51 +3,144 @@ description: "ToDO: add description"
 max_rounds: 2 # ToDo: To increase to 4
-input_keys:
   - "problem_description"
   - "input_description"
   - "output_description"
   - "io_examples_and_explanation"
   - "public_tests_individual_io"
-output_keys:
   - "code"
 subflows_config:
   CodeGenerator:
     _target_: .CF_Code.instantiate_from_default_config
-    overrides:
-      name: "CodeGenerator"
-      model_name: "gpt-4"
-      human_message_prompt_template:
-        template: |2-
-          {{testing_results_summary}}
-          Consider the problem statement, the last proposed solution, and its issue. Provide a corrected version of the code that solves the original problem and resolves the issue, without any explanation, in the following format:
-          ```python
-          {{code_placeholder}}
-          ```
-        input_variables:
-          - testing_results_summary
-        partial_variables:
-          code_placeholder: "{{python_code}}"
-      input_keys:
-        - "testing_results_summary"
   CodeTestingCritic:
     _target_: .CF_CodeTesting.instantiate_from_default_config
 topology:
-  - flow: CodeGenerator
     reset_every_round: false
-    output_transformations:
-      - _target_: flows.data_transformations.EndOfInteraction
-        end_of_interaction_string: "Final answer"
-        input_key: "api_output"
-        output_key: "end_of_interaction"
-  - flow: CodeTestingCritic
-    reset_every_round: true
-early_exit_key: "end_of_interaction"

 max_rounds: 2 # ToDo: To increase to 4
+input_interface:
   - "problem_description"
   - "input_description"
   - "output_description"
   - "io_examples_and_explanation"
   - "public_tests_individual_io"
+output_interface:
   - "code"
 subflows_config:
   CodeGenerator:
     _target_: .CF_Code.instantiate_from_default_config
+    name: "CodeGenerator"
+    model_name: "gpt-4"
+    human_message_prompt_template:
+      template: |2-
+        {{testing_results_summary}}
+        Consider the problem statement, the last proposed solution, and its issue. Provide a corrected version of the code that solves the original problem and resolves the issue, without any explanation, in the following format:
+        ```python
+        {{code_placeholder}}
+        ```
+      input_variables:
+        - testing_results_summary
+      partial_variables:
+        code_placeholder: "{{python_code}}"
+    input_interface_initialized:
+      - "testing_results_summary"
   CodeTestingCritic:
     _target_: .CF_CodeTesting.instantiate_from_default_config
 topology:
+  # ~~~ Code Generator ~~~
+  - goal: "Generate/refine a solution."
+    ### Input Interface
+    input_interface:
+      _target_: flows.interfaces.KeyInterface
+      additional_transformations:
+        - _target_: flows.data_transformations.KeyMatchInput
+    ### Flow Specification
+    flow: CodeGenerator
+    ### Output Interface
+    output_interface:
+      _target_: flows.interfaces.KeyInterface
+      additional_transformations:
+        - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+          regex: '(?<=```python)([\s\S]*?)(?=```)'
+          regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
+          input_key: "api_output"
+          output_key: "code"
+          strip: True
+          assert_unique: True
+      keys_to_select:
+        - "code"
     reset_every_round: false
+  # ~~~ Code Testing Critic ~~~
+  - goal: "Test the code on the public tests and provide a results summary."
+    ### Input Interface
+    input_interface:
+      _target_: flows.interfaces.KeyInterface
+      additional_transformations:
+        - _target_: flows.data_transformations.KeyMatchInput
+    ### Flow Specification
+    flow: CodeTestingCritic
+    ### Output Interface
+    output_interface:
+      _target_: flows.interfaces.KeyInterface
+      additional_transformations:
+        - _target_: martinjosifoski.CC_flows.src.data_transformations.CorrectnessFlag
+          input_key: "public_tests_results"
+          output_key: "all_tests_passed"
+        - _target_: martinjosifoski.CC_flows.src.data_transformations.TestingResultsSummaryGeneration
+          output_key: "testing_results_summary"
+          single_test_error_message: True
+          no_error_template: |2-
+            ${.issue_title}
+            All of the executed tests passed.
+          compilation_error_template: |2-
+            ${.issue_title}
+            The execution resulted in a compilation error.
+            ## Compilation error message:
+            {{error_message}}
+          timeout_error_template: |2-
+            ${.issue_title}
+            The execution timed out, the solution is not efficient enough.
+          runtime_error_template: |2-
+            ${.issue_title}
+            The execution resulted in a runtime error on the following test.
+            ## [Failed test] Input
+            ```
+            {{test_input}}
+            ```
+            ## [Failed test] Runtime error message
+            {{error_message}}
+          single_test_error_template: |2-
+            ${.issue_title}
+            The Python code does not solve the problem in the problem description due to logical errors. It fails the following test:
+            ## [Failed test] Input
+            ```
+            {{test_input}}
+            ```
+            ## [Failed test] Expected output
+            ```
+            {{expected_output}}
+            ```
+            ## [Failed test] Generated output
+            ```
+            {{generated_output}}
+            ```
+          all_tests_header: |2-
+            ${.issue_title}
+            The Python code does not solve the problem in the problem description due to logical errors. It fails on the following tests.
+          test_error_template: |2-
+            ## [Failed test {{idx}}]
+            ### [Failed test {{idx}}] Input
+            ```
+            {{test_input}}
+            ```
+            ### [Failed test {{idx}}] Expected output
+            ```
+            {{expected_output}}
+            ```
+            ### [Failed test {{idx}}] Generated output
+            ```
+            {{generated_output}}
+            ```
+          tests_separator: "\n\n"
+          issue_title: "# Issue with the last proposed solution"

CF_CodeTesting.py CHANGED Viewed

@@ -1,13 +1,12 @@
 from typing import Any, Dict
 from flows import logging
 from .src.evaluation import testing_utils_codeforces
 from .CodeTesting import CodeTesting
 log = logging.get_logger(__name__)
-# ToDo: Add a flag to control whether hidden, public or both tests should be used for evaluation
 class CF_CodeTesting(CodeTesting):
     REQUIRED_KEYS_CONFIG = []
@@ -16,17 +15,38 @@ class CF_CodeTesting(CodeTesting):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
     def _get_test_data(self, input_data: Dict):
         """This function retrieves (or generates) input-output pairs that will be used to test the implementation."""
-        return input_data["public_tests_individual_io"]
     def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
         testing_results = testing_utils_codeforces.evaluate_solution_for_problem(
             candidate_solution=input_data["code"],
-            public_tests_io=test_data
         )
-        for test_output in testing_results["public_tests_results"]:
-            test_output["input"] = "\n".join(test_output["input"])
         return testing_results

 from typing import Any, Dict
 from flows import logging
+from flows.utils.general_helpers import validate_parameters
 from .src.evaluation import testing_utils_codeforces
 from .CodeTesting import CodeTesting
 log = logging.get_logger(__name__)
 class CF_CodeTesting(CodeTesting):
     REQUIRED_KEYS_CONFIG = []
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
+    @classmethod
+    def _validate_parameters(cls, kwargs):
+        validate_parameters(cls, kwargs)
+        if "public_tests_key" not in kwargs["flow_config"] and "hidden_tests_key" not in kwargs["flow_config"]:
+            raise ValueError("At least one of 'public_tests_key' "
+                             "and 'hidden_tests_key' must be specified in the config.")
     def _get_test_data(self, input_data: Dict):
         """This function retrieves (or generates) input-output pairs that will be used to test the implementation."""
+        test_data = {"public_tests_io": None, "hidden_tests_io": None}
+        if "public_tests_key" in self.flow_config:
+            test_data["public_tests_io"] = input_data[self.flow_config["public_tests_key"]]
+        if "hidden_tests_key" in self.flow_config:
+            test_data["hidden_tests_io"] = input_data[self.flow_config["hidden_tests_key"]]
+        return test_data
     def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
         testing_results = testing_utils_codeforces.evaluate_solution_for_problem(
             candidate_solution=input_data["code"],
+            **test_data
         )
+        if "public_tests_results" in testing_results:
+            for test_output in testing_results["public_tests_results"]:
+                test_output["input"] = "\n".join(test_output["input"])
+        if "hidden_tests_results" in testing_results:
+            for test_output in testing_results["hidden_tests_results"]:
+                test_output["input"] = "\n".join(test_output["input"])
         return testing_results

CF_CodeTesting.yaml CHANGED Viewed

@@ -1,76 +1,78 @@
 name: "CF_CodeTesting"
 description: "ToDo: add description"
-input_data_transformations: []
-input_keys:
   - "code"
   - "public_tests_individual_io"
-output_keys:
   - "all_tests_passed"
   - "testing_results_summary"
-output_data_transformations:
-  - _target_: martinjosifoski.CC_flows.src.data_transformations.CorrectnessFlag
-    input_key: "public_tests_results"
-    output_key: "all_tests_passed"
-  - _target_: martinjosifoski.CC_flows.src.data_transformations.TestingResultsSummaryGeneration
-    output_key: "testing_results_summary"
-    single_test_error_message: True
-    no_error_template: |2-
-      ${.issue_title}
-      All of the executed tests passed.
-    compilation_error_template: |2-
-      ${.issue_title}
-      The execution resulted in a compilation error.
-      ## Compilation error message:
-      {{error_message}}
-    timeout_error_template: |2-
-      ${.issue_title}
-      The execution timed out, the solution is not efficient enough.
-    runtime_error_template: |2-
-      ${.issue_title}
-      The execution resulted in a runtime error on the following test.
-      ## [Failed test] Input
-      ```
-      {{test_input}}
-      ```
-      ## [Failed test] Runtime error message
-      {{error_message}}
-    single_test_error_template: |2-
-      ${.issue_title}
-      The Python code does not solve the problem in the problem description due to logical errors. It fails the following test:
-      ## [Failed test] Input
-      ```
-      {{test_input}}
-      ```
-      ## [Failed test] Expected output
-      ```
-      {{expected_output}}
-      ```
-      ## [Failed test] Generated output
-      ```
-      {{generated_output}}
-      ```
-    all_tests_header: |2-
-      ${.issue_title}
-      The Python code does not solve the problem in the problem description due to logical errors. It fails on the following tests.
-    test_error_template: |2-
-      ## [Failed test {{idx}}]
-      ### [Failed test {{idx}}] Input
-      ```
-      {{test_input}}
-      ```
-      ### [Failed test {{idx}}] Expected output
-      ```
-      {{expected_output}}
-      ```
-      ### [Failed test {{idx}}] Generated output
-      ```
-      {{generated_output}}
-      ```
-    tests_separator: "\n\n"
-    issue_title: "# Issue with the last proposed solution"

 name: "CF_CodeTesting"
 description: "ToDo: add description"
+input_interface:
   - "code"
   - "public_tests_individual_io"
+output_interface:
   - "all_tests_passed"
   - "testing_results_summary"
+public_tests_key: "public_tests_individual_io"
+#output_data_transformations:
+#  - _target_: martinjosifoski.CC_flows.src.data_transformations.CorrectnessFlag
+#    input_key: "public_tests_results"
+#    output_key: "all_tests_passed"
+#  - _target_: martinjosifoski.CC_flows.src.data_transformations.TestingResultsSummaryGeneration
+#    output_key: "testing_results_summary"
+#
+#    single_test_error_message: True
+#
+#    no_error_template: |2-
+#      ${.issue_title}
+#      All of the executed tests passed.
+#
+#    compilation_error_template: |2-
+#      ${.issue_title}
+#      The execution resulted in a compilation error.
+#      ## Compilation error message:
+#      {{error_message}}
+#    timeout_error_template: |2-
+#      ${.issue_title}
+#      The execution timed out, the solution is not efficient enough.
+#    runtime_error_template: |2-
+#      ${.issue_title}
+#      The execution resulted in a runtime error on the following test.
+#      ## [Failed test] Input
+#      ```
+#      {{test_input}}
+#      ```
+#      ## [Failed test] Runtime error message
+#      {{error_message}}
+#    single_test_error_template: |2-
+#      ${.issue_title}
+#      The Python code does not solve the problem in the problem description due to logical errors. It fails the following test:
+#      ## [Failed test] Input
+#      ```
+#      {{test_input}}
+#      ```
+#      ## [Failed test] Expected output
+#      ```
+#      {{expected_output}}
+#      ```
+#      ## [Failed test] Generated output
+#      ```
+#      {{generated_output}}
+#      ```
+#    all_tests_header: |2-
+#      ${.issue_title}
+#      The Python code does not solve the problem in the problem description due to logical errors. It fails on the following tests.
+#    test_error_template: |2-
+#      ## [Failed test {{idx}}]
+#      ### [Failed test {{idx}}] Input
+#      ```
+#      {{test_input}}
+#      ```
+#      ### [Failed test {{idx}}] Expected output
+#      ```
+#      {{expected_output}}
+#      ```
+#      ### [Failed test {{idx}}] Generated output
+#      ```
+#      {{generated_output}}
+#      ```
+#    tests_separator: "\n\n"
+#
+#    issue_title: "# Issue with the last proposed solution"

CodeTesting.py CHANGED Viewed

@@ -21,20 +21,18 @@ class CodeTesting(AtomicFlow):
     def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
         raise NotImplementedError()
-    @classmethod
-    def instantiate_from_config(cls, config):
-        flow_config = deepcopy(config)
-        kwargs = {"flow_config": flow_config}
-        kwargs["input_data_transformations"] = cls._set_up_data_transformations(config["input_data_transformations"])
-        kwargs["output_data_transformations"] = cls._set_up_data_transformations(config["output_data_transformations"])
-        # ~~~ Instantiate flow ~~~
-        return cls(**kwargs)
-    def run(self,
-            input_data: Dict[str, Any]) -> Dict[str, Any]:
         # ~~~ Retrieve the test data ~~~
         test_data = self._get_test_data(input_data)

     def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
         raise NotImplementedError()
+    # @classmethod
+    # def instantiate_from_config(cls, config):
+    #     flow_config = deepcopy(config)
+    #
+    #     kwargs = {"flow_config": flow_config}
+    #     kwargs["input_data_transformations"] = cls._set_up_data_transformations(config["input_data_transformations"])
+    #     kwargs["output_data_transformations"] = cls._set_up_data_transformations(config["output_data_transformations"])
+    #
+    #     # ~~~ Instantiate flow ~~~
+    #     return cls(**kwargs)
+    def run(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
         # ~~~ Retrieve the test data ~~~
         test_data = self._get_test_data(input_data)

__init__.py CHANGED Viewed

@@ -4,84 +4,84 @@ from flows import flow_verse
 # cf-code
 from .CF_Code import CF_Code
 # lc-code
-from .LC_Code import LC_Code
 # cf-code_reflect
-from .FixedReply_CodeReflect import FixedReply_CodeReflect
-from .CF_CodeReflect import CF_CodeReflect
 # cf-code_collab
-from .CF_CodeCritic import CF_CodeCritic
-from .CF_CodeCollab import CF_CodeCollab
 # cf-plan-code (and cf-plan_oracle-code)
-from .CF_Plan import CF_Plan
-from .CF_CodeWithPlan import CF_CodeWithPlan
-from .CF_Plan_Code import CF_Plan_Code
 # # cf-plan_reflect-code
-from .FixedReply_PlanReflect import FixedReply_PlanReflect
-from .CF_PlanReflect import CF_PlanReflect
-from .CF_PlanReflect_Code import CF_PlanReflect_Code
 #
-# # cf-plan_collab-code
-from .CF_PlanCritic import CF_PlanCritic
-from .CF_PlanCollab import CF_PlanCollab
-from .CF_PlanCollab_Code import CF_PlanCollab_Code
-# cf-code_debug
 from .CF_CodeTesting import CF_CodeTesting
 from .CF_CodeDebug import CF_CodeDebug
-# # cf-code_debug_collab
-from .CF_CodeCriticWrongAttempt import CF_CodeCriticWrongAttempt
-from .CF_CodeDebugCritic import CF_CodeDebugCritic
-from .CF_CodeDebugCollab import CF_CodeDebugCollab
-#
-# # cf-plan_oracle-code_debug_collab
-from .CF_CodeCriticWrongAttemptWithPlan import CF_CodeCriticWrongAttemptWithPlan
-from .CF_CodeDebugCriticWithPlan import CF_CodeDebugCriticWithPlan
-from .CF_CodeDebugCollabWithPlan import CF_CodeDebugCollabWithPlan
 ########################## LC ##########################
-# lc-code
-from .LC_Code import LC_Code
-# lc-code_reflect
-from .LC_CodeReflect import LC_CodeReflect
-# lc-code_collab
-from .LC_CodeCritic import LC_CodeCritic
-from .LC_CodeCollab import LC_CodeCollab
-# lc-plan-code (and lc-plan_oracle-code)
-from .LC_Plan import LC_Plan
-from .LC_CodeWithPlan import LC_CodeWithPlan
-from .LC_Plan_Code import LC_Plan_Code
-# lc-plan_reflect-code
-from .LC_PlanReflect import LC_PlanReflect
-from .LC_PlanReflect_Code import LC_PlanReflect_Code
-# lc-plan_collab-code
-from .LC_PlanCritic import LC_PlanCritic
-from .LC_PlanCollab import LC_PlanCollab
-from .LC_PlanCollab_Code import LC_PlanCollab_Code
-# lc-code_debug
-from .LC_CodeTesting import LC_CodeTesting
-from .LC_CodeDebug import LC_CodeDebug
-# lc-code_debug_collab
-from .LC_CodeCriticWrongAttempt import LC_CodeCriticWrongAttempt
-from .LC_CodeDebugCritic import LC_CodeDebugCritic
-from .LC_CodeDebugCollab import LC_CodeDebugCollab
-# lc-plan_oracle-code_debug_collab
-from .LC_CodeCriticWrongAttemptWithPlan import LC_CodeCriticWrongAttemptWithPlan
-from .LC_CodeDebugCriticWithPlan import LC_CodeDebugCriticWithPlan
-from .LC_CodeDebugCollabWithPlan import LC_CodeDebugCollabWithPlan

 # cf-code
 from .CF_Code import CF_Code
 # lc-code
+# from .LC_Code import LC_Code
 # cf-code_reflect
+# from .FixedReply_CodeReflect import FixedReply_CodeReflect
+# from .CF_CodeReflect import CF_CodeReflect
 # cf-code_collab
+# from .CF_CodeCritic import CF_CodeCritic
+# from .CF_CodeCollab import CF_CodeCollab
 # cf-plan-code (and cf-plan_oracle-code)
+# from .CF_Plan import CF_Plan
+# from .CF_CodeWithPlan import CF_CodeWithPlan
+# from .CF_Plan_Code import CF_Plan_Code
 # # cf-plan_reflect-code
+# from .FixedReply_PlanReflect import FixedReply_PlanReflect
+# from .CF_PlanReflect import CF_PlanReflect
+# from .CF_PlanReflect_Code import CF_PlanReflect_Code
+# #
+# # # cf-plan_collab-code
+# from .CF_PlanCritic import CF_PlanCritic
+# from .CF_PlanCollab import CF_PlanCollab
+# from .CF_PlanCollab_Code import CF_PlanCollab_Code
 #
+# # cf-code_debug
 from .CF_CodeTesting import CF_CodeTesting
 from .CF_CodeDebug import CF_CodeDebug
+# # # cf-code_debug_collab
+# from .CF_CodeCriticWrongAttempt import CF_CodeCriticWrongAttempt
+# from .CF_CodeDebugCritic import CF_CodeDebugCritic
+# from .CF_CodeDebugCollab import CF_CodeDebugCollab
+# #
+# # # cf-plan_oracle-code_debug_collab
+# from .CF_CodeCriticWrongAttemptWithPlan import CF_CodeCriticWrongAttemptWithPlan
+# from .CF_CodeDebugCriticWithPlan import CF_CodeDebugCriticWithPlan
+# from .CF_CodeDebugCollabWithPlan import CF_CodeDebugCollabWithPlan
 ########################## LC ##########################
+# # lc-code
+# from .LC_Code import LC_Code
+#
+# # lc-code_reflect
+# from .LC_CodeReflect import LC_CodeReflect
+#
+# # lc-code_collab
+# from .LC_CodeCritic import LC_CodeCritic
+# from .LC_CodeCollab import LC_CodeCollab
+#
+# # lc-plan-code (and lc-plan_oracle-code)
+# from .LC_Plan import LC_Plan
+# from .LC_CodeWithPlan import LC_CodeWithPlan
+# from .LC_Plan_Code import LC_Plan_Code
+#
+# # lc-plan_reflect-code
+# from .LC_PlanReflect import LC_PlanReflect
+# from .LC_PlanReflect_Code import LC_PlanReflect_Code
+#
+# # lc-plan_collab-code
+# from .LC_PlanCritic import LC_PlanCritic
+# from .LC_PlanCollab import LC_PlanCollab
+# from .LC_PlanCollab_Code import LC_PlanCollab_Code
+#
+# # lc-code_debug
+# from .LC_CodeTesting import LC_CodeTesting
+# from .LC_CodeDebug import LC_CodeDebug
+#
+# # lc-code_debug_collab
+# from .LC_CodeCriticWrongAttempt import LC_CodeCriticWrongAttempt
+# from .LC_CodeDebugCritic import LC_CodeDebugCritic
+# from .LC_CodeDebugCollab import LC_CodeDebugCollab
+#
+# # lc-plan_oracle-code_debug_collab
+# from .LC_CodeCriticWrongAttemptWithPlan import LC_CodeCriticWrongAttemptWithPlan
+# from .LC_CodeDebugCriticWithPlan import LC_CodeDebugCriticWithPlan
+# from .LC_CodeDebugCollabWithPlan import LC_CodeDebugCollabWithPlan