Spaces:

yaleh
/

meta-prompt

Sleeping

App Files Files Community

yaleh commited on Sep 9, 2024

Commit

0e80df8

1 Parent(s): fa61b60

Updated unit test. Updated UI.

Browse files

Files changed (5) hide show

app/gradio_meta_prompt.py +23 -10
config.yml +43 -31
meta_prompt/consts.py +44 -30
meta_prompt/meta_prompt.py +24 -7
tests/meta_prompt_graph_test.py +42 -46

app/gradio_meta_prompt.py CHANGED Viewed

@@ -301,14 +301,16 @@ with gr.Blocks(title='Meta Prompt') as demo:
                             with gr.Row():
                                 evaluate_initial_system_message_button = gr.Button(
                                     value="Evaluate",
-                                    variant="secondary"
                                 )
                                 generate_initial_system_message_button = gr.Button(
                                     value="Generate",
-                                    variant="secondary"
                                 )
                                 pull_task_description_output_button = gr.Button(
-                                    value="→ Pull Output", variant="secondary")
                                 pull_system_message_output_button = gr.Button(
                                     value="Pull Output ←", variant="secondary")
@@ -318,10 +320,15 @@ with gr.Blocks(title='Meta Prompt') as demo:
                                 show_copy_button=True
                             )
                             with gr.Row():
-                                evaluate_acceptance_criteria_input_button = gr.Button("Evaluate")
                                 generate_acceptance_criteria_button = gr.Button(
                                     value="Generate",
-                                    variant="secondary"
                                 )
                                 pull_acceptance_criteria_output_button = gr.Button(
                                     value="Pull Output ←", variant="secondary")
@@ -454,18 +461,18 @@ with gr.Blocks(title='Meta Prompt') as demo:
                             label="System Message", show_copy_button=True)
                         with gr.Row():
                             evaluate_system_message_button = gr.Button(
-                                value="Evaluate", variant="secondary")
                     output_output = gr.Textbox(
                         label="Output", show_copy_button=True)
                     with gr.Group():
                         acceptance_criteria_output = gr.Textbox(
                             label="Acceptance Criteria", show_copy_button=True)
                         evaluate_acceptance_criteria_output_button = gr.Button(
-                            value="Evaluate", variant="secondary")
                     analysis_output = gr.Textbox(
                         label="Analysis", show_copy_button=True)
                     flag_button = gr.Button(
-                        value="Flag", variant="secondary", visible=config.allow_flagging)
                     with gr.Accordion("Details", open=False, visible=config.verbose):
                         logs_chatbot = gr.Chatbot(
                             label='Messages', show_copy_button=True, layout='bubble',
@@ -713,9 +720,15 @@ with gr.Blocks(title='Meta Prompt') as demo:
         )
     prompt_inputs_ready_state.change(
-        fn=lambda x: gr.update(interactive=x),
         inputs=[prompt_inputs_ready_state],
-        outputs=[prompt_submit_button],
     )
     simple_llm_tab.select(

                             with gr.Row():
                                 evaluate_initial_system_message_button = gr.Button(
                                     value="Evaluate",
+                                    variant="secondary",
+                                    interactive=False
                                 )
                                 generate_initial_system_message_button = gr.Button(
                                     value="Generate",
+                                    variant="secondary",
+                                    interactive=False
                                 )
                                 pull_task_description_output_button = gr.Button(
+                                    value="→ Pull Description", variant="secondary")
                                 pull_system_message_output_button = gr.Button(
                                     value="Pull Output ←", variant="secondary")
                                 show_copy_button=True
                             )
                             with gr.Row():
+                                evaluate_acceptance_criteria_input_button = gr.Button(
+                                    value="Evaluate",
+                                    variant="secondary",
+                                    interactive=False
+                                )
                                 generate_acceptance_criteria_button = gr.Button(
                                     value="Generate",
+                                    variant="secondary",
+                                    interactive=False
                                 )
                                 pull_acceptance_criteria_output_button = gr.Button(
                                     value="Pull Output ←", variant="secondary")
                             label="System Message", show_copy_button=True)
                         with gr.Row():
                             evaluate_system_message_button = gr.Button(
+                                value="Evaluate", variant="secondary", interactive=False)
                     output_output = gr.Textbox(
                         label="Output", show_copy_button=True)
                     with gr.Group():
                         acceptance_criteria_output = gr.Textbox(
                             label="Acceptance Criteria", show_copy_button=True)
                         evaluate_acceptance_criteria_output_button = gr.Button(
+                            value="Evaluate", variant="secondary", interactive=False)
                     analysis_output = gr.Textbox(
                         label="Analysis", show_copy_button=True)
                     flag_button = gr.Button(
+                        value="Flag", variant="secondary", visible=config.allow_flagging, interactive=False)
                     with gr.Accordion("Details", open=False, visible=config.verbose):
                         logs_chatbot = gr.Chatbot(
                             label='Messages', show_copy_button=True, layout='bubble',
         )
     prompt_inputs_ready_state.change(
+        fn=lambda x: [gr.update(interactive=x)] * 8,
         inputs=[prompt_inputs_ready_state],
+        outputs=[
+            prompt_submit_button,
+            evaluate_initial_system_message_button, generate_initial_system_message_button,
+            evaluate_system_message_button, evaluate_acceptance_criteria_input_button,
+            generate_acceptance_criteria_button, evaluate_acceptance_criteria_output_button,
+            flag_button
+        ],
     )
     simple_llm_tab.select(

config.yml CHANGED Viewed

@@ -336,49 +336,61 @@ prompt_templates:
     prompt_analyzer:
       - role: system
         message: |
-          **TASK:** Compare the Expected Output with the Actual Output according to the Acceptance Criteria. Provide a JSON output with your analysis.
-          **Requirements:**
-          - Compare Expected and Actual Outputs strictly following the Acceptance Criteria.
-          - Set `Accept` to "Yes" only if all criteria are met; otherwise, set it to "No."
-          - List acceptable and unacceptable differences based on the criteria.
-          **Output Format:** JSON with:
-          - `Accept: (Yes/No)`
-          - `Acceptable Differences: []`
-          - `Unacceptable Differences: []`
-          **Example Output:**
-          ```json
           {{
               "Accept": "No",
               "Acceptable Differences": [
-                  "Spelling variations: 'colour' vs 'color'"
               ],
               "Unacceptable Differences": [
-                  "Missing section: 'Conclusion'",
-                  "Incorrect date format: '2023/10/12' vs '12-10-2023'"
               ]
           }}
-          ```
-          # Acceptance Criteria
-          {acceptance_criteria}
       - role: human
         message: |
-          # Expected Output
-          ```
           {expected_output}
-          ```
-          # Actual Output
-          ```
           {output}
-          ```
     prompt_suggester:
       - role: system

     prompt_analyzer:
       - role: system
         message: |
           {{
+            "task_description": "Compare the Expected Output with the Actual Output according to the Acceptance Criteria and provide a JSON output with the analysis.",
+            "requirements": [
+              "Strictly follow the Acceptance Criteria to compare Expected and Actual Outputs",
+              "Set 'Accept' to 'Yes' only if all criteria are met, otherwise set it to 'No'",
+              "List acceptable and unacceptable differences based on the criteria"
+            ],
+            "output_format": {{
+              "type": "object",
+              "properties": {{
+                "Accept": {{
+                  "type": "string",
+                  "enum": ["Yes", "No"]
+                }},
+                "Acceptable Differences": {{
+                  "type": "array",
+                  "items": {{
+                    "type": "string"
+                  }}
+                }},
+                "Unacceptable Differences": {{
+                  "type": "array",
+                  "items": {{
+                    "type": "string"
+                  }}
+                }}
+              }},
+              "required": ["Accept", "Acceptable Differences", "Unacceptable Differences"]
+            }},
+            "output_example": {{
               "Accept": "No",
               "Acceptable Differences": [
+                "Spelling variations: 'colour' vs 'color'"
               ],
               "Unacceptable Differences": [
+                "Missing section: 'Conclusion'",
+                "Incorrect date format: '2023/10/12' vs '12-10-2023'"
               ]
+            }}
           }}
       - role: human
         message: |
+          <|Start_Expected_Output|>
           {expected_output}
+          <|End_Expected_Output|>
+          <|Start_Actual_Output|>
+          {expected_output}
+          <|End_Expected_Output|>
+          <|Start_Actual_Output|>
           {output}
+          <|End_Actual_Output|>
+          <|Start_Acceptance_Criteria|>
+          {acceptance_criteria}
+          <|End_Acceptance_Criteria|>
     prompt_suggester:
       - role: system

meta_prompt/consts.py CHANGED Viewed

@@ -222,46 +222,60 @@ Create a [name], Here's the descriptions [description]. Start with "GPT Descript
 """)
     ]),
     NODE_PROMPT_ANALYZER: ChatPromptTemplate.from_messages([
-        ("system", """**TASK:** Compare the Expected Output with the Actual Output according to the Acceptance Criteria. Provide a JSON output with your analysis.
-**Requirements:**
-- Compare Expected and Actual Outputs strictly following the Acceptance Criteria.
-- Set `Accept` to "Yes" only if all criteria are met; otherwise, set it to "No."
-- List acceptable and unacceptable differences based on the criteria.
-**Output Format:** JSON with:
-- `Accept: (Yes/No)`
-- `Acceptable Differences: []`
-- `Unacceptable Differences: []`
-**Example Output:**
-```json
-{{
     "Accept": "No",
     "Acceptable Differences": [
-        "Spelling variations: 'colour' vs 'color'"
     ],
     "Unacceptable Differences": [
-        "Missing section: 'Conclusion'",
-        "Incorrect date format: '2023/10/12' vs '12-10-2023'"
     ]
 }}
 ```
-# Acceptance Criteria
-{acceptance_criteria}
 """),
-        ("human", """# Expected Output
-```
 {expected_output}
-```
-# Actual Output
-```
 {output}
 ```
 """)
     ]),

 """)
     ]),
     NODE_PROMPT_ANALYZER: ChatPromptTemplate.from_messages([
+        ("system", """{{
+  "task_description": "Compare the Expected Output with the Actual Output according to the Acceptance Criteria and provide a JSON output with the analysis.",
+  "requirements": [
+    "Strictly follow the Acceptance Criteria to compare Expected and Actual Outputs",
+    "Set 'Accept' to 'Yes' only if all criteria are met, otherwise set it to 'No'",
+    "List acceptable and unacceptable differences based on the criteria"
+  ],
+  "output_format": {{
+    "type": "object",
+    "properties": {{
+      "Accept": {{
+        "type": "string",
+        "enum": ["Yes", "No"]
+      }},
+      "Acceptable Differences": {{
+        "type": "array",
+        "items": {{
+          "type": "string"
+        }}
+      }},
+      "Unacceptable Differences": {{
+        "type": "array",
+        "items": {{
+          "type": "string"
+        }}
+      }}
+    }},
+    "required": ["Accept", "Acceptable Differences", "Unacceptable Differences"]
+  }},
+  "output_example": {{
     "Accept": "No",
     "Acceptable Differences": [
+      "Spelling variations: 'colour' vs 'color'"
     ],
     "Unacceptable Differences": [
+      "Missing section: 'Conclusion'",
+      "Incorrect date format: '2023/10/12' vs '12-10-2023'"
     ]
+  }}
 }}
 ```
 """),
+        ("human", """<|Start_Expected_Output|>
 {expected_output}
+<|End_Expected_Output|>
+<|Start_Actual_Output|>
+{expected_output}
+<|End_Expected_Output|>
+<|Start_Actual_Output|>
 {output}
+<|End_Actual_Output|>
+<|Start_Acceptance_Criteria|>
+{acceptance_criteria}
+<|End_Acceptance_Criteria|>
 ```
 """)
     ]),

meta_prompt/meta_prompt.py CHANGED Viewed

@@ -7,7 +7,9 @@ from langgraph.checkpoint.memory import MemorySaver
 from langgraph.errors import GraphRecursionError
 from langgraph.graph import StateGraph, START, END
 from langchain_core.runnables.base import RunnableLike
-from langchain_core.output_parsers import JsonOutputParser
 from pydantic import BaseModel
 from typing import Annotated, Dict, Optional, Union, TypedDict
 from .consts import *
@@ -400,17 +402,17 @@ class MetaPromptGraph:
                 }
             )
-        response = self.llms[node].invoke(formatted_messages)
         logger.debug(
             {
                 'node': node,
                 'action': 'response',
-                'type': response.type,
-                'message': response.content
             }
         )
-        return {target_attribute: response.content}
     def _output_history_analyzer(self, state: AgentState) -> AgentState:
@@ -451,7 +453,14 @@ class MetaPromptGraph:
         chain = (
             self.prompt_templates[NODE_OUTPUT_HISTORY_ANALYZER] | self.llms[NODE_OUTPUT_HISTORY_ANALYZER] | JsonOutputParser()
-        )
         analysis_dict = chain.invoke(state)
         logger.debug({
@@ -511,7 +520,15 @@ class MetaPromptGraph:
         chain = (
             self.prompt_templates[NODE_PROMPT_ANALYZER] | self.llms[NODE_PROMPT_ANALYZER] | JsonOutputParser()
-        )
         result = chain.invoke(state)
         logger.debug({

 from langgraph.errors import GraphRecursionError
 from langgraph.graph import StateGraph, START, END
 from langchain_core.runnables.base import RunnableLike
+from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
+from langchain_core.runnables import RunnableLambda
+from openai import BadRequestError
 from pydantic import BaseModel
 from typing import Annotated, Dict, Optional, Union, TypedDict
 from .consts import *
                 }
             )
+        chain = self.llms[node] | StrOutputParser()
+        response = chain.invoke(formatted_messages)
         logger.debug(
             {
                 'node': node,
                 'action': 'response',
+                'message': response
             }
         )
+        return {target_attribute: response}
     def _output_history_analyzer(self, state: AgentState) -> AgentState:
         chain = (
             self.prompt_templates[NODE_OUTPUT_HISTORY_ANALYZER] | self.llms[NODE_OUTPUT_HISTORY_ANALYZER] | JsonOutputParser()
+        ).with_retry(
+            retry_if_exception_type=(BadRequestError,), # Retry only on ValueError
+            wait_exponential_jitter=True, # Add jitter to the exponential backoff
+            stop_after_attempt=2 # Try twice
+        ).with_fallbacks([RunnableLambda(lambda x: {
+            "analysis": "",
+            "closerOutputID": 0
+        })])
         analysis_dict = chain.invoke(state)
         logger.debug({
         chain = (
             self.prompt_templates[NODE_PROMPT_ANALYZER] | self.llms[NODE_PROMPT_ANALYZER] | JsonOutputParser()
+        ).with_retry(
+            retry_if_exception_type=(BadRequestError,), # Retry only on ValueError
+            wait_exponential_jitter=True, # Add jitter to the exponential backoff
+            stop_after_attempt=2 # Try twice
+        ).with_fallbacks([RunnableLambda(lambda x: {
+            "Accept": "No",
+            "Acceptable Differences": [],
+            "Unacceptable Differences": []
+        })])
         result = chain.invoke(state)
         logger.debug({

tests/meta_prompt_graph_test.py CHANGED Viewed

@@ -23,12 +23,12 @@ class TestMetaPromptGraph(unittest.TestCase):
         and verifies that the updated state has the output attribute updated with
         the mocked response content.
         """
         llms = {
-            NODE_PROMPT_INITIAL_DEVELOPER: MagicMock(
-                invoke=MagicMock(
-                    return_value=MagicMock(content="Mocked response content")
-                )
-            )
         }
         graph = MetaPromptGraph(llms=llms)
@@ -52,15 +52,11 @@ class TestMetaPromptGraph(unittest.TestCase):
         response and verifies that the updated state has the best output, best
         system message, and best output age updated correctly.
         """
-        llms = {
-            "output_history_analyzer": MagicMock(
-                invoke=lambda prompt: MagicMock(
-                    content="{\"closerOutputID\": 2, \"analysis\": \"The output should use the `reverse()` method.\"}"
-                )
-            )
-        }
         prompts = {}
-        meta_prompt_graph = MetaPromptGraph(llms=llms, prompts=prompts)
         state = AgentState(
             user_message="How do I reverse a list in Python?",
             expected_output="Use the `[::-1]` slicing technique or the `list.reverse()` method.",
@@ -93,12 +89,13 @@ class TestMetaPromptGraph(unittest.TestCase):
         response and verifies that the updated state has the accepted attribute
         set to True.
         """
-        llms = {
-            NODE_PROMPT_ANALYZER: MagicMock(
-                invoke=lambda prompt: MagicMock(content="{\"Accept\": \"Yes\"}")
-            )
-        }
-        meta_prompt_graph = MetaPromptGraph(llms=llms)
         state = AgentState(
             output="Test output", expected_output="Expected output",
             acceptance_criteria="Acceptance criteria: ...",
@@ -137,8 +134,8 @@ class TestMetaPromptGraph(unittest.TestCase):
             NODE_ACCEPTANCE_CRITERIA_DEVELOPER: raw_llm,
             NODE_PROMPT_DEVELOPER: raw_llm,
             NODE_PROMPT_EXECUTOR: raw_llm,
-            NODE_OUTPUT_HISTORY_ANALYZER: raw_llm.bind(response_format={"type": "json_object"}),
-            NODE_PROMPT_ANALYZER: raw_llm.bind(response_format={"type": "json_object"}),
             NODE_PROMPT_SUGGESTER: raw_llm,
         }
@@ -239,12 +236,14 @@ class TestMetaPromptGraph(unittest.TestCase):
         """
         # Create a mock LLM that returns predefined responses based on the input messages
         llm = Mock(spec=BaseLanguageModel)
         responses = [
-            Mock(type="content", content="Explain how to reverse a list in Python."),  # NODE_PROMPT_INITIAL_DEVELOPER
-            Mock(type="content", content="Here's one way: `my_list[::-1]`"),  # NODE_PROMPT_EXECUTOR
-            Mock(type="content", content="{\"Accept\": \"Yes\"}"),  # NODE_PPROMPT_ANALYZER
         ]
-        llm.invoke = functools.partial(next, iter(responses))
         meta_prompt_graph = MetaPromptGraph(llms=llm)
         input_state = AgentState(
@@ -273,17 +272,18 @@ class TestMetaPromptGraph(unittest.TestCase):
         """
         # Create a mock LLM that returns predefined responses based on the input messages
         llm = Mock(spec=BaseLanguageModel)
         responses = [
-            Mock(type="content", content="Explain how to reverse a list in Python."),  # NODE_PROMPT_INITIAL_DEVELOPER
-            Mock(type="content", content="Here's one way: `my_list[::-1]`"),  # NODE_PROMPT_EXECUTOR
-            Mock(type="content", content="{\"Accept\": \"No\"}"),  # NODE_PPROMPT_ANALYZER
-            Mock(type="content", content="Try using the `reverse()` method instead."),  # NODE_PROMPT_SUGGESTER
-            Mock(type="content", content="Explain how to reverse a list in Python. Output in a Markdown List."),  # NODE_PROMPT_DEVELOPER
-            Mock(type="content", content="Here's one way: `my_list.reverse()`"),  # NODE_PROMPT_EXECUTOR
-            Mock(type="content", content="{\"closerOutputID\": 2, \"analysis\": \"The output should use the `reverse()` method.\"}"), # NODE_OUTPUT_HISTORY_ANALYZER
-            Mock(type="content", content="{\"Accept\": \"Yes\"}"),  # NODE_PPROMPT_ANALYZER
         ]
-        llm.invoke = lambda _: responses.pop(0)
         meta_prompt_graph = MetaPromptGraph(llms=llm)
         input_state = AgentState(
@@ -347,12 +347,10 @@ class TestMetaPromptGraph(unittest.TestCase):
         This test case verifies that the run_acceptance_criteria_graph method
         returns a state with acceptance criteria.
         """
-        llms = {
-            NODE_ACCEPTANCE_CRITERIA_DEVELOPER: MagicMock(
-                invoke=lambda prompt: MagicMock(content="Acceptance criteria: ...")
-            )
-        }
-        meta_prompt_graph = MetaPromptGraph(llms=llms)
         state = AgentState(
             user_message="How do I reverse a list in Python?",
             expected_output="The output should use the `reverse()` method.",
@@ -372,12 +370,10 @@ class TestMetaPromptGraph(unittest.TestCase):
         This test case verifies that the run_prompt_initial_developer_graph method
         returns a state with an initial developer prompt.
         """
-        llms = {
-            NODE_PROMPT_INITIAL_DEVELOPER: MagicMock(
-                invoke=lambda prompt: MagicMock(content="Initial developer prompt: ...")
-            )
-        }
-        meta_prompt_graph = MetaPromptGraph(llms=llms)
         state = AgentState(user_message="How do I reverse a list in Python?")
         output_state = meta_prompt_graph.run_node_graph(NODE_PROMPT_INITIAL_DEVELOPER, state)

         and verifies that the updated state has the output attribute updated with
         the mocked response content.
         """
+        llm = Mock(spec=BaseLanguageModel)
+        llm.config_specs = []
+        llm.invoke = lambda x, y=None: "Mocked response content"
         llms = {
+            NODE_PROMPT_INITIAL_DEVELOPER: llm
         }
         graph = MetaPromptGraph(llms=llms)
         response and verifies that the updated state has the best output, best
         system message, and best output age updated correctly.
         """
+        llm = Mock(spec=BaseLanguageModel)
+        llm.config_specs = []
+        llm.invoke = lambda x, y: "{\"closerOutputID\": 2, \"analysis\": \"The output should use the `reverse()` method.\"}"
         prompts = {}
+        meta_prompt_graph = MetaPromptGraph(llms=llm, prompts=prompts)
         state = AgentState(
             user_message="How do I reverse a list in Python?",
             expected_output="Use the `[::-1]` slicing technique or the `list.reverse()` method.",
         response and verifies that the updated state has the accepted attribute
         set to True.
         """
+        # llms = {
+        #     NODE_PROMPT_ANALYZER: lambda prompt: "{\"Accept\": \"Yes\"}"
+        # }
+        llm = Mock(spec=BaseLanguageModel)
+        llm.config_specs = []
+        llm.invoke = lambda x, y: "{\"Accept\": \"Yes\"}"
+        meta_prompt_graph = MetaPromptGraph(llms=llm)
         state = AgentState(
             output="Test output", expected_output="Expected output",
             acceptance_criteria="Acceptance criteria: ...",
             NODE_ACCEPTANCE_CRITERIA_DEVELOPER: raw_llm,
             NODE_PROMPT_DEVELOPER: raw_llm,
             NODE_PROMPT_EXECUTOR: raw_llm,
+            NODE_OUTPUT_HISTORY_ANALYZER: raw_llm,
+            NODE_PROMPT_ANALYZER: raw_llm,
             NODE_PROMPT_SUGGESTER: raw_llm,
         }
         """
         # Create a mock LLM that returns predefined responses based on the input messages
         llm = Mock(spec=BaseLanguageModel)
+        llm.config_specs = []
         responses = [
+            "Explain how to reverse a list in Python.",  # NODE_PROMPT_INITIAL_DEVELOPER
+            "Here's one way: `my_list[::-1]`",  # NODE_PROMPT_EXECUTOR
+            "{\"Accept\": \"Yes\"}",  # NODE_PPROMPT_ANALYZER
         ]
+        # everytime llm.invoke was called, it returns a item in responses
+        llm.invoke = lambda x, y=None: responses.pop(0)
         meta_prompt_graph = MetaPromptGraph(llms=llm)
         input_state = AgentState(
         """
         # Create a mock LLM that returns predefined responses based on the input messages
         llm = Mock(spec=BaseLanguageModel)
+        llm.config_specs = []
         responses = [
+            "Explain how to reverse a list in Python.",  # NODE_PROMPT_INITIAL_DEVELOPER
+            "Here's one way: `my_list[::-1]`",  # NODE_PROMPT_EXECUTOR
+            "{\"Accept\": \"No\"}",  # NODE_PPROMPT_ANALYZER
+            "Try using the `reverse()` method instead.",  # NODE_PROMPT_SUGGESTER
+            "Explain how to reverse a list in Python. Output in a Markdown List.",  # NODE_PROMPT_DEVELOPER
+            "Here's one way: `my_list.reverse()`",  # NODE_PROMPT_EXECUTOR
+            "{\"closerOutputID\": 2, \"analysis\": \"The output should use the `reverse()` method.\"}", # NODE_OUTPUT_HISTORY_ANALYZER
+            "{\"Accept\": \"Yes\"}",  # NODE_PPROMPT_ANALYZER
         ]
+        llm.invoke = lambda x, y = None: responses.pop(0)
         meta_prompt_graph = MetaPromptGraph(llms=llm)
         input_state = AgentState(
         This test case verifies that the run_acceptance_criteria_graph method
         returns a state with acceptance criteria.
         """
+        llm = Mock(spec=BaseLanguageModel)
+        llm.config_specs = []
+        llm.invoke = lambda x, y: "{\"Acceptance criteria\": \"Acceptance criteria: ...\"}"
+        meta_prompt_graph = MetaPromptGraph(llms=llm)
         state = AgentState(
             user_message="How do I reverse a list in Python?",
             expected_output="The output should use the `reverse()` method.",
         This test case verifies that the run_prompt_initial_developer_graph method
         returns a state with an initial developer prompt.
         """
+        llm = Mock(spec=BaseLanguageModel)
+        llm.config_specs = []
+        llm.invoke = lambda x, y: "{\"Initial developer prompt\": \"Initial developer prompt: ...\"}"
+        meta_prompt_graph = MetaPromptGraph(llms=llm)
         state = AgentState(user_message="How do I reverse a list in Python?")
         output_state = meta_prompt_graph.run_node_graph(NODE_PROMPT_INITIAL_DEVELOPER, state)