Spaces:
Sleeping
Sleeping
Updated unit test. Updated UI.
Browse files- app/gradio_meta_prompt.py +23 -10
- config.yml +43 -31
- meta_prompt/consts.py +44 -30
- meta_prompt/meta_prompt.py +24 -7
- tests/meta_prompt_graph_test.py +42 -46
app/gradio_meta_prompt.py
CHANGED
|
@@ -301,14 +301,16 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
| 301 |
with gr.Row():
|
| 302 |
evaluate_initial_system_message_button = gr.Button(
|
| 303 |
value="Evaluate",
|
| 304 |
-
variant="secondary"
|
|
|
|
| 305 |
)
|
| 306 |
generate_initial_system_message_button = gr.Button(
|
| 307 |
value="Generate",
|
| 308 |
-
variant="secondary"
|
|
|
|
| 309 |
)
|
| 310 |
pull_task_description_output_button = gr.Button(
|
| 311 |
-
value="→ Pull
|
| 312 |
pull_system_message_output_button = gr.Button(
|
| 313 |
value="Pull Output ←", variant="secondary")
|
| 314 |
|
|
@@ -318,10 +320,15 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
| 318 |
show_copy_button=True
|
| 319 |
)
|
| 320 |
with gr.Row():
|
| 321 |
-
evaluate_acceptance_criteria_input_button = gr.Button(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
generate_acceptance_criteria_button = gr.Button(
|
| 323 |
value="Generate",
|
| 324 |
-
variant="secondary"
|
|
|
|
| 325 |
)
|
| 326 |
pull_acceptance_criteria_output_button = gr.Button(
|
| 327 |
value="Pull Output ←", variant="secondary")
|
|
@@ -454,18 +461,18 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
| 454 |
label="System Message", show_copy_button=True)
|
| 455 |
with gr.Row():
|
| 456 |
evaluate_system_message_button = gr.Button(
|
| 457 |
-
value="Evaluate", variant="secondary")
|
| 458 |
output_output = gr.Textbox(
|
| 459 |
label="Output", show_copy_button=True)
|
| 460 |
with gr.Group():
|
| 461 |
acceptance_criteria_output = gr.Textbox(
|
| 462 |
label="Acceptance Criteria", show_copy_button=True)
|
| 463 |
evaluate_acceptance_criteria_output_button = gr.Button(
|
| 464 |
-
value="Evaluate", variant="secondary")
|
| 465 |
analysis_output = gr.Textbox(
|
| 466 |
label="Analysis", show_copy_button=True)
|
| 467 |
flag_button = gr.Button(
|
| 468 |
-
value="Flag", variant="secondary", visible=config.allow_flagging)
|
| 469 |
with gr.Accordion("Details", open=False, visible=config.verbose):
|
| 470 |
logs_chatbot = gr.Chatbot(
|
| 471 |
label='Messages', show_copy_button=True, layout='bubble',
|
|
@@ -713,9 +720,15 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
| 713 |
)
|
| 714 |
|
| 715 |
prompt_inputs_ready_state.change(
|
| 716 |
-
fn=lambda x: gr.update(interactive=x),
|
| 717 |
inputs=[prompt_inputs_ready_state],
|
| 718 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 719 |
)
|
| 720 |
|
| 721 |
simple_llm_tab.select(
|
|
|
|
| 301 |
with gr.Row():
|
| 302 |
evaluate_initial_system_message_button = gr.Button(
|
| 303 |
value="Evaluate",
|
| 304 |
+
variant="secondary",
|
| 305 |
+
interactive=False
|
| 306 |
)
|
| 307 |
generate_initial_system_message_button = gr.Button(
|
| 308 |
value="Generate",
|
| 309 |
+
variant="secondary",
|
| 310 |
+
interactive=False
|
| 311 |
)
|
| 312 |
pull_task_description_output_button = gr.Button(
|
| 313 |
+
value="→ Pull Description", variant="secondary")
|
| 314 |
pull_system_message_output_button = gr.Button(
|
| 315 |
value="Pull Output ←", variant="secondary")
|
| 316 |
|
|
|
|
| 320 |
show_copy_button=True
|
| 321 |
)
|
| 322 |
with gr.Row():
|
| 323 |
+
evaluate_acceptance_criteria_input_button = gr.Button(
|
| 324 |
+
value="Evaluate",
|
| 325 |
+
variant="secondary",
|
| 326 |
+
interactive=False
|
| 327 |
+
)
|
| 328 |
generate_acceptance_criteria_button = gr.Button(
|
| 329 |
value="Generate",
|
| 330 |
+
variant="secondary",
|
| 331 |
+
interactive=False
|
| 332 |
)
|
| 333 |
pull_acceptance_criteria_output_button = gr.Button(
|
| 334 |
value="Pull Output ←", variant="secondary")
|
|
|
|
| 461 |
label="System Message", show_copy_button=True)
|
| 462 |
with gr.Row():
|
| 463 |
evaluate_system_message_button = gr.Button(
|
| 464 |
+
value="Evaluate", variant="secondary", interactive=False)
|
| 465 |
output_output = gr.Textbox(
|
| 466 |
label="Output", show_copy_button=True)
|
| 467 |
with gr.Group():
|
| 468 |
acceptance_criteria_output = gr.Textbox(
|
| 469 |
label="Acceptance Criteria", show_copy_button=True)
|
| 470 |
evaluate_acceptance_criteria_output_button = gr.Button(
|
| 471 |
+
value="Evaluate", variant="secondary", interactive=False)
|
| 472 |
analysis_output = gr.Textbox(
|
| 473 |
label="Analysis", show_copy_button=True)
|
| 474 |
flag_button = gr.Button(
|
| 475 |
+
value="Flag", variant="secondary", visible=config.allow_flagging, interactive=False)
|
| 476 |
with gr.Accordion("Details", open=False, visible=config.verbose):
|
| 477 |
logs_chatbot = gr.Chatbot(
|
| 478 |
label='Messages', show_copy_button=True, layout='bubble',
|
|
|
|
| 720 |
)
|
| 721 |
|
| 722 |
prompt_inputs_ready_state.change(
|
| 723 |
+
fn=lambda x: [gr.update(interactive=x)] * 8,
|
| 724 |
inputs=[prompt_inputs_ready_state],
|
| 725 |
+
outputs=[
|
| 726 |
+
prompt_submit_button,
|
| 727 |
+
evaluate_initial_system_message_button, generate_initial_system_message_button,
|
| 728 |
+
evaluate_system_message_button, evaluate_acceptance_criteria_input_button,
|
| 729 |
+
generate_acceptance_criteria_button, evaluate_acceptance_criteria_output_button,
|
| 730 |
+
flag_button
|
| 731 |
+
],
|
| 732 |
)
|
| 733 |
|
| 734 |
simple_llm_tab.select(
|
config.yml
CHANGED
|
@@ -336,49 +336,61 @@ prompt_templates:
|
|
| 336 |
prompt_analyzer:
|
| 337 |
- role: system
|
| 338 |
message: |
|
| 339 |
-
**TASK:** Compare the Expected Output with the Actual Output according to the Acceptance Criteria. Provide a JSON output with your analysis.
|
| 340 |
-
|
| 341 |
-
**Requirements:**
|
| 342 |
-
- Compare Expected and Actual Outputs strictly following the Acceptance Criteria.
|
| 343 |
-
- Set `Accept` to "Yes" only if all criteria are met; otherwise, set it to "No."
|
| 344 |
-
- List acceptable and unacceptable differences based on the criteria.
|
| 345 |
-
|
| 346 |
-
**Output Format:** JSON with:
|
| 347 |
-
- `Accept: (Yes/No)`
|
| 348 |
-
- `Acceptable Differences: []`
|
| 349 |
-
- `Unacceptable Differences: []`
|
| 350 |
-
|
| 351 |
-
**Example Output:**
|
| 352 |
-
```json
|
| 353 |
{{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
"Accept": "No",
|
| 355 |
"Acceptable Differences": [
|
| 356 |
-
|
| 357 |
],
|
| 358 |
"Unacceptable Differences": [
|
| 359 |
-
|
| 360 |
-
|
| 361 |
]
|
|
|
|
| 362 |
}}
|
| 363 |
-
```
|
| 364 |
-
|
| 365 |
-
# Acceptance Criteria
|
| 366 |
-
|
| 367 |
-
{acceptance_criteria}
|
| 368 |
|
| 369 |
- role: human
|
| 370 |
message: |
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
```
|
| 374 |
{expected_output}
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
{output}
|
| 381 |
-
|
|
|
|
|
|
|
|
|
|
| 382 |
|
| 383 |
prompt_suggester:
|
| 384 |
- role: system
|
|
|
|
| 336 |
prompt_analyzer:
|
| 337 |
- role: system
|
| 338 |
message: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
{{
|
| 340 |
+
"task_description": "Compare the Expected Output with the Actual Output according to the Acceptance Criteria and provide a JSON output with the analysis.",
|
| 341 |
+
"requirements": [
|
| 342 |
+
"Strictly follow the Acceptance Criteria to compare Expected and Actual Outputs",
|
| 343 |
+
"Set 'Accept' to 'Yes' only if all criteria are met, otherwise set it to 'No'",
|
| 344 |
+
"List acceptable and unacceptable differences based on the criteria"
|
| 345 |
+
],
|
| 346 |
+
"output_format": {{
|
| 347 |
+
"type": "object",
|
| 348 |
+
"properties": {{
|
| 349 |
+
"Accept": {{
|
| 350 |
+
"type": "string",
|
| 351 |
+
"enum": ["Yes", "No"]
|
| 352 |
+
}},
|
| 353 |
+
"Acceptable Differences": {{
|
| 354 |
+
"type": "array",
|
| 355 |
+
"items": {{
|
| 356 |
+
"type": "string"
|
| 357 |
+
}}
|
| 358 |
+
}},
|
| 359 |
+
"Unacceptable Differences": {{
|
| 360 |
+
"type": "array",
|
| 361 |
+
"items": {{
|
| 362 |
+
"type": "string"
|
| 363 |
+
}}
|
| 364 |
+
}}
|
| 365 |
+
}},
|
| 366 |
+
"required": ["Accept", "Acceptable Differences", "Unacceptable Differences"]
|
| 367 |
+
}},
|
| 368 |
+
"output_example": {{
|
| 369 |
"Accept": "No",
|
| 370 |
"Acceptable Differences": [
|
| 371 |
+
"Spelling variations: 'colour' vs 'color'"
|
| 372 |
],
|
| 373 |
"Unacceptable Differences": [
|
| 374 |
+
"Missing section: 'Conclusion'",
|
| 375 |
+
"Incorrect date format: '2023/10/12' vs '12-10-2023'"
|
| 376 |
]
|
| 377 |
+
}}
|
| 378 |
}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
|
| 380 |
- role: human
|
| 381 |
message: |
|
| 382 |
+
<|Start_Expected_Output|>
|
|
|
|
|
|
|
| 383 |
{expected_output}
|
| 384 |
+
<|End_Expected_Output|>
|
| 385 |
+
<|Start_Actual_Output|>
|
| 386 |
+
{expected_output}
|
| 387 |
+
<|End_Expected_Output|>
|
| 388 |
+
<|Start_Actual_Output|>
|
| 389 |
{output}
|
| 390 |
+
<|End_Actual_Output|>
|
| 391 |
+
<|Start_Acceptance_Criteria|>
|
| 392 |
+
{acceptance_criteria}
|
| 393 |
+
<|End_Acceptance_Criteria|>
|
| 394 |
|
| 395 |
prompt_suggester:
|
| 396 |
- role: system
|
meta_prompt/consts.py
CHANGED
|
@@ -222,46 +222,60 @@ Create a [name], Here's the descriptions [description]. Start with "GPT Descript
|
|
| 222 |
""")
|
| 223 |
]),
|
| 224 |
NODE_PROMPT_ANALYZER: ChatPromptTemplate.from_messages([
|
| 225 |
-
("system", """
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
{{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
"Accept": "No",
|
| 241 |
"Acceptable Differences": [
|
| 242 |
-
|
| 243 |
],
|
| 244 |
"Unacceptable Differences": [
|
| 245 |
-
|
| 246 |
-
|
| 247 |
]
|
|
|
|
| 248 |
}}
|
| 249 |
```
|
| 250 |
-
|
| 251 |
-
# Acceptance Criteria
|
| 252 |
-
|
| 253 |
-
{acceptance_criteria}
|
| 254 |
"""),
|
| 255 |
-
("human", """
|
| 256 |
-
|
| 257 |
-
```
|
| 258 |
{expected_output}
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
{output}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
```
|
| 266 |
""")
|
| 267 |
]),
|
|
|
|
| 222 |
""")
|
| 223 |
]),
|
| 224 |
NODE_PROMPT_ANALYZER: ChatPromptTemplate.from_messages([
|
| 225 |
+
("system", """{{
|
| 226 |
+
"task_description": "Compare the Expected Output with the Actual Output according to the Acceptance Criteria and provide a JSON output with the analysis.",
|
| 227 |
+
"requirements": [
|
| 228 |
+
"Strictly follow the Acceptance Criteria to compare Expected and Actual Outputs",
|
| 229 |
+
"Set 'Accept' to 'Yes' only if all criteria are met, otherwise set it to 'No'",
|
| 230 |
+
"List acceptable and unacceptable differences based on the criteria"
|
| 231 |
+
],
|
| 232 |
+
"output_format": {{
|
| 233 |
+
"type": "object",
|
| 234 |
+
"properties": {{
|
| 235 |
+
"Accept": {{
|
| 236 |
+
"type": "string",
|
| 237 |
+
"enum": ["Yes", "No"]
|
| 238 |
+
}},
|
| 239 |
+
"Acceptable Differences": {{
|
| 240 |
+
"type": "array",
|
| 241 |
+
"items": {{
|
| 242 |
+
"type": "string"
|
| 243 |
+
}}
|
| 244 |
+
}},
|
| 245 |
+
"Unacceptable Differences": {{
|
| 246 |
+
"type": "array",
|
| 247 |
+
"items": {{
|
| 248 |
+
"type": "string"
|
| 249 |
+
}}
|
| 250 |
+
}}
|
| 251 |
+
}},
|
| 252 |
+
"required": ["Accept", "Acceptable Differences", "Unacceptable Differences"]
|
| 253 |
+
}},
|
| 254 |
+
"output_example": {{
|
| 255 |
"Accept": "No",
|
| 256 |
"Acceptable Differences": [
|
| 257 |
+
"Spelling variations: 'colour' vs 'color'"
|
| 258 |
],
|
| 259 |
"Unacceptable Differences": [
|
| 260 |
+
"Missing section: 'Conclusion'",
|
| 261 |
+
"Incorrect date format: '2023/10/12' vs '12-10-2023'"
|
| 262 |
]
|
| 263 |
+
}}
|
| 264 |
}}
|
| 265 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
"""),
|
| 267 |
+
("human", """<|Start_Expected_Output|>
|
|
|
|
|
|
|
| 268 |
{expected_output}
|
| 269 |
+
<|End_Expected_Output|>
|
| 270 |
+
<|Start_Actual_Output|>
|
| 271 |
+
{expected_output}
|
| 272 |
+
<|End_Expected_Output|>
|
| 273 |
+
<|Start_Actual_Output|>
|
| 274 |
{output}
|
| 275 |
+
<|End_Actual_Output|>
|
| 276 |
+
<|Start_Acceptance_Criteria|>
|
| 277 |
+
{acceptance_criteria}
|
| 278 |
+
<|End_Acceptance_Criteria|>
|
| 279 |
```
|
| 280 |
""")
|
| 281 |
]),
|
meta_prompt/meta_prompt.py
CHANGED
|
@@ -7,7 +7,9 @@ from langgraph.checkpoint.memory import MemorySaver
|
|
| 7 |
from langgraph.errors import GraphRecursionError
|
| 8 |
from langgraph.graph import StateGraph, START, END
|
| 9 |
from langchain_core.runnables.base import RunnableLike
|
| 10 |
-
from langchain_core.output_parsers import JsonOutputParser
|
|
|
|
|
|
|
| 11 |
from pydantic import BaseModel
|
| 12 |
from typing import Annotated, Dict, Optional, Union, TypedDict
|
| 13 |
from .consts import *
|
|
@@ -400,17 +402,17 @@ class MetaPromptGraph:
|
|
| 400 |
}
|
| 401 |
)
|
| 402 |
|
| 403 |
-
|
|
|
|
| 404 |
logger.debug(
|
| 405 |
{
|
| 406 |
'node': node,
|
| 407 |
'action': 'response',
|
| 408 |
-
'
|
| 409 |
-
'message': response.content
|
| 410 |
}
|
| 411 |
)
|
| 412 |
|
| 413 |
-
return {target_attribute: response
|
| 414 |
|
| 415 |
|
| 416 |
def _output_history_analyzer(self, state: AgentState) -> AgentState:
|
|
@@ -451,7 +453,14 @@ class MetaPromptGraph:
|
|
| 451 |
|
| 452 |
chain = (
|
| 453 |
self.prompt_templates[NODE_OUTPUT_HISTORY_ANALYZER] | self.llms[NODE_OUTPUT_HISTORY_ANALYZER] | JsonOutputParser()
|
| 454 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
analysis_dict = chain.invoke(state)
|
| 456 |
|
| 457 |
logger.debug({
|
|
@@ -511,7 +520,15 @@ class MetaPromptGraph:
|
|
| 511 |
|
| 512 |
chain = (
|
| 513 |
self.prompt_templates[NODE_PROMPT_ANALYZER] | self.llms[NODE_PROMPT_ANALYZER] | JsonOutputParser()
|
| 514 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 515 |
result = chain.invoke(state)
|
| 516 |
|
| 517 |
logger.debug({
|
|
|
|
| 7 |
from langgraph.errors import GraphRecursionError
|
| 8 |
from langgraph.graph import StateGraph, START, END
|
| 9 |
from langchain_core.runnables.base import RunnableLike
|
| 10 |
+
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
|
| 11 |
+
from langchain_core.runnables import RunnableLambda
|
| 12 |
+
from openai import BadRequestError
|
| 13 |
from pydantic import BaseModel
|
| 14 |
from typing import Annotated, Dict, Optional, Union, TypedDict
|
| 15 |
from .consts import *
|
|
|
|
| 402 |
}
|
| 403 |
)
|
| 404 |
|
| 405 |
+
chain = self.llms[node] | StrOutputParser()
|
| 406 |
+
response = chain.invoke(formatted_messages)
|
| 407 |
logger.debug(
|
| 408 |
{
|
| 409 |
'node': node,
|
| 410 |
'action': 'response',
|
| 411 |
+
'message': response
|
|
|
|
| 412 |
}
|
| 413 |
)
|
| 414 |
|
| 415 |
+
return {target_attribute: response}
|
| 416 |
|
| 417 |
|
| 418 |
def _output_history_analyzer(self, state: AgentState) -> AgentState:
|
|
|
|
| 453 |
|
| 454 |
chain = (
|
| 455 |
self.prompt_templates[NODE_OUTPUT_HISTORY_ANALYZER] | self.llms[NODE_OUTPUT_HISTORY_ANALYZER] | JsonOutputParser()
|
| 456 |
+
).with_retry(
|
| 457 |
+
retry_if_exception_type=(BadRequestError,), # Retry only on ValueError
|
| 458 |
+
wait_exponential_jitter=True, # Add jitter to the exponential backoff
|
| 459 |
+
stop_after_attempt=2 # Try twice
|
| 460 |
+
).with_fallbacks([RunnableLambda(lambda x: {
|
| 461 |
+
"analysis": "",
|
| 462 |
+
"closerOutputID": 0
|
| 463 |
+
})])
|
| 464 |
analysis_dict = chain.invoke(state)
|
| 465 |
|
| 466 |
logger.debug({
|
|
|
|
| 520 |
|
| 521 |
chain = (
|
| 522 |
self.prompt_templates[NODE_PROMPT_ANALYZER] | self.llms[NODE_PROMPT_ANALYZER] | JsonOutputParser()
|
| 523 |
+
).with_retry(
|
| 524 |
+
retry_if_exception_type=(BadRequestError,), # Retry only on ValueError
|
| 525 |
+
wait_exponential_jitter=True, # Add jitter to the exponential backoff
|
| 526 |
+
stop_after_attempt=2 # Try twice
|
| 527 |
+
).with_fallbacks([RunnableLambda(lambda x: {
|
| 528 |
+
"Accept": "No",
|
| 529 |
+
"Acceptable Differences": [],
|
| 530 |
+
"Unacceptable Differences": []
|
| 531 |
+
})])
|
| 532 |
result = chain.invoke(state)
|
| 533 |
|
| 534 |
logger.debug({
|
tests/meta_prompt_graph_test.py
CHANGED
|
@@ -23,12 +23,12 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
| 23 |
and verifies that the updated state has the output attribute updated with
|
| 24 |
the mocked response content.
|
| 25 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
llms = {
|
| 27 |
-
NODE_PROMPT_INITIAL_DEVELOPER:
|
| 28 |
-
invoke=MagicMock(
|
| 29 |
-
return_value=MagicMock(content="Mocked response content")
|
| 30 |
-
)
|
| 31 |
-
)
|
| 32 |
}
|
| 33 |
|
| 34 |
graph = MetaPromptGraph(llms=llms)
|
|
@@ -52,15 +52,11 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
| 52 |
response and verifies that the updated state has the best output, best
|
| 53 |
system message, and best output age updated correctly.
|
| 54 |
"""
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
content="{\"closerOutputID\": 2, \"analysis\": \"The output should use the `reverse()` method.\"}"
|
| 59 |
-
)
|
| 60 |
-
)
|
| 61 |
-
}
|
| 62 |
prompts = {}
|
| 63 |
-
meta_prompt_graph = MetaPromptGraph(llms=
|
| 64 |
state = AgentState(
|
| 65 |
user_message="How do I reverse a list in Python?",
|
| 66 |
expected_output="Use the `[::-1]` slicing technique or the `list.reverse()` method.",
|
|
@@ -93,12 +89,13 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
| 93 |
response and verifies that the updated state has the accepted attribute
|
| 94 |
set to True.
|
| 95 |
"""
|
| 96 |
-
llms = {
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
|
|
|
| 102 |
state = AgentState(
|
| 103 |
output="Test output", expected_output="Expected output",
|
| 104 |
acceptance_criteria="Acceptance criteria: ...",
|
|
@@ -137,8 +134,8 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
| 137 |
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: raw_llm,
|
| 138 |
NODE_PROMPT_DEVELOPER: raw_llm,
|
| 139 |
NODE_PROMPT_EXECUTOR: raw_llm,
|
| 140 |
-
NODE_OUTPUT_HISTORY_ANALYZER: raw_llm
|
| 141 |
-
NODE_PROMPT_ANALYZER: raw_llm
|
| 142 |
NODE_PROMPT_SUGGESTER: raw_llm,
|
| 143 |
}
|
| 144 |
|
|
@@ -239,12 +236,14 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
| 239 |
"""
|
| 240 |
# Create a mock LLM that returns predefined responses based on the input messages
|
| 241 |
llm = Mock(spec=BaseLanguageModel)
|
|
|
|
| 242 |
responses = [
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
]
|
| 247 |
-
llm.invoke
|
|
|
|
| 248 |
|
| 249 |
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
| 250 |
input_state = AgentState(
|
|
@@ -273,17 +272,18 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
| 273 |
"""
|
| 274 |
# Create a mock LLM that returns predefined responses based on the input messages
|
| 275 |
llm = Mock(spec=BaseLanguageModel)
|
|
|
|
| 276 |
responses = [
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
]
|
| 286 |
-
llm.invoke = lambda
|
| 287 |
|
| 288 |
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
| 289 |
input_state = AgentState(
|
|
@@ -347,12 +347,10 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
| 347 |
This test case verifies that the run_acceptance_criteria_graph method
|
| 348 |
returns a state with acceptance criteria.
|
| 349 |
"""
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
}
|
| 355 |
-
meta_prompt_graph = MetaPromptGraph(llms=llms)
|
| 356 |
state = AgentState(
|
| 357 |
user_message="How do I reverse a list in Python?",
|
| 358 |
expected_output="The output should use the `reverse()` method.",
|
|
@@ -372,12 +370,10 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
| 372 |
This test case verifies that the run_prompt_initial_developer_graph method
|
| 373 |
returns a state with an initial developer prompt.
|
| 374 |
"""
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
}
|
| 380 |
-
meta_prompt_graph = MetaPromptGraph(llms=llms)
|
| 381 |
state = AgentState(user_message="How do I reverse a list in Python?")
|
| 382 |
output_state = meta_prompt_graph.run_node_graph(NODE_PROMPT_INITIAL_DEVELOPER, state)
|
| 383 |
|
|
|
|
| 23 |
and verifies that the updated state has the output attribute updated with
|
| 24 |
the mocked response content.
|
| 25 |
"""
|
| 26 |
+
llm = Mock(spec=BaseLanguageModel)
|
| 27 |
+
llm.config_specs = []
|
| 28 |
+
llm.invoke = lambda x, y=None: "Mocked response content"
|
| 29 |
+
|
| 30 |
llms = {
|
| 31 |
+
NODE_PROMPT_INITIAL_DEVELOPER: llm
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
}
|
| 33 |
|
| 34 |
graph = MetaPromptGraph(llms=llms)
|
|
|
|
| 52 |
response and verifies that the updated state has the best output, best
|
| 53 |
system message, and best output age updated correctly.
|
| 54 |
"""
|
| 55 |
+
llm = Mock(spec=BaseLanguageModel)
|
| 56 |
+
llm.config_specs = []
|
| 57 |
+
llm.invoke = lambda x, y: "{\"closerOutputID\": 2, \"analysis\": \"The output should use the `reverse()` method.\"}"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
prompts = {}
|
| 59 |
+
meta_prompt_graph = MetaPromptGraph(llms=llm, prompts=prompts)
|
| 60 |
state = AgentState(
|
| 61 |
user_message="How do I reverse a list in Python?",
|
| 62 |
expected_output="Use the `[::-1]` slicing technique or the `list.reverse()` method.",
|
|
|
|
| 89 |
response and verifies that the updated state has the accepted attribute
|
| 90 |
set to True.
|
| 91 |
"""
|
| 92 |
+
# llms = {
|
| 93 |
+
# NODE_PROMPT_ANALYZER: lambda prompt: "{\"Accept\": \"Yes\"}"
|
| 94 |
+
# }
|
| 95 |
+
llm = Mock(spec=BaseLanguageModel)
|
| 96 |
+
llm.config_specs = []
|
| 97 |
+
llm.invoke = lambda x, y: "{\"Accept\": \"Yes\"}"
|
| 98 |
+
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
| 99 |
state = AgentState(
|
| 100 |
output="Test output", expected_output="Expected output",
|
| 101 |
acceptance_criteria="Acceptance criteria: ...",
|
|
|
|
| 134 |
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: raw_llm,
|
| 135 |
NODE_PROMPT_DEVELOPER: raw_llm,
|
| 136 |
NODE_PROMPT_EXECUTOR: raw_llm,
|
| 137 |
+
NODE_OUTPUT_HISTORY_ANALYZER: raw_llm,
|
| 138 |
+
NODE_PROMPT_ANALYZER: raw_llm,
|
| 139 |
NODE_PROMPT_SUGGESTER: raw_llm,
|
| 140 |
}
|
| 141 |
|
|
|
|
| 236 |
"""
|
| 237 |
# Create a mock LLM that returns predefined responses based on the input messages
|
| 238 |
llm = Mock(spec=BaseLanguageModel)
|
| 239 |
+
llm.config_specs = []
|
| 240 |
responses = [
|
| 241 |
+
"Explain how to reverse a list in Python.", # NODE_PROMPT_INITIAL_DEVELOPER
|
| 242 |
+
"Here's one way: `my_list[::-1]`", # NODE_PROMPT_EXECUTOR
|
| 243 |
+
"{\"Accept\": \"Yes\"}", # NODE_PPROMPT_ANALYZER
|
| 244 |
]
|
| 245 |
+
# everytime llm.invoke was called, it returns a item in responses
|
| 246 |
+
llm.invoke = lambda x, y=None: responses.pop(0)
|
| 247 |
|
| 248 |
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
| 249 |
input_state = AgentState(
|
|
|
|
| 272 |
"""
|
| 273 |
# Create a mock LLM that returns predefined responses based on the input messages
|
| 274 |
llm = Mock(spec=BaseLanguageModel)
|
| 275 |
+
llm.config_specs = []
|
| 276 |
responses = [
|
| 277 |
+
"Explain how to reverse a list in Python.", # NODE_PROMPT_INITIAL_DEVELOPER
|
| 278 |
+
"Here's one way: `my_list[::-1]`", # NODE_PROMPT_EXECUTOR
|
| 279 |
+
"{\"Accept\": \"No\"}", # NODE_PPROMPT_ANALYZER
|
| 280 |
+
"Try using the `reverse()` method instead.", # NODE_PROMPT_SUGGESTER
|
| 281 |
+
"Explain how to reverse a list in Python. Output in a Markdown List.", # NODE_PROMPT_DEVELOPER
|
| 282 |
+
"Here's one way: `my_list.reverse()`", # NODE_PROMPT_EXECUTOR
|
| 283 |
+
"{\"closerOutputID\": 2, \"analysis\": \"The output should use the `reverse()` method.\"}", # NODE_OUTPUT_HISTORY_ANALYZER
|
| 284 |
+
"{\"Accept\": \"Yes\"}", # NODE_PPROMPT_ANALYZER
|
| 285 |
]
|
| 286 |
+
llm.invoke = lambda x, y = None: responses.pop(0)
|
| 287 |
|
| 288 |
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
| 289 |
input_state = AgentState(
|
|
|
|
| 347 |
This test case verifies that the run_acceptance_criteria_graph method
|
| 348 |
returns a state with acceptance criteria.
|
| 349 |
"""
|
| 350 |
+
llm = Mock(spec=BaseLanguageModel)
|
| 351 |
+
llm.config_specs = []
|
| 352 |
+
llm.invoke = lambda x, y: "{\"Acceptance criteria\": \"Acceptance criteria: ...\"}"
|
| 353 |
+
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
|
|
|
|
|
|
| 354 |
state = AgentState(
|
| 355 |
user_message="How do I reverse a list in Python?",
|
| 356 |
expected_output="The output should use the `reverse()` method.",
|
|
|
|
| 370 |
This test case verifies that the run_prompt_initial_developer_graph method
|
| 371 |
returns a state with an initial developer prompt.
|
| 372 |
"""
|
| 373 |
+
llm = Mock(spec=BaseLanguageModel)
|
| 374 |
+
llm.config_specs = []
|
| 375 |
+
llm.invoke = lambda x, y: "{\"Initial developer prompt\": \"Initial developer prompt: ...\"}"
|
| 376 |
+
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
|
|
|
|
|
|
| 377 |
state = AgentState(user_message="How do I reverse a list in Python?")
|
| 378 |
output_state = meta_prompt_graph.run_node_graph(NODE_PROMPT_INITIAL_DEVELOPER, state)
|
| 379 |
|