copilot-swe-agent[bot] CatoG commited on
Commit ยท
769359f
1
Parent(s): 4b17537
Add supervisor-style multi-role agentic workflow
Browse filesCo-authored-by: CatoG <47473856+CatoG@users.noreply.github.com>
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import warnings
|
|
| 6 |
import traceback
|
| 7 |
from contextvars import ContextVar
|
| 8 |
from datetime import datetime, timezone
|
| 9 |
-
from typing import Dict, List, Optional, Tuple
|
| 10 |
|
| 11 |
import requests
|
| 12 |
from dotenv import load_dotenv
|
|
@@ -26,6 +26,7 @@ import matplotlib.pyplot as plt
|
|
| 26 |
from huggingface_hub.errors import HfHubHTTPError
|
| 27 |
|
| 28 |
from langchain_core.tools import tool
|
|
|
|
| 29 |
from langchain.agents import create_agent
|
| 30 |
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
|
| 31 |
from langchain_community.tools import DuckDuckGoSearchRun, ArxivQueryRun
|
|
@@ -494,6 +495,384 @@ ALL_TOOLS = {
|
|
| 494 |
TOOL_NAMES = list(ALL_TOOLS.keys())
|
| 495 |
|
| 496 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
# ============================================================
|
| 498 |
# Agent builder
|
| 499 |
# ============================================================
|
|
@@ -732,121 +1111,189 @@ with gr.Blocks(title="LLM + Agent tools demo", theme=gr.themes.Soft()) as demo:
|
|
| 732 |
"with selectable tools and extended debugging."
|
| 733 |
)
|
| 734 |
|
| 735 |
-
with gr.
|
| 736 |
-
model_dropdown = gr.Dropdown(
|
| 737 |
-
choices=MODEL_OPTIONS,
|
| 738 |
-
value=DEFAULT_MODEL_ID,
|
| 739 |
-
label="Base model",
|
| 740 |
-
)
|
| 741 |
-
model_status = gr.Textbox(
|
| 742 |
-
value=model_status_text(DEFAULT_MODEL_ID),
|
| 743 |
-
label="Model status",
|
| 744 |
-
interactive=False,
|
| 745 |
-
)
|
| 746 |
|
| 747 |
-
|
| 748 |
-
with gr.
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 754 |
|
| 755 |
with gr.Row():
|
| 756 |
-
|
| 757 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 758 |
|
| 759 |
-
|
|
|
|
| 760 |
|
| 761 |
-
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
|
| 766 |
-
|
| 767 |
-
max_lines=1,
|
| 768 |
-
)
|
| 769 |
|
| 770 |
-
|
| 771 |
-
|
| 772 |
-
|
| 773 |
-
|
| 774 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 775 |
)
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
|
|
|
|
|
|
|
| 780 |
)
|
| 781 |
|
| 782 |
-
|
| 783 |
-
|
| 784 |
-
|
| 785 |
-
|
| 786 |
-
|
|
|
|
| 787 |
|
| 788 |
-
|
| 789 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 790 |
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
|
|
|
|
|
|
| 797 |
|
| 798 |
-
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
return new Promise((resolve) => {
|
| 805 |
-
const fallback = async () => {
|
| 806 |
-
try {
|
| 807 |
-
const r = await fetch('https://api.ipify.org?format=json');
|
| 808 |
-
const d = await r.json();
|
| 809 |
-
resolve(['ip:' + d.ip, 'Location: IP-based fallback (approximate)']);
|
| 810 |
-
} catch(e) {
|
| 811 |
-
resolve(['', 'Location detection failed.']);
|
| 812 |
-
}
|
| 813 |
-
};
|
| 814 |
-
if (!navigator.geolocation) { fallback(); return; }
|
| 815 |
-
navigator.geolocation.getCurrentPosition(
|
| 816 |
-
(pos) => {
|
| 817 |
-
const lat = pos.coords.latitude.toFixed(5);
|
| 818 |
-
const lon = pos.coords.longitude.toFixed(5);
|
| 819 |
-
const acc = Math.round(pos.coords.accuracy);
|
| 820 |
-
resolve([lat + ',' + lon, `\u2705 GPS/WiFi location set (\u00b1${acc}m)`]);
|
| 821 |
-
},
|
| 822 |
-
fallback,
|
| 823 |
-
{timeout: 10000, maximumAge: 60000, enableHighAccuracy: true}
|
| 824 |
-
);
|
| 825 |
-
});
|
| 826 |
-
}""",
|
| 827 |
-
show_api=False,
|
| 828 |
-
)
|
| 829 |
|
| 830 |
-
|
| 831 |
-
|
| 832 |
-
|
| 833 |
-
|
| 834 |
-
|
| 835 |
-
|
|
|
|
|
|
|
|
|
|
| 836 |
|
| 837 |
-
|
| 838 |
-
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
|
| 842 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 843 |
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
|
| 850 |
|
| 851 |
if __name__ == "__main__":
|
| 852 |
port = int(os.environ.get("PORT", 7860))
|
|
|
|
| 6 |
import traceback
|
| 7 |
from contextvars import ContextVar
|
| 8 |
from datetime import datetime, timezone
|
| 9 |
+
from typing import Dict, List, Optional, Tuple, TypedDict
|
| 10 |
|
| 11 |
import requests
|
| 12 |
from dotenv import load_dotenv
|
|
|
|
| 26 |
from huggingface_hub.errors import HfHubHTTPError
|
| 27 |
|
| 28 |
from langchain_core.tools import tool
|
| 29 |
+
from langchain_core.messages import SystemMessage, HumanMessage
|
| 30 |
from langchain.agents import create_agent
|
| 31 |
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
|
| 32 |
from langchain_community.tools import DuckDuckGoSearchRun, ArxivQueryRun
|
|
|
|
| 495 |
TOOL_NAMES = list(ALL_TOOLS.keys())
|
| 496 |
|
| 497 |
|
| 498 |
+
# ============================================================
|
| 499 |
+
# Multi-role workflow โ supervisor-style orchestration
|
| 500 |
+
# ============================================================
|
| 501 |
+
# Architecture:
|
| 502 |
+
# Planner โ Specialist (Creative or Technical) โ QA Tester โ Planner review
|
| 503 |
+
# The Planner breaks the task, picks a specialist, and reviews QA feedback.
|
| 504 |
+
# If QA fails and retries remain, the Planner revises and loops again.
|
| 505 |
+
# If QA passes (or max retries are reached) the Planner approves a final answer.
|
| 506 |
+
# ============================================================
|
| 507 |
+
|
| 508 |
+
MAX_REVISIONS = 3 # Maximum QA-driven revision cycles before accepting best attempt
|
| 509 |
+
|
| 510 |
+
|
| 511 |
+
class WorkflowState(TypedDict):
|
| 512 |
+
"""Shared, inspectable state object threaded through the whole workflow."""
|
| 513 |
+
user_request: str
|
| 514 |
+
plan: str
|
| 515 |
+
current_role: str # "creative" or "technical"
|
| 516 |
+
creative_output: str
|
| 517 |
+
technical_output: str
|
| 518 |
+
draft_output: str # latest specialist output forwarded to QA
|
| 519 |
+
qa_report: str
|
| 520 |
+
qa_passed: bool
|
| 521 |
+
revision_count: int
|
| 522 |
+
final_answer: str
|
| 523 |
+
|
| 524 |
+
|
| 525 |
+
# --- Role system prompts ---
|
| 526 |
+
|
| 527 |
+
_PLANNER_SYSTEM = (
|
| 528 |
+
"You are the Planner in a multi-role AI workflow.\n"
|
| 529 |
+
"Your job is to:\n"
|
| 530 |
+
"1. Break the user's task into clear subtasks.\n"
|
| 531 |
+
"2. Decide which specialist to call: 'Creative Expert' (ideas, framing, wording)\n"
|
| 532 |
+
" or 'Technical Expert' (code, architecture, implementation).\n"
|
| 533 |
+
"3. State clear success criteria.\n\n"
|
| 534 |
+
"Respond in this exact format:\n"
|
| 535 |
+
"TASK BREAKDOWN:\n<subtask list>\n\n"
|
| 536 |
+
"ROLE TO CALL: <Creative Expert | Technical Expert>\n\n"
|
| 537 |
+
"SUCCESS CRITERIA:\n<what a correct, complete answer looks like>\n\n"
|
| 538 |
+
"GUIDANCE FOR SPECIALIST:\n<any constraints or focus areas>"
|
| 539 |
+
)
|
| 540 |
+
|
| 541 |
+
_CREATIVE_SYSTEM = (
|
| 542 |
+
"You are the Creative Expert in a multi-role AI workflow.\n"
|
| 543 |
+
"You handle brainstorming, alternative ideas, framing, wording, and concept generation.\n\n"
|
| 544 |
+
"Respond in this exact format:\n"
|
| 545 |
+
"IDEAS:\n<list of ideas and alternatives>\n\n"
|
| 546 |
+
"RATIONALE:\n<why these are strong choices>\n\n"
|
| 547 |
+
"RECOMMENDED DRAFT:\n<the best draft output based on the ideas>"
|
| 548 |
+
)
|
| 549 |
+
|
| 550 |
+
_TECHNICAL_SYSTEM = (
|
| 551 |
+
"You are the Technical Expert in a multi-role AI workflow.\n"
|
| 552 |
+
"You handle implementation details, code, architecture, and structured technical solutions.\n\n"
|
| 553 |
+
"Respond in this exact format:\n"
|
| 554 |
+
"TECHNICAL APPROACH:\n<recommended approach>\n\n"
|
| 555 |
+
"IMPLEMENTATION NOTES:\n<key details, steps, and caveats>\n\n"
|
| 556 |
+
"FINAL TECHNICAL DRAFT:\n<the complete technical output or solution>"
|
| 557 |
+
)
|
| 558 |
+
|
| 559 |
+
_QA_SYSTEM = (
|
| 560 |
+
"You are the QA Tester in a multi-role AI workflow.\n"
|
| 561 |
+
"Check whether the specialist output satisfies the original request and success criteria.\n\n"
|
| 562 |
+
"Respond in this exact format:\n"
|
| 563 |
+
"REQUIREMENTS CHECKED:\n<list each requirement and whether it was met>\n\n"
|
| 564 |
+
"ISSUES FOUND:\n<defects or gaps โ or 'None' if all requirements are met>\n\n"
|
| 565 |
+
"RESULT: <PASS | FAIL>\n\n"
|
| 566 |
+
"RECOMMENDED FIXES:\n<specific improvements โ or 'None' if result is PASS>"
|
| 567 |
+
)
|
| 568 |
+
|
| 569 |
+
_PLANNER_REVIEW_SYSTEM = (
|
| 570 |
+
"You are the Planner reviewing QA feedback in a multi-role AI workflow.\n"
|
| 571 |
+
"Based on the QA report, either approve the result or plan a revision.\n\n"
|
| 572 |
+
"If QA PASSED, respond with:\n"
|
| 573 |
+
"DECISION: APPROVED\n"
|
| 574 |
+
"FINAL ANSWER:\n<the approved specialist output, reproduced in full>\n\n"
|
| 575 |
+
"If QA FAILED, respond with:\n"
|
| 576 |
+
"DECISION: REVISE\n"
|
| 577 |
+
"ROLE TO CALL: <Creative Expert | Technical Expert>\n"
|
| 578 |
+
"REVISED INSTRUCTIONS:\n<specific fixes the specialist must address>"
|
| 579 |
+
)
|
| 580 |
+
|
| 581 |
+
|
| 582 |
+
# --- Internal helpers ---
|
| 583 |
+
|
| 584 |
+
def _llm_call(chat_model, system_prompt: str, user_content: str) -> str:
|
| 585 |
+
"""Invoke the LLM with a role-specific system prompt. Returns plain text."""
|
| 586 |
+
response = chat_model.invoke([
|
| 587 |
+
SystemMessage(content=system_prompt),
|
| 588 |
+
HumanMessage(content=user_content),
|
| 589 |
+
])
|
| 590 |
+
return content_to_text(response.content)
|
| 591 |
+
|
| 592 |
+
|
| 593 |
+
def _decide_role(text: str) -> str:
|
| 594 |
+
"""Parse which specialist role the Planner wants to invoke.
|
| 595 |
+
|
| 596 |
+
Checks for the expected structured 'ROLE TO CALL:' format first,
|
| 597 |
+
then falls back to a word-boundary search for 'creative'.
|
| 598 |
+
Defaults to 'technical' when no clear signal is found.
|
| 599 |
+
"""
|
| 600 |
+
# Prefer the explicit structured label produced by the Planner prompt
|
| 601 |
+
if "ROLE TO CALL: Creative Expert" in text:
|
| 602 |
+
return "creative"
|
| 603 |
+
if "ROLE TO CALL: Technical Expert" in text:
|
| 604 |
+
return "technical"
|
| 605 |
+
# Fallback: word-boundary match so 'creative' in 'not creative enough' still works,
|
| 606 |
+
# but avoids false hits from unrelated use of the word.
|
| 607 |
+
if re.search(r"\bcreative\b", text, re.IGNORECASE):
|
| 608 |
+
return "creative"
|
| 609 |
+
return "technical"
|
| 610 |
+
|
| 611 |
+
|
| 612 |
+
def _qa_passed_check(qa_text: str) -> bool:
|
| 613 |
+
"""Return True only if the QA report contains an explicit PASS result.
|
| 614 |
+
|
| 615 |
+
Relies on the structured 'RESULT: PASS / RESULT: FAIL' line produced by
|
| 616 |
+
the QA Tester prompt. Returns False when the expected format is absent
|
| 617 |
+
to avoid false positives from words like 'bypass' or 'password'.
|
| 618 |
+
"""
|
| 619 |
+
lower = qa_text.lower()
|
| 620 |
+
if "result: pass" in lower:
|
| 621 |
+
return True
|
| 622 |
+
if "result: fail" in lower:
|
| 623 |
+
return False
|
| 624 |
+
# No recognised verdict โ treat as fail to avoid accepting a bad draft
|
| 625 |
+
return False
|
| 626 |
+
|
| 627 |
+
|
| 628 |
+
# --- Workflow step functions ---
|
| 629 |
+
# Each step receives the shared state and an append-only trace list,
|
| 630 |
+
# updates state in place, appends log lines, and returns updated state.
|
| 631 |
+
|
| 632 |
+
def _step_plan(chat_model, state: WorkflowState, trace: List[str]) -> WorkflowState:
|
| 633 |
+
"""Planner: analyse the task, produce a plan, decide which specialist to call."""
|
| 634 |
+
trace.append("\nโโโ [PLANNER] Analysing task... โโโ")
|
| 635 |
+
content = f"User request: {state['user_request']}"
|
| 636 |
+
if state["revision_count"] > 0:
|
| 637 |
+
content += (
|
| 638 |
+
f"\n\nThis is revision {state['revision_count']} of {MAX_REVISIONS}."
|
| 639 |
+
f"\nPrevious QA report:\n{state['qa_report']}"
|
| 640 |
+
"\nAdjust the plan to address the QA issues."
|
| 641 |
+
)
|
| 642 |
+
plan_text = _llm_call(chat_model, _PLANNER_SYSTEM, content)
|
| 643 |
+
state["plan"] = plan_text
|
| 644 |
+
state["current_role"] = _decide_role(plan_text)
|
| 645 |
+
trace.append(plan_text)
|
| 646 |
+
trace.append(f"โโโ [PLANNER] โ routing to: {state['current_role'].upper()} EXPERT โโโ")
|
| 647 |
+
return state
|
| 648 |
+
|
| 649 |
+
|
| 650 |
+
def _step_creative(chat_model, state: WorkflowState, trace: List[str]) -> WorkflowState:
|
| 651 |
+
"""Creative Expert: brainstorm ideas and produce a recommended draft."""
|
| 652 |
+
trace.append("\nโโโ [CREATIVE EXPERT] Generating ideas... โโโ")
|
| 653 |
+
content = (
|
| 654 |
+
f"User request: {state['user_request']}\n\n"
|
| 655 |
+
f"Planner instructions:\n{state['plan']}"
|
| 656 |
+
)
|
| 657 |
+
if state["revision_count"] > 0:
|
| 658 |
+
content += f"\n\nQA feedback to address:\n{state['qa_report']}"
|
| 659 |
+
text = _llm_call(chat_model, _CREATIVE_SYSTEM, content)
|
| 660 |
+
state["creative_output"] = text
|
| 661 |
+
state["draft_output"] = text
|
| 662 |
+
trace.append(text)
|
| 663 |
+
trace.append("โโโ [CREATIVE EXPERT] Done โโโ")
|
| 664 |
+
return state
|
| 665 |
+
|
| 666 |
+
|
| 667 |
+
def _step_technical(chat_model, state: WorkflowState, trace: List[str]) -> WorkflowState:
|
| 668 |
+
"""Technical Expert: provide implementation details and a complete technical draft."""
|
| 669 |
+
trace.append("\nโโโ [TECHNICAL EXPERT] Working on implementation... โโโ")
|
| 670 |
+
content = (
|
| 671 |
+
f"User request: {state['user_request']}\n\n"
|
| 672 |
+
f"Planner instructions:\n{state['plan']}"
|
| 673 |
+
)
|
| 674 |
+
if state["revision_count"] > 0:
|
| 675 |
+
content += f"\n\nQA feedback to address:\n{state['qa_report']}"
|
| 676 |
+
text = _llm_call(chat_model, _TECHNICAL_SYSTEM, content)
|
| 677 |
+
state["technical_output"] = text
|
| 678 |
+
state["draft_output"] = text
|
| 679 |
+
trace.append(text)
|
| 680 |
+
trace.append("โโโ [TECHNICAL EXPERT] Done โโโ")
|
| 681 |
+
return state
|
| 682 |
+
|
| 683 |
+
|
| 684 |
+
def _step_qa(chat_model, state: WorkflowState, trace: List[str]) -> WorkflowState:
|
| 685 |
+
"""QA Tester: check the draft against the original request and success criteria."""
|
| 686 |
+
trace.append("\nโโโ [QA TESTER] Reviewing output... โโโ")
|
| 687 |
+
content = (
|
| 688 |
+
f"Original user request: {state['user_request']}\n\n"
|
| 689 |
+
f"Planner's plan and success criteria:\n{state['plan']}\n\n"
|
| 690 |
+
f"Specialist output to review:\n{state['draft_output']}"
|
| 691 |
+
)
|
| 692 |
+
text = _llm_call(chat_model, _QA_SYSTEM, content)
|
| 693 |
+
state["qa_report"] = text
|
| 694 |
+
state["qa_passed"] = _qa_passed_check(text)
|
| 695 |
+
result_label = "โ
PASS" if state["qa_passed"] else "โ FAIL"
|
| 696 |
+
trace.append(text)
|
| 697 |
+
trace.append(f"โโโ [QA TESTER] Result: {result_label} โโโ")
|
| 698 |
+
return state
|
| 699 |
+
|
| 700 |
+
|
| 701 |
+
def _step_planner_review(chat_model, state: WorkflowState, trace: List[str]) -> WorkflowState:
|
| 702 |
+
"""Planner: review QA feedback and either approve the result or request a revision."""
|
| 703 |
+
trace.append("\nโโโ [PLANNER] Reviewing QA feedback... โโโ")
|
| 704 |
+
content = (
|
| 705 |
+
f"User request: {state['user_request']}\n\n"
|
| 706 |
+
f"Plan:\n{state['plan']}\n\n"
|
| 707 |
+
f"Specialist output:\n{state['draft_output']}\n\n"
|
| 708 |
+
f"QA report:\n{state['qa_report']}"
|
| 709 |
+
)
|
| 710 |
+
review = _llm_call(chat_model, _PLANNER_REVIEW_SYSTEM, content)
|
| 711 |
+
trace.append(review)
|
| 712 |
+
|
| 713 |
+
if "DECISION: APPROVED" in review.upper():
|
| 714 |
+
# Extract the final answer that the Planner reproduced in full
|
| 715 |
+
parts = review.split("FINAL ANSWER:", 1)
|
| 716 |
+
if len(parts) > 1:
|
| 717 |
+
state["final_answer"] = parts[1].strip()
|
| 718 |
+
else:
|
| 719 |
+
# Planner approved but omitted the expected FINAL ANSWER section โ use draft
|
| 720 |
+
trace.append(" โ FINAL ANSWER section missing; using specialist draft as final answer.")
|
| 721 |
+
state["final_answer"] = state["draft_output"]
|
| 722 |
+
trace.append("โโโ [PLANNER] โ โ
APPROVED โโโ")
|
| 723 |
+
else:
|
| 724 |
+
# Planner requests a revision โ update plan with revised instructions
|
| 725 |
+
parts = review.split("REVISED INSTRUCTIONS:", 1)
|
| 726 |
+
if len(parts) > 1:
|
| 727 |
+
state["plan"] = parts[1].strip()
|
| 728 |
+
else:
|
| 729 |
+
# Revision requested but REVISED INSTRUCTIONS section missing โ keep current plan
|
| 730 |
+
trace.append(" โ REVISED INSTRUCTIONS section missing; retrying with existing plan.")
|
| 731 |
+
state["current_role"] = _decide_role(review)
|
| 732 |
+
trace.append(
|
| 733 |
+
f"โโโ [PLANNER] โ ๐ REVISE โ routing to {state['current_role'].upper()} EXPERT โโโ"
|
| 734 |
+
)
|
| 735 |
+
return state
|
| 736 |
+
|
| 737 |
+
|
| 738 |
+
# --- Specialist role tools ---
|
| 739 |
+
# These wrap the step functions as @tool so the Planner (or any LangChain agent)
|
| 740 |
+
# can invoke specialists in a standard tool-use pattern.
|
| 741 |
+
|
| 742 |
+
# Holds the active model ID for standalone specialist tool calls.
|
| 743 |
+
_workflow_model_id: str = DEFAULT_MODEL_ID
|
| 744 |
+
|
| 745 |
+
|
| 746 |
+
@tool
|
| 747 |
+
def call_creative_expert(task: str) -> str:
|
| 748 |
+
"""Call the Creative Expert to brainstorm ideas, framing, and produce a draft for a given task."""
|
| 749 |
+
chat = build_provider_chat(_workflow_model_id)
|
| 750 |
+
state: WorkflowState = {
|
| 751 |
+
"user_request": task, "plan": task, "current_role": "creative",
|
| 752 |
+
"creative_output": "", "technical_output": "", "draft_output": "",
|
| 753 |
+
"qa_report": "", "qa_passed": False, "revision_count": 0, "final_answer": "",
|
| 754 |
+
}
|
| 755 |
+
state = _step_creative(chat, state, [])
|
| 756 |
+
return state["creative_output"]
|
| 757 |
+
|
| 758 |
+
|
| 759 |
+
@tool
|
| 760 |
+
def call_technical_expert(task: str) -> str:
|
| 761 |
+
"""Call the Technical Expert to produce implementation details and a solution for a given task."""
|
| 762 |
+
chat = build_provider_chat(_workflow_model_id)
|
| 763 |
+
state: WorkflowState = {
|
| 764 |
+
"user_request": task, "plan": task, "current_role": "technical",
|
| 765 |
+
"creative_output": "", "technical_output": "", "draft_output": "",
|
| 766 |
+
"qa_report": "", "qa_passed": False, "revision_count": 0, "final_answer": "",
|
| 767 |
+
}
|
| 768 |
+
state = _step_technical(chat, state, [])
|
| 769 |
+
return state["technical_output"]
|
| 770 |
+
|
| 771 |
+
|
| 772 |
+
@tool
|
| 773 |
+
def call_qa_tester(task_and_output: str) -> str:
|
| 774 |
+
"""Call the QA Tester to review specialist output against requirements.
|
| 775 |
+
Input format: 'TASK: <description>\nOUTPUT: <specialist output to review>'"""
|
| 776 |
+
chat = build_provider_chat(_workflow_model_id)
|
| 777 |
+
if "OUTPUT:" in task_and_output:
|
| 778 |
+
parts = task_and_output.split("OUTPUT:", 1)
|
| 779 |
+
task = parts[0].replace("TASK:", "").strip()
|
| 780 |
+
output = parts[1].strip()
|
| 781 |
+
else:
|
| 782 |
+
task = task_and_output
|
| 783 |
+
output = task_and_output
|
| 784 |
+
# current_role is left empty โ this is a standalone QA call outside the normal loop
|
| 785 |
+
state: WorkflowState = {
|
| 786 |
+
"user_request": task, "plan": task, "current_role": "",
|
| 787 |
+
"creative_output": "", "technical_output": "", "draft_output": output,
|
| 788 |
+
"qa_report": "", "qa_passed": False, "revision_count": 0, "final_answer": "",
|
| 789 |
+
}
|
| 790 |
+
state = _step_qa(chat, state, [])
|
| 791 |
+
return state["qa_report"]
|
| 792 |
+
|
| 793 |
+
|
| 794 |
+
# --- Orchestration loop ---
|
| 795 |
+
|
| 796 |
+
def run_multi_role_workflow(message: str, model_id: str) -> Tuple[str, str]:
|
| 797 |
+
"""Run the supervisor-style multi-role workflow.
|
| 798 |
+
|
| 799 |
+
Flow:
|
| 800 |
+
1. Planner analyses the task and picks a specialist.
|
| 801 |
+
2. Specialist (Creative or Technical) generates output.
|
| 802 |
+
3. QA Tester reviews the output.
|
| 803 |
+
4. Planner reviews QA result and either approves or requests a revision.
|
| 804 |
+
5. Repeat from step 2 if QA fails and retries remain.
|
| 805 |
+
6. If max retries are reached, return best attempt with QA concerns.
|
| 806 |
+
|
| 807 |
+
Returns:
|
| 808 |
+
(final_answer, workflow_trace_text)
|
| 809 |
+
"""
|
| 810 |
+
global _workflow_model_id
|
| 811 |
+
_workflow_model_id = model_id
|
| 812 |
+
chat_model = build_provider_chat(model_id)
|
| 813 |
+
|
| 814 |
+
state: WorkflowState = {
|
| 815 |
+
"user_request": message,
|
| 816 |
+
"plan": "",
|
| 817 |
+
"current_role": "",
|
| 818 |
+
"creative_output": "",
|
| 819 |
+
"technical_output": "",
|
| 820 |
+
"draft_output": "",
|
| 821 |
+
"qa_report": "",
|
| 822 |
+
"qa_passed": False,
|
| 823 |
+
"revision_count": 0,
|
| 824 |
+
"final_answer": "",
|
| 825 |
+
}
|
| 826 |
+
|
| 827 |
+
trace: List[str] = [
|
| 828 |
+
"โโโ MULTI-ROLE WORKFLOW STARTED โโโ",
|
| 829 |
+
f"Model : {model_id}",
|
| 830 |
+
f"Request : {message}",
|
| 831 |
+
f"Max revisions: {MAX_REVISIONS}",
|
| 832 |
+
]
|
| 833 |
+
|
| 834 |
+
try:
|
| 835 |
+
# Step 1: Planner creates the initial plan
|
| 836 |
+
state = _step_plan(chat_model, state, trace)
|
| 837 |
+
|
| 838 |
+
# Orchestration loop: specialist โ QA โ Planner review โ revise if needed
|
| 839 |
+
while True:
|
| 840 |
+
# Step 2: invoke the chosen specialist
|
| 841 |
+
if state["current_role"] == "creative":
|
| 842 |
+
state = _step_creative(chat_model, state, trace)
|
| 843 |
+
else:
|
| 844 |
+
state = _step_technical(chat_model, state, trace)
|
| 845 |
+
|
| 846 |
+
# Step 3: QA reviews the specialist's draft
|
| 847 |
+
state = _step_qa(chat_model, state, trace)
|
| 848 |
+
|
| 849 |
+
# Step 4: Planner reviews the QA result
|
| 850 |
+
state = _step_planner_review(chat_model, state, trace)
|
| 851 |
+
|
| 852 |
+
# Exit if the Planner approved the result
|
| 853 |
+
if state["final_answer"]:
|
| 854 |
+
trace.append("\nโโโ WORKFLOW COMPLETE โ APPROVED โโโ")
|
| 855 |
+
break
|
| 856 |
+
|
| 857 |
+
# Increment revision counter and enforce the retry limit
|
| 858 |
+
state["revision_count"] += 1
|
| 859 |
+
if state["revision_count"] >= MAX_REVISIONS:
|
| 860 |
+
state["final_answer"] = state["draft_output"]
|
| 861 |
+
trace.append(
|
| 862 |
+
f"\nโโโ MAX REVISIONS REACHED ({MAX_REVISIONS}) โโโ\n"
|
| 863 |
+
f"Returning best attempt. Outstanding QA concerns:\n{state['qa_report']}"
|
| 864 |
+
)
|
| 865 |
+
break
|
| 866 |
+
|
| 867 |
+
trace.append(f"\nโโโ REVISION {state['revision_count']} / {MAX_REVISIONS} โโโ")
|
| 868 |
+
|
| 869 |
+
except Exception as exc:
|
| 870 |
+
trace.append(f"\n[ERROR] {exc}\n{traceback.format_exc()}")
|
| 871 |
+
state["final_answer"] = state["draft_output"] or f"Workflow error: {exc}"
|
| 872 |
+
|
| 873 |
+
return state["final_answer"], "\n".join(trace)
|
| 874 |
+
|
| 875 |
+
|
| 876 |
# ============================================================
|
| 877 |
# Agent builder
|
| 878 |
# ============================================================
|
|
|
|
| 1111 |
"with selectable tools and extended debugging."
|
| 1112 |
)
|
| 1113 |
|
| 1114 |
+
with gr.Tabs():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1115 |
|
| 1116 |
+
# โโ Tab 1: existing single-agent demo (unchanged) โโโโโโโโโโโโโโโโโโ
|
| 1117 |
+
with gr.Tab("Agent Demo"):
|
| 1118 |
+
with gr.Row():
|
| 1119 |
+
model_dropdown = gr.Dropdown(
|
| 1120 |
+
choices=MODEL_OPTIONS,
|
| 1121 |
+
value=DEFAULT_MODEL_ID,
|
| 1122 |
+
label="Base model",
|
| 1123 |
+
)
|
| 1124 |
+
model_status = gr.Textbox(
|
| 1125 |
+
value=model_status_text(DEFAULT_MODEL_ID),
|
| 1126 |
+
label="Model status",
|
| 1127 |
+
interactive=False,
|
| 1128 |
+
)
|
| 1129 |
|
| 1130 |
with gr.Row():
|
| 1131 |
+
with gr.Column(scale=3):
|
| 1132 |
+
chatbot = gr.Chatbot(label="Conversation", height=460, type="messages")
|
| 1133 |
+
user_input = gr.Textbox(
|
| 1134 |
+
label="Message",
|
| 1135 |
+
placeholder="Ask anything...",
|
| 1136 |
+
)
|
| 1137 |
+
|
| 1138 |
+
with gr.Row():
|
| 1139 |
+
send_btn = gr.Button("Send", variant="primary")
|
| 1140 |
+
clear_btn = gr.Button("Clear")
|
| 1141 |
+
|
| 1142 |
+
chart_output = gr.Image(label="Generated chart", type="filepath")
|
| 1143 |
+
|
| 1144 |
+
with gr.Row():
|
| 1145 |
+
location_btn = gr.Button("๐ Share my location", size="sm")
|
| 1146 |
+
location_status = gr.Textbox(
|
| 1147 |
+
value="Location not set โ click the button above before asking 'where am I'",
|
| 1148 |
+
label="Location status",
|
| 1149 |
+
interactive=False,
|
| 1150 |
+
max_lines=1,
|
| 1151 |
+
)
|
| 1152 |
+
|
| 1153 |
+
with gr.Column(scale=1):
|
| 1154 |
+
enabled_tools = gr.CheckboxGroup(
|
| 1155 |
+
choices=TOOL_NAMES,
|
| 1156 |
+
value=TOOL_NAMES,
|
| 1157 |
+
label="Enabled tools",
|
| 1158 |
+
)
|
| 1159 |
+
tool_trace = gr.Textbox(
|
| 1160 |
+
label="Tool trace",
|
| 1161 |
+
lines=18,
|
| 1162 |
+
interactive=False,
|
| 1163 |
+
)
|
| 1164 |
+
|
| 1165 |
+
debug_output = gr.Textbox(
|
| 1166 |
+
label="Debug output",
|
| 1167 |
+
lines=28,
|
| 1168 |
+
interactive=False,
|
| 1169 |
+
)
|
| 1170 |
|
| 1171 |
+
# Hidden: holds "lat,lon" or "ip:<address>" set by the location button
|
| 1172 |
+
client_ip_box = gr.Textbox(visible=False, value="")
|
| 1173 |
|
| 1174 |
+
model_dropdown.change(
|
| 1175 |
+
fn=model_status_text,
|
| 1176 |
+
inputs=[model_dropdown],
|
| 1177 |
+
outputs=[model_status],
|
| 1178 |
+
show_api=False,
|
| 1179 |
+
)
|
|
|
|
|
|
|
| 1180 |
|
| 1181 |
+
# Geolocation button: JS runs in the browser, result goes to hidden box + status label
|
| 1182 |
+
location_btn.click(
|
| 1183 |
+
fn=None,
|
| 1184 |
+
inputs=None,
|
| 1185 |
+
outputs=[client_ip_box, location_status],
|
| 1186 |
+
js="""async () => {
|
| 1187 |
+
return new Promise((resolve) => {
|
| 1188 |
+
const fallback = async () => {
|
| 1189 |
+
try {
|
| 1190 |
+
const r = await fetch('https://api.ipify.org?format=json');
|
| 1191 |
+
const d = await r.json();
|
| 1192 |
+
resolve(['ip:' + d.ip, 'Location: IP-based fallback (approximate)']);
|
| 1193 |
+
} catch(e) {
|
| 1194 |
+
resolve(['', 'Location detection failed.']);
|
| 1195 |
+
}
|
| 1196 |
+
};
|
| 1197 |
+
if (!navigator.geolocation) { fallback(); return; }
|
| 1198 |
+
navigator.geolocation.getCurrentPosition(
|
| 1199 |
+
(pos) => {
|
| 1200 |
+
const lat = pos.coords.latitude.toFixed(5);
|
| 1201 |
+
const lon = pos.coords.longitude.toFixed(5);
|
| 1202 |
+
const acc = Math.round(pos.coords.accuracy);
|
| 1203 |
+
resolve([lat + ',' + lon, `\u2705 GPS/WiFi location set (\u00b1${acc}m)`]);
|
| 1204 |
+
},
|
| 1205 |
+
fallback,
|
| 1206 |
+
{timeout: 10000, maximumAge: 60000, enableHighAccuracy: true}
|
| 1207 |
+
);
|
| 1208 |
+
});
|
| 1209 |
+
}""",
|
| 1210 |
+
show_api=False,
|
| 1211 |
)
|
| 1212 |
+
|
| 1213 |
+
send_btn.click(
|
| 1214 |
+
fn=run_agent,
|
| 1215 |
+
inputs=[user_input, chatbot, enabled_tools, model_dropdown, client_ip_box],
|
| 1216 |
+
outputs=[chatbot, tool_trace, user_input, chart_output, model_status, debug_output],
|
| 1217 |
+
show_api=False,
|
| 1218 |
)
|
| 1219 |
|
| 1220 |
+
user_input.submit(
|
| 1221 |
+
fn=run_agent,
|
| 1222 |
+
inputs=[user_input, chatbot, enabled_tools, model_dropdown, client_ip_box],
|
| 1223 |
+
outputs=[chatbot, tool_trace, user_input, chart_output, model_status, debug_output],
|
| 1224 |
+
show_api=False,
|
| 1225 |
+
)
|
| 1226 |
|
| 1227 |
+
clear_btn.click(
|
| 1228 |
+
fn=lambda model_id: ([], "", "", None, model_status_text(model_id), ""),
|
| 1229 |
+
inputs=[model_dropdown],
|
| 1230 |
+
outputs=[chatbot, tool_trace, user_input, chart_output, model_status, debug_output],
|
| 1231 |
+
show_api=False,
|
| 1232 |
+
)
|
| 1233 |
|
| 1234 |
+
# โโ Tab 2: multi-role workflow demo โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 1235 |
+
with gr.Tab("Multi-Role Workflow"):
|
| 1236 |
+
gr.Markdown(
|
| 1237 |
+
"## Supervisor-style Multi-Role Workflow\n"
|
| 1238 |
+
"**Planner** โ **Specialist** (Creative or Technical) โ **QA Tester** โ **Planner review**\n\n"
|
| 1239 |
+
"The Planner breaks the task, picks the right specialist, and reviews QA feedback. "
|
| 1240 |
+
f"If QA fails, the loop repeats up to **{MAX_REVISIONS}** times before accepting the best attempt."
|
| 1241 |
+
)
|
| 1242 |
|
| 1243 |
+
with gr.Row():
|
| 1244 |
+
wf_model_dropdown = gr.Dropdown(
|
| 1245 |
+
choices=MODEL_OPTIONS,
|
| 1246 |
+
value=DEFAULT_MODEL_ID,
|
| 1247 |
+
label="Model",
|
| 1248 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1249 |
|
| 1250 |
+
wf_input = gr.Textbox(
|
| 1251 |
+
label="Task / Request",
|
| 1252 |
+
placeholder=(
|
| 1253 |
+
"Describe what you want the multi-role team to work onโฆ\n"
|
| 1254 |
+
"e.g. 'Write a short blog post about the benefits of open-source AI'"
|
| 1255 |
+
),
|
| 1256 |
+
lines=3,
|
| 1257 |
+
)
|
| 1258 |
+
wf_submit_btn = gr.Button("โถ Run Multi-Role Workflow", variant="primary")
|
| 1259 |
|
| 1260 |
+
with gr.Row():
|
| 1261 |
+
with gr.Column(scale=2):
|
| 1262 |
+
wf_answer = gr.Textbox(
|
| 1263 |
+
label="โ
Final Answer (Planner approved)",
|
| 1264 |
+
lines=14,
|
| 1265 |
+
interactive=False,
|
| 1266 |
+
)
|
| 1267 |
+
with gr.Column(scale=3):
|
| 1268 |
+
wf_trace = gr.Textbox(
|
| 1269 |
+
label="Workflow Trace โ role-by-role log",
|
| 1270 |
+
lines=28,
|
| 1271 |
+
interactive=False,
|
| 1272 |
+
)
|
| 1273 |
+
|
| 1274 |
+
def _run_workflow_ui(message: str, model_id: str) -> Tuple[str, str]:
|
| 1275 |
+
"""Gradio handler: validate input, run the workflow, return outputs."""
|
| 1276 |
+
if not message or not message.strip():
|
| 1277 |
+
return "No input provided.", ""
|
| 1278 |
+
try:
|
| 1279 |
+
final_answer, trace = run_multi_role_workflow(message.strip(), model_id)
|
| 1280 |
+
return final_answer, trace
|
| 1281 |
+
except Exception as exc:
|
| 1282 |
+
return f"Workflow error: {exc}", traceback.format_exc()
|
| 1283 |
+
|
| 1284 |
+
wf_submit_btn.click(
|
| 1285 |
+
fn=_run_workflow_ui,
|
| 1286 |
+
inputs=[wf_input, wf_model_dropdown],
|
| 1287 |
+
outputs=[wf_answer, wf_trace],
|
| 1288 |
+
show_api=False,
|
| 1289 |
+
)
|
| 1290 |
|
| 1291 |
+
wf_input.submit(
|
| 1292 |
+
fn=_run_workflow_ui,
|
| 1293 |
+
inputs=[wf_input, wf_model_dropdown],
|
| 1294 |
+
outputs=[wf_answer, wf_trace],
|
| 1295 |
+
show_api=False,
|
| 1296 |
+
)
|
| 1297 |
|
| 1298 |
if __name__ == "__main__":
|
| 1299 |
port = int(os.environ.get("PORT", 7860))
|