copilot-swe-agent[bot] CatoG commited on
Commit
769359f
ยท
1 Parent(s): 4b17537

Add supervisor-style multi-role agentic workflow

Browse files

Co-authored-by: CatoG <47473856+CatoG@users.noreply.github.com>

Files changed (1) hide show
  1. app.py +548 -101
app.py CHANGED
@@ -6,7 +6,7 @@ import warnings
6
  import traceback
7
  from contextvars import ContextVar
8
  from datetime import datetime, timezone
9
- from typing import Dict, List, Optional, Tuple
10
 
11
  import requests
12
  from dotenv import load_dotenv
@@ -26,6 +26,7 @@ import matplotlib.pyplot as plt
26
  from huggingface_hub.errors import HfHubHTTPError
27
 
28
  from langchain_core.tools import tool
 
29
  from langchain.agents import create_agent
30
  from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
31
  from langchain_community.tools import DuckDuckGoSearchRun, ArxivQueryRun
@@ -494,6 +495,384 @@ ALL_TOOLS = {
494
  TOOL_NAMES = list(ALL_TOOLS.keys())
495
 
496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
  # ============================================================
498
  # Agent builder
499
  # ============================================================
@@ -732,121 +1111,189 @@ with gr.Blocks(title="LLM + Agent tools demo", theme=gr.themes.Soft()) as demo:
732
  "with selectable tools and extended debugging."
733
  )
734
 
735
- with gr.Row():
736
- model_dropdown = gr.Dropdown(
737
- choices=MODEL_OPTIONS,
738
- value=DEFAULT_MODEL_ID,
739
- label="Base model",
740
- )
741
- model_status = gr.Textbox(
742
- value=model_status_text(DEFAULT_MODEL_ID),
743
- label="Model status",
744
- interactive=False,
745
- )
746
 
747
- with gr.Row():
748
- with gr.Column(scale=3):
749
- chatbot = gr.Chatbot(label="Conversation", height=460, type="messages")
750
- user_input = gr.Textbox(
751
- label="Message",
752
- placeholder="Ask anything...",
753
- )
 
 
 
 
 
 
754
 
755
  with gr.Row():
756
- send_btn = gr.Button("Send", variant="primary")
757
- clear_btn = gr.Button("Clear")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
758
 
759
- chart_output = gr.Image(label="Generated chart", type="filepath")
 
760
 
761
- with gr.Row():
762
- location_btn = gr.Button("๐Ÿ“ Share my location", size="sm")
763
- location_status = gr.Textbox(
764
- value="Location not set โ€” click the button above before asking 'where am I'",
765
- label="Location status",
766
- interactive=False,
767
- max_lines=1,
768
- )
769
 
770
- with gr.Column(scale=1):
771
- enabled_tools = gr.CheckboxGroup(
772
- choices=TOOL_NAMES,
773
- value=TOOL_NAMES,
774
- label="Enabled tools",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
775
  )
776
- tool_trace = gr.Textbox(
777
- label="Tool trace",
778
- lines=18,
779
- interactive=False,
 
 
780
  )
781
 
782
- debug_output = gr.Textbox(
783
- label="Debug output",
784
- lines=28,
785
- interactive=False,
786
- )
 
787
 
788
- # Hidden: holds "lat,lon" or "ip:<address>" set by the location button
789
- client_ip_box = gr.Textbox(visible=False, value="")
 
 
 
 
790
 
791
- model_dropdown.change(
792
- fn=model_status_text,
793
- inputs=[model_dropdown],
794
- outputs=[model_status],
795
- show_api=False,
796
- )
 
 
797
 
798
- # Geolocation button: JS runs in the browser, result goes to hidden box + status label
799
- location_btn.click(
800
- fn=None,
801
- inputs=None,
802
- outputs=[client_ip_box, location_status],
803
- js="""async () => {
804
- return new Promise((resolve) => {
805
- const fallback = async () => {
806
- try {
807
- const r = await fetch('https://api.ipify.org?format=json');
808
- const d = await r.json();
809
- resolve(['ip:' + d.ip, 'Location: IP-based fallback (approximate)']);
810
- } catch(e) {
811
- resolve(['', 'Location detection failed.']);
812
- }
813
- };
814
- if (!navigator.geolocation) { fallback(); return; }
815
- navigator.geolocation.getCurrentPosition(
816
- (pos) => {
817
- const lat = pos.coords.latitude.toFixed(5);
818
- const lon = pos.coords.longitude.toFixed(5);
819
- const acc = Math.round(pos.coords.accuracy);
820
- resolve([lat + ',' + lon, `\u2705 GPS/WiFi location set (\u00b1${acc}m)`]);
821
- },
822
- fallback,
823
- {timeout: 10000, maximumAge: 60000, enableHighAccuracy: true}
824
- );
825
- });
826
- }""",
827
- show_api=False,
828
- )
829
 
830
- send_btn.click(
831
- fn=run_agent,
832
- inputs=[user_input, chatbot, enabled_tools, model_dropdown, client_ip_box],
833
- outputs=[chatbot, tool_trace, user_input, chart_output, model_status, debug_output],
834
- show_api=False,
835
- )
 
 
 
836
 
837
- user_input.submit(
838
- fn=run_agent,
839
- inputs=[user_input, chatbot, enabled_tools, model_dropdown, client_ip_box],
840
- outputs=[chatbot, tool_trace, user_input, chart_output, model_status, debug_output],
841
- show_api=False,
842
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
843
 
844
- clear_btn.click(
845
- fn=lambda model_id: ([], "", "", None, model_status_text(model_id), ""),
846
- inputs=[model_dropdown],
847
- outputs=[chatbot, tool_trace, user_input, chart_output, model_status, debug_output],
848
- show_api=False,
849
- )
850
 
851
  if __name__ == "__main__":
852
  port = int(os.environ.get("PORT", 7860))
 
6
  import traceback
7
  from contextvars import ContextVar
8
  from datetime import datetime, timezone
9
+ from typing import Dict, List, Optional, Tuple, TypedDict
10
 
11
  import requests
12
  from dotenv import load_dotenv
 
26
  from huggingface_hub.errors import HfHubHTTPError
27
 
28
  from langchain_core.tools import tool
29
+ from langchain_core.messages import SystemMessage, HumanMessage
30
  from langchain.agents import create_agent
31
  from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
32
  from langchain_community.tools import DuckDuckGoSearchRun, ArxivQueryRun
 
495
  TOOL_NAMES = list(ALL_TOOLS.keys())
496
 
497
 
498
+ # ============================================================
499
+ # Multi-role workflow โ€” supervisor-style orchestration
500
+ # ============================================================
501
+ # Architecture:
502
+ # Planner โ†’ Specialist (Creative or Technical) โ†’ QA Tester โ†’ Planner review
503
+ # The Planner breaks the task, picks a specialist, and reviews QA feedback.
504
+ # If QA fails and retries remain, the Planner revises and loops again.
505
+ # If QA passes (or max retries are reached) the Planner approves a final answer.
506
+ # ============================================================
507
+
508
+ MAX_REVISIONS = 3 # Maximum QA-driven revision cycles before accepting best attempt
509
+
510
+
511
+ class WorkflowState(TypedDict):
512
+ """Shared, inspectable state object threaded through the whole workflow."""
513
+ user_request: str
514
+ plan: str
515
+ current_role: str # "creative" or "technical"
516
+ creative_output: str
517
+ technical_output: str
518
+ draft_output: str # latest specialist output forwarded to QA
519
+ qa_report: str
520
+ qa_passed: bool
521
+ revision_count: int
522
+ final_answer: str
523
+
524
+
525
+ # --- Role system prompts ---
526
+
527
+ _PLANNER_SYSTEM = (
528
+ "You are the Planner in a multi-role AI workflow.\n"
529
+ "Your job is to:\n"
530
+ "1. Break the user's task into clear subtasks.\n"
531
+ "2. Decide which specialist to call: 'Creative Expert' (ideas, framing, wording)\n"
532
+ " or 'Technical Expert' (code, architecture, implementation).\n"
533
+ "3. State clear success criteria.\n\n"
534
+ "Respond in this exact format:\n"
535
+ "TASK BREAKDOWN:\n<subtask list>\n\n"
536
+ "ROLE TO CALL: <Creative Expert | Technical Expert>\n\n"
537
+ "SUCCESS CRITERIA:\n<what a correct, complete answer looks like>\n\n"
538
+ "GUIDANCE FOR SPECIALIST:\n<any constraints or focus areas>"
539
+ )
540
+
541
+ _CREATIVE_SYSTEM = (
542
+ "You are the Creative Expert in a multi-role AI workflow.\n"
543
+ "You handle brainstorming, alternative ideas, framing, wording, and concept generation.\n\n"
544
+ "Respond in this exact format:\n"
545
+ "IDEAS:\n<list of ideas and alternatives>\n\n"
546
+ "RATIONALE:\n<why these are strong choices>\n\n"
547
+ "RECOMMENDED DRAFT:\n<the best draft output based on the ideas>"
548
+ )
549
+
550
+ _TECHNICAL_SYSTEM = (
551
+ "You are the Technical Expert in a multi-role AI workflow.\n"
552
+ "You handle implementation details, code, architecture, and structured technical solutions.\n\n"
553
+ "Respond in this exact format:\n"
554
+ "TECHNICAL APPROACH:\n<recommended approach>\n\n"
555
+ "IMPLEMENTATION NOTES:\n<key details, steps, and caveats>\n\n"
556
+ "FINAL TECHNICAL DRAFT:\n<the complete technical output or solution>"
557
+ )
558
+
559
+ _QA_SYSTEM = (
560
+ "You are the QA Tester in a multi-role AI workflow.\n"
561
+ "Check whether the specialist output satisfies the original request and success criteria.\n\n"
562
+ "Respond in this exact format:\n"
563
+ "REQUIREMENTS CHECKED:\n<list each requirement and whether it was met>\n\n"
564
+ "ISSUES FOUND:\n<defects or gaps โ€” or 'None' if all requirements are met>\n\n"
565
+ "RESULT: <PASS | FAIL>\n\n"
566
+ "RECOMMENDED FIXES:\n<specific improvements โ€” or 'None' if result is PASS>"
567
+ )
568
+
569
+ _PLANNER_REVIEW_SYSTEM = (
570
+ "You are the Planner reviewing QA feedback in a multi-role AI workflow.\n"
571
+ "Based on the QA report, either approve the result or plan a revision.\n\n"
572
+ "If QA PASSED, respond with:\n"
573
+ "DECISION: APPROVED\n"
574
+ "FINAL ANSWER:\n<the approved specialist output, reproduced in full>\n\n"
575
+ "If QA FAILED, respond with:\n"
576
+ "DECISION: REVISE\n"
577
+ "ROLE TO CALL: <Creative Expert | Technical Expert>\n"
578
+ "REVISED INSTRUCTIONS:\n<specific fixes the specialist must address>"
579
+ )
580
+
581
+
582
+ # --- Internal helpers ---
583
+
584
+ def _llm_call(chat_model, system_prompt: str, user_content: str) -> str:
585
+ """Invoke the LLM with a role-specific system prompt. Returns plain text."""
586
+ response = chat_model.invoke([
587
+ SystemMessage(content=system_prompt),
588
+ HumanMessage(content=user_content),
589
+ ])
590
+ return content_to_text(response.content)
591
+
592
+
593
+ def _decide_role(text: str) -> str:
594
+ """Parse which specialist role the Planner wants to invoke.
595
+
596
+ Checks for the expected structured 'ROLE TO CALL:' format first,
597
+ then falls back to a word-boundary search for 'creative'.
598
+ Defaults to 'technical' when no clear signal is found.
599
+ """
600
+ # Prefer the explicit structured label produced by the Planner prompt
601
+ if "ROLE TO CALL: Creative Expert" in text:
602
+ return "creative"
603
+ if "ROLE TO CALL: Technical Expert" in text:
604
+ return "technical"
605
+ # Fallback: word-boundary match so 'creative' in 'not creative enough' still works,
606
+ # but avoids false hits from unrelated use of the word.
607
+ if re.search(r"\bcreative\b", text, re.IGNORECASE):
608
+ return "creative"
609
+ return "technical"
610
+
611
+
612
+ def _qa_passed_check(qa_text: str) -> bool:
613
+ """Return True only if the QA report contains an explicit PASS result.
614
+
615
+ Relies on the structured 'RESULT: PASS / RESULT: FAIL' line produced by
616
+ the QA Tester prompt. Returns False when the expected format is absent
617
+ to avoid false positives from words like 'bypass' or 'password'.
618
+ """
619
+ lower = qa_text.lower()
620
+ if "result: pass" in lower:
621
+ return True
622
+ if "result: fail" in lower:
623
+ return False
624
+ # No recognised verdict โ€” treat as fail to avoid accepting a bad draft
625
+ return False
626
+
627
+
628
+ # --- Workflow step functions ---
629
+ # Each step receives the shared state and an append-only trace list,
630
+ # updates state in place, appends log lines, and returns updated state.
631
+
632
+ def _step_plan(chat_model, state: WorkflowState, trace: List[str]) -> WorkflowState:
633
+ """Planner: analyse the task, produce a plan, decide which specialist to call."""
634
+ trace.append("\nโ•”โ•โ• [PLANNER] Analysing task... โ•โ•โ•—")
635
+ content = f"User request: {state['user_request']}"
636
+ if state["revision_count"] > 0:
637
+ content += (
638
+ f"\n\nThis is revision {state['revision_count']} of {MAX_REVISIONS}."
639
+ f"\nPrevious QA report:\n{state['qa_report']}"
640
+ "\nAdjust the plan to address the QA issues."
641
+ )
642
+ plan_text = _llm_call(chat_model, _PLANNER_SYSTEM, content)
643
+ state["plan"] = plan_text
644
+ state["current_role"] = _decide_role(plan_text)
645
+ trace.append(plan_text)
646
+ trace.append(f"โ•šโ•โ• [PLANNER] โ†’ routing to: {state['current_role'].upper()} EXPERT โ•โ•โ•")
647
+ return state
648
+
649
+
650
+ def _step_creative(chat_model, state: WorkflowState, trace: List[str]) -> WorkflowState:
651
+ """Creative Expert: brainstorm ideas and produce a recommended draft."""
652
+ trace.append("\nโ•”โ•โ• [CREATIVE EXPERT] Generating ideas... โ•โ•โ•—")
653
+ content = (
654
+ f"User request: {state['user_request']}\n\n"
655
+ f"Planner instructions:\n{state['plan']}"
656
+ )
657
+ if state["revision_count"] > 0:
658
+ content += f"\n\nQA feedback to address:\n{state['qa_report']}"
659
+ text = _llm_call(chat_model, _CREATIVE_SYSTEM, content)
660
+ state["creative_output"] = text
661
+ state["draft_output"] = text
662
+ trace.append(text)
663
+ trace.append("โ•šโ•โ• [CREATIVE EXPERT] Done โ•โ•โ•")
664
+ return state
665
+
666
+
667
+ def _step_technical(chat_model, state: WorkflowState, trace: List[str]) -> WorkflowState:
668
+ """Technical Expert: provide implementation details and a complete technical draft."""
669
+ trace.append("\nโ•”โ•โ• [TECHNICAL EXPERT] Working on implementation... โ•โ•โ•—")
670
+ content = (
671
+ f"User request: {state['user_request']}\n\n"
672
+ f"Planner instructions:\n{state['plan']}"
673
+ )
674
+ if state["revision_count"] > 0:
675
+ content += f"\n\nQA feedback to address:\n{state['qa_report']}"
676
+ text = _llm_call(chat_model, _TECHNICAL_SYSTEM, content)
677
+ state["technical_output"] = text
678
+ state["draft_output"] = text
679
+ trace.append(text)
680
+ trace.append("โ•šโ•โ• [TECHNICAL EXPERT] Done โ•โ•โ•")
681
+ return state
682
+
683
+
684
+ def _step_qa(chat_model, state: WorkflowState, trace: List[str]) -> WorkflowState:
685
+ """QA Tester: check the draft against the original request and success criteria."""
686
+ trace.append("\nโ•”โ•โ• [QA TESTER] Reviewing output... โ•โ•โ•—")
687
+ content = (
688
+ f"Original user request: {state['user_request']}\n\n"
689
+ f"Planner's plan and success criteria:\n{state['plan']}\n\n"
690
+ f"Specialist output to review:\n{state['draft_output']}"
691
+ )
692
+ text = _llm_call(chat_model, _QA_SYSTEM, content)
693
+ state["qa_report"] = text
694
+ state["qa_passed"] = _qa_passed_check(text)
695
+ result_label = "โœ… PASS" if state["qa_passed"] else "โŒ FAIL"
696
+ trace.append(text)
697
+ trace.append(f"โ•šโ•โ• [QA TESTER] Result: {result_label} โ•โ•โ•")
698
+ return state
699
+
700
+
701
+ def _step_planner_review(chat_model, state: WorkflowState, trace: List[str]) -> WorkflowState:
702
+ """Planner: review QA feedback and either approve the result or request a revision."""
703
+ trace.append("\nโ•”โ•โ• [PLANNER] Reviewing QA feedback... โ•โ•โ•—")
704
+ content = (
705
+ f"User request: {state['user_request']}\n\n"
706
+ f"Plan:\n{state['plan']}\n\n"
707
+ f"Specialist output:\n{state['draft_output']}\n\n"
708
+ f"QA report:\n{state['qa_report']}"
709
+ )
710
+ review = _llm_call(chat_model, _PLANNER_REVIEW_SYSTEM, content)
711
+ trace.append(review)
712
+
713
+ if "DECISION: APPROVED" in review.upper():
714
+ # Extract the final answer that the Planner reproduced in full
715
+ parts = review.split("FINAL ANSWER:", 1)
716
+ if len(parts) > 1:
717
+ state["final_answer"] = parts[1].strip()
718
+ else:
719
+ # Planner approved but omitted the expected FINAL ANSWER section โ€” use draft
720
+ trace.append(" โš  FINAL ANSWER section missing; using specialist draft as final answer.")
721
+ state["final_answer"] = state["draft_output"]
722
+ trace.append("โ•šโ•โ• [PLANNER] โ†’ โœ… APPROVED โ•โ•โ•")
723
+ else:
724
+ # Planner requests a revision โ€” update plan with revised instructions
725
+ parts = review.split("REVISED INSTRUCTIONS:", 1)
726
+ if len(parts) > 1:
727
+ state["plan"] = parts[1].strip()
728
+ else:
729
+ # Revision requested but REVISED INSTRUCTIONS section missing โ€” keep current plan
730
+ trace.append(" โš  REVISED INSTRUCTIONS section missing; retrying with existing plan.")
731
+ state["current_role"] = _decide_role(review)
732
+ trace.append(
733
+ f"โ•šโ•โ• [PLANNER] โ†’ ๐Ÿ”„ REVISE โ€” routing to {state['current_role'].upper()} EXPERT โ•โ•โ•"
734
+ )
735
+ return state
736
+
737
+
738
+ # --- Specialist role tools ---
739
+ # These wrap the step functions as @tool so the Planner (or any LangChain agent)
740
+ # can invoke specialists in a standard tool-use pattern.
741
+
742
+ # Holds the active model ID for standalone specialist tool calls.
743
+ _workflow_model_id: str = DEFAULT_MODEL_ID
744
+
745
+
746
+ @tool
747
+ def call_creative_expert(task: str) -> str:
748
+ """Call the Creative Expert to brainstorm ideas, framing, and produce a draft for a given task."""
749
+ chat = build_provider_chat(_workflow_model_id)
750
+ state: WorkflowState = {
751
+ "user_request": task, "plan": task, "current_role": "creative",
752
+ "creative_output": "", "technical_output": "", "draft_output": "",
753
+ "qa_report": "", "qa_passed": False, "revision_count": 0, "final_answer": "",
754
+ }
755
+ state = _step_creative(chat, state, [])
756
+ return state["creative_output"]
757
+
758
+
759
+ @tool
760
+ def call_technical_expert(task: str) -> str:
761
+ """Call the Technical Expert to produce implementation details and a solution for a given task."""
762
+ chat = build_provider_chat(_workflow_model_id)
763
+ state: WorkflowState = {
764
+ "user_request": task, "plan": task, "current_role": "technical",
765
+ "creative_output": "", "technical_output": "", "draft_output": "",
766
+ "qa_report": "", "qa_passed": False, "revision_count": 0, "final_answer": "",
767
+ }
768
+ state = _step_technical(chat, state, [])
769
+ return state["technical_output"]
770
+
771
+
772
+ @tool
773
+ def call_qa_tester(task_and_output: str) -> str:
774
+ """Call the QA Tester to review specialist output against requirements.
775
+ Input format: 'TASK: <description>\nOUTPUT: <specialist output to review>'"""
776
+ chat = build_provider_chat(_workflow_model_id)
777
+ if "OUTPUT:" in task_and_output:
778
+ parts = task_and_output.split("OUTPUT:", 1)
779
+ task = parts[0].replace("TASK:", "").strip()
780
+ output = parts[1].strip()
781
+ else:
782
+ task = task_and_output
783
+ output = task_and_output
784
+ # current_role is left empty โ€” this is a standalone QA call outside the normal loop
785
+ state: WorkflowState = {
786
+ "user_request": task, "plan": task, "current_role": "",
787
+ "creative_output": "", "technical_output": "", "draft_output": output,
788
+ "qa_report": "", "qa_passed": False, "revision_count": 0, "final_answer": "",
789
+ }
790
+ state = _step_qa(chat, state, [])
791
+ return state["qa_report"]
792
+
793
+
794
+ # --- Orchestration loop ---
795
+
796
+ def run_multi_role_workflow(message: str, model_id: str) -> Tuple[str, str]:
797
+ """Run the supervisor-style multi-role workflow.
798
+
799
+ Flow:
800
+ 1. Planner analyses the task and picks a specialist.
801
+ 2. Specialist (Creative or Technical) generates output.
802
+ 3. QA Tester reviews the output.
803
+ 4. Planner reviews QA result and either approves or requests a revision.
804
+ 5. Repeat from step 2 if QA fails and retries remain.
805
+ 6. If max retries are reached, return best attempt with QA concerns.
806
+
807
+ Returns:
808
+ (final_answer, workflow_trace_text)
809
+ """
810
+ global _workflow_model_id
811
+ _workflow_model_id = model_id
812
+ chat_model = build_provider_chat(model_id)
813
+
814
+ state: WorkflowState = {
815
+ "user_request": message,
816
+ "plan": "",
817
+ "current_role": "",
818
+ "creative_output": "",
819
+ "technical_output": "",
820
+ "draft_output": "",
821
+ "qa_report": "",
822
+ "qa_passed": False,
823
+ "revision_count": 0,
824
+ "final_answer": "",
825
+ }
826
+
827
+ trace: List[str] = [
828
+ "โ•โ•โ• MULTI-ROLE WORKFLOW STARTED โ•โ•โ•",
829
+ f"Model : {model_id}",
830
+ f"Request : {message}",
831
+ f"Max revisions: {MAX_REVISIONS}",
832
+ ]
833
+
834
+ try:
835
+ # Step 1: Planner creates the initial plan
836
+ state = _step_plan(chat_model, state, trace)
837
+
838
+ # Orchestration loop: specialist โ†’ QA โ†’ Planner review โ†’ revise if needed
839
+ while True:
840
+ # Step 2: invoke the chosen specialist
841
+ if state["current_role"] == "creative":
842
+ state = _step_creative(chat_model, state, trace)
843
+ else:
844
+ state = _step_technical(chat_model, state, trace)
845
+
846
+ # Step 3: QA reviews the specialist's draft
847
+ state = _step_qa(chat_model, state, trace)
848
+
849
+ # Step 4: Planner reviews the QA result
850
+ state = _step_planner_review(chat_model, state, trace)
851
+
852
+ # Exit if the Planner approved the result
853
+ if state["final_answer"]:
854
+ trace.append("\nโ•โ•โ• WORKFLOW COMPLETE โ€” APPROVED โ•โ•โ•")
855
+ break
856
+
857
+ # Increment revision counter and enforce the retry limit
858
+ state["revision_count"] += 1
859
+ if state["revision_count"] >= MAX_REVISIONS:
860
+ state["final_answer"] = state["draft_output"]
861
+ trace.append(
862
+ f"\nโ•โ•โ• MAX REVISIONS REACHED ({MAX_REVISIONS}) โ•โ•โ•\n"
863
+ f"Returning best attempt. Outstanding QA concerns:\n{state['qa_report']}"
864
+ )
865
+ break
866
+
867
+ trace.append(f"\nโ•โ•โ• REVISION {state['revision_count']} / {MAX_REVISIONS} โ•โ•โ•")
868
+
869
+ except Exception as exc:
870
+ trace.append(f"\n[ERROR] {exc}\n{traceback.format_exc()}")
871
+ state["final_answer"] = state["draft_output"] or f"Workflow error: {exc}"
872
+
873
+ return state["final_answer"], "\n".join(trace)
874
+
875
+
876
  # ============================================================
877
  # Agent builder
878
  # ============================================================
 
1111
  "with selectable tools and extended debugging."
1112
  )
1113
 
1114
+ with gr.Tabs():
 
 
 
 
 
 
 
 
 
 
1115
 
1116
+ # โ”€โ”€ Tab 1: existing single-agent demo (unchanged) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1117
+ with gr.Tab("Agent Demo"):
1118
+ with gr.Row():
1119
+ model_dropdown = gr.Dropdown(
1120
+ choices=MODEL_OPTIONS,
1121
+ value=DEFAULT_MODEL_ID,
1122
+ label="Base model",
1123
+ )
1124
+ model_status = gr.Textbox(
1125
+ value=model_status_text(DEFAULT_MODEL_ID),
1126
+ label="Model status",
1127
+ interactive=False,
1128
+ )
1129
 
1130
  with gr.Row():
1131
+ with gr.Column(scale=3):
1132
+ chatbot = gr.Chatbot(label="Conversation", height=460, type="messages")
1133
+ user_input = gr.Textbox(
1134
+ label="Message",
1135
+ placeholder="Ask anything...",
1136
+ )
1137
+
1138
+ with gr.Row():
1139
+ send_btn = gr.Button("Send", variant="primary")
1140
+ clear_btn = gr.Button("Clear")
1141
+
1142
+ chart_output = gr.Image(label="Generated chart", type="filepath")
1143
+
1144
+ with gr.Row():
1145
+ location_btn = gr.Button("๐Ÿ“ Share my location", size="sm")
1146
+ location_status = gr.Textbox(
1147
+ value="Location not set โ€” click the button above before asking 'where am I'",
1148
+ label="Location status",
1149
+ interactive=False,
1150
+ max_lines=1,
1151
+ )
1152
+
1153
+ with gr.Column(scale=1):
1154
+ enabled_tools = gr.CheckboxGroup(
1155
+ choices=TOOL_NAMES,
1156
+ value=TOOL_NAMES,
1157
+ label="Enabled tools",
1158
+ )
1159
+ tool_trace = gr.Textbox(
1160
+ label="Tool trace",
1161
+ lines=18,
1162
+ interactive=False,
1163
+ )
1164
+
1165
+ debug_output = gr.Textbox(
1166
+ label="Debug output",
1167
+ lines=28,
1168
+ interactive=False,
1169
+ )
1170
 
1171
+ # Hidden: holds "lat,lon" or "ip:<address>" set by the location button
1172
+ client_ip_box = gr.Textbox(visible=False, value="")
1173
 
1174
+ model_dropdown.change(
1175
+ fn=model_status_text,
1176
+ inputs=[model_dropdown],
1177
+ outputs=[model_status],
1178
+ show_api=False,
1179
+ )
 
 
1180
 
1181
+ # Geolocation button: JS runs in the browser, result goes to hidden box + status label
1182
+ location_btn.click(
1183
+ fn=None,
1184
+ inputs=None,
1185
+ outputs=[client_ip_box, location_status],
1186
+ js="""async () => {
1187
+ return new Promise((resolve) => {
1188
+ const fallback = async () => {
1189
+ try {
1190
+ const r = await fetch('https://api.ipify.org?format=json');
1191
+ const d = await r.json();
1192
+ resolve(['ip:' + d.ip, 'Location: IP-based fallback (approximate)']);
1193
+ } catch(e) {
1194
+ resolve(['', 'Location detection failed.']);
1195
+ }
1196
+ };
1197
+ if (!navigator.geolocation) { fallback(); return; }
1198
+ navigator.geolocation.getCurrentPosition(
1199
+ (pos) => {
1200
+ const lat = pos.coords.latitude.toFixed(5);
1201
+ const lon = pos.coords.longitude.toFixed(5);
1202
+ const acc = Math.round(pos.coords.accuracy);
1203
+ resolve([lat + ',' + lon, `\u2705 GPS/WiFi location set (\u00b1${acc}m)`]);
1204
+ },
1205
+ fallback,
1206
+ {timeout: 10000, maximumAge: 60000, enableHighAccuracy: true}
1207
+ );
1208
+ });
1209
+ }""",
1210
+ show_api=False,
1211
  )
1212
+
1213
+ send_btn.click(
1214
+ fn=run_agent,
1215
+ inputs=[user_input, chatbot, enabled_tools, model_dropdown, client_ip_box],
1216
+ outputs=[chatbot, tool_trace, user_input, chart_output, model_status, debug_output],
1217
+ show_api=False,
1218
  )
1219
 
1220
+ user_input.submit(
1221
+ fn=run_agent,
1222
+ inputs=[user_input, chatbot, enabled_tools, model_dropdown, client_ip_box],
1223
+ outputs=[chatbot, tool_trace, user_input, chart_output, model_status, debug_output],
1224
+ show_api=False,
1225
+ )
1226
 
1227
+ clear_btn.click(
1228
+ fn=lambda model_id: ([], "", "", None, model_status_text(model_id), ""),
1229
+ inputs=[model_dropdown],
1230
+ outputs=[chatbot, tool_trace, user_input, chart_output, model_status, debug_output],
1231
+ show_api=False,
1232
+ )
1233
 
1234
+ # โ”€โ”€ Tab 2: multi-role workflow demo โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1235
+ with gr.Tab("Multi-Role Workflow"):
1236
+ gr.Markdown(
1237
+ "## Supervisor-style Multi-Role Workflow\n"
1238
+ "**Planner** โ†’ **Specialist** (Creative or Technical) โ†’ **QA Tester** โ†’ **Planner review**\n\n"
1239
+ "The Planner breaks the task, picks the right specialist, and reviews QA feedback. "
1240
+ f"If QA fails, the loop repeats up to **{MAX_REVISIONS}** times before accepting the best attempt."
1241
+ )
1242
 
1243
+ with gr.Row():
1244
+ wf_model_dropdown = gr.Dropdown(
1245
+ choices=MODEL_OPTIONS,
1246
+ value=DEFAULT_MODEL_ID,
1247
+ label="Model",
1248
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1249
 
1250
+ wf_input = gr.Textbox(
1251
+ label="Task / Request",
1252
+ placeholder=(
1253
+ "Describe what you want the multi-role team to work onโ€ฆ\n"
1254
+ "e.g. 'Write a short blog post about the benefits of open-source AI'"
1255
+ ),
1256
+ lines=3,
1257
+ )
1258
+ wf_submit_btn = gr.Button("โ–ถ Run Multi-Role Workflow", variant="primary")
1259
 
1260
+ with gr.Row():
1261
+ with gr.Column(scale=2):
1262
+ wf_answer = gr.Textbox(
1263
+ label="โœ… Final Answer (Planner approved)",
1264
+ lines=14,
1265
+ interactive=False,
1266
+ )
1267
+ with gr.Column(scale=3):
1268
+ wf_trace = gr.Textbox(
1269
+ label="Workflow Trace โ€” role-by-role log",
1270
+ lines=28,
1271
+ interactive=False,
1272
+ )
1273
+
1274
+ def _run_workflow_ui(message: str, model_id: str) -> Tuple[str, str]:
1275
+ """Gradio handler: validate input, run the workflow, return outputs."""
1276
+ if not message or not message.strip():
1277
+ return "No input provided.", ""
1278
+ try:
1279
+ final_answer, trace = run_multi_role_workflow(message.strip(), model_id)
1280
+ return final_answer, trace
1281
+ except Exception as exc:
1282
+ return f"Workflow error: {exc}", traceback.format_exc()
1283
+
1284
+ wf_submit_btn.click(
1285
+ fn=_run_workflow_ui,
1286
+ inputs=[wf_input, wf_model_dropdown],
1287
+ outputs=[wf_answer, wf_trace],
1288
+ show_api=False,
1289
+ )
1290
 
1291
+ wf_input.submit(
1292
+ fn=_run_workflow_ui,
1293
+ inputs=[wf_input, wf_model_dropdown],
1294
+ outputs=[wf_answer, wf_trace],
1295
+ show_api=False,
1296
+ )
1297
 
1298
  if __name__ == "__main__":
1299
  port = int(os.environ.get("PORT", 7860))