Spaces:

antonisbast
/

clue-agent

Sleeping

antonisbast Claude Opus 4.6 commited on Mar 11

Commit

66c7278

1 Parent(s): a729a7d

Add Gradio app for Hugging Face Spaces deployment

- Add app.py: Gradio UI using WaldiezRunner to run multi-agent pipeline
- Add HF Spaces YAML frontmatter to README.md
- Add gradio to requirements.txt

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (3) hide show

README.md +12 -0
app.py +217 -0
requirements.txt +1 -0

README.md CHANGED Viewed

@@ -1,3 +1,15 @@
 # Multi-Agent System for Temporal Clue
 A multi-agent AI system that solves [Temporal Clue](https://github.com/bradhilton/temporal-clue) murder mystery logic puzzles using 3 specialized agents coordinated via group chat. Built with [AG2 (AutoGen v2)](https://github.com/ag2ai/ag2) and [Waldiez Studio](https://waldiez.github.io/), the system achieves **38.0% mean accuracy** — a **+7.9% improvement** over the single-agent baseline.

+---
+title: Temporal Clue Multi-Agent Solver
+emoji: 🔍
+colorFrom: indigo
+colorTo: purple
+sdk: gradio
+sdk_version: 6.9.0
+app_file: app.py
+pinned: false
+license: mit
+---
 # Multi-Agent System for Temporal Clue
 A multi-agent AI system that solves [Temporal Clue](https://github.com/bradhilton/temporal-clue) murder mystery logic puzzles using 3 specialized agents coordinated via group chat. Built with [AG2 (AutoGen v2)](https://github.com/ag2ai/ag2) and [Waldiez Studio](https://waldiez.github.io/), the system achieves **38.0% mean accuracy** — a **+7.9% improvement** over the single-agent baseline.

app.py ADDED Viewed

	@@ -0,0 +1,217 @@

+import os
+import sys
+import io
+import re
+import ast
+import tempfile
+import pandas as pd
+import gradio as gr
+from pathlib import Path
+from waldiez import WaldiezRunner
+DATA_FILE = "train_with_baselines.csv"
+FLOW_FILE = "Clue_v5.waldiez"
+df = pd.read_csv(DATA_FILE)
+puzzle_choices = [f"Puzzle {i + 1}" for i in range(len(df))]
+def extract_answer(text: str) -> dict | None:
+    """Extract answer dictionary from agent output text."""
+    pattern = r"\{[^{}]*['\"]A['\"]\s*:"
+    for match in re.finditer(pattern, text):
+        start = match.start()
+        depth = 0
+        for i in range(start, len(text)):
+            if text[i] == "{":
+                depth += 1
+            elif text[i] == "}":
+                depth -= 1
+                if depth == 0:
+                    try:
+                        return ast.literal_eval(text[start : i + 1])
+                    except (ValueError, SyntaxError):
+                        break
+    return None
+def parse_conversation(output_text: str) -> list[dict]:
+    """Parse AG2 stdout into chatbot messages."""
+    messages = []
+    agent_pattern = re.compile(r"^([\w_ ]+?)\s+\(to\s+([\w_ ]+?)\):\s*$")
+    separator_pattern = re.compile(r"^-{10,}$")
+    current_sender = None
+    current_content: list[str] = []
+    for line in output_text.split("\n"):
+        m = agent_pattern.match(line)
+        if m:
+            if current_sender and current_content:
+                content = "\n".join(current_content).strip()
+                if content:
+                    messages.append(
+                        {
+                            "role": "assistant",
+                            "content": f"**{current_sender}:**\n\n{content}",
+                        }
+                    )
+            current_sender = m.group(1)
+            current_content = []
+        elif separator_pattern.match(line.strip()):
+            if current_sender and current_content:
+                content = "\n".join(current_content).strip()
+                if content:
+                    messages.append(
+                        {
+                            "role": "assistant",
+                            "content": f"**{current_sender}:**\n\n{content}",
+                        }
+                    )
+            current_sender = None
+            current_content = []
+        elif current_sender is not None:
+            current_content.append(line)
+    if current_sender and current_content:
+        content = "\n".join(current_content).strip()
+        if content:
+            messages.append(
+                {"role": "assistant", "content": f"**{current_sender}:**\n\n{content}"}
+            )
+    return messages
+def solve_puzzle(puzzle_idx: str, progress=gr.Progress()):
+    """Run the multi-agent pipeline on the selected puzzle."""
+    if not os.environ.get("NIM_API_KEY"):
+        return (
+            [
+                {
+                    "role": "assistant",
+                    "content": (
+                        "**Error:** `NIM_API_KEY` not set. "
+                        "Add it as a Space secret (Settings > Secrets)."
+                    ),
+                }
+            ],
+            pd.DataFrame(),
+            "No API key",
+        )
+    idx = int(puzzle_idx.split(" ")[1]) - 1
+    question = df.iloc[idx]["question"]
+    ground_truth = ast.literal_eval(df.iloc[idx]["ground_truth"])
+    progress(0.1, desc="Loading workflow...")
+    captured = io.StringIO()
+    old_stdout = sys.stdout
+    tmp_path = None
+    try:
+        sys.stdout = captured
+        runner = WaldiezRunner.load(Path(FLOW_FILE))
+        progress(0.2, desc="Agents working (this takes ~60s)...")
+        tmp_fd, tmp_path = tempfile.mkstemp(suffix=".py", dir=".")
+        os.close(tmp_fd)
+        result = runner.run(output_path=tmp_path, message=question)
+    except Exception as e:
+        result = None
+        error_msg = str(e)
+    finally:
+        sys.stdout = old_stdout
+        if tmp_path:
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass
+    progress(0.9, desc="Parsing results...")
+    conv_text = captured.getvalue()
+    chat_messages = parse_conversation(conv_text)
+    if not chat_messages and conv_text.strip():
+        chat_messages = [
+            {"role": "assistant", "content": conv_text[:3000]}
+        ]
+    if result is None:
+        chat_messages.append(
+            {"role": "assistant", "content": f"**Pipeline error:** {error_msg}"}
+        )
+        return chat_messages, pd.DataFrame(), "Error"
+    prediction = extract_answer(str(result))
+    rows = []
+    correct = 0
+    total = 0
+    for key in sorted(ground_truth.keys()):
+        gt_val = ground_truth[key]
+        pred_val = prediction.get(key, "\u2014") if prediction else "\u2014"
+        match = (
+            pred_val.strip() == gt_val.strip()
+            if prediction and pred_val != "\u2014"
+            else False
+        )
+        if match:
+            correct += 1
+        total += 1
+        rows.append(
+            {
+                "Letter": key,
+                "Predicted": pred_val,
+                "Ground Truth": gt_val,
+                "Match": "\u2713" if match else "\u2717",
+            }
+        )
+    results_df = pd.DataFrame(rows)
+    accuracy = f"{correct}/{total} ({100 * correct / total:.1f}%)"
+    return chat_messages, results_df, accuracy
+with gr.Blocks(title="Temporal Clue Multi-Agent Solver") as demo:
+    gr.Markdown(
+        "# Temporal Clue Multi-Agent Solver\n\n"
+        "A multi-agent system that solves murder mystery logic puzzles from the "
+        "[Temporal Clue](https://github.com/bradhilton/temporal-clue) benchmark. "
+        "Three agents &mdash; **Evidence Analyst** (7B), **Detective** (70B + spatial tool), "
+        "and **Format Agent** (7B) &mdash; collaborate via round-robin group chat.\n\n"
+        "Select a puzzle and click **Solve** to run the pipeline live."
+    )
+    with gr.Row():
+        puzzle_dropdown = gr.Dropdown(
+            choices=puzzle_choices,
+            value=puzzle_choices[0],
+            label="Select Puzzle",
+        )
+        solve_btn = gr.Button("Solve", variant="primary", scale=0)
+    with gr.Row():
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(
+                label="Agent Conversation",
+                height=500,
+            )
+        with gr.Column(scale=1):
+            accuracy_display = gr.Textbox(label="Accuracy", interactive=False)
+            results_table = gr.Dataframe(label="Results")
+    solve_btn.click(
+        fn=solve_puzzle,
+        inputs=[puzzle_dropdown],
+        outputs=[chatbot, results_table, accuracy_display],
+        concurrency_limit=1,
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -2,3 +2,4 @@ waldiez
 pandas
 openpyxl
 jupyter

 pandas
 openpyxl
 jupyter
+gradio