antonisbast Claude Opus 4.6 commited on
Commit
66c7278
·
1 Parent(s): a729a7d

Add Gradio app for Hugging Face Spaces deployment

Browse files

- Add app.py: Gradio UI using WaldiezRunner to run multi-agent pipeline
- Add HF Spaces YAML frontmatter to README.md
- Add gradio to requirements.txt

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (3) hide show
  1. README.md +12 -0
  2. app.py +217 -0
  3. requirements.txt +1 -0
README.md CHANGED
@@ -1,3 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Multi-Agent System for Temporal Clue
2
 
3
  A multi-agent AI system that solves [Temporal Clue](https://github.com/bradhilton/temporal-clue) murder mystery logic puzzles using 3 specialized agents coordinated via group chat. Built with [AG2 (AutoGen v2)](https://github.com/ag2ai/ag2) and [Waldiez Studio](https://waldiez.github.io/), the system achieves **38.0% mean accuracy** — a **+7.9% improvement** over the single-agent baseline.
 
1
+ ---
2
+ title: Temporal Clue Multi-Agent Solver
3
+ emoji: 🔍
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 6.9.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
  # Multi-Agent System for Temporal Clue
14
 
15
  A multi-agent AI system that solves [Temporal Clue](https://github.com/bradhilton/temporal-clue) murder mystery logic puzzles using 3 specialized agents coordinated via group chat. Built with [AG2 (AutoGen v2)](https://github.com/ag2ai/ag2) and [Waldiez Studio](https://waldiez.github.io/), the system achieves **38.0% mean accuracy** — a **+7.9% improvement** over the single-agent baseline.
app.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import io
4
+ import re
5
+ import ast
6
+ import tempfile
7
+
8
+ import pandas as pd
9
+ import gradio as gr
10
+ from pathlib import Path
11
+ from waldiez import WaldiezRunner
12
+
13
+ DATA_FILE = "train_with_baselines.csv"
14
+ FLOW_FILE = "Clue_v5.waldiez"
15
+
16
+ df = pd.read_csv(DATA_FILE)
17
+ puzzle_choices = [f"Puzzle {i + 1}" for i in range(len(df))]
18
+
19
+
20
+ def extract_answer(text: str) -> dict | None:
21
+ """Extract answer dictionary from agent output text."""
22
+ pattern = r"\{[^{}]*['\"]A['\"]\s*:"
23
+ for match in re.finditer(pattern, text):
24
+ start = match.start()
25
+ depth = 0
26
+ for i in range(start, len(text)):
27
+ if text[i] == "{":
28
+ depth += 1
29
+ elif text[i] == "}":
30
+ depth -= 1
31
+ if depth == 0:
32
+ try:
33
+ return ast.literal_eval(text[start : i + 1])
34
+ except (ValueError, SyntaxError):
35
+ break
36
+ return None
37
+
38
+
39
+ def parse_conversation(output_text: str) -> list[dict]:
40
+ """Parse AG2 stdout into chatbot messages."""
41
+ messages = []
42
+ agent_pattern = re.compile(r"^([\w_ ]+?)\s+\(to\s+([\w_ ]+?)\):\s*$")
43
+ separator_pattern = re.compile(r"^-{10,}$")
44
+
45
+ current_sender = None
46
+ current_content: list[str] = []
47
+
48
+ for line in output_text.split("\n"):
49
+ m = agent_pattern.match(line)
50
+ if m:
51
+ if current_sender and current_content:
52
+ content = "\n".join(current_content).strip()
53
+ if content:
54
+ messages.append(
55
+ {
56
+ "role": "assistant",
57
+ "content": f"**{current_sender}:**\n\n{content}",
58
+ }
59
+ )
60
+ current_sender = m.group(1)
61
+ current_content = []
62
+ elif separator_pattern.match(line.strip()):
63
+ if current_sender and current_content:
64
+ content = "\n".join(current_content).strip()
65
+ if content:
66
+ messages.append(
67
+ {
68
+ "role": "assistant",
69
+ "content": f"**{current_sender}:**\n\n{content}",
70
+ }
71
+ )
72
+ current_sender = None
73
+ current_content = []
74
+ elif current_sender is not None:
75
+ current_content.append(line)
76
+
77
+ if current_sender and current_content:
78
+ content = "\n".join(current_content).strip()
79
+ if content:
80
+ messages.append(
81
+ {"role": "assistant", "content": f"**{current_sender}:**\n\n{content}"}
82
+ )
83
+
84
+ return messages
85
+
86
+
87
+ def solve_puzzle(puzzle_idx: str, progress=gr.Progress()):
88
+ """Run the multi-agent pipeline on the selected puzzle."""
89
+ if not os.environ.get("NIM_API_KEY"):
90
+ return (
91
+ [
92
+ {
93
+ "role": "assistant",
94
+ "content": (
95
+ "**Error:** `NIM_API_KEY` not set. "
96
+ "Add it as a Space secret (Settings > Secrets)."
97
+ ),
98
+ }
99
+ ],
100
+ pd.DataFrame(),
101
+ "No API key",
102
+ )
103
+
104
+ idx = int(puzzle_idx.split(" ")[1]) - 1
105
+ question = df.iloc[idx]["question"]
106
+ ground_truth = ast.literal_eval(df.iloc[idx]["ground_truth"])
107
+
108
+ progress(0.1, desc="Loading workflow...")
109
+
110
+ captured = io.StringIO()
111
+ old_stdout = sys.stdout
112
+ tmp_path = None
113
+
114
+ try:
115
+ sys.stdout = captured
116
+ runner = WaldiezRunner.load(Path(FLOW_FILE))
117
+
118
+ progress(0.2, desc="Agents working (this takes ~60s)...")
119
+
120
+ tmp_fd, tmp_path = tempfile.mkstemp(suffix=".py", dir=".")
121
+ os.close(tmp_fd)
122
+
123
+ result = runner.run(output_path=tmp_path, message=question)
124
+ except Exception as e:
125
+ result = None
126
+ error_msg = str(e)
127
+ finally:
128
+ sys.stdout = old_stdout
129
+ if tmp_path:
130
+ try:
131
+ os.unlink(tmp_path)
132
+ except OSError:
133
+ pass
134
+
135
+ progress(0.9, desc="Parsing results...")
136
+
137
+ conv_text = captured.getvalue()
138
+ chat_messages = parse_conversation(conv_text)
139
+ if not chat_messages and conv_text.strip():
140
+ chat_messages = [
141
+ {"role": "assistant", "content": conv_text[:3000]}
142
+ ]
143
+
144
+ if result is None:
145
+ chat_messages.append(
146
+ {"role": "assistant", "content": f"**Pipeline error:** {error_msg}"}
147
+ )
148
+ return chat_messages, pd.DataFrame(), "Error"
149
+
150
+ prediction = extract_answer(str(result))
151
+
152
+ rows = []
153
+ correct = 0
154
+ total = 0
155
+ for key in sorted(ground_truth.keys()):
156
+ gt_val = ground_truth[key]
157
+ pred_val = prediction.get(key, "\u2014") if prediction else "\u2014"
158
+ match = (
159
+ pred_val.strip() == gt_val.strip()
160
+ if prediction and pred_val != "\u2014"
161
+ else False
162
+ )
163
+ if match:
164
+ correct += 1
165
+ total += 1
166
+ rows.append(
167
+ {
168
+ "Letter": key,
169
+ "Predicted": pred_val,
170
+ "Ground Truth": gt_val,
171
+ "Match": "\u2713" if match else "\u2717",
172
+ }
173
+ )
174
+
175
+ results_df = pd.DataFrame(rows)
176
+ accuracy = f"{correct}/{total} ({100 * correct / total:.1f}%)"
177
+
178
+ return chat_messages, results_df, accuracy
179
+
180
+
181
+ with gr.Blocks(title="Temporal Clue Multi-Agent Solver") as demo:
182
+ gr.Markdown(
183
+ "# Temporal Clue Multi-Agent Solver\n\n"
184
+ "A multi-agent system that solves murder mystery logic puzzles from the "
185
+ "[Temporal Clue](https://github.com/bradhilton/temporal-clue) benchmark. "
186
+ "Three agents &mdash; **Evidence Analyst** (7B), **Detective** (70B + spatial tool), "
187
+ "and **Format Agent** (7B) &mdash; collaborate via round-robin group chat.\n\n"
188
+ "Select a puzzle and click **Solve** to run the pipeline live."
189
+ )
190
+
191
+ with gr.Row():
192
+ puzzle_dropdown = gr.Dropdown(
193
+ choices=puzzle_choices,
194
+ value=puzzle_choices[0],
195
+ label="Select Puzzle",
196
+ )
197
+ solve_btn = gr.Button("Solve", variant="primary", scale=0)
198
+
199
+ with gr.Row():
200
+ with gr.Column(scale=2):
201
+ chatbot = gr.Chatbot(
202
+ label="Agent Conversation",
203
+ height=500,
204
+ )
205
+ with gr.Column(scale=1):
206
+ accuracy_display = gr.Textbox(label="Accuracy", interactive=False)
207
+ results_table = gr.Dataframe(label="Results")
208
+
209
+ solve_btn.click(
210
+ fn=solve_puzzle,
211
+ inputs=[puzzle_dropdown],
212
+ outputs=[chatbot, results_table, accuracy_display],
213
+ concurrency_limit=1,
214
+ )
215
+
216
+ if __name__ == "__main__":
217
+ demo.launch()
requirements.txt CHANGED
@@ -2,3 +2,4 @@ waldiez
2
  pandas
3
  openpyxl
4
  jupyter
 
 
2
  pandas
3
  openpyxl
4
  jupyter
5
+ gradio