File size: 11,796 Bytes
aa90967
 
 
 
057f972
aa90967
cdd5c49
 
 
aa90967
2ab9024
5f83d1c
e9881d4
 
baf121b
3e35b1c
5f83d1c
aa90967
78ab02b
 
ecd2178
78ab02b
ebbf3cb
cbbac96
cdd5c49
60a8793
cdd5c49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05f6f94
cdd5c49
 
 
 
 
 
 
05f6f94
cdd5c49
 
 
 
 
05f6f94
cdd5c49
60a8793
aa90967
 
cdd5c49
aa90967
 
 
 
463e06e
 
 
aa90967
cdd5c49
 
aa90967
 
9ba1127
 
cdd5c49
 
aa90967
 
9ba1127
 
cdd5c49
 
aa90967
 
9ba1127
 
cdd5c49
 
aa90967
 
9ba1127
 
cdd5c49
 
7cac1b6
12e39ca
f2956bf
 
12e39ca
9ba1127
cdd5c49
 
7cac1b6
cdd5c49
 
 
aa90967
 
4d08f8e
 
17f8ea6
 
 
aa90967
17f8ea6
 
 
 
cdd5c49
 
 
 
 
 
 
 
 
 
 
 
60a8793
cdd5c49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa90967
9ba1127
 
cdd5c49
aa90967
 
 
 
 
4542454
aa90967
 
 
a051fa3
 
 
 
 
aa90967
 
e4fd2d1
0567f30
c0e4df1
80ff929
 
c3efb0a
057f972
 
 
4542454
e4fd2d1
78ab02b
61e315b
cfa2c31
61e315b
12eeddf
aa90967
3ec9c21
e90befc
23bea4b
4e65386
23bea4b
e90befc
c87003c
378a750
 
c87003c
aa90967
cd8d999
aa90967
c87003c
cd8d999
 
aa90967
 
23bea4b
aa90967
3ec9c21
 
aa90967
e90befc
812da35
f7e408f
f04302b
 
 
78ab02b
f04302b
 
 
 
 
 
f46b7b4
f04302b
404c1fe
df4336e
6c2487d
f04302b
 
 
 
ce924a8
1c2f67f
df4336e
6c2487d
f04302b
 
 
95cb6a8
df4336e
6c2487d
09e8d5f
f04302b
 
 
 
 
 
 
 
 
 
416f8f9
404c1fe
 
f04302b
 
 
 
416f8f9
404c1fe
 
f04302b
 
 
 
 
416f8f9
404c1fe
 
f04302b
6e4a927
 
f04302b
 
 
 
 
df4336e
6c2487d
d6b79a6
057f972
cdd5c49
60a8793
057f972
60a8793
c3efb0a
cdd5c49
 
60a8793
 
33214b0
 
 
 
 
f04302b
 
36526e4
60a8793
f04302b
 
 
 
 
 
 
 
c72df5c
f04302b
 
5f4828e
0994767
057f972
f04302b
 
fc406e5
f04302b
 
5f4828e
0994767
057f972
f04302b
 
9444ba7
f04302b
 
5f4828e
0994767
057f972
f04302b
 
a9675c5
f04302b
 
5f4828e
0994767
057f972
f04302b
 
4bde233
26ab2ef
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
# References:

# https://www.gradio.app/guides/quickstart

import gradio.utils, os, sys
import gradio as gr
import threading
import queue
import time
from agents.crew import run_crew
from huggingface_hub import whoami
from utils.utils import (
    DATASET_TYPE_GAIA,
    DATASET_TYPE_HLE,
    get_dataset,
    validate_input
)

# Configuration

SPACE_ID = os.environ.get("SPACE_ID")

BASE_URL = f"https://huggingface.co/spaces/{SPACE_ID}/blob/main"

# Streaming console output capture

class StreamingCapture:
    def __init__(self):
        self.queue = queue.Queue()
        self.old_stdout = None
        self.old_stderr = None
        self.stopped = False
        
    def start(self):
        self.old_stdout = sys.stdout
        self.old_stderr = sys.stderr
        
        parent = self
        
        class QueueWriter:
            def __init__(self, original, q):
                self.original = original
                self.q = q
                
            def write(self, data):
                if data:
                    self.q.put(data)
                    self.original.write(data)
                    
            def flush(self):
                self.original.flush()
        
        sys.stdout = QueueWriter(self.old_stdout, self.queue)
        sys.stderr = QueueWriter(self.old_stderr, self.queue)
        
    def stop(self):
        self.stopped = True

        if self.old_stdout:
            sys.stdout = self.old_stdout
        if self.old_stderr:
            sys.stderr = self.old_stderr
            
    def get_new_output(self):
        output = []
        
        while not self.queue.empty():
            try:
                output.append(self.queue.get_nowait())
            except queue.Empty:
                break
        
        return "".join(output)

# MCP server functions

def ask(oauth_token: gr.OAuthToken, question: str, openai_api_key: str, gemini_api_key: str, anthropic_api_key: str, file_name: str = ""):
    """
    Ask General AI Assistant a question to answer.
    Args:
        question (str): The question to answer
        openai_api_key (str): OpenAI API key (always used)
        gemini_api_key (str): Gemini API key (always used)
        anthropic_api_key (str): Anthropic API key (only used by Stagehand tool)
        file_name (str): Optional file name
    Yields:
        tuple: (answer, console_logs) - The answer to the question and captured console output (streamed)
    """
    if not question:
        msg = "Question is required."
        gr.Warning(msg)
        yield None, msg
        return

    if not openai_api_key:
        msg = "OpenAI API Key is required."
        gr.Warning(msg)
        yield None, msg
        return
    
    if not gemini_api_key:
        msg = "Gemini API Key is required."
        gr.Warning(msg)
        yield None, msg
        return

    if not anthropic_api_key:
        msg = "Anthropic API Key is required."
        gr.Warning(msg)
        yield None, msg
        return

    is_valid = validate_input(question, openai_api_key, gemini_api_key, anthropic_api_key)
    
    if not is_valid:
        msg = "Invalid input"
        gr.Warning(msg)
        yield None, msg
        return
    
    task_file_name = file_name
    if task_file_name:
        task_file_name = f"files/{task_file_name}"
    
    try:
        # API keys are provided thanks to sponsor credit.
        
        openai_key = openai_api_key if openai_api_key and openai_api_key != "*" else os.environ.get("OPENAI_API_KEY")
        gemini_key = gemini_api_key if gemini_api_key and gemini_api_key != "*" else os.environ.get("GEMINI_API_KEY")
        anthropic_key = anthropic_api_key if anthropic_api_key and anthropic_api_key != "*" else os.environ.get("ANTHROPIC_API_KEY")
        
        os.environ["OPENAI_API_KEY"] = openai_key
        os.environ["GEMINI_API_KEY"] = gemini_key
        os.environ["ANTHROPIC_API_KEY"] = anthropic_key

        # Streaming capture
        capture = StreamingCapture()
        capture.start()
        
        result = {"answer": None, "error": None}
        all_logs = ""
        
        def run_task():
            try:
                result["answer"] = run_crew(question, task_file_name)
            except Exception as e:
                result["error"] = str(e)
        
        # Run in background thread
        thread = threading.Thread(target=run_task)
        thread.start()
        
        # Stream logs while waiting
        while thread.is_alive():
            new_output = capture.get_new_output()
            if new_output:
                all_logs += new_output
                yield None, all_logs  # Update console logs, answer still None
            time.sleep(0.1)  # Poll interval
        
        thread.join()
        capture.stop()
        
        # Get any remaining output
        new_output = capture.get_new_output()
        if new_output:
            all_logs += new_output
        
        if result["error"]:
            gr.Warning(result["error"])
            yield None, all_logs + f"\n\nError: {result['error']}"
        else:
            yield result["answer"], all_logs
            
    except Exception as e:
        msg = str(e)
        gr.Warning(msg)
        yield None, f"Error: {msg}"

# Helper functions

def update_file_link(file_name):
    if file_name:
        return f"<a href='{BASE_URL}/files/{file_name}' target='_blank'>Open File</a>"
    
    return ""

def watchfn(*args, **kwargs):
    pass

gradio.utils.watchfn_spaces = watchfn

# Graphical user interface

DESCRIPTION = (
    f"Prototype <strong>multi-agent AI platform</strong> with high autonomy, "
    f"including code generation & execution, browser automation, and multi-modal reasoning. "
    f"The system can solve multiple <a href='https://arxiv.org/pdf/2311.12983'>GAIA Benchmark</a> "
    f"Level 1, 2, 3 and even <a href='https://arxiv.org/pdf/2501.14249'>Humanity's Last Exam</a> "
    f"problems. To get started, select from the examples below. "
    f"Processing can take minutes depending on question complexity. "
    f"Console logs are provided below for transparency. "
    f"API keys are provided temporarily thanks to sponsor credit. "
    f"<a href='{BASE_URL}/README.md'>Documentation</a></p>"
)

DEFAULT_QUESTION = "How many public GitHub repos does the person who submitted the 'General AI Assistant' solution in MCP's 1st Birthday Hackathon have?"

DEFAULT_GROUND_TRUTH = "18"

CSS_FULL_WIDTH = """
<style>
html,
body,
main,
.gradio-app {
    width: 100% !important;
    max-width: 100% !important;
    margin: 0 !important;
    padding: 0 !important;
    overflow-x: hidden !important;
}
.full-width-app {
    width: 100% !important;
    max-width: 100% !important;
    margin: 0 !important;
    padding: 0 !important;
}
.content-padding {
    padding: 0 1.5rem 0;
}
</style>
"""

with gr.Blocks(elem_classes=["full-width-app"]) as gaia:
    gr.HTML(CSS_FULL_WIDTH)

    with gr.Column(elem_classes=["content-padding"]):
        gr.Markdown("## General AI Assistant")

        gr.Markdown(DESCRIPTION)
        
        with gr.Row():
            with gr.Column(scale=3):
                with gr.Row():
                    question = gr.Textbox(
                        label="Question *",
                        value=DEFAULT_QUESTION,
                        interactive=True,
                        max_length = 500,
                        lines=1,
                        max_lines=5
                    )
                with gr.Row():
                    ground_truth = gr.Textbox(
                        label="Ground Truth",
                        value=DEFAULT_GROUND_TRUTH,
                        interactive=False,
                        lines=1,
                        max_lines=2
                    )
                    file_name = gr.Textbox(
                        label="File Name",
                        interactive=False,
                        lines=1,
                        max_lines=2,
                        scale=2
                    )
                    file_link = gr.HTML(
                        label="File Link",
                        value=""
                    )
                with gr.Row():
                    openai_api_key = gr.Textbox(
                        label="OpenAI API Key *",
                        type="password",
                        placeholder="sk‑...",
                        value="*", # API keys are provided thanks to sponsor credit
                        interactive=True,
                        max_length = 150
                    )
                    gemini_api_key = gr.Textbox(
                        label="Gemini API Key *",
                        type="password",
                        value="*", # API keys are provided thanks to sponsor credit
                        interactive=True,
                        max_length = 150
                    )
                    anthropic_api_key = gr.Textbox(
                        label="Anthropic API Key *",
                        type="password",
                        placeholder="sk-ant-...",
                        value="*", # API keys are provided thanks to sponsor credit
                        interactive=True,
                        max_length = 150
                    )
                with gr.Row():
                    gr.LoginButton()
                    submit_btn = gr.Button("Submit", variant="primary")
            with gr.Column(scale=1):
                answer = gr.Textbox(
                    label="Answer",
                    interactive=False,
                    lines=1,
                    max_lines=5
                )

        with gr.Accordion("Console Logs", open=True):
            console_logs = gr.Textbox(
                label="Output",
                interactive=False,
                lines=25,
                max_lines=25,
                autoscroll=True
            )
        
        with gr.Row():
            clear_btn = gr.ClearButton(
                components=[question, ground_truth, file_name, file_link, answer, console_logs]
            )

        submit_btn.click(
            fn=ask,
            inputs=[question, openai_api_key, gemini_api_key, anthropic_api_key, file_name],
            outputs=[answer, console_logs]
        )

        file_name.change(
            fn=update_file_link,
            inputs=[file_name],
            outputs=[file_link]
        )

        with gr.Tabs():
            with gr.TabItem("GAIA Benchmark Level 1"):
                gr.Examples(
                    examples=get_dataset(DATASET_TYPE_GAIA, 1),
                    inputs=[question, ground_truth, file_name, "", "", ""],
                    examples_per_page=5,
                    cache_examples=False
                )

            with gr.TabItem("GAIA Benchmark Level 2"):
                gr.Examples(
                    examples=get_dataset(DATASET_TYPE_GAIA, 2),
                    inputs=[question, ground_truth, file_name, "", "", ""],
                    examples_per_page=5,
                    cache_examples=False
                )

            with gr.TabItem("GAIA Benchmark Level 3"):
                gr.Examples(
                    examples=get_dataset(DATASET_TYPE_GAIA, 3),
                    inputs=[question, ground_truth, file_name, "", "", ""],
                    examples_per_page=5,
                    cache_examples=False
                )

            with gr.TabItem("Humanity's Last Exam"):
                gr.Examples(
                    examples=get_dataset(DATASET_TYPE_HLE, 0),
                    inputs=[question, ground_truth, file_name, "", "", ""],
                    examples_per_page=5,
                    cache_examples=False
                )

gaia.launch(mcp_server=True, ssr_mode=False)