File size: 1,352 Bytes
10e9b7d
e39d8d6
 
 
10e9b7d
e39d8d6
 
 
 
 
 
e80aab9
e39d8d6
7637e78
 
e39d8d6
 
 
 
 
 
 
 
 
 
860a4ac
d20fc11
e39d8d6
 
 
 
 
 
 
 
e80aab9
e39d8d6
 
 
 
7637e78
e39d8d6
7637e78
 
e39d8d6
49ad7de
e39d8d6
 
49ad7de
e39d8d6
e80aab9
 
 
7637e78
 
49ad7de
e80aab9
e39d8d6
d20fc11
e39d8d6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
import json
import os
from transformers import pipeline

# Load model (lightweight & allowed)
qa_pipeline = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    max_new_tokens=64
)

DATA_PATH = "/data/gaia_validation_questions.json"


def solve_question(question: str) -> str:
    """
    Very simple baseline solver.
    GAIA Unit 4 rewards formatting + correctness, not fancy agents.
    """
    try:
        result = qa_pipeline(question)
        return result[0]["generated_text"].strip()
    except Exception:
        return "unknown"


def run_evaluation():
    """
    Runs GAIA evaluation and returns answers in correct format
    """
    with open(DATA_PATH, "r") as f:
        data = json.load(f)

    answers = {}

    for item in data:
        qid = item["id"]
        question = item["question"]
        answers[qid] = solve_question(question)

    return answers


def submit():
    """
    This function is REQUIRED by GAIA.
    It must return a dict of {question_id: answer}
    """
    return run_evaluation()


with gr.Blocks() as demo:
    gr.Markdown("# GAIA Unit 4 – Basic Agent Runner")
    run_btn = gr.Button("Run Evaluation & Submit")
    output = gr.JSON(label="Submission Result")

    run_btn.click(fn=submit, outputs=output)

demo.launch(server_name="0.0.0.0", server_port=7860)