File size: 2,909 Bytes
7e2b21d
10e9b7d
883f765
 
1ef4d7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
883f765
1ef4d7b
9a446a5
1ef4d7b
 
 
9a446a5
1ef4d7b
9a446a5
1ef4d7b
 
 
 
9a446a5
1ef4d7b
9a446a5
 
1ef4d7b
 
9a446a5
 
1ef4d7b
ee4f812
9a446a5
 
465335a
9a446a5
1ef4d7b
 
 
 
 
9a446a5
 
 
 
1ef4d7b
9a446a5
 
 
 
 
 
 
 
 
 
 
465335a
1ef4d7b
9a446a5
 
 
1ef4d7b
 
 
 
9a446a5
 
 
5c36832
1ef4d7b
 
 
e80aab9
1ef4d7b
5c36832
ee4f812
1ef4d7b
 
 
8f3f1d7
1ef4d7b
9a446a5
1ef4d7b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import gradio as gr
import requests
import pandas as pd
from openai import OpenAI

# =============================
# CONSTANTS (DO NOT CHANGE)
# =============================
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

SYSTEM_PROMPT = """
You are a general AI assistant.
Answer the question and finish your response with:

FINAL ANSWER: <answer>

Rules:
- If number: no commas, no units unless specified
- If string: no articles, no abbreviations
- If list: comma-separated, minimal words
"""

# =============================
# MODEL
# =============================
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def llm_answer(question: str) -> str:
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": question}
        ],
        temperature=0
    )

    text = response.choices[0].message.content

    # Extract FINAL ANSWER only
    if "FINAL ANSWER:" in text:
        return text.split("FINAL ANSWER:")[-1].strip()

    return text.strip()

# =============================
# GAIA PIPELINE
# =============================
def run_and_submit_all(profile: gr.OAuthProfile | None):
    if not profile:
        return "❌ Please login to Hugging Face", None

    username = profile.username.strip()
    space_id = os.getenv("SPACE_ID")
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # Fetch questions
    questions = requests.get(f"{DEFAULT_API_URL}/questions").json()

    answers = []
    logs = []

    for q in questions:
        try:
            ans = llm_answer(q["question"])
        except Exception as e:
            ans = "I don't know"

        answers.append({
            "task_id": q["task_id"],
            "submitted_answer": ans
        })

        logs.append({
            "Task ID": q["task_id"],
            "Question": q["question"],
            "Answer": ans
        })

    payload = {
        "username": username,
        "agent_code": agent_code,
        "answers": answers
    }

    res = requests.post(f"{DEFAULT_API_URL}/submit", json=payload).json()

    status = (
        f"✅ Submission Successful!\n"
        f"User: {res.get('username')}\n"
        f"Score: {res.get('score')}%\n"
        f"Correct: {res.get('correct_count')}/{res.get('total_attempted')}\n"
        f"Message: {res.get('message')}"
    )

    return status, pd.DataFrame(logs)

# =============================
# UI
# =============================
with gr.Blocks() as demo:
    gr.Markdown("# 🤖 GAIA Level-1 Agent (Unit-4)")
    gr.LoginButton()

    run_btn = gr.Button("Run Evaluation & Submit")
    status = gr.Textbox(label="Submission Result", lines=5)
    table = gr.Dataframe(label="Questions & Answers")

    run_btn.click(run_and_submit_all, outputs=[status, table])

demo.launch()