File size: 3,672 Bytes
4f7d0fd
 
10e9b7d
3db6293
0166bad
33a40c7
 
 
 
 
 
4f7d0fd
33a40c7
 
 
4f7d0fd
8727844
33a40c7
 
 
 
 
 
 
 
 
 
 
4f7d0fd
2f74e6e
51ce1ed
33a40c7
4f7d0fd
33a40c7
 
 
 
8727844
31243f4
33a40c7
31243f4
33a40c7
 
 
 
8727844
f43898e
33a40c7
 
 
e80aab9
33a40c7
 
 
 
 
 
 
 
 
f43898e
33a40c7
e80aab9
8727844
4f7d0fd
 
 
33a40c7
 
3c4371f
4f7d0fd
7d65c66
4f7d0fd
e80aab9
 
33a40c7
 
 
31243f4
33a40c7
7d65c66
33a40c7
e50ad54
e80aab9
4f7d0fd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os, requests, pandas as pd, gradio as gr
from agent import BasicAgent

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

def run_and_submit_all(profile: gr.OAuthProfile | None = None, *args, **kwargs):
    space_id = os.getenv("SPACE_ID")
    if profile:
        username = f"{profile.username}"
    else:
        return "Please Login to Hugging Face with the button.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    try:
        agent = BasicAgent(api_url, corpus_path=os.getenv("CORPUS_PATH", "corpus.txt"))
    except Exception as e:
        return f"Error initializing agent: {e}", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""

    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
            return "Fetched questions list is empty or invalid format.", None
    except Exception as e:
        return f"Error fetching questions: {e}", None

    results_log, answers_payload = [], []
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            continue
        try:
            submitted_answer = agent(question_text, task_id)
        except Exception as e:
            submitted_answer = f"AGENT ERROR: {e}"
        answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
        results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})

    if not answers_payload:
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}

    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        return final_status, pd.DataFrame(results_log)
    except requests.exceptions.HTTPError as e:
        try:
            detail = e.response.json().get("detail", e.response.text)
        except Exception:
            detail = e.response.text
        msg = f"Submission Failed: HTTP {e.response.status_code}. Detail: {detail[:500]}"
        return msg, pd.DataFrame(results_log)
    except requests.exceptions.Timeout:
        return "Submission Failed: The request timed out.", pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(results_log)

with gr.Blocks() as demo:
    gr.Markdown("# Basic Agent Evaluation Runner (RAG from .txt)")
    gr.Markdown("Log in, then click **Run Evaluation & Submit All Answers**.")
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)