Psiska commited on
Commit
48910c1
Β·
1 Parent(s): cb80af5

Evaluation 5

Browse files
Files changed (1) hide show
  1. app.py +150 -138
app.py CHANGED
@@ -1,143 +1,155 @@
1
- import os
2
- import json
3
- import random
4
- import requests
5
- import uvicorn
6
-
7
- from fastapi import FastAPI, HTTPException
8
- from fastapi.responses import FileResponse
9
  import gradio as gr
10
- import pandas as pd
11
-
12
  from crew import run_crew
13
-
14
- # ─── Configuration ─────────────────────────────────────────────────────────
15
- PORT = int(os.getenv("PORT", 7860))
16
- LOCAL_API = f"http://127.0.0.1:{PORT}"
17
- SPACE_ID = os.getenv("SPACE_ID", "Psiska/General_AI_Assistant")
18
- QUESTIONS_PATH = os.getenv("QUESTIONS_PATH", "data/gaia_validation.jsonl")
19
-
20
- # ─── FastAPI setup ──────────────────────────────────────────────────────────
21
- api = FastAPI(title="GAIA Evaluation API")
22
-
23
- # Load questions from JSONL
24
- questions = []
25
- with open(QUESTIONS_PATH, 'r') as f:
26
- for line in f:
27
- questions.append(json.loads(line))
28
-
29
- @api.get("/questions")
30
- def get_questions():
31
- return questions
32
-
33
- @api.get("/random-question")
34
- def get_random_question():
35
- return random.choice(questions)
36
-
37
- @api.get("/files/{task_id}")
38
- def get_file(task_id: str):
39
- entry = next(
40
- (q for q in questions if str(q.get("task_id") or q.get("id")) == task_id),
41
- None
42
- )
43
- if not entry or not entry.get("file_name"):
44
- raise HTTPException(status_code=404, detail="File not found for this task")
45
- file_path = os.path.join("data", entry["file_name"])
46
- if not os.path.exists(file_path):
47
- raise HTTPException(status_code=404, detail="File missing on disk")
48
- return FileResponse(file_path)
49
-
50
- @api.post("/submit")
51
- def submit(batch: dict):
52
- username = batch.get("username", "")
53
- agent_code = batch.get("agent_code", "")
54
- answers = batch.get("answers", [])
55
-
56
- total = len(answers)
57
- correct = 0
58
-
59
- # Map task_id -> ground-truth
60
- truth_map = {
61
- str(q.get("task_id") or q.get("id")): str(q.get("Final answer") or q.get("final_answer") or "")
62
- for q in questions
63
- }
64
- for ans in answers:
65
- tid = str(ans.get("task_id"))
66
- if str(ans.get("submitted_answer", "")) == truth_map.get(tid, ""):
67
- correct += 1
68
-
69
- score = round(100 * correct / total) if total else 0
70
- return {
71
- "username": username,
72
- "agent_code": agent_code,
73
- "score": score,
74
- "correct_count": correct,
75
- "total_attempted": total
76
- }
77
-
78
- # ─── Gradio UI setup ─────────────────────────────────────────────────────────
79
- def run_and_submit_all(username: str):
80
- if not username:
81
- return "πŸ”’ Please enter your Hugging Face username.", None
82
- try:
83
- # Fetch questions from local API
84
- resp = requests.get(f"{LOCAL_API}/questions", timeout=15)
85
- resp.raise_for_status()
86
- qs = resp.json()
87
-
88
- logs, payload = [], []
89
- for q in qs:
90
- task_id = str(q.get("task_id") or q.get("id"))
91
- question = q.get("question", "")
92
- file_name= q.get("file_name", "")
93
-
94
- # Download file if exists
95
- if file_name:
96
- file_resp = requests.get(f"{LOCAL_API}/files/{task_id}", timeout=15)
97
- file_resp.raise_for_status()
98
- os.makedirs("data", exist_ok=True)
99
- path = os.path.join("data", file_name)
100
- with open(path, "wb") as fd:
101
- fd.write(file_resp.content)
102
-
103
  answer = run_crew(question, file_name)
104
- payload.append({"task_id": task_id, "submitted_answer": answer})
105
- logs.append({"Task ID": task_id, "Question": question, "Answer": answer})
106
-
107
- submission = {
108
- "username": username,
109
- "agent_code": f"https://huggingface.co/spaces/{SPACE_ID}/tree/main",
110
- "answers": payload
111
- }
112
- sub_resp = requests.post(f"{LOCAL_API}/submit", json=submission, timeout=60)
113
- sub_resp.raise_for_status()
114
- result = sub_resp.json()
115
-
116
- status = (
117
- f"βœ… {result['username']} scored {result['score']}% "
118
- f"({result['correct_count']}/{result['total_attempted']} correct)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  )
120
- return status, pd.DataFrame(logs)
121
- except Exception as e:
122
- return f"❌ Error: {e}", None
123
-
124
- # Build Gradio interface
125
- with gr.Blocks(title="GAIA Evaluation Runner") as demo:
126
- gr.Markdown("# GAIA Evaluation Runner")
127
- user_input = gr.Textbox(label="Hugging Face Username")
128
- run_btn = gr.Button("Run & Submit All Answers")
129
- status = gr.Textbox(label="Status", interactive=False)
130
- table = gr.DataFrame(headers=["Task ID", "Question", "Answer"], label="Log of Q&A")
131
-
132
- run_btn.click(
133
- fn=run_and_submit_all,
134
- inputs=[user_input],
135
- outputs=[status, table]
136
- )
137
-
138
- # Mount Gradio app on FastAPI
139
- api.mount("/", demo)
140
-
141
- if __name__ == "__main__":
142
- uvicorn.run(api, host="0.0.0.0", port=PORT)
 
 
 
 
 
143
 
 
 
1
+ import os, threading
 
 
 
 
 
 
 
2
  import gradio as gr
3
+ from crew import run_parallel_crew
 
4
  from crew import run_crew
5
+ from utils import get_questions
6
+
7
+
8
+ def ask(question, openai_api_key, gemini_api_key, anthropic_api_key, file_name = ""):
9
+ """
10
+ Ask General AI Assistant a question to answer.
11
+
12
+ Args:
13
+ question (str): The question to answer
14
+ openai_api_key (str): OpenAI API key
15
+ gemini_api_key (str): Gemini API key
16
+ anthropic_api_key (str): Anthropic API key
17
+ file_name (str): Optional file name
18
+
19
+ Returns:
20
+ str: The answer to the question
21
+ """
22
+ if not question:
23
+ raise gr.Error("Question is required.")
24
+
25
+ if not openai_api_key:
26
+ raise gr.Error("OpenAI API Key is required.")
27
+
28
+ if not gemini_api_key:
29
+ raise gr.Error("Gemini API Key is required.")
30
+
31
+ if not anthropic_api_key:
32
+ raise gr.Error("Anthropic API Key is required.")
33
+
34
+ if file_name:
35
+ file_name = f"data/{file_name}"
36
+
37
+ lock = threading.Lock()
38
+
39
+ with lock:
40
+ answer = ""
41
+
42
+ try:
43
+ os.environ["OPENAI_API_KEY"] = openai_api_key
44
+ os.environ["GEMINI_API_KEY"] = gemini_api_key
45
+ os.environ["MODEL_API_KEY"] = anthropic_api_key
46
+
47
+ #answer = run_parallel_crew(question, file_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  answer = run_crew(question, file_name)
49
+ except Exception as e:
50
+ raise gr.Error(e)
51
+ finally:
52
+ del os.environ["OPENAI_API_KEY"]
53
+ del os.environ["GEMINI_API_KEY"]
54
+ del os.environ["MODEL_API_KEY"]
55
+
56
+ return answer
57
+
58
+ gr.close_all()
59
+
60
+ with gr.Blocks() as grady:
61
+ gr.Markdown("## Grady - General AI Assistant")
62
+
63
+ with gr.Tab("Solution"):
64
+ gr.Markdown(os.environ.get("DESCRIPTION"))
65
+
66
+ with gr.Row():
67
+ with gr.Column(scale=3):
68
+ with gr.Row():
69
+ question = gr.Textbox(
70
+ label="Question *",
71
+ placeholder="In the 2025 Gradio Agents & MCP Hackathon, what percentage of participants submitted a solution during the last 24 hours?",
72
+ interactive=True
73
+ )
74
+ with gr.Row():
75
+ level = gr.Radio(
76
+ choices=[1, 2, 3],
77
+ label="GAIA Benchmark Level",
78
+ interactive=True,
79
+ scale=1
80
+ )
81
+ ground_truth = gr.Textbox(
82
+ label="Ground Truth",
83
+ interactive=True,
84
+ scale=1
85
+ )
86
+ file_name = gr.Textbox(
87
+ label="File Name",
88
+ interactive=True,
89
+ scale=2
90
+ )
91
+ with gr.Row():
92
+ openai_api_key = gr.Textbox(
93
+ label="OpenAI API Key *",
94
+ type="password",
95
+ placeholder="sk‑...",
96
+ interactive=True
97
+ )
98
+ gemini_api_key = gr.Textbox(
99
+ label="Gemini API Key *",
100
+ type="password",
101
+ interactive=True
102
+ )
103
+ anthropic_api_key = gr.Textbox(
104
+ label="Anthropic API Key *",
105
+ type="password",
106
+ placeholder="sk-ant-...",
107
+ interactive=True
108
+ )
109
+ with gr.Row():
110
+ clear_btn = gr.ClearButton(
111
+ components=[question, level, ground_truth, file_name]
112
+ )
113
+ submit_btn = gr.Button("Submit", variant="primary")
114
+ with gr.Column(scale=1):
115
+ answer = gr.Textbox(
116
+ label="Answer",
117
+ lines=1,
118
+ interactive=False
119
+ )
120
+
121
+ submit_btn.click(
122
+ fn=ask,
123
+ inputs=[question, openai_api_key, gemini_api_key, anthropic_api_key, file_name],
124
+ outputs=answer
125
  )
126
+
127
+ QUESTION_FILE_PATH = "data/gaia_validation.jsonl"
128
+
129
+ gr.Examples(
130
+ label="GAIA Benchmark Level 1 Problems",
131
+ examples=get_questions(QUESTION_FILE_PATH, 1),
132
+ inputs=[question, level, ground_truth, file_name, openai_api_key, gemini_api_key, anthropic_api_key],
133
+ outputs=answer,
134
+ cache_examples=False
135
+ )
136
+
137
+ gr.Examples(
138
+ label="GAIA Benchmark Level 2 Problems",
139
+ examples=get_questions(QUESTION_FILE_PATH, 2),
140
+ inputs=[question, level, ground_truth, file_name, openai_api_key, gemini_api_key, anthropic_api_key],
141
+ outputs=answer,
142
+ cache_examples=False
143
+ )
144
+
145
+ gr.Examples(
146
+ label="GAIA Benchmark Level 3 Problems",
147
+ examples=get_questions(QUESTION_FILE_PATH, 3),
148
+ inputs=[question, level, ground_truth, file_name, openai_api_key, gemini_api_key, anthropic_api_key],
149
+ outputs=answer,
150
+ cache_examples=False
151
+ )
152
+ with gr.Tab("Documentation"):
153
+ gr.Markdown(os.environ.get("DOCUMENTATION"))
154
 
155
+ grady.launch(mcp_server=True)