Psiska commited on
Commit
a9d6ab6
Β·
1 Parent(s): 3fe1356

Evaluation 2

Browse files
Files changed (1) hide show
  1. app.py +101 -56
app.py CHANGED
@@ -1,66 +1,112 @@
1
  import os
2
- import requests
3
- import pandas as pd
4
- import gradio as gr
5
-
6
- from crew import run_crew
 
7
 
8
- # Configuration: endpoint for GAIA evaluation API
9
- API_URL = os.getenv("GAIA_API_URL", "https://huggingface.co/spaces/Psiska/General_AI_Assistant")
10
- # Your Space identifier for generating the agent_code URL
11
- SPACE_ID = os.getenv("SPACE_ID", "Psiska/General_AI_Assistant")
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def run_and_submit_all(username: str):
15
- """
16
- Fetches all evaluation questions, runs your agent on each,
17
- and submits the batch to the /submit endpoint.
18
- Returns a status message and a DataFrame of logs.
19
- """
20
  if not username:
21
  return "πŸ”’ Please enter your Hugging Face username.", None
22
 
23
  try:
24
- # 1) Fetch questions
25
- resp = requests.get(f"{API_URL}/questions", timeout=15)
26
  resp.raise_for_status()
27
- questions = resp.json()
28
-
29
- # 2) Run agent on each question
30
- logs = []
31
- answers = []
32
- for item in questions:
33
- task_id = item.get("task_id") or item.get("id")
34
- question = item.get("question", "")
35
- file_name = item.get("file_name", "")
36
-
37
- # Optional: download attached file
38
- if file_name:
39
- file_resp = requests.get(f"{API_URL}/files/{task_id}", timeout=15)
40
  file_resp.raise_for_status()
41
- local_path = os.path.join("data", file_name)
42
- os.makedirs(os.path.dirname(local_path), exist_ok=True)
43
- with open(local_path, "wb") as f:
44
  f.write(file_resp.content)
45
 
46
- # Get agent's answer
47
- answer = run_crew(question, file_name)
48
- answers.append({"task_id": task_id, "submitted_answer": answer})
49
- logs.append({"Task ID": task_id, "Question": question, "Answer": answer})
50
 
51
- # 3) Prepare payload
52
- payload = {
53
  "username": username,
54
- "agent_code": f"https://huggingface.co/spaces/{SPACE_ID}/tree/main",
55
- "answers": answers
56
  }
57
-
58
- # 4) Submit answers
59
- submit_resp = requests.post(f"{API_URL}/submit", json=payload, timeout=60)
60
  submit_resp.raise_for_status()
61
  result = submit_resp.json()
62
 
63
- # Format status
64
  status = (
65
  f"βœ… {result['username']} scored {result['score']}% "
66
  f"({result['correct_count']}/{result['total_attempted']} correct)"
@@ -68,23 +114,22 @@ def run_and_submit_all(username: str):
68
  return status, pd.DataFrame(logs)
69
 
70
  except Exception as e:
71
- return f"❌ Error: {str(e)}", None
72
-
73
 
74
- # Build Gradio interface
75
  with gr.Blocks(title="GAIA Evaluation Runner") as demo:
76
  gr.Markdown("# GAIA Evaluation Runner")
77
- username_input = gr.Textbox(label="Hugging Face Username")
78
-
79
  run_btn = gr.Button("Run & Submit All Answers")
80
  status = gr.Textbox(label="Status", interactive=False)
81
- table = gr.DataFrame(headers=["Task ID", "Question", "Answer"], label="Log of Q&A")
 
 
 
 
 
 
82
 
83
- run_btn.click(
84
- fn=run_and_submit_all,
85
- inputs=[username_input],
86
- outputs=[status, table]
87
- )
88
 
89
  if __name__ == "__main__":
90
- demo.launch()
 
1
  import os
2
+ from fastapi import FastAPI, HTTPException
3
+ from fastapi.responses import JSONResponse, FileResponse
4
+ from starlette.staticfiles import StaticFiles
5
+ import uvicorn
6
+ import random
7
+ import json
8
 
9
+ import gradio as gr
10
+ import pandas as pd
11
+ import requests
 
12
 
13
+ from crew import run_crew # your agent runner :contentReference[oaicite:0]{index=0}
14
+ from utils import read_file_json # your file‐reading helpers :contentReference[oaicite:1]{index=1}
15
+
16
+ # ─── 1) FastAPI setup ───────────────────────────────────────────────────────
17
+
18
+ api = FastAPI(title="GAIA Evaluation API")
19
+
20
+ # Load all questions once
21
+ QUESTIONS_PATH = "data/gaia_validation.jsonl"
22
+ with open(QUESTIONS_PATH) as f:
23
+ questions = [json.loads(line) for line in f]
24
+
25
+ # GET /questions
26
+ @api.get("/questions")
27
+ def get_questions():
28
+ return questions
29
+
30
+ # GET /random-question
31
+ @api.get("/random-question")
32
+ def get_random():
33
+ return random.choice(questions)
34
+
35
+ # GET /files/{task_id}
36
+ @api.get("/files/{task_id}")
37
+ def get_file(task_id: str):
38
+ # find matching question entry
39
+ entry = next((q for q in questions if str(q["task_id"]) == task_id), None)
40
+ if not entry or not entry.get("file_name"):
41
+ raise HTTPException(404, "No file for that task")
42
+ path = os.path.join("data", entry["file_name"])
43
+ return FileResponse(path)
44
+
45
+ # POST /submit
46
+ @api.post("/submit")
47
+ def submit(batch: dict):
48
+ username = batch.get("username", "")
49
+ agent_code = batch.get("agent_code", "")
50
+ answers = batch.get("answers", [])
51
+ total = len([a for a in answers if a.get("submitted_answer") is not None])
52
+ correct = 0
53
+
54
+ # simple exact‐match scoring
55
+ truth_map = {str(q["task_id"]): str(q["Final answer"]) for q in questions}
56
+ for ans in answers:
57
+ tid = str(ans["task_id"])
58
+ if ans["submitted_answer"] == truth_map.get(tid, ""):
59
+ correct += 1
60
+
61
+ score = round(100 * correct / total) if total else 0
62
+ return {
63
+ "username": username,
64
+ "agent_code": agent_code,
65
+ "score": score,
66
+ "correct_count": correct,
67
+ "total_attempted": total
68
+ }
69
+
70
+ # ─── 2) Gradio UI setup ────────────────────────────────────────────────────
71
 
72
  def run_and_submit_all(username: str):
 
 
 
 
 
73
  if not username:
74
  return "πŸ”’ Please enter your Hugging Face username.", None
75
 
76
  try:
77
+ # fetch questions
78
+ resp = requests.get("http://localhost:7860/questions", timeout=15)
79
  resp.raise_for_status()
80
+ qs = resp.json()
81
+
82
+ logs, payload = [], []
83
+ for q in qs:
84
+ tid = q["task_id"]
85
+ question = q["question"]
86
+ fname = q.get("file_name", "")
87
+
88
+ # download file if exists
89
+ if fname:
90
+ file_resp = requests.get(f"http://localhost:7860/files/{tid}", timeout=15)
 
 
91
  file_resp.raise_for_status()
92
+ local = os.path.join("data", fname)
93
+ os.makedirs(os.path.dirname(local), exist_ok=True)
94
+ with open(local, "wb") as f:
95
  f.write(file_resp.content)
96
 
97
+ ans = run_crew(question, fname)
98
+ payload.append({"task_id": tid, "submitted_answer": ans})
99
+ logs.append({"Task ID": tid, "Question": question, "Answer": ans})
 
100
 
101
+ sub = {
 
102
  "username": username,
103
+ "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
104
+ "answers": payload
105
  }
106
+ submit_resp = requests.post("http://localhost:7860/submit", json=sub, timeout=60)
 
 
107
  submit_resp.raise_for_status()
108
  result = submit_resp.json()
109
 
 
110
  status = (
111
  f"βœ… {result['username']} scored {result['score']}% "
112
  f"({result['correct_count']}/{result['total_attempted']} correct)"
 
114
  return status, pd.DataFrame(logs)
115
 
116
  except Exception as e:
117
+ return f"❌ Error: {e}", None
 
118
 
 
119
  with gr.Blocks(title="GAIA Evaluation Runner") as demo:
120
  gr.Markdown("# GAIA Evaluation Runner")
121
+ user_in = gr.Textbox(label="Hugging Face Username")
 
122
  run_btn = gr.Button("Run & Submit All Answers")
123
  status = gr.Textbox(label="Status", interactive=False)
124
+ table = gr.DataFrame(headers=["Task ID","Question","Answer"], label="Log of Q&A")
125
+
126
+ run_btn.click(fn=run_and_submit_all,
127
+ inputs=[user_in], outputs=[status, table])
128
+
129
+ # Mount Gradio under β€œ/” so that FastAPI serves both API and UI
130
+ api.mount("/", demo, name="gradio")
131
 
132
+ # ─── 3) Entry point ────────────────────────────────────────────────────────
 
 
 
 
133
 
134
  if __name__ == "__main__":
135
+ uvicorn.run(api, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))