Psiska commited on
Commit
4eb9c01
Β·
1 Parent(s): a9d6ab6

Evaluation 3

Browse files
Files changed (1) hide show
  1. app.py +76 -68
app.py CHANGED
@@ -1,135 +1,143 @@
1
  import os
2
- from fastapi import FastAPI, HTTPException
3
- from fastapi.responses import JSONResponse, FileResponse
4
- from starlette.staticfiles import StaticFiles
5
- import uvicorn
6
- import random
7
  import json
 
 
 
8
 
 
 
9
  import gradio as gr
10
  import pandas as pd
11
- import requests
12
 
13
- from crew import run_crew # your agent runner :contentReference[oaicite:0]{index=0}
14
- from utils import read_file_json # your file‐reading helpers :contentReference[oaicite:1]{index=1}
15
 
16
- # ─── 1) FastAPI setup ───────────────────────────────────────────────────────
 
 
 
 
17
 
 
18
  api = FastAPI(title="GAIA Evaluation API")
19
 
20
- # Load all questions once
21
- QUESTIONS_PATH = "data/gaia_validation.jsonl"
22
- with open(QUESTIONS_PATH) as f:
23
- questions = [json.loads(line) for line in f]
 
24
 
25
- # GET /questions
26
  @api.get("/questions")
27
  def get_questions():
28
  return questions
29
 
30
- # GET /random-question
31
  @api.get("/random-question")
32
- def get_random():
33
  return random.choice(questions)
34
 
35
- # GET /files/{task_id}
36
  @api.get("/files/{task_id}")
37
  def get_file(task_id: str):
38
- # find matching question entry
39
- entry = next((q for q in questions if str(q["task_id"]) == task_id), None)
 
 
40
  if not entry or not entry.get("file_name"):
41
- raise HTTPException(404, "No file for that task")
42
- path = os.path.join("data", entry["file_name"])
43
- return FileResponse(path)
 
 
44
 
45
- # POST /submit
46
  @api.post("/submit")
47
  def submit(batch: dict):
48
  username = batch.get("username", "")
49
  agent_code = batch.get("agent_code", "")
50
  answers = batch.get("answers", [])
51
- total = len([a for a in answers if a.get("submitted_answer") is not None])
52
- correct = 0
53
 
54
- # simple exact‐match scoring
55
- truth_map = {str(q["task_id"]): str(q["Final answer"]) for q in questions}
 
 
 
 
 
 
56
  for ans in answers:
57
- tid = str(ans["task_id"])
58
- if ans["submitted_answer"] == truth_map.get(tid, ""):
59
  correct += 1
60
 
61
  score = round(100 * correct / total) if total else 0
62
  return {
63
- "username": username,
64
- "agent_code": agent_code,
65
- "score": score,
66
- "correct_count": correct,
67
  "total_attempted": total
68
  }
69
 
70
- # ─── 2) Gradio UI setup ────────────────────────────────────────────────────
71
-
72
  def run_and_submit_all(username: str):
73
  if not username:
74
  return "πŸ”’ Please enter your Hugging Face username.", None
75
-
76
  try:
77
- # fetch questions
78
- resp = requests.get("http://localhost:7860/questions", timeout=15)
79
  resp.raise_for_status()
80
  qs = resp.json()
81
 
82
  logs, payload = [], []
83
  for q in qs:
84
- tid = q["task_id"]
85
- question = q["question"]
86
- fname = q.get("file_name", "")
87
 
88
- # download file if exists
89
- if fname:
90
- file_resp = requests.get(f"http://localhost:7860/files/{tid}", timeout=15)
91
  file_resp.raise_for_status()
92
- local = os.path.join("data", fname)
93
- os.makedirs(os.path.dirname(local), exist_ok=True)
94
- with open(local, "wb") as f:
95
- f.write(file_resp.content)
96
 
97
- ans = run_crew(question, fname)
98
- payload.append({"task_id": tid, "submitted_answer": ans})
99
- logs.append({"Task ID": tid, "Question": question, "Answer": ans})
100
 
101
- sub = {
102
  "username": username,
103
- "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
104
  "answers": payload
105
  }
106
- submit_resp = requests.post("http://localhost:7860/submit", json=sub, timeout=60)
107
- submit_resp.raise_for_status()
108
- result = submit_resp.json()
109
 
110
  status = (
111
  f"βœ… {result['username']} scored {result['score']}% "
112
  f"({result['correct_count']}/{result['total_attempted']} correct)"
113
  )
114
  return status, pd.DataFrame(logs)
115
-
116
  except Exception as e:
117
  return f"❌ Error: {e}", None
118
 
 
119
  with gr.Blocks(title="GAIA Evaluation Runner") as demo:
120
  gr.Markdown("# GAIA Evaluation Runner")
121
- user_in = gr.Textbox(label="Hugging Face Username")
122
- run_btn = gr.Button("Run & Submit All Answers")
123
- status = gr.Textbox(label="Status", interactive=False)
124
- table = gr.DataFrame(headers=["Task ID","Question","Answer"], label="Log of Q&A")
125
-
126
- run_btn.click(fn=run_and_submit_all,
127
- inputs=[user_in], outputs=[status, table])
128
 
129
- # Mount Gradio under β€œ/” so that FastAPI serves both API and UI
130
- api.mount("/", demo, name="gradio")
 
 
 
131
 
132
- # ─── 3) Entry point ────────────────────────────────────────────────────────
 
133
 
134
  if __name__ == "__main__":
135
- uvicorn.run(api, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
 
 
1
  import os
 
 
 
 
 
2
  import json
3
+ import random
4
+ import requests
5
+ import uvicorn
6
 
7
+ from fastapi import FastAPI, HTTPException
8
+ from fastapi.responses import FileResponse
9
  import gradio as gr
10
  import pandas as pd
 
11
 
12
+ from crew import run_crew
 
13
 
14
+ # ─── Configuration ─────────────────────────────────────────────────────────
15
+ PORT = int(os.getenv("PORT", 7860))
16
+ LOCAL_API = f"http://127.0.0.1:{PORT}"
17
+ SPACE_ID = os.getenv("SPACE_ID", "Psiska/General_AI_Assistant")
18
+ QUESTIONS_PATH = os.getenv("QUESTIONS_PATH", "data/gaia_validation.jsonl")
19
 
20
+ # ─── FastAPI setup ──────────────────────────────────────────────────────────
21
  api = FastAPI(title="GAIA Evaluation API")
22
 
23
+ # Load questions from JSONL
24
+ questions = []
25
+ with open(QUESTIONS_PATH, 'r') as f:
26
+ for line in f:
27
+ questions.append(json.loads(line))
28
 
 
29
  @api.get("/questions")
30
  def get_questions():
31
  return questions
32
 
 
33
  @api.get("/random-question")
34
+ def get_random_question():
35
  return random.choice(questions)
36
 
 
37
  @api.get("/files/{task_id}")
38
  def get_file(task_id: str):
39
+ entry = next(
40
+ (q for q in questions if str(q.get("task_id") or q.get("id")) == task_id),
41
+ None
42
+ )
43
  if not entry or not entry.get("file_name"):
44
+ raise HTTPException(status_code=404, detail="File not found for this task")
45
+ file_path = os.path.join("data", entry["file_name"])
46
+ if not os.path.exists(file_path):
47
+ raise HTTPException(status_code=404, detail="File missing on disk")
48
+ return FileResponse(file_path)
49
 
 
50
  @api.post("/submit")
51
  def submit(batch: dict):
52
  username = batch.get("username", "")
53
  agent_code = batch.get("agent_code", "")
54
  answers = batch.get("answers", [])
 
 
55
 
56
+ total = len(answers)
57
+ correct = 0
58
+
59
+ # Map task_id -> ground-truth
60
+ truth_map = {
61
+ str(q.get("task_id") or q.get("id")): str(q.get("Final answer") or q.get("final_answer") or "")
62
+ for q in questions
63
+ }
64
  for ans in answers:
65
+ tid = str(ans.get("task_id"))
66
+ if str(ans.get("submitted_answer", "")) == truth_map.get(tid, ""):
67
  correct += 1
68
 
69
  score = round(100 * correct / total) if total else 0
70
  return {
71
+ "username": username,
72
+ "agent_code": agent_code,
73
+ "score": score,
74
+ "correct_count": correct,
75
  "total_attempted": total
76
  }
77
 
78
+ # ─── Gradio UI setup ─────────────────────────────────────────────────────────
 
79
  def run_and_submit_all(username: str):
80
  if not username:
81
  return "πŸ”’ Please enter your Hugging Face username.", None
 
82
  try:
83
+ # Fetch questions from local API
84
+ resp = requests.get(f"{LOCAL_API}/questions", timeout=15)
85
  resp.raise_for_status()
86
  qs = resp.json()
87
 
88
  logs, payload = [], []
89
  for q in qs:
90
+ task_id = str(q.get("task_id") or q.get("id"))
91
+ question = q.get("question", "")
92
+ file_name= q.get("file_name", "")
93
 
94
+ # Download file if exists
95
+ if file_name:
96
+ file_resp = requests.get(f"{LOCAL_API}/files/{task_id}", timeout=15)
97
  file_resp.raise_for_status()
98
+ os.makedirs("data", exist_ok=True)
99
+ path = os.path.join("data", file_name)
100
+ with open(path, "wb") as fd:
101
+ fd.write(file_resp.content)
102
 
103
+ answer = run_crew(question, file_name)
104
+ payload.append({"task_id": task_id, "submitted_answer": answer})
105
+ logs.append({"Task ID": task_id, "Question": question, "Answer": answer})
106
 
107
+ submission = {
108
  "username": username,
109
+ "agent_code": f"https://huggingface.co/spaces/{SPACE_ID}/tree/main",
110
  "answers": payload
111
  }
112
+ sub_resp = requests.post(f"{LOCAL_API}/submit", json=submission, timeout=60)
113
+ sub_resp.raise_for_status()
114
+ result = sub_resp.json()
115
 
116
  status = (
117
  f"βœ… {result['username']} scored {result['score']}% "
118
  f"({result['correct_count']}/{result['total_attempted']} correct)"
119
  )
120
  return status, pd.DataFrame(logs)
 
121
  except Exception as e:
122
  return f"❌ Error: {e}", None
123
 
124
+ # Build Gradio interface
125
  with gr.Blocks(title="GAIA Evaluation Runner") as demo:
126
  gr.Markdown("# GAIA Evaluation Runner")
127
+ user_input = gr.Textbox(label="Hugging Face Username")
128
+ run_btn = gr.Button("Run & Submit All Answers")
129
+ status = gr.Textbox(label="Status", interactive=False)
130
+ table = gr.DataFrame(headers=["Task ID", "Question", "Answer"], label="Log of Q&A")
 
 
 
131
 
132
+ run_btn.click(
133
+ fn=run_and_submit_all,
134
+ inputs=[user_input],
135
+ outputs=[status, table]
136
+ )
137
 
138
+ # Mount Gradio app on FastAPI
139
+ api.mount("/", demo)
140
 
141
  if __name__ == "__main__":
142
+ uvicorn.run(api, host="0.0.0.0", port=PORT)
143
+