General_AI_Assistant

Paused

App Files Files Community

Psiska commited on Jun 30, 2025

Commit

a9d6ab6

1 Parent(s): 3fe1356

Evaluation 2

Browse files

Files changed (1) hide show

app.py +101 -56

app.py CHANGED Viewed

@@ -1,66 +1,112 @@
 import os
-import requests
-import pandas as pd
-import gradio as gr
-from crew import run_crew
-# Configuration: endpoint for GAIA evaluation API
-API_URL = os.getenv("GAIA_API_URL", "https://huggingface.co/spaces/Psiska/General_AI_Assistant")
-# Your Space identifier for generating the agent_code URL
-SPACE_ID = os.getenv("SPACE_ID", "Psiska/General_AI_Assistant")
 def run_and_submit_all(username: str):
-    """
-    Fetches all evaluation questions, runs your agent on each,
-    and submits the batch to the /submit endpoint.
-    Returns a status message and a DataFrame of logs.
-    """
     if not username:
         return "🔒 Please enter your Hugging Face username.", None
     try:
-        # 1) Fetch questions
-        resp = requests.get(f"{API_URL}/questions", timeout=15)
         resp.raise_for_status()
-        questions = resp.json()
-        # 2) Run agent on each question
-        logs = []
-        answers = []
-        for item in questions:
-            task_id   = item.get("task_id") or item.get("id")
-            question  = item.get("question", "")
-            file_name = item.get("file_name", "")
-            # Optional: download attached file
-            if file_name:
-                file_resp = requests.get(f"{API_URL}/files/{task_id}", timeout=15)
                 file_resp.raise_for_status()
-                local_path = os.path.join("data", file_name)
-                os.makedirs(os.path.dirname(local_path), exist_ok=True)
-                with open(local_path, "wb") as f:
                     f.write(file_resp.content)
-            # Get agent's answer
-            answer = run_crew(question, file_name)
-            answers.append({"task_id": task_id, "submitted_answer": answer})
-            logs.append({"Task ID": task_id, "Question": question, "Answer": answer})
-        # 3) Prepare payload
-        payload = {
             "username":   username,
-            "agent_code": f"https://huggingface.co/spaces/{SPACE_ID}/tree/main",
-            "answers":    answers
         }
-        # 4) Submit answers
-        submit_resp = requests.post(f"{API_URL}/submit", json=payload, timeout=60)
         submit_resp.raise_for_status()
         result = submit_resp.json()
-        # Format status
         status = (
             f"✅ {result['username']} scored {result['score']}% "
             f"({result['correct_count']}/{result['total_attempted']} correct)"
@@ -68,23 +114,22 @@ def run_and_submit_all(username: str):
         return status, pd.DataFrame(logs)
     except Exception as e:
-        return f"❌ Error: {str(e)}", None
-# Build Gradio interface
 with gr.Blocks(title="GAIA Evaluation Runner") as demo:
     gr.Markdown("# GAIA Evaluation Runner")
-    username_input = gr.Textbox(label="Hugging Face Username")
     run_btn = gr.Button("Run & Submit All Answers")
     status  = gr.Textbox(label="Status", interactive=False)
-    table   = gr.DataFrame(headers=["Task ID", "Question", "Answer"], label="Log of Q&A")
-    run_btn.click(
-        fn=run_and_submit_all,
-        inputs=[username_input],
-        outputs=[status, table]
-    )
 if __name__ == "__main__":
-    demo.launch()

 import os
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import JSONResponse, FileResponse
+from starlette.staticfiles import StaticFiles
+import uvicorn
+import random
+import json
+import gradio as gr
+import pandas as pd
+import requests
+from crew import run_crew         # your agent runner :contentReference[oaicite:0]{index=0}
+from utils import read_file_json  # your file‐reading helpers :contentReference[oaicite:1]{index=1}
+# ─── 1) FastAPI setup ───────────────────────────────────────────────────────
+api = FastAPI(title="GAIA Evaluation API")
+# Load all questions once
+QUESTIONS_PATH = "data/gaia_validation.jsonl"
+with open(QUESTIONS_PATH) as f:
+    questions = [json.loads(line) for line in f]
+# GET /questions
+@api.get("/questions")
+def get_questions():
+    return questions
+# GET /random-question
+@api.get("/random-question")
+def get_random():
+    return random.choice(questions)
+# GET /files/{task_id}
+@api.get("/files/{task_id}")
+def get_file(task_id: str):
+    # find matching question entry
+    entry = next((q for q in questions if str(q["task_id"]) == task_id), None)
+    if not entry or not entry.get("file_name"):
+        raise HTTPException(404, "No file for that task")
+    path = os.path.join("data", entry["file_name"])
+    return FileResponse(path)
+# POST /submit
+@api.post("/submit")
+def submit(batch: dict):
+    username   = batch.get("username", "")
+    agent_code = batch.get("agent_code", "")
+    answers    = batch.get("answers", [])
+    total      = len([a for a in answers if a.get("submitted_answer") is not None])
+    correct    = 0
+    # simple exact‐match scoring
+    truth_map = {str(q["task_id"]): str(q["Final answer"]) for q in questions}
+    for ans in answers:
+        tid = str(ans["task_id"])
+        if ans["submitted_answer"] == truth_map.get(tid, ""):
+            correct += 1
+    score = round(100 * correct / total) if total else 0
+    return {
+        "username":      username,
+        "agent_code":    agent_code,
+        "score":         score,
+        "correct_count": correct,
+        "total_attempted": total
+    }
+# ─── 2) Gradio UI setup ────────────────────────────────────────────────────
 def run_and_submit_all(username: str):
     if not username:
         return "🔒 Please enter your Hugging Face username.", None
     try:
+        # fetch questions
+        resp = requests.get("http://localhost:7860/questions", timeout=15)
         resp.raise_for_status()
+        qs = resp.json()
+        logs, payload = [], []
+        for q in qs:
+            tid   = q["task_id"]
+            question = q["question"]
+            fname    = q.get("file_name", "")
+            # download file if exists
+            if fname:
+                file_resp = requests.get(f"http://localhost:7860/files/{tid}", timeout=15)
                 file_resp.raise_for_status()
+                local = os.path.join("data", fname)
+                os.makedirs(os.path.dirname(local), exist_ok=True)
+                with open(local, "wb") as f:
                     f.write(file_resp.content)
+            ans = run_crew(question, fname)
+            payload.append({"task_id": tid, "submitted_answer": ans})
+            logs.append({"Task ID": tid, "Question": question, "Answer": ans})
+        sub = {
             "username":   username,
+            "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
+            "answers":    payload
         }
+        submit_resp = requests.post("http://localhost:7860/submit", json=sub, timeout=60)
         submit_resp.raise_for_status()
         result = submit_resp.json()
         status = (
             f"✅ {result['username']} scored {result['score']}% "
             f"({result['correct_count']}/{result['total_attempted']} correct)"
         return status, pd.DataFrame(logs)
     except Exception as e:
+        return f"❌ Error: {e}", None
 with gr.Blocks(title="GAIA Evaluation Runner") as demo:
     gr.Markdown("# GAIA Evaluation Runner")
+    user_in = gr.Textbox(label="Hugging Face Username")
     run_btn = gr.Button("Run & Submit All Answers")
     status  = gr.Textbox(label="Status", interactive=False)
+    table   = gr.DataFrame(headers=["Task ID","Question","Answer"], label="Log of Q&A")
+    run_btn.click(fn=run_and_submit_all,
+                  inputs=[user_in], outputs=[status, table])
+# Mount Gradio under “/” so that FastAPI serves both API and UI
+api.mount("/", demo, name="gradio")
+# ─── 3) Entry point ────────────────────────────────────────────────────────
 if __name__ == "__main__":
+    uvicorn.run(api, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))