General_AI_Assistant

Configuration error

App Files Files Community

Psiska commited on Jun 30, 2025

Commit

4475dcb

1 Parent(s): a410403

Evaluation

Browse files

Files changed (3) hide show

__pycache__/crew.cpython-310.pyc +0 -0
evaluation.py +92 -0
requirements.txt +2 -0

__pycache__/crew.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/crew.cpython-310.pyc and b/__pycache__/crew.cpython-310.pyc differ

evaluation.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import os
+import requests
+import pandas as pd
+import gradio as gr
+from crew import run_crew
+# Configuration: endpoint for GAIA evaluation API
+API_URL = os.getenv("GAIA_API_URL", "https://huggingface.co/spaces/Psiska/General_AI_Assistant")
+# Your Space identifier for generating the agent_code URL
+SPACE_ID = os.getenv("SPACE_ID", "Psiska/General_AI_Assistant")
+def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """
+    Fetches all evaluation questions, runs your agent on each,
+    and submits the batch to the /submit endpoint.
+    Returns a status message and a DataFrame of logs.
+    """
+    if profile is None:
+        return "🔒 Please log in with your Hugging Face account.", None
+    username = profile.username
+    try:
+        # 1) Fetch questions
+        resp = requests.get(f"{API_URL}/questions", timeout=15)
+        resp.raise_for_status()
+        questions = resp.json()
+        # 2) Run agent on each question
+        logs = []
+        answers = []
+        for item in questions:
+            task_id   = item.get("task_id") or item.get("id")
+            question  = item.get("question", "")
+            file_name = item.get("file_name", "")
+            # Optional: download attached file
+            if file_name:
+                file_resp = requests.get(f"{API_URL}/files/{task_id}", timeout=15)
+                file_resp.raise_for_status()
+                local_path = os.path.join("data", file_name)
+                os.makedirs(os.path.dirname(local_path), exist_ok=True)
+                with open(local_path, "wb") as f:
+                    f.write(file_resp.content)
+                # pass file_name or path to your agent if needed
+            # Get agent's answer
+            answer = run_crew(question, file_name)
+            answers.append({"task_id": task_id, "submitted_answer": answer})
+            logs.append({"Task ID": task_id, "Question": question, "Answer": answer})
+        # 3) Prepare payload
+        payload = {
+            "username":   username,
+            "agent_code": f"https://huggingface.co/spaces/{SPACE_ID}/tree/main",
+            "answers":    answers
+        }
+        # 4) Submit answers
+        submit_resp = requests.post(f"{API_URL}/submit", json=payload, timeout=60)
+        submit_resp.raise_for_status()
+        result = submit_resp.json()
+        # Format status
+        status = (
+            f"✅ {result['username']} scored {result['score']}% "
+            f"({result['correct_count']}/{result['total_attempted']} correct)"
+        )
+        return status, pd.DataFrame(logs)
+    except Exception as e:
+        return f"❌ Error: {str(e)}", None
+# Build Gradio interface
+with gr.Blocks(title="GAIA Evaluation Runner") as demo:
+    gr.Markdown("# GAIA Evaluation Runner")
+    login = gr.LoginButton()
+    run_btn = gr.Button("Run & Submit All Answers")
+    status  = gr.Textbox(label="Status", interactive=False)
+    table   = gr.DataFrame(headers=["Task ID", "Question", "Answer"], label="Log of Q&A")
+    run_btn.click(
+        fn=run_and_submit_all,
+        inputs=[login],
+        outputs=[status, table]
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -11,3 +11,5 @@ langchain
 redis==4.5.5         # if you choose Redis for persistence
 python-dotenv        # to load REDIS_URL from .env
 faiss-cpu

 redis==4.5.5         # if you choose Redis for persistence
 python-dotenv        # to load REDIS_URL from .env
 faiss-cpu
+requests
+pandas