FinalTest

Runtime error

App Files Files Community

yoshizen commited on May 29, 2025

Commit

af37df4

verified ·

1 Parent(s): 19870ea

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -153

app.py CHANGED Viewed

@@ -1,158 +1,56 @@
-import json
-import time
-import requests
 import gradio as gr
-import pandas as pd
-from tqdm import tqdm
-from typing import Tuple, List, Dict, Any
-from agent import GAIAExpertAgent
-# Константы
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-class EvaluationRunner:
-    """Оптимизированный обработчик оценки"""
-    def __init__(self, api_url=DEFAULT_API_URL):
-        self.api_url = api_url
-        self.questions_url = f"{api_url}/questions"
-        self.submit_url = f"{api_url}/submit"
-        self.results_url = f"{api_url}/results"
-        self.correct_answers = 0
-        self.total_questions = 0
-    def run_evaluation(self, agent, username: str, agent_code: str) -> Tuple[str, pd.DataFrame]:
-        questions_data = self._fetch_questions()
-        if not isinstance(questions_data, list):
-            return questions_data, pd.DataFrame()
-        results_log, answers_payload = self._run_agent_on_questions(agent, questions_data)
-        if not answers_payload:
-            return "No answers generated", pd.DataFrame()
-        submission_result = self._submit_answers(username, agent_code, answers_payload)
-        return submission_result, pd.DataFrame(results_log)
-    def _fetch_questions(self):
-        try:
-            response = requests.get(self.questions_url, timeout=30)
-            response.raise_for_status()
-            questions_data = response.json()
-            self.total_questions = len(questions_data)
-            print(f"Fetched {self.total_questions} questions")
-            return questions_data
-        except Exception as e:
-            return f"Error: {str(e)}"
-    def _run_agent_on_questions(self, agent, questions_data):
-        results_log = []
-        answers_payload = []
-        print(f"Processing {len(questions_data)} questions...")
-        for item in tqdm(questions_data, desc="Questions"):
-            task_id = item.get("task_id")
-            question_text = item.get("question")
-            if not task_id or not question_text:
-                continue
-            try:
-                json_response = agent(question_text, task_id)
-                response_obj = json.loads(json_response)
-                answer = response_obj.get("final_answer", "")
-                answers_payload.append({"task_id": task_id, "submitted_answer": answer})
-                results_log.append({
-                    "Task ID": task_id,
-                    "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
-                    "Answer": answer[:50] + "..." if len(answer) > 50 else answer
-                })
-            except Exception as e:
-                answers_payload.append({"task_id": task_id, "submitted_answer": f"ERROR: {str(e)}"})
-                results_log.append({
-                    "Task ID": task_id,
-                    "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
-                    "Answer": f"ERROR: {str(e)}"
-                })
-        return results_log, answers_payload
-    def _submit_answers(self, username: str, agent_code: str, answers_payload):
-        submission_data = {
-            "username": username.strip(),
-            "agent_code": agent_code.strip(),
-            "answers": answers_payload
-        }
-        print("Submitting answers...")
-        try:
-            response = requests.post(
-                self.submit_url,
-                json=submission_data,
-                headers={"Content-Type": "application/json"},
-                timeout=60
             )
-            response.raise_for_status()
-            return response.json().get("message", "Answers submitted successfully")
-        except Exception as e:
-            return f"Submission failed: {str(e)}"
-def run_evaluation(username: str, agent_code: str, model_name: str):
-    print("Initializing GAIA Expert Agent...")
-    agent = GAIAExpertAgent(model_name=model_name)
-    print("Starting evaluation...")
-    runner = EvaluationRunner()
-    result, results_df = runner.run_evaluation(agent, username, agent_code)
-    # Добавляем счетчики вопросов
-    total_questions = runner.total_questions
-    # Для простоты будем считать, что правильные ответы мы не знаем (GAIA API не возвращает сразу)
-    correct_answers = 0
-    return result, correct_answers, total_questions, results_df
-def create_gradio_interface():
-    with gr.Blocks(title="GAIA Expert Agent") as demo:
-        gr.Markdown("# 🧠 GAIA Expert Agent Evaluation")
-        with gr.Row():
-            with gr.Column():
-                gr.Markdown("### Configuration")
-                username = gr.Textbox(label="Hugging Face Username", value="yoshizen")
-                agent_code = gr.Textbox(
-                    label="Agent Code",
-                    value="https://huggingface.co/spaces/yoshizen/FinalTest"
-                )
-                model_name = gr.Dropdown(
-                    label="Model",
-                    choices=[
-                        "google/flan-t5-small",
-                        "google/flan-t5-base",
-                        "google/flan-t5-large"
-                    ],
-                    value="google/flan-t5-large"
-                )
-                run_button = gr.Button("🚀 Run Evaluation", variant="primary")
-            with gr.Column():
-                gr.Markdown("### Results")
-                result_text = gr.Textbox(label="Submission Status")
-                correct_answers = gr.Number(label="Correct Answers")
-                total_questions = gr.Number(label="Total Questions")
-                results_table = gr.Dataframe(label="Processed Questions", interactive=False)
-        run_button.click(
-            fn=run_evaluation,
-            inputs=[username, agent_code, model_name],
-            outputs=[result_text, correct_answers, total_questions, results_table]
-        )
-    return demo
 if __name__ == "__main__":
-    demo = create_gradio_interface()
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
+from gaia_agent import GAIAExpertAgent
+from evaluation_runner import EvaluationRunner
+# Инициализация компонентов
+agent = GAIAExpertAgent(model_name="google/flan-t5-large")
+runner = EvaluationRunner()
+def run_evaluation(username: str, agent_code: str):
+    """Основная функция для запуска оценки"""
+    try:
+        result, correct, total, df = runner.run_evaluation(
+            agent=agent,
+            username=username,
+            agent_code=agent_code
+        )
+        return result, correct, total, df
+    except Exception as e:
+        return f"Error: {str(e)}", 0, 0, None
+# Интерфейс Gradio
+with gr.Blocks(title="GAIA Agent Evaluation") as demo:
+    gr.Markdown("# 🏆 GAIA Agent Certification")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### Configuration")
+            username = gr.Textbox(
+                label="Hugging Face Username",
+                value="yoshizen"
             )
+            agent_code = gr.Textbox(
+                label="Agent Code",
+                value="https://huggingface.co/spaces/yoshizen/FinalTest"
+            )
+            run_btn = gr.Button("Run Evaluation", variant="primary")
+        with gr.Column():
+            gr.Markdown("### Results")
+            result_output = gr.Textbox(label="Status")
+            correct_output = gr.Number(label="Correct Answers")
+            total_output = gr.Number(label="Total Questions")
+            results_table = gr.Dataframe(label="Details")
+    run_btn.click(
+        fn=run_evaluation,
+        inputs=[username, agent_code],
+        outputs=[result_output, correct_output, total_output, results_table]
+    )
 if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False  # Для Spaces оставить False
+    )