FinalTest

Runtime error

App Files Files Community

yoshizen commited on May 29

Commit

0d32a9e

verified ·

1 Parent(s): b8312c7

Update app.py

Browse files

Files changed (1) hide show

app.py +164 -146

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ logger = logging.getLogger("GAIA-Mastermind")
 # Конфигурация
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-MODEL_NAME = "google/flan-t5-xxl"
 API_RETRIES = 3
 API_TIMEOUT = 45
@@ -32,25 +32,29 @@ class GAIAThoughtProcessor:
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"⚡ Инициализация GAIAThoughtProcessor на {self.device.upper()}")
-        # Оптимизированная загрузка модели
-        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-        self.model = AutoModelForSeq2SeqLM.from_pretrained(
-            MODEL_NAME,
-            device_map="auto",
-            torch_dtype=torch.float16 if "cuda" in self.device else torch.float32,
-            low_cpu_mem_usage=True
-        ).eval()
-        # Создаем пайплайн для генерации текста
-        self.text_generator = pipeline(
-            "text2text-generation",
-            model=self.model,
-            tokenizer=self.tokenizer,
-            device=self.device,
-            max_new_tokens=512
-        )
-        logger.info("✅ GAIAThoughtProcessor готов")
     def _math_solver(self, expression: str) -> str:
         """Безопасное вычисление математических выражений"""
@@ -138,7 +142,7 @@ class GAIAThoughtProcessor:
             # Базовый анализ изображения
             description = (
                 f"Format: {img.format}, Size: {img.size}, "
-                f"Mode: {img.mode}, Colors: {len(set(img.getdata()))}"
             )
             return description
         except (UnidentifiedImageError, requests.exceptions.RequestException) as e:
@@ -189,53 +193,15 @@ class GAIAThoughtProcessor:
     def process_question(self, question: str, task_id: str) -> str:
         """Обработка вопроса с декомпозицией на шаги"""
         try:
-            # Шаг 1: Декомпозиция задачи
-            decomposition_prompt = (
-                f"Декомпозируй задачу GAIA ({task_id}) на шаги. "
-                f"Используй инструменты: math_solver, table_analyzer, text_processor, image_processor.\n\n"
-                f"Задача: {question}\n\n"
-                "Шаги (формат: [tool_name] arguments):"
-            )
-            steps_response = self._generate_response(decomposition_prompt)
-            steps = [s.strip() for s in steps_response.split("\n") if s.strip()]
-            # Шаг 2: Выполнение шагов
-            results = []
-            for step in steps:
-                if step:
-                    try:
-                        # Извлечение инструмента и аргументов
-                        match = re.match(r"\[(\w+)\]\s*(.+)", step)
-                        if match:
-                            tool_name = match.group(1)
-                            arguments = match.group(2)
-                            result = self._call_tool(tool_name, arguments)
-                            results.append(f"{step} -> {result}")
-                        else:
-                            results.append(f"{step} -> ERROR: Invalid format")
-                    except Exception as e:
-                        results.append(f"{step} -> ERROR: {str(e)}")
-            # Шаг 3: Синтез финального ответа
-            synthesis_prompt = (
-                f"Задача GAIA {task_id}:\n{question}\n\n"
-                "Выполненные шаги:\n" + "\n".join(results) +
-                "\n\nФинальный ответ в формате JSON (только п��ле final_answer):"
-            )
-            final_response = self._generate_response(synthesis_prompt)
             # Извлечение чистого ответа
-            if "final_answer" in final_response:
-                return json.dumps({"final_answer": final_response})
             else:
-                # Попробуем извлечь ответ из текста
-                answer_match = re.search(r'\{.*\}', final_response, re.DOTALL)
-                if answer_match:
-                    return answer_match.group(0)
-                else:
-                    return json.dumps({"final_answer": final_response.strip()})
         except Exception as e:
             logger.exception("Processing failed")
             return json.dumps({
@@ -262,7 +228,14 @@ class GAIAEvaluationRunner:
         # Получение вопросов
         questions, status = self._fetch_questions()
         if status != "success":
-            return status, 0, 0, pd.DataFrame()
         # Обработка вопросов
         results = []
@@ -290,8 +263,8 @@ class GAIAEvaluationRunner:
                 # Запись результатов
                 results.append({
                     "Task ID": task_id,
-                    "Question": q["question"][:150] + "..." if len(q["question"]) > 150 else q["question"],
-                    "Answer": final_answer[:200],
                     "Status": "Processed"
                 })
             except Exception as e:
@@ -308,12 +281,22 @@ class GAIAEvaluationRunner:
                 })
         # Отправка ответов
-        submission_result, score = self._submit_answers(username, agent_code, answers)
-        return submission_result, score, len(questions), pd.DataFrame(results)
     def _fetch_questions(self) -> Tuple[list, str]:
         """Получение вопросов с API"""
-        for _ in range(API_RETRIES):
             try:
                 response = self.session.get(
                     self.questions_url,
@@ -323,7 +306,7 @@ class GAIAEvaluationRunner:
                 if response.status_code == 200:
                     questions = response.json()
                     if not isinstance(questions, list):
-                        return [], "Invalid response format: expected list"
                     # Добавление task_id если отсутствует
                     for q in questions:
@@ -331,18 +314,22 @@ class GAIAEvaluationRunner:
                     return questions, "success"
                 elif response.status_code == 429:
-                    logger.warning("Rate limited, retrying...")
-                    time.sleep(5)
                     continue
                 else:
-                    return [], f"API error: HTTP {response.status_code}"
             except Exception as e:
-                logger.error(f"Fetch error: {e}")
-                return [], f"Connection error: {str(e)}"
-        return [], "API unavailable after retries"
     def _submit_answers(self, username: str, agent_code: str, answers: list) -> Tuple[str, int]:
         """Отправка ответов на сервер"""
@@ -363,89 +350,120 @@ class GAIAEvaluationRunner:
                 if response.status_code == 200:
                     result = response.json()
                     score = result.get("score", 0)
-                    return result.get("message", "Answers submitted"), score
                 elif response.status_code == 400:
-                    error = response.json().get("error", "Invalid request")
-                    logger.error(f"Validation error: {error}")
-                    return f"Validation Error: {error}", 0
                 elif response.status_code == 429:
-                    logger.warning("Rate limited, retrying...")
-                    time.sleep(10)
                     continue
                 else:
-                    return f"HTTP Error {response.status_code}", 0
             except Exception as e:
-                logger.error(f"Submit error: {e}")
-                return f"Connection Error: {str(e)}", 0
-        return "Submission failed after retries", 0
 # === ИНТЕРФЕЙС GRADIO ===
 def run_evaluation(username: str, agent_code: str, progress=gr.Progress()):
-    progress(0, desc="⚡ Инициализация GAIA Mastermind...")
     try:
         agent = GAIAThoughtProcessor()
-    except Exception as e:
-        logger.exception("Agent initialization failed")
-        return f"Agent Error: {str(e)}", 0, 0, pd.DataFrame()
-    progress(0.1, desc="🌐 Подключение к GAIA API...")
-    runner = GAIAEvaluationRunner()
-    # Получение вопросов
-    questions, status = runner._fetch_questions()
-    if status != "success":
-        return status, 0, 0, pd.DataFrame()
-    # Обработка вопросов с прогрессом
-    results = []
-    answers = []
-    total = len(questions)
-    for i, q in enumerate(questions):
-        progress(i / total, desc=f"🧠 Обработка задач ({i+1}/{total})")
-        try:
-            task_id = q.get("task_id", f"unknown_{i}")
-            json_response = agent.process_question(q["question"], task_id)
-            # Парсинг ответа
-            try:
-                response_obj = json.loads(json_response)
-                final_answer = response_obj.get("final_answer", "")
-            except:
-                final_answer = json_response
-            answers.append({
-                "task_id": task_id,
-                "answer": str(final_answer)[:500]
-            })
-            results.append({
-                "Task ID": task_id,
-                "Question": q["question"][:150] + "..." if len(q["question"]) > 150 else q["question"],
-                "Answer": str(final_answer)[:200],
-                "Status": "Processed"
-            })
-        except Exception as e:
-            logger.error(f"Task {task_id} failed: {e}")
-            answers.append({
-                "task_id": task_id,
-                "answer": f"ERROR: {str(e)}"
-            })
-            results.append({
-                "Task ID": task_id,
-                "Question": "Error",
-                "Answer": f"ERROR: {str(e)}",
                 "Status": "Failed"
-            })
-    # Отправка ответов
-    submission_result, score = runner._submit_answers(username, agent_code, answers)
-    return submission_result, score, total, pd.DataFrame(results)
 # Создание интерфейса
 with gr.Blocks(
@@ -500,7 +518,7 @@ with gr.Blocks(
                     interactive=False
                 )
-            # Упрощенный Dataframe без параметров, вызывающих ошибки
             results_table = gr.Dataframe(
                 label="🔍 Детализация ответов",
                 headers=["Task ID", "Question", "Answer", "Status"],
@@ -528,5 +546,5 @@ if __name__ == "__main__":
         server_port=7860,
         share=False,
         show_error=True,
-        debug=False
     )

 # Конфигурация
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+MODEL_NAME = "google/flan-t5-large"  # Упрощенная модель для CPU
 API_RETRIES = 3
 API_TIMEOUT = 45
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"⚡ Инициализация GAIAThoughtProcessor на {self.device.upper()}")
+        try:
+            # Оптимизированная загрузка модели
+            self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+            self.model = AutoModelForSeq2SeqLM.from_pretrained(
+                MODEL_NAME,
+                device_map="auto" if torch.cuda.is_available() else None,
+                torch_dtype=torch.float16 if "cuda" in self.device else torch.float32,
+                low_cpu_mem_usage=True
+            ).eval()
+            # Создаем пайплайн для генерации текста
+            self.text_generator = pipeline(
+                "text2text-generation",
+                model=self.model,
+                tokenizer=self.tokenizer,
+                device=self.device,
+                max_new_tokens=256
+            )
+            logger.info("✅ GAIAThoughtProcessor готов")
+        except Exception as e:
+            logger.exception("Ошибка инициализации модели")
+            raise RuntimeError(f"Ошибка инициализации: {str(e)}")
     def _math_solver(self, expression: str) -> str:
         """Безопасное вычисление математических выражений"""
             # Базовый анализ изображения
             description = (
                 f"Format: {img.format}, Size: {img.size}, "
+                f"Mode: {img.mode}"
             )
             return description
         except (UnidentifiedImageError, requests.exceptions.RequestException) as e:
     def process_question(self, question: str, task_id: str) -> str:
         """Обработка вопроса с декомпозицией на шаги"""
         try:
+            # Упрощенный промпт для CPU
+            prompt = f"Реши задачу шаг за шагом: {question}\n\nФинальный ответ:"
+            response = self._generate_response(prompt)
             # Извлечение чистого ответа
+            if "final_answer" in response:
+                return json.dumps({"final_answer": response})
             else:
+                return json.dumps({"final_answer": response.strip()})
         except Exception as e:
             logger.exception("Processing failed")
             return json.dumps({
         # Получение вопросов
         questions, status = self._fetch_questions()
         if status != "success":
+            # Возвращаем ошибку в понятном формате
+            error_df = pd.DataFrame([{
+                "Task ID": "ERROR",
+                "Question": status,
+                "Answer": "Не удалось получить вопросы",
+                "Status": "Failed"
+            }])
+            return status, 0, 0, error_df
         # Обработка вопросов
         results = []
                 # Запись результатов
                 results.append({
                     "Task ID": task_id,
+                    "Question": q["question"][:100] + "..." if len(q["question"]) > 100 else q["question"],
+                    "Answer": final_answer[:100] + "..." if len(final_answer) > 100 else final_answer,
                     "Status": "Processed"
                 })
             except Exception as e:
                 })
         # Отправка ответов
+        try:
+            submission_result, score = self._submit_answers(username, agent_code, answers)
+            return submission_result, score, len(questions), pd.DataFrame(results)
+        except Exception as e:
+            error_message = f"Ошибка отправки: {str(e)}"
+            results.append({
+                "Task ID": "SUBMIT_ERROR",
+                "Question": error_message,
+                "Answer": "",
+                "Status": "Failed"
+            })
+            return error_message, 0, len(questions), pd.DataFrame(results)
     def _fetch_questions(self) -> Tuple[list, str]:
         """Получение вопросов с API"""
+        for attempt in range(API_RETRIES):
             try:
                 response = self.session.get(
                     self.questions_url,
                 if response.status_code == 200:
                     questions = response.json()
                     if not isinstance(questions, list):
+                        return [], f"Неверный формат ответа: ожидался список, получен {type(questions)}"
                     # Добавление task_id если отсутствует
                     for q in questions:
                     return questions, "success"
                 elif response.status_code == 429:
+                    wait_time = 2 ** attempt  # Экспоненциальная задержка
+                    logger.warning(f"Rate limited, retrying in {wait_time}s...")
+                    time.sleep(wait_time)
                     continue
                 else:
+                    return [], f"Ошибка API: HTTP {response.status_code} - {response.text}"
+            except requests.exceptions.RequestException as e:
+                logger.error(f"Ошибка соединения: {e}")
+                return [], f"Ошибка сети: {str(e)}"
             except Exception as e:
+                logger.error(f"Неожиданная ошибка: {e}")
+                return [], f"Неожиданная ошибка: {str(e)}"
+        return [], "API недоступен после попыток"
     def _submit_answers(self, username: str, agent_code: str, answers: list) -> Tuple[str, int]:
         """Отправка ответов на сервер"""
                 if response.status_code == 200:
                     result = response.json()
                     score = result.get("score", 0)
+                    return result.get("message", "Ответы успешно отправлены"), score
                 elif response.status_code == 400:
+                    error = response.json().get("error", "Неверный запрос")
+                    logger.error(f"Ошибка валидации: {error}")
+                    return f"Ошибка валидации: {error}", 0
                 elif response.status_code == 429:
+                    wait_time = 5 * (attempt + 1)
+                    logger.warning(f"Rate limited, retrying in {wait_time}s...")
+                    time.sleep(wait_time)
                     continue
                 else:
+                    return f"HTTP Ошибка {response.status_code} - {response.text}", 0
+            except requests.exceptions.RequestException as e:
+                logger.error(f"Ошибка отправки: {e}")
+                return f"Ошибка сети: {str(e)}", 0
             except Exception as e:
+                logger.error(f"Неожиданная ошибка отправки: {e}")
+                return f"Неожиданная ошибка: {str(e)}", 0
+        return "Сбой отправки после попыток", 0
 # === ИНТЕРФЕЙС GRADIO ===
 def run_evaluation(username: str, agent_code: str, progress=gr.Progress()):
     try:
+        progress(0, desc="⚡ Инициализация GAIA Mastermind...")
         agent = GAIAThoughtProcessor()
+        progress(0.1, desc="🌐 Подключение к GAIA API...")
+        runner = GAIAEvaluationRunner()
+        # Получение вопросов
+        progress(0.2, desc="📡 Получение вопросов...")
+        questions, status = runner._fetch_questions()
+        if status != "success":
+            error_message = f"Ошибка: {status}"
+            error_df = pd.DataFrame([{
+                "Task ID": "ERROR",
+                "Question": error_message,
+                "Answer": "Не удалось получить вопросы",
                 "Status": "Failed"
+            }])
+            return error_message, 0, 0, error_df
+        total = len(questions)
+        if total == 0:
+            error_message = "Получено 0 вопросов"
+            error_df = pd.DataFrame([{
+                "Task ID": "ERROR",
+                "Question": error_message,
+                "Answer": "Нет данных",
+                "Status": "Failed"
+            }])
+            return error_message, 0, 0, error_df
+        # Обработка вопросов с прогрессом
+        results = []
+        answers = []
+        for i, q in enumerate(questions):
+            progress(i / total, desc=f"🧠 Обработка задачи {i+1}/{total}")
+            try:
+                task_id = q.get("task_id", f"unknown_{i}")
+                json_response = agent.process_question(q["question"], task_id)
+                # Парсинг ответа
+                try:
+                    response_obj = json.loads(json_response)
+                    final_answer = response_obj.get("final_answer", "")
+                except:
+                    final_answer = json_response
+                answers.append({
+                    "task_id": task_id,
+                    "answer": str(final_answer)[:500]
+                })
+                results.append({
+                    "Task ID": task_id,
+                    "Question": q["question"][:100] + "..." if len(q["question"]) > 100 else q["question"],
+                    "Answer": str(final_answer)[:100] + "..." if len(str(final_answer)) > 100 else str(final_answer),
+                    "Status": "Processed"
+                })
+            except Exception as e:
+                logger.error(f"Task {task_id} failed: {e}")
+                answers.append({
+                    "task_id": task_id,
+                    "answer": f"ERROR: {str(e)}"
+                })
+                results.append({
+                    "Task ID": task_id,
+                    "Question": "Error",
+                    "Answer": f"ERROR: {str(e)}",
+                    "Status": "Failed"
+                })
+        # Отправка ответов
+        progress(0.9, desc="📤 Отправка результатов...")
+        submission_result, score = runner._submit_answers(username, agent_code, answers)
+        return submission_result, score, total, pd.DataFrame(results)
+    except Exception as e:
+        logger.exception("Critical error in run_evaluation")
+        error_message = f"Критическая ошибка: {str(e)}"
+        error_df = pd.DataFrame([{
+            "Task ID": "CRITICAL",
+            "Question": error_message,
+            "Answer": "См. логи",
+            "Status": "Failed"
+        }])
+        return error_message, 0, 0, error_df
 # Создание интерфейса
 with gr.Blocks(
                     interactive=False
                 )
+            # Упрощенный Dataframe
             results_table = gr.Dataframe(
                 label="🔍 Детализация ответов",
                 headers=["Task ID", "Question", "Answer", "Status"],
         server_port=7860,
         share=False,
         show_error=True,
+        debug=True  # Включение детального лога
     )