| import re |
| import requests |
| import pandas as pd |
| import torch |
| import gradio as gr |
| from tqdm import tqdm |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline |
| from typing import List, Dict, Any, Tuple, Optional |
| import json |
| import ast |
| import numpy as np |
| from PIL import Image, UnidentifiedImageError |
| import io |
| import base64 |
| import logging |
| import time |
| import sys |
|
|
| |
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger("GAIA-Mastermind") |
|
|
| |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
| MODEL_NAME = "google/flan-t5-large" |
| API_RETRIES = 3 |
| API_TIMEOUT = 45 |
|
|
| |
| class GAIAThoughtProcessor: |
| def __init__(self): |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" |
| logger.info(f"⚡ Инициализация GAIAThoughtProcessor на {self.device.upper()}") |
| |
| try: |
| |
| self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
| self.model = AutoModelForSeq2SeqLM.from_pretrained( |
| MODEL_NAME, |
| device_map="auto" if torch.cuda.is_available() else None, |
| torch_dtype=torch.float16 if "cuda" in self.device else torch.float32, |
| low_cpu_mem_usage=True |
| ).eval() |
| |
| |
| self.text_generator = pipeline( |
| "text2text-generation", |
| model=self.model, |
| tokenizer=self.tokenizer, |
| device=self.device, |
| max_new_tokens=256 |
| ) |
| |
| logger.info("✅ GAIAThoughtProcessor готов") |
| except Exception as e: |
| logger.exception("Ошибка инициализации модели") |
| raise RuntimeError(f"Ошибка инициализации: {str(e)}") |
|
|
| def _math_solver(self, expression: str) -> str: |
| """Безопасное вычисление математических выражений""" |
| try: |
| |
| clean_expr = re.sub(r"[^0-9+\-*/().^√π]", "", expression) |
| |
| context = { |
| "sqrt": np.sqrt, |
| "log": np.log, |
| "log10": np.log10, |
| "pi": np.pi, |
| "e": np.e, |
| "sin": np.sin, |
| "cos": np.cos, |
| "tan": np.tan |
| } |
| return str(eval(clean_expr, {"__builtins__": None}, context)) |
| except Exception as e: |
| logger.error(f"Math error: {e}") |
| return f"Math Error: {str(e)}" |
|
|
| def _table_analyzer(self, table_data: str, query: str) -> str: |
| """Анализ табличных данных""" |
| try: |
| |
| if "\t" in table_data: |
| df = pd.read_csv(io.StringIO(table_data), sep="\t") |
| elif "," in table_data: |
| df = pd.read_csv(io.StringIO(table_data)) |
| else: |
| df = pd.read_fwf(io.StringIO(table_data)) |
| |
| |
| query = query.lower() |
| if "sum" in query: |
| return str(df.sum(numeric_only=True).to_dict()) |
| elif "mean" in query: |
| return str(df.mean(numeric_only=True).to_dict()) |
| elif "max" in query: |
| return str(df.max(numeric_only=True).to_dict()) |
| elif "min" in query: |
| return str(df.min(numeric_only=True).to_dict()) |
| elif "count" in query: |
| return str(df.count().to_dict()) |
| else: |
| return df.describe().to_string() |
| except Exception as e: |
| logger.error(f"Table error: {e}") |
| return f"Table Error: {str(e)}" |
|
|
| def _text_processor(self, text: str, operation: str) -> str: |
| """Операции с текстом""" |
| operation = operation.lower() |
| if operation == "reverse": |
| return text[::-1] |
| elif operation == "count_words": |
| return str(len(text.split())) |
| elif operation == "extract_numbers": |
| return ", ".join(re.findall(r"[-+]?\d*\.\d+|\d+", text)) |
| elif operation == "uppercase": |
| return text.upper() |
| elif operation == "lowercase": |
| return text.lower() |
| else: |
| return f"Unsupported operation: {operation}" |
|
|
| def _image_processor(self, image_input: str) -> str: |
| """Обработка изображений""" |
| try: |
| |
| if image_input.startswith("http"): |
| response = requests.get(image_input, timeout=30) |
| response.raise_for_status() |
| img_data = response.content |
| img = Image.open(io.BytesIO(img_data)) |
| |
| elif image_input.startswith("data:image"): |
| header, data = image_input.split(",", 1) |
| img_data = base64.b64decode(data) |
| img = Image.open(io.BytesIO(img_data)) |
| else: |
| return "Invalid image format" |
| |
| |
| description = ( |
| f"Format: {img.format}, Size: {img.size}, " |
| f"Mode: {img.mode}" |
| ) |
| return description |
| except (UnidentifiedImageError, requests.exceptions.RequestException) as e: |
| logger.error(f"Image processing error: {e}") |
| return f"Image Error: {str(e)}" |
| except Exception as e: |
| logger.exception("Unexpected image error") |
| return f"Unexpected Error: {str(e)}" |
|
|
| def _call_tool(self, tool_name: str, arguments: str) -> str: |
| """Вызов инструмента по имени""" |
| try: |
| |
| args = [a.strip() for a in arguments.split(",")] |
| |
| if tool_name == "math_solver": |
| return self._math_solver(args[0]) |
| elif tool_name == "table_analyzer": |
| return self._table_analyzer(args[0], args[1]) |
| elif tool_name == "text_processor": |
| return self._text_processor(args[0], args[1]) |
| elif tool_name == "image_processor": |
| return self._image_processor(args[0]) |
| else: |
| return f"Unknown tool: {tool_name}" |
| except Exception as e: |
| return f"Tool Error: {str(e)}" |
|
|
| def _generate_response(self, prompt: str) -> str: |
| """Генерация ответа с помощью модели""" |
| try: |
| result = self.text_generator( |
| prompt, |
| max_new_tokens=256, |
| num_beams=3, |
| early_stopping=True, |
| temperature=0.01 |
| ) |
| return result[0]['generated_text'] |
| except Exception as e: |
| logger.error(f"Generation error: {e}") |
| return f"Generation Error: {str(e)}" |
| finally: |
| |
| if "cuda" in self.device: |
| torch.cuda.empty_cache() |
|
|
| def process_question(self, question: str, task_id: str) -> str: |
| """Обработка вопроса с декомпозицией на шаги""" |
| try: |
| |
| prompt = f"Реши задачу шаг за шагом: {question}\n\nФинальный ответ:" |
| response = self._generate_response(prompt) |
| |
| |
| if "final_answer" in response: |
| return json.dumps({"final_answer": response}) |
| else: |
| return json.dumps({"final_answer": response.strip()}) |
| except Exception as e: |
| logger.exception("Processing failed") |
| return json.dumps({ |
| "task_id": task_id, |
| "error": str(e), |
| "final_answer": f"SYSTEM ERROR: {str(e)}" |
| }) |
|
|
| |
| class GAIAEvaluationRunner: |
| def __init__(self, api_url: str = DEFAULT_API_URL): |
| self.api_url = api_url |
| self.questions_url = f"{api_url}/questions" |
| self.submit_url = f"{api_url}/submit" |
| self.session = requests.Session() |
| self.session.headers.update({ |
| "Accept": "application/json", |
| "User-Agent": "GAIA-Mastermind/1.0", |
| "Content-Type": "application/json" |
| }) |
| logger.info(f"🌐 Инициализирован GAIAEvaluationRunner для {api_url}") |
|
|
| def run_evaluation(self, agent, username: str, agent_code: str, progress=tqdm): |
| |
| questions, status = self._fetch_questions() |
| if status != "success": |
| |
| error_df = pd.DataFrame([{ |
| "Task ID": "ERROR", |
| "Question": status, |
| "Answer": "Не удалось получить вопросы", |
| "Status": "Failed" |
| }]) |
| return status, 0, 0, error_df |
| |
| |
| results = [] |
| answers = [] |
| for i, q in enumerate(progress(questions, desc="🧠 Processing GAIA")): |
| try: |
| task_id = q.get("task_id", f"unknown_{i}") |
| json_response = agent.process_question(q["question"], task_id) |
| |
| |
| try: |
| response_obj = json.loads(json_response) |
| final_answer = response_obj.get("final_answer", "") |
| if not isinstance(final_answer, str): |
| final_answer = str(final_answer) |
| except json.JSONDecodeError: |
| final_answer = json_response |
| |
| |
| answers.append({ |
| "task_id": task_id, |
| "answer": final_answer[:500] |
| }) |
| |
| |
| results.append({ |
| "Task ID": task_id, |
| "Question": q["question"][:100] + "..." if len(q["question"]) > 100 else q["question"], |
| "Answer": final_answer[:100] + "..." if len(final_answer) > 100 else final_answer, |
| "Status": "Processed" |
| }) |
| except Exception as e: |
| logger.error(f"Task {task_id} failed: {e}") |
| answers.append({ |
| "task_id": task_id, |
| "answer": f"ERROR: {str(e)}" |
| }) |
| results.append({ |
| "Task ID": task_id, |
| "Question": "Error", |
| "Answer": f"ERROR: {str(e)}", |
| "Status": "Failed" |
| }) |
| |
| |
| try: |
| submission_result, score = self._submit_answers(username, agent_code, answers) |
| return submission_result, score, len(questions), pd.DataFrame(results) |
| except Exception as e: |
| error_message = f"Ошибка отправки: {str(e)}" |
| results.append({ |
| "Task ID": "SUBMIT_ERROR", |
| "Question": error_message, |
| "Answer": "", |
| "Status": "Failed" |
| }) |
| return error_message, 0, len(questions), pd.DataFrame(results) |
|
|
| def _fetch_questions(self) -> Tuple[list, str]: |
| """Получение вопросов с API""" |
| for attempt in range(API_RETRIES): |
| try: |
| response = self.session.get( |
| self.questions_url, |
| timeout=API_TIMEOUT |
| ) |
| |
| if response.status_code == 200: |
| questions = response.json() |
| if not isinstance(questions, list): |
| return [], f"Неверный формат ответа: ожидался список, получен {type(questions)}" |
| |
| |
| for q in questions: |
| q.setdefault("task_id", f"id_{hash(q['question']) % 100000}") |
| return questions, "success" |
| |
| elif response.status_code == 429: |
| wait_time = 2 ** attempt |
| logger.warning(f"Rate limited, retrying in {wait_time}s...") |
| time.sleep(wait_time) |
| continue |
| |
| else: |
| return [], f"Ошибка API: HTTP {response.status_code} - {response.text}" |
| |
| except requests.exceptions.RequestException as e: |
| logger.error(f"Ошибка соединения: {e}") |
| return [], f"Ошибка сети: {str(e)}" |
| except Exception as e: |
| logger.error(f"Неожиданная ошибка: {e}") |
| return [], f"Неожиданная ошибка: {str(e)}" |
| |
| return [], "API недоступен после попыток" |
|
|
| def _submit_answers(self, username: str, agent_code: str, answers: list) -> Tuple[str, int]: |
| """Отправка ответов на сервер""" |
| payload = { |
| "username": username.strip(), |
| "agent_code": agent_code.strip(), |
| "answers": answers |
| } |
| |
| for attempt in range(API_RETRIES): |
| try: |
| response = self.session.post( |
| self.submit_url, |
| json=payload, |
| timeout=API_TIMEOUT * 2 |
| ) |
| |
| if response.status_code == 200: |
| result = response.json() |
| score = result.get("score", 0) |
| return result.get("message", "Ответы успешно отправлены"), score |
| |
| elif response.status_code == 400: |
| error = response.json().get("error", "Неверный запрос") |
| logger.error(f"Ошибка валидации: {error}") |
| return f"Ошибка валидации: {error}", 0 |
| |
| elif response.status_code == 429: |
| wait_time = 5 * (attempt + 1) |
| logger.warning(f"Rate limited, retrying in {wait_time}s...") |
| time.sleep(wait_time) |
| continue |
| |
| else: |
| return f"HTTP Ошибка {response.status_code} - {response.text}", 0 |
| |
| except requests.exceptions.RequestException as e: |
| logger.error(f"Ошибка отправки: {e}") |
| return f"Ошибка сети: {str(e)}", 0 |
| except Exception as e: |
| logger.error(f"Неожиданная ошибка отправки: {e}") |
| return f"Неожиданная ошибка: {str(e)}", 0 |
| |
| return "Сбой отправки после попыток", 0 |
|
|
| |
| def run_evaluation(username: str, agent_code: str, progress=gr.Progress()): |
| try: |
| progress(0, desc="⚡ Инициализация GAIA Mastermind...") |
| agent = GAIAThoughtProcessor() |
| |
| progress(0.1, desc="🌐 Подключение к GAIA API...") |
| runner = GAIAEvaluationRunner() |
| |
| |
| progress(0.2, desc="📡 Получение вопросов...") |
| questions, status = runner._fetch_questions() |
| if status != "success": |
| error_message = f"Ошибка: {status}" |
| error_df = pd.DataFrame([{ |
| "Task ID": "ERROR", |
| "Question": error_message, |
| "Answer": "Не удалось получить вопросы", |
| "Status": "Failed" |
| }]) |
| return error_message, 0, 0, error_df |
| |
| total = len(questions) |
| if total == 0: |
| error_message = "Получено 0 вопросов" |
| error_df = pd.DataFrame([{ |
| "Task ID": "ERROR", |
| "Question": error_message, |
| "Answer": "Нет данных", |
| "Status": "Failed" |
| }]) |
| return error_message, 0, 0, error_df |
| |
| |
| results = [] |
| answers = [] |
| |
| for i, q in enumerate(questions): |
| progress(i / total, desc=f"🧠 Обработка задачи {i+1}/{total}") |
| try: |
| task_id = q.get("task_id", f"unknown_{i}") |
| json_response = agent.process_question(q["question"], task_id) |
| |
| |
| try: |
| response_obj = json.loads(json_response) |
| final_answer = response_obj.get("final_answer", "") |
| except: |
| final_answer = json_response |
| |
| answers.append({ |
| "task_id": task_id, |
| "answer": str(final_answer)[:500] |
| }) |
| |
| results.append({ |
| "Task ID": task_id, |
| "Question": q["question"][:100] + "..." if len(q["question"]) > 100 else q["question"], |
| "Answer": str(final_answer)[:100] + "..." if len(str(final_answer)) > 100 else str(final_answer), |
| "Status": "Processed" |
| }) |
| except Exception as e: |
| logger.error(f"Task {task_id} failed: {e}") |
| answers.append({ |
| "task_id": task_id, |
| "answer": f"ERROR: {str(e)}" |
| }) |
| results.append({ |
| "Task ID": task_id, |
| "Question": "Error", |
| "Answer": f"ERROR: {str(e)}", |
| "Status": "Failed" |
| }) |
| |
| |
| progress(0.9, desc="📤 Отправка результатов...") |
| submission_result, score = runner._submit_answers(username, agent_code, answers) |
| return submission_result, score, total, pd.DataFrame(results) |
| |
| except Exception as e: |
| logger.exception("Critical error in run_evaluation") |
| error_message = f"Критическая ошибка: {str(e)}" |
| error_df = pd.DataFrame([{ |
| "Task ID": "CRITICAL", |
| "Question": error_message, |
| "Answer": "См. логи", |
| "Status": "Failed" |
| }]) |
| return error_message, 0, 0, error_df |
|
|
| |
| with gr.Blocks( |
| title="🧠 GAIA Mastermind", |
| theme=gr.themes.Soft(), |
| css=""" |
| .gradio-container {background: linear-gradient(135deg, #1a2a6c, #2c5364)} |
| .dark {color: #f0f0f0} |
| """ |
| ) as demo: |
| gr.Markdown(""" |
| <div style="text-align:center; background: linear-gradient(135deg, #0f2027, #203a43); |
| padding: 20px; border-radius: 15px; color: white; box-shadow: 0 10px 20px rgba(0,0,0,0.3);"> |
| <h1>🧠 GAIA Mastermind</h1> |
| <h3>Многошаговое решение задач с декомпозицией</h3> |
| <p>Соответствует спецификации GAIA API</p> |
| </div> |
| """) |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### 🔐 Авторизация") |
| username = gr.Textbox( |
| label="HF Username", |
| value="yoshizen", |
| info="Ваше имя пользователя Hugging Face" |
| ) |
| agent_code = gr.Textbox( |
| label="Agent Code", |
| value="https://huggingface.co/spaces/yoshizen/FinalTest", |
| info="URL вашего агента" |
| ) |
| run_btn = gr.Button("🚀 Запустить оценку", variant="primary", scale=1) |
| |
| gr.Markdown("### ⚙️ Статус системы") |
| sys_info = gr.Textbox(label="Системная информация", interactive=False, value="") |
| |
| with gr.Column(scale=2): |
| gr.Markdown("### 📊 Результаты GAIA") |
| with gr.Row(): |
| result_output = gr.Textbox( |
| label="Статус отправки", |
| interactive=False, |
| max_lines=3 |
| ) |
| correct_output = gr.Number( |
| label="✅ Правильные ответы", |
| interactive=False |
| ) |
| total_output = gr.Number( |
| label="📚 Всего вопросов", |
| interactive=False |
| ) |
| |
| |
| results_table = gr.Dataframe( |
| label="🔍 Детализация ответов", |
| headers=["Task ID", "Question", "Answer", "Status"], |
| interactive=False |
| ) |
| |
| |
| def get_system_info(): |
| device = "GPU ✅" if torch.cuda.is_available() else "CPU ⚠️" |
| return f"Device: {device} | Model: {MODEL_NAME} | API: {DEFAULT_API_URL}" |
| |
| demo.load(get_system_info, inputs=None, outputs=sys_info) |
| |
| run_btn.click( |
| fn=run_evaluation, |
| inputs=[username, agent_code], |
| outputs=[result_output, correct_output, total_output, results_table], |
| concurrency_limit=1, |
| show_progress="minimal" |
| ) |
|
|
| if __name__ == "__main__": |
| demo.queue(max_size=5).launch( |
| server_name="0.0.0.0", |
| server_port=7860, |
| share=False, |
| show_error=True, |
| debug=True |
| ) |